Merge remote branch 'mst/for_anthony' into staging

[qemu.git] / fpu / softfloat.c
diff --git a/fpu/softfloat.c b/fpu/softfloat.c

index 8ebb69264830862602f27f8a2e7fcbe1a0e61c29..0b8279798cb1eeee70c70501e9b0d418cac858a0 100644 (file)
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -30,6 +30,8 @@ these four paragraphs for those parts of this code that are retained.
  
  =============================================================================*/
  
+/* FIXME: Flush-To-Zero only effects results.  Denormal inputs should also
+   be flushed to zero.  */
  #include "softfloat.h"
  
  /*----------------------------------------------------------------------------
@@ -175,7 +177,7 @@ static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PA
  INLINE bits32 extractFloat32Frac( float32 a )
  {
  
-    return a & 0x007FFFFF;
+    return float32_val(a) & 0x007FFFFF;
  
  }
  
@@ -186,7 +188,7 @@ INLINE bits32 extractFloat32Frac( float32 a )
  INLINE int16 extractFloat32Exp( float32 a )
  {
  
-    return ( a>>23 ) & 0xFF;
+    return ( float32_val(a)>>23 ) & 0xFF;
  
  }
  
@@ -197,7 +199,7 @@ INLINE int16 extractFloat32Exp( float32 a )
  INLINE flag extractFloat32Sign( float32 a )
  {
  
-    return a>>31;
+    return float32_val(a)>>31;
  
  }
  
@@ -233,7 +235,8 @@ static void
  INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
  {
  
-    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+    return make_float32(
+          ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
  
  }
  
@@ -290,9 +293,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_P
                    && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
             ) {
              float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
-            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+            return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
          }
          if ( zExp < 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
              isTiny =
                     ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                  || ( zExp < -1 )
@@ -337,7 +341,7 @@ static float32
  INLINE bits64 extractFloat64Frac( float64 a )
  {
  
-    return a & LIT64( 0x000FFFFFFFFFFFFF );
+    return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
  
  }
  
@@ -348,7 +352,7 @@ INLINE bits64 extractFloat64Frac( float64 a )
  INLINE int16 extractFloat64Exp( float64 a )
  {
  
-    return ( a>>52 ) & 0x7FF;
+    return ( float64_val(a)>>52 ) & 0x7FF;
  
  }
  
@@ -359,7 +363,7 @@ INLINE int16 extractFloat64Exp( float64 a )
  INLINE flag extractFloat64Sign( float64 a )
  {
  
-    return a>>63;
+    return float64_val(a)>>63;
  
  }
  
@@ -395,7 +399,8 @@ static void
  INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
  {
  
-    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+    return make_float64(
+        ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
  
  }
  
@@ -452,9 +457,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_P
                    && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
             ) {
              float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
-            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
          }
          if ( zExp < 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
              isTiny =
                     ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                  || ( zExp < -1 )
@@ -633,6 +639,7 @@ static floatx80
              goto overflow;
          }
          if ( zExp <= 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
              isTiny =
                     ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                  || ( zExp < 0 )
@@ -963,6 +970,7 @@ static float128
              return packFloat128( zSign, 0x7FFF, 0, 0 );
          }
          if ( zExp < 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
              isTiny =
                     ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
                  || ( zExp < -1 )
@@ -1050,7 +1058,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM )
  {
      flag zSign;
  
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float32_zero;
      if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
      zSign = ( a < 0 );
      return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
@@ -1070,7 +1078,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM )
      int8 shiftCount;
      bits64 zSig;
  
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float64_zero;
      zSign = ( a < 0 );
      absA = zSign ? - a : a;
      shiftCount = countLeadingZeros32( absA ) + 21;
@@ -1144,7 +1152,7 @@ float32 int64_to_float32( int64 a STATUS_PARAM )
      uint64 absA;
      int8 shiftCount;
  
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float32_zero;
      zSign = ( a < 0 );
      absA = zSign ? - a : a;
      shiftCount = countLeadingZeros64( absA ) - 40;
@@ -1168,7 +1176,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
  {
      int8 shiftCount;
  
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float32_zero;
      shiftCount = countLeadingZeros64( a ) - 40;
      if ( 0 <= shiftCount ) {
          return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
@@ -1195,7 +1203,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM )
  {
      flag zSign;
  
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float64_zero;
      if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
          return packFloat64( 1, 0x43E, 0 );
      }
@@ -1206,7 +1214,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM )
  
  float64 uint64_to_float64( uint64 a STATUS_PARAM )
  {
-    if ( a == 0 ) return 0;
+    if ( a == 0 ) return float64_zero;
      return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
  
  }
@@ -1325,7 +1333,7 @@ int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
      aSign = extractFloat32Sign( a );
      shiftCount = aExp - 0x9E;
      if ( 0 <= shiftCount ) {
-        if ( a != 0xCF000000 ) {
+        if ( float32_val(a) != 0xCF000000 ) {
              float_raise( float_flag_invalid STATUS_VAR);
              if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
          }
@@ -1404,7 +1412,7 @@ int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
      aSign = extractFloat32Sign( a );
      shiftCount = aExp - 0xBE;
      if ( 0 <= shiftCount ) {
-        if ( a != 0xDF000000 ) {
+        if ( float32_val(a) != 0xDF000000 ) {
              float_raise( float_flag_invalid STATUS_VAR);
              if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
                  return LIT64( 0x7FFFFFFFFFFFFFFF );
@@ -1535,7 +1543,7 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
      int16 aExp;
      bits32 lastBitMask, roundBitsMask;
      int8 roundingMode;
-    float32 z;
+    bits32 z;
  
      aExp = extractFloat32Exp( a );
      if ( 0x96 <= aExp ) {
@@ -1545,7 +1553,7 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
          return a;
      }
      if ( aExp <= 0x7E ) {
-        if ( (bits32) ( a<<1 ) == 0 ) return a;
+        if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
          STATUS(float_exception_flags) |= float_flag_inexact;
          aSign = extractFloat32Sign( a );
          switch ( STATUS(float_rounding_mode) ) {
@@ -1555,29 +1563,29 @@ float32 float32_round_to_int( float32 a STATUS_PARAM)
              }
              break;
           case float_round_down:
-            return aSign ? 0xBF800000 : 0;
+            return make_float32(aSign ? 0xBF800000 : 0);
           case float_round_up:
-            return aSign ? 0x80000000 : 0x3F800000;
+            return make_float32(aSign ? 0x80000000 : 0x3F800000);
          }
          return packFloat32( aSign, 0, 0 );
      }
      lastBitMask = 1;
      lastBitMask <<= 0x96 - aExp;
      roundBitsMask = lastBitMask - 1;
-    z = a;
+    z = float32_val(a);
      roundingMode = STATUS(float_rounding_mode);
      if ( roundingMode == float_round_nearest_even ) {
          z += lastBitMask>>1;
          if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
      }
      else if ( roundingMode != float_round_to_zero ) {
-        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+        if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
              z += roundBitsMask;
          }
      }
      z &= ~ roundBitsMask;
-    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
-    return z;
+    if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return make_float32(z);
  
  }
  
@@ -1635,7 +1643,10 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
              if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
              return a;
          }
-        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        if ( aExp == 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
+            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        }
          zSig = 0x40000000 + aSig + bSig;
          zExp = aExp;
          goto roundAndPack;
@@ -1899,7 +1910,7 @@ float32 float32_div( float32 a, float32 b STATUS_PARAM )
  
  float32 float32_rem( float32 a, float32 b STATUS_PARAM )
  {
-    flag aSign, bSign, zSign;
+    flag aSign, zSign;
      int16 aExp, bExp, expDiff;
      bits32 aSig, bSig;
      bits32 q;
@@ -1912,7 +1923,6 @@ float32 float32_rem( float32 a, float32 b STATUS_PARAM )
      aSign = extractFloat32Sign( a );
      bSig = extractFloat32Frac( b );
      bExp = extractFloat32Exp( b );
-    bSign = extractFloat32Sign( b );
      if ( aExp == 0xFF ) {
          if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
              return propagateFloat32NaN( a, b STATUS_VAR );
@@ -2008,7 +2018,7 @@ float32 float32_sqrt( float32 a STATUS_PARAM )
      aExp = extractFloat32Exp( a );
      aSign = extractFloat32Sign( a );
      if ( aExp == 0xFF ) {
-        if ( aSig ) return propagateFloat32NaN( a, 0 STATUS_VAR );
+        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
          if ( ! aSign ) return a;
          float_raise( float_flag_invalid STATUS_VAR);
          return float32_default_nan;
@@ -2019,7 +2029,7 @@ float32 float32_sqrt( float32 a STATUS_PARAM )
          return float32_default_nan;
      }
      if ( aExp == 0 ) {
-        if ( aSig == 0 ) return 0;
+        if ( aSig == 0 ) return float32_zero;
          normalizeFloat32Subnormal( aSig, &aExp, &aSig );
      }
      zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
@@ -2045,6 +2055,132 @@ float32 float32_sqrt( float32 a STATUS_PARAM )
  
  }
  
+/*----------------------------------------------------------------------------
+| Returns the binary exponential of the single-precision floating-point value
+| `a'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+|
+| Uses the following identities:
+|
+| 1. -------------------------------------------------------------------------
+|      x    x*ln(2)
+|     2  = e
+|
+| 2. -------------------------------------------------------------------------
+|                      2     3     4     5           n
+|      x        x     x     x     x     x           x
+|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+|               1!    2!    3!    4!    5!          n!
+*----------------------------------------------------------------------------*/
+
+static const float64 float32_exp2_coefficients[15] =
+{
+    make_float64( 0x3ff0000000000000ll ), /*  1 */
+    make_float64( 0x3fe0000000000000ll ), /*  2 */
+    make_float64( 0x3fc5555555555555ll ), /*  3 */
+    make_float64( 0x3fa5555555555555ll ), /*  4 */
+    make_float64( 0x3f81111111111111ll ), /*  5 */
+    make_float64( 0x3f56c16c16c16c17ll ), /*  6 */
+    make_float64( 0x3f2a01a01a01a01all ), /*  7 */
+    make_float64( 0x3efa01a01a01a01all ), /*  8 */
+    make_float64( 0x3ec71de3a556c734ll ), /*  9 */
+    make_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+    make_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+    make_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+    make_float64( 0x3de6124613a86d09ll ), /* 13 */
+    make_float64( 0x3da93974a8c07c9dll ), /* 14 */
+    make_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+};
+
+float32 float32_exp2( float32 a STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+    float64 r, x, xn;
+    int i;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+
+    if ( aExp == 0xFF) {
+        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+        return (aSign) ? float32_zero : a;
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return float32_one;
+    }
+
+    float_raise( float_flag_inexact STATUS_VAR);
+
+    /* ******************************* */
+    /* using float64 for approximation */
+    /* ******************************* */
+    x = float32_to_float64(a STATUS_VAR);
+    x = float64_mul(x, float64_ln2 STATUS_VAR);
+
+    xn = x;
+    r = float64_one;
+    for (i = 0 ; i < 15 ; i++) {
+        float64 f;
+
+        f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
+        r = float64_add(r, f STATUS_VAR);
+
+        xn = float64_mul(xn, x STATUS_VAR);
+    }
+
+    return float64_to_float32(r, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary log of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float32 float32_log2( float32 a STATUS_PARAM )
+{
+    flag aSign, zSign;
+    int16 aExp;
+    bits32 aSig, zSig, i;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( aSign ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float32_default_nan;
+    }
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+        return a;
+    }
+
+    aExp -= 0x7F;
+    aSig |= 0x00800000;
+    zSign = aExp < 0;
+    zSig = aExp << 23;
+
+    for (i = 1 << 22; i > 0; i >>= 1) {
+        aSig = ( (bits64)aSig * aSig ) >> 23;
+        if ( aSig & 0x01000000 ) {
+            aSig >>= 1;
+            zSig |= i;
+        }
+    }
+
+    if ( zSign )
+        zSig = -zSig;
+
+    return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
+}
+
  /*----------------------------------------------------------------------------
  | Returns 1 if the single-precision floating-point value `a' is equal to
  | the corresponding value `b', and 0 otherwise.  The comparison is performed
@@ -2062,7 +2198,8 @@ int float32_eq( float32 a, float32 b STATUS_PARAM )
          }
          return 0;
      }
-    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( float32_val(a) == float32_val(b) ) ||
+            ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
  
  }
  
@@ -2076,6 +2213,7 @@ int float32_eq( float32 a, float32 b STATUS_PARAM )
  int float32_le( float32 a, float32 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits32 av, bv;
  
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2085,8 +2223,10 @@ int float32_le( float32 a, float32 b STATUS_PARAM )
      }
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
  
  }
  
@@ -2099,6 +2239,7 @@ int float32_le( float32 a, float32 b STATUS_PARAM )
  int float32_lt( float32 a, float32 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits32 av, bv;
  
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2108,8 +2249,10 @@ int float32_lt( float32 a, float32 b STATUS_PARAM )
      }
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
  
  }
  
@@ -2122,6 +2265,7 @@ int float32_lt( float32 a, float32 b STATUS_PARAM )
  
  int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
  {
+    bits32 av, bv;
  
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2129,7 +2273,9 @@ int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
          float_raise( float_flag_invalid STATUS_VAR);
          return 0;
      }
-    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    av = float32_val(a);
+    bv = float32_val(b);
+    return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
  
  }
  
@@ -2143,6 +2289,7 @@ int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
  int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits32 av, bv;
  
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2154,8 +2301,10 @@ int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
      }
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
  
  }
  
@@ -2169,6 +2318,7 @@ int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
  int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits32 av, bv;
  
      if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
           || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
@@ -2180,8 +2330,10 @@ int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
      }
      aSign = extractFloat32Sign( a );
      bSign = extractFloat32Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
+    av = float32_val(a);
+    bv = float32_val(b);
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
  
  }
  
@@ -2324,7 +2476,7 @@ int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
      shiftCount = aExp - 0x433;
      if ( 0 <= shiftCount ) {
          if ( 0x43E <= aExp ) {
-            if ( a != LIT64( 0xC3E0000000000000 ) ) {
+            if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
                  float_raise( float_flag_invalid STATUS_VAR);
                  if (    ! aSign
                       || (    ( aExp == 0x7FF )
@@ -2383,6 +2535,144 @@ float32 float64_to_float32( float64 a STATUS_PARAM )
  
  }
  
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| half-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
+{
+    return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
+}
+
+/* Half precision floats come in two formats: standard IEEE and "ARM" format.
+   The latter gains extra exponent range by omitting the NaN/Inf encodings.  */
+  
+float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSign = a >> 15;
+    aExp = (a >> 10) & 0x1f;
+    aSig = a & 0x3ff;
+
+    if (aExp == 0x1f && ieee) {
+        if (aSig) {
+            /* Make sure correct exceptions are raised.  */
+            float32ToCommonNaN(a STATUS_VAR);
+            aSig |= 0x200;
+        }
+        return packFloat32(aSign, 0xff, aSig << 13);
+    }
+    if (aExp == 0) {
+        int8 shiftCount;
+
+        if (aSig == 0) {
+            return packFloat32(aSign, 0, 0);
+        }
+
+        shiftCount = countLeadingZeros32( aSig ) - 21;
+        aSig = aSig << shiftCount;
+        aExp = -shiftCount;
+    }
+    return packFloat32( aSign, aExp + 0x70, aSig << 13);
+}
+
+bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+    bits32 mask;
+    bits32 increment;
+    int8 roundingMode;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if (aSig) {
+            /* Make sure correct exceptions are raised.  */
+            float32ToCommonNaN(a STATUS_VAR);
+            aSig |= 0x00400000;
+        }
+        return packFloat16(aSign, 0x1f, aSig >> 13);
+    }
+    if (aExp == 0 && aSign == 0) {
+        return packFloat16(aSign, 0, 0);
+    }
+    /* Decimal point between bits 22 and 23.  */
+    aSig |= 0x00800000;
+    aExp -= 0x7f;
+    if (aExp < -14) {
+        mask = 0x007fffff;
+        if (aExp < -24) {
+            aExp = -25;
+        } else {
+            mask >>= 24 + aExp;
+        }
+    } else {
+        mask = 0x00001fff;
+    }
+    if (aSig & mask) {
+        float_raise( float_flag_underflow STATUS_VAR );
+        roundingMode = STATUS(float_rounding_mode);
+        switch (roundingMode) {
+        case float_round_nearest_even:
+            increment = (mask + 1) >> 1;
+            if ((aSig & mask) == increment) {
+                increment = aSig & (increment << 1);
+            }
+            break;
+        case float_round_up:
+            increment = aSign ? 0 : mask;
+            break;
+        case float_round_down:
+            increment = aSign ? mask : 0;
+            break;
+        default: /* round_to_zero */
+            increment = 0;
+            break;
+        }
+        aSig += increment;
+        if (aSig >= 0x01000000) {
+            aSig >>= 1;
+            aExp++;
+        }
+    } else if (aExp < -14
+          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
+        float_raise( float_flag_underflow STATUS_VAR);
+    }
+
+    if (ieee) {
+        if (aExp > 15) {
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            return packFloat16(aSign, 0x1f, 0);
+        }
+    } else {
+        if (aExp > 16) {
+            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            return packFloat16(aSign, 0x1f, 0x3ff);
+        }
+    }
+    if (aExp < -24) {
+        return packFloat16(aSign, 0, 0);
+    }
+    if (aExp < -14) {
+        aSig >>= -14 - aExp;
+        aExp = -14;
+    }
+    return packFloat16(aSign, aExp + 14, aSig >> 13);
+}
+
  #ifdef FLOATX80
  
  /*----------------------------------------------------------------------------
@@ -2464,7 +2754,7 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
      int16 aExp;
      bits64 lastBitMask, roundBitsMask;
      int8 roundingMode;
-    float64 z;
+    bits64 z;
  
      aExp = extractFloat64Exp( a );
      if ( 0x433 <= aExp ) {
@@ -2474,7 +2764,7 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
          return a;
      }
      if ( aExp < 0x3FF ) {
-        if ( (bits64) ( a<<1 ) == 0 ) return a;
+        if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
          STATUS(float_exception_flags) |= float_flag_inexact;
          aSign = extractFloat64Sign( a );
          switch ( STATUS(float_rounding_mode) ) {
@@ -2484,30 +2774,31 @@ float64 float64_round_to_int( float64 a STATUS_PARAM )
              }
              break;
           case float_round_down:
-            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+            return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
           case float_round_up:
-            return
-            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+            return make_float64(
+            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
          }
          return packFloat64( aSign, 0, 0 );
      }
      lastBitMask = 1;
      lastBitMask <<= 0x433 - aExp;
      roundBitsMask = lastBitMask - 1;
-    z = a;
+    z = float64_val(a);
      roundingMode = STATUS(float_rounding_mode);
      if ( roundingMode == float_round_nearest_even ) {
          z += lastBitMask>>1;
          if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
      }
      else if ( roundingMode != float_round_to_zero ) {
-        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+        if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
              z += roundBitsMask;
          }
      }
      z &= ~ roundBitsMask;
-    if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact;
-    return z;
+    if ( z != float64_val(a) )
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    return make_float64(z);
  
  }
  
@@ -2576,7 +2867,10 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
              if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
              return a;
          }
-        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        if ( aExp == 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
+            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        }
          zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
          zExp = aExp;
          goto roundAndPack;
@@ -2846,7 +3140,7 @@ float64 float64_div( float64 a, float64 b STATUS_PARAM )
  
  float64 float64_rem( float64 a, float64 b STATUS_PARAM )
  {
-    flag aSign, bSign, zSign;
+    flag aSign, zSign;
      int16 aExp, bExp, expDiff;
      bits64 aSig, bSig;
      bits64 q, alternateASig;
@@ -2857,7 +3151,6 @@ float64 float64_rem( float64 a, float64 b STATUS_PARAM )
      aSign = extractFloat64Sign( a );
      bSig = extractFloat64Frac( b );
      bExp = extractFloat64Exp( b );
-    bSign = extractFloat64Sign( b );
      if ( aExp == 0x7FF ) {
          if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
              return propagateFloat64NaN( a, b STATUS_VAR );
@@ -2951,7 +3244,7 @@ float64 float64_sqrt( float64 a STATUS_PARAM )
          return float64_default_nan;
      }
      if ( aExp == 0 ) {
-        if ( aSig == 0 ) return 0;
+        if ( aSig == 0 ) return float64_zero;
          normalizeFloat64Subnormal( aSig, &aExp, &aSig );
      }
      zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
@@ -2974,6 +3267,52 @@ float64 float64_sqrt( float64 a STATUS_PARAM )
  
  }
  
+/*----------------------------------------------------------------------------
+| Returns the binary log of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float64 float64_log2( float64 a STATUS_PARAM )
+{
+    flag aSign, zSign;
+    int16 aExp;
+    bits64 aSig, aSig0, aSig1, zSig, i;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( aSign ) {
+        float_raise( float_flag_invalid STATUS_VAR);
+        return float64_default_nan;
+    }
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
+        return a;
+    }
+
+    aExp -= 0x3FF;
+    aSig |= LIT64( 0x0010000000000000 );
+    zSign = aExp < 0;
+    zSig = (bits64)aExp << 52;
+    for (i = 1LL << 51; i > 0; i >>= 1) {
+        mul64To128( aSig, aSig, &aSig0, &aSig1 );
+        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
+        if ( aSig & LIT64( 0x0020000000000000 ) ) {
+            aSig >>= 1;
+            zSig |= i;
+        }
+    }
+
+    if ( zSign )
+        zSig = -zSig;
+    return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
+}
+
  /*----------------------------------------------------------------------------
  | Returns 1 if the double-precision floating-point value `a' is equal to the
  | corresponding value `b', and 0 otherwise.  The comparison is performed
@@ -2982,6 +3321,7 @@ float64 float64_sqrt( float64 a STATUS_PARAM )
  
  int float64_eq( float64 a, float64 b STATUS_PARAM )
  {
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -2991,7 +3331,9 @@ int float64_eq( float64 a, float64 b STATUS_PARAM )
          }
          return 0;
      }
-    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    av = float64_val(a);
+    bv = float64_val(b);
+    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
  
  }
  
@@ -3005,6 +3347,7 @@ int float64_eq( float64 a, float64 b STATUS_PARAM )
  int float64_le( float64 a, float64 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3014,8 +3357,10 @@ int float64_le( float64 a, float64 b STATUS_PARAM )
      }
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
  
  }
  
@@ -3028,6 +3373,7 @@ int float64_le( float64 a, float64 b STATUS_PARAM )
  int float64_lt( float64 a, float64 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3037,8 +3383,10 @@ int float64_lt( float64 a, float64 b STATUS_PARAM )
      }
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
  
  }
  
@@ -3051,6 +3399,7 @@ int float64_lt( float64 a, float64 b STATUS_PARAM )
  
  int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
  {
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3058,7 +3407,9 @@ int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
          float_raise( float_flag_invalid STATUS_VAR);
          return 0;
      }
-    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    av = float64_val(a);
+    bv = float64_val(b);
+    return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
  
  }
  
@@ -3072,6 +3423,7 @@ int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
  int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3083,8 +3435,10 @@ int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
      }
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
-    return ( a == b ) || ( aSign ^ ( a < b ) );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
+    return ( av == bv ) || ( aSign ^ ( av < bv ) );
  
  }
  
@@ -3098,6 +3452,7 @@ int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
  int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
  {
      flag aSign, bSign;
+    bits64 av, bv;
  
      if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
           || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
@@ -3109,8 +3464,10 @@ int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
      }
      aSign = extractFloat64Sign( a );
      bSign = extractFloat64Sign( b );
-    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
-    return ( a != b ) && ( aSign ^ ( a < b ) );
+    av = float64_val(a);
+    bv = float64_val(b);
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
+    return ( av != bv ) && ( aSign ^ ( av < bv ) );
  
  }
  
@@ -3752,7 +4109,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
  
  floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
  {
-    flag aSign, bSign, zSign;
+    flag aSign, zSign;
      int32 aExp, bExp, expDiff;
      bits64 aSig0, aSig1, bSig;
      bits64 q, term0, term1, alternateASig0, alternateASig1;
@@ -3763,7 +4120,6 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
      aSign = extractFloatx80Sign( a );
      bSig = extractFloatx80Frac( b );
      bExp = extractFloatx80Exp( b );
-    bSign = extractFloatx80Sign( b );
      if ( aExp == 0x7FFF ) {
          if (    (bits64) ( aSig0<<1 )
               || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
@@ -4560,7 +4916,10 @@ static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM
              return a;
          }
          add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+        if ( aExp == 0 ) {
+            if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
+            return packFloat128( zSign, 0, zSig0, zSig1 );
+        }
          zSig2 = 0;
          zSig0 |= LIT64( 0x0002000000000000 );
          zExp = aExp;
@@ -4861,7 +5220,7 @@ float128 float128_div( float128 a, float128 b STATUS_PARAM )
  
  float128 float128_rem( float128 a, float128 b STATUS_PARAM )
  {
-    flag aSign, bSign, zSign;
+    flag aSign, zSign;
      int32 aExp, bExp, expDiff;
      bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
      bits64 allZero, alternateASig0, alternateASig1, sigMean1;
@@ -4875,7 +5234,6 @@ float128 float128_rem( float128 a, float128 b STATUS_PARAM )
      bSig1 = extractFloat128Frac1( b );
      bSig0 = extractFloat128Frac0( b );
      bExp = extractFloat128Exp( b );
-    bSign = extractFloat128Sign( b );
      if ( aExp == 0x7FFF ) {
          if (    ( aSig0 | aSig1 )
               || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
@@ -4950,7 +5308,7 @@ float128 float128_rem( float128 a, float128 b STATUS_PARAM )
          sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
      } while ( 0 <= (sbits64) aSig0 );
      add128(
-        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
+        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
      if (    ( sigMean0 < 0 )
           || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
          aSig0 = alternateASig0;
@@ -5310,12 +5668,14 @@ unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
      return res;
  }
  
+/* FIXME: This looks broken.  */
  uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
  {
      int64_t v;
  
-    v = int64_to_float64(INT64_MIN STATUS_VAR);
-    v = float64_to_int64((a + v) STATUS_VAR);
+    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+    v += float64_val(a);
+    v = float64_to_int64(make_float64(v) STATUS_VAR);
  
      return v - INT64_MIN;
  }
@@ -5324,8 +5684,9 @@ uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
  {
      int64_t v;
  
-    v = int64_to_float64(INT64_MIN STATUS_VAR);
-    v = float64_to_int64_round_to_zero((a + v) STATUS_VAR);
+    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+    v += float64_val(a);
+    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
  
      return v - INT64_MIN;
  }
@@ -5335,6 +5696,7 @@ INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
                                        int is_quiet STATUS_PARAM )            \
  {                                                                            \
      flag aSign, bSign;                                                       \
+    bits ## s av, bv;                                                        \
                                                                               \
      if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) &&                    \
           extractFloat ## s ## Frac( a ) ) ||                                 \
@@ -5349,18 +5711,20 @@ INLINE int float ## s ## _compare_internal( float ## s a, float ## s b,      \
      }                                                                        \
      aSign = extractFloat ## s ## Sign( a );                                  \
      bSign = extractFloat ## s ## Sign( b );                                  \
+    av = float ## s ## _val(a);                                              \
+    bv = float ## s ## _val(b);                                              \
      if ( aSign != bSign ) {                                                  \
-        if ( (bits ## s) ( ( a | b )<<1 ) == 0 ) {                           \
+        if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) {                         \
              /* zero case */                                                  \
              return float_relation_equal;                                     \
          } else {                                                             \
              return 1 - (2 * aSign);                                          \
          }                                                                    \
      } else {                                                                 \
-        if (a == b) {                                                        \
+        if (av == bv) {                                                      \
              return float_relation_equal;                                     \
          } else {                                                             \
-            return 1 - 2 * (aSign ^ ( a < b ));                              \
+            return 1 - 2 * (aSign ^ ( av < bv ));                            \
          }                                                                    \
      }                                                                        \
  }                                                                            \
@@ -5378,6 +5742,50 @@ int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM )  \
  COMPARE(32, 0xff)
  COMPARE(64, 0x7ff)
  
+INLINE int float128_compare_internal( float128 a, float128 b,
+                                      int is_quiet STATUS_PARAM )
+{
+    flag aSign, bSign;
+
+    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
+          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
+        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
+          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
+        if (!is_quiet ||
+            float128_is_signaling_nan( a ) ||
+            float128_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        }
+        return float_relation_unordered;
+    }
+    aSign = extractFloat128Sign( a );
+    bSign = extractFloat128Sign( b );
+    if ( aSign != bSign ) {
+        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
+            /* zero case */
+            return float_relation_equal;
+        } else {
+            return 1 - (2 * aSign);
+        }
+    } else {
+        if (a.low == b.low && a.high == b.high) {
+            return float_relation_equal;
+        } else {
+            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+        }
+    }
+}
+
+int float128_compare( float128 a, float128 b STATUS_PARAM )
+{
+    return float128_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
+{
+    return float128_compare_internal(a, b, 1 STATUS_VAR);
+}
+
  /* Multiply A by 2 raised to the power N.  */
  float32 float32_scalbn( float32 a, int n STATUS_PARAM )
  {
@@ -5392,8 +5800,14 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM )
      if ( aExp == 0xFF ) {
          return a;
      }
-    aExp += n;
-    return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+    if ( aExp != 0 )
+        aSig |= 0x00800000;
+    else if ( aSig == 0 )
+        return a;
+
+    aExp += n - 1;
+    aSig <<= 7;
+    return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
  }
  
  float64 float64_scalbn( float64 a, int n STATUS_PARAM )
@@ -5409,8 +5823,14 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM )
      if ( aExp == 0x7FF ) {
          return a;
      }
-    aExp += n;
-    return roundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
+    if ( aExp != 0 )
+        aSig |= LIT64( 0x0010000000000000 );
+    else if ( aSig == 0 )
+        return a;
+
+    aExp += n - 1;
+    aSig <<= 10;
+    return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
  }
  
  #ifdef FLOATX80
@@ -5427,9 +5847,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
      if ( aExp == 0x7FF ) {
          return a;
      }
+    if (aExp == 0 && aSig == 0)
+        return a;
+
      aExp += n;
-    return roundAndPackFloatx80( STATUS(floatx80_rounding_precision),
-                                 aSign, aExp, aSig, 0 STATUS_VAR );
+    return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
+                                          aSign, aExp, aSig, 0 STATUS_VAR );
  }
  #endif
  
@@ -5447,8 +5870,14 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM )
      if ( aExp == 0x7FFF ) {
          return a;
      }
-    aExp += n;
-    return roundAndPackFloat128( aSign, aExp, aSig0, aSig1, 0 STATUS_VAR );
+    if ( aExp != 0 )
+        aSig0 |= LIT64( 0x0001000000000000 );
+    else if ( aSig0 == 0 && aSig1 == 0 )
+        return a;
+
+    aExp += n - 1;
+    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
+                                          STATUS_VAR );
  
  }
  #endif