ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c

   1 /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
   2
   3 /*
   4  * This version hacked for use with gcc -msoft-float by bjh21.
   5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
   6  *  itself).
   7  */
   8
   9 /*
  10  * Things you may want to define:
  11  *
  12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
  13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
  14  *   properly renamed.
  15  */
  16
  17 /*
  18  * This differs from the standard bits32/softfloat.c in that float64
  19  * is defined to be a 64-bit integer rather than a structure.  The
  20  * structure is float64s, with translation between the two going via
  21  * float64u.
  22  */
  23
  24 /*
  25 ===============================================================================
  26
  27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
  28 Arithmetic Package, Release 2a.
  29
  30 Written by John R. Hauser.  This work was made possible in part by the
  31 International Computer Science Institute, located at Suite 600, 1947 Center
  32 Street, Berkeley, California 94704.  Funding was partially provided by the
  33 National Science Foundation under grant MIP-9311980.  The original version
  34 of this code was written as part of a project to build a fixed-point vector
  35 processor in collaboration with the University of California at Berkeley,
  36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  38 arithmetic/SoftFloat.html'.
  39
  40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  45
  46 Derivative works are acceptable, even for commercial purposes, so long as
  47 (1) they include prominent notice that the work is derivative, and (2) they
  48 include prominent notice akin to these four paragraphs for those parts of
  49 this code that are retained.
  50
  51 ===============================================================================
  52 */
  53
  54 #if defined(LIBC_SCCS) && !defined(lint)
  55 __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
  56 #endif /* LIBC_SCCS and not lint */
  57
  58 #ifdef SOFTFLOAT_FOR_GCC
  59 #include "softfloat-for-gcc.h"
  60 #endif
  61
  62 #include "milieu.h"
  63 #include "softfloat.h"
  64
  65 /*
  66  * Conversions between floats as stored in memory and floats as
  67  * SoftFloat uses them
  68  */
  69 #ifndef FLOAT64_DEMANGLE
  70 #define FLOAT64_DEMANGLE(a) (a)
  71 #endif
  72 #ifndef FLOAT64_MANGLE
  73 #define FLOAT64_MANGLE(a)   (a)
  74 #endif
  75
  76 /*
  77 -------------------------------------------------------------------------------
  78 Floating-point rounding mode and exception flags.
  79 -------------------------------------------------------------------------------
  80 */
  81 #ifndef set_float_rounding_mode
  82 fp_rnd float_rounding_mode = float_round_nearest_even;
  83 fp_except float_exception_flags = 0;
  84 #endif
  85 #ifndef set_float_exception_inexact_flag
  86 #define set_float_exception_inexact_flag() \
  87     ((void)(float_exception_flags |= float_flag_inexact))
  88 #endif
  89
  90 /*
  91 -------------------------------------------------------------------------------
  92 Primitive arithmetic functions, including multi-word arithmetic, and
  93 division and square root approximations.  (Can be specialized to target if
  94 desired.)
  95 -------------------------------------------------------------------------------
  96 */
  97 #include "softfloat-macros"
  98
  99 /*
 100 -------------------------------------------------------------------------------
 101 Functions and definitions to determine:  (1) whether tininess for underflow
 102 is detected before or after rounding by default, (2) what (if anything)
 103 happens when exceptions are raised, (3) how signaling NaNs are distinguished
 104 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
 105 are propagated from function inputs to output.  These details are target-
 106 specific.
 107 -------------------------------------------------------------------------------
 108 */
 109 #include "softfloat-specialize"
 110
 111 /*
 112 -------------------------------------------------------------------------------
 113 Returns the fraction bits of the single-precision floating-point value `a'.
 114 -------------------------------------------------------------------------------
 115 */
 116 INLINE bits32 extractFloat32Frac( float32 a )
 117 {
 118
 119     return a & 0x007FFFFF;
 120
 121 }
 122
 123 /*
 124 -------------------------------------------------------------------------------
 125 Returns the exponent bits of the single-precision floating-point value `a'.
 126 -------------------------------------------------------------------------------
 127 */
 128 INLINE int16 extractFloat32Exp( float32 a )
 129 {
 130
 131     return ( a>>23 ) & 0xFF;
 132
 133 }
 134
 135 /*
 136 -------------------------------------------------------------------------------
 137 Returns the sign bit of the single-precision floating-point value `a'.
 138 -------------------------------------------------------------------------------
 139 */
 140 INLINE flag extractFloat32Sign( float32 a )
 141 {
 142
 143     return a>>31;
 144
 145 }
 146
 147 /*
 148 -------------------------------------------------------------------------------
 149 Normalizes the subnormal single-precision floating-point value represented
 150 by the denormalized significand `aSig'.  The normalized exponent and
 151 significand are stored at the locations pointed to by `zExpPtr' and
 152 `zSigPtr', respectively.
 153 -------------------------------------------------------------------------------
 154 */
 155 static void
 156  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 157 {
 158     int8 shiftCount;
 159
 160     shiftCount = countLeadingZeros32( aSig ) - 8;
 161     *zSigPtr = aSig<<shiftCount;
 162     *zExpPtr = 1 - shiftCount;
 163
 164 }
 165
 166 /*
 167 -------------------------------------------------------------------------------
 168 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 169 single-precision floating-point value, returning the result.  After being
 170 shifted into the proper positions, the three fields are simply added
 171 together to form the result.  This means that any integer portion of `zSig'
 172 will be added into the exponent.  Since a properly normalized significand
 173 will have an integer portion equal to 1, the `zExp' input should be 1 less
 174 than the desired result exponent whenever `zSig' is a complete, normalized
 175 significand.
 176 -------------------------------------------------------------------------------
 177 */
 178 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
 179 {
 180
 181     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
 182
 183 }
 184
 185 /*
 186 -------------------------------------------------------------------------------
 187 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 188 and significand `zSig', and returns the proper single-precision floating-
 189 point value corresponding to the abstract input.  Ordinarily, the abstract
 190 value is simply rounded and packed into the single-precision format, with
 191 the inexact exception raised if the abstract input cannot be represented
 192 exactly.  However, if the abstract value is too large, the overflow and
 193 inexact exceptions are raised and an infinity or maximal finite value is
 194 returned.  If the abstract value is too small, the input value is rounded to
 195 a subnormal number, and the underflow and inexact exceptions are raised if
 196 the abstract input cannot be represented exactly as a subnormal single-
 197 precision floating-point number.
 198     The input significand `zSig' has its binary point between bits 30
 199 and 29, which is 7 bits to the left of the usual location.  This shifted
 200 significand must be normalized or smaller.  If `zSig' is not normalized,
 201 `zExp' must be 0; in that case, the result returned is a subnormal number,
 202 and it must not require rounding.  In the usual case that `zSig' is
 203 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 204 The handling of underflow and overflow follows the IEC/IEEE Standard for
 205 Binary Floating-Point Arithmetic.
 206 -------------------------------------------------------------------------------
 207 */
 208 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 209 {
 210     int8 roundingMode;
 211     flag roundNearestEven;
 212     int8 roundIncrement, roundBits;
 213     flag isTiny;
 214
 215     roundingMode = float_rounding_mode;
 216     roundNearestEven = roundingMode == float_round_nearest_even;
 217     roundIncrement = 0x40;
 218     if ( ! roundNearestEven ) {
 219         if ( roundingMode == float_round_to_zero ) {
 220             roundIncrement = 0;
 221         }
 222         else {
 223             roundIncrement = 0x7F;
 224             if ( zSign ) {
 225                 if ( roundingMode == float_round_up ) roundIncrement = 0;
 226             }
 227             else {
 228                 if ( roundingMode == float_round_down ) roundIncrement = 0;
 229             }
 230         }
 231     }
 232     roundBits = zSig & 0x7F;
 233     if ( 0xFD <= (bits16) zExp ) {
 234         if (    ( 0xFD < zExp )
 235              || (    ( zExp == 0xFD )
 236                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
 237            ) {
 238             float_raise( float_flag_overflow | float_flag_inexact );
 239             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
 240         }
 241         if ( zExp < 0 ) {
 242             isTiny =
 243                    ( float_detect_tininess == float_tininess_before_rounding )
 244                 || ( zExp < -1 )
 245                 || ( zSig + roundIncrement < (uint32)0x80000000 );
 246             shift32RightJamming( zSig, - zExp, &zSig );
 247             zExp = 0;
 248             roundBits = zSig & 0x7F;
 249             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 250         }
 251     }
 252     if ( roundBits ) set_float_exception_inexact_flag();
 253     zSig = ( zSig + roundIncrement )>>7;
 254     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 255     if ( zSig == 0 ) zExp = 0;
 256     return packFloat32( zSign, zExp, zSig );
 257
 258 }
 259
 260 /*
 261 -------------------------------------------------------------------------------
 262 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 263 and significand `zSig', and returns the proper single-precision floating-
 264 point value corresponding to the abstract input.  This routine is just like
 265 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 266 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 267 floating-point exponent.
 268 -------------------------------------------------------------------------------
 269 */
 270 static float32
 271  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 272 {
 273     int8 shiftCount;
 274
 275     shiftCount = countLeadingZeros32( zSig ) - 1;
 276     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
 277
 278 }
 279
 280 /*
 281 -------------------------------------------------------------------------------
 282 Returns the least-significant 32 fraction bits of the double-precision
 283 floating-point value `a'.
 284 -------------------------------------------------------------------------------
 285 */
 286 INLINE bits32 extractFloat64Frac1( float64 a )
 287 {
 288
 289     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
 290
 291 }
 292
 293 /*
 294 -------------------------------------------------------------------------------
 295 Returns the most-significant 20 fraction bits of the double-precision
 296 floating-point value `a'.
 297 -------------------------------------------------------------------------------
 298 */
 299 INLINE bits32 extractFloat64Frac0( float64 a )
 300 {
 301
 302     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
 303
 304 }
 305
 306 /*
 307 -------------------------------------------------------------------------------
 308 Returns the exponent bits of the double-precision floating-point value `a'.
 309 -------------------------------------------------------------------------------
 310 */
 311 INLINE int16 extractFloat64Exp( float64 a )
 312 {
 313
 314     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
 315
 316 }
 317
 318 /*
 319 -------------------------------------------------------------------------------
 320 Returns the sign bit of the double-precision floating-point value `a'.
 321 -------------------------------------------------------------------------------
 322 */
 323 INLINE flag extractFloat64Sign( float64 a )
 324 {
 325
 326     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
 327
 328 }
 329
 330 /*
 331 -------------------------------------------------------------------------------
 332 Normalizes the subnormal double-precision floating-point value represented
 333 by the denormalized significand formed by the concatenation of `aSig0' and
 334 `aSig1'.  The normalized exponent is stored at the location pointed to by
 335 `zExpPtr'.  The most significant 21 bits of the normalized significand are
 336 stored at the location pointed to by `zSig0Ptr', and the least significant
 337 32 bits of the normalized significand are stored at the location pointed to
 338 by `zSig1Ptr'.
 339 -------------------------------------------------------------------------------
 340 */
 341 static void
 342  normalizeFloat64Subnormal(
 343      bits32 aSig0,
 344      bits32 aSig1,
 345      int16 *zExpPtr,
 346      bits32 *zSig0Ptr,
 347      bits32 *zSig1Ptr
 348  )
 349 {
 350     int8 shiftCount;
 351
 352     if ( aSig0 == 0 ) {
 353         shiftCount = countLeadingZeros32( aSig1 ) - 11;
 354         if ( shiftCount < 0 ) {
 355             *zSig0Ptr = aSig1>>( - shiftCount );
 356             *zSig1Ptr = aSig1<<( shiftCount & 31 );
 357         }
 358         else {
 359             *zSig0Ptr = aSig1<<shiftCount;
 360             *zSig1Ptr = 0;
 361         }
 362         *zExpPtr = - shiftCount - 31;
 363     }
 364     else {
 365         shiftCount = countLeadingZeros32( aSig0 ) - 11;
 366         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
 367         *zExpPtr = 1 - shiftCount;
 368     }
 369
 370 }
 371
 372 /*
 373 -------------------------------------------------------------------------------
 374 Packs the sign `zSign', the exponent `zExp', and the significand formed by
 375 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
 376 point value, returning the result.  After being shifted into the proper
 377 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
 378 together to form the most significant 32 bits of the result.  This means
 379 that any integer portion of `zSig0' will be added into the exponent.  Since
 380 a properly normalized significand will have an integer portion equal to 1,
 381 the `zExp' input should be 1 less than the desired result exponent whenever
 382 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
 383 -------------------------------------------------------------------------------
 384 */
 385 INLINE float64
 386  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 387 {
 388
 389     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
 390                            ( ( (bits64) zExp )<<52 ) +
 391                            ( ( (bits64) zSig0 )<<32 ) + zSig1 );
 392
 393
 394 }
 395
 396 /*
 397 -------------------------------------------------------------------------------
 398 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 399 and extended significand formed by the concatenation of `zSig0', `zSig1',
 400 and `zSig2', and returns the proper double-precision floating-point value
 401 corresponding to the abstract input.  Ordinarily, the abstract value is
 402 simply rounded and packed into the double-precision format, with the inexact
 403 exception raised if the abstract input cannot be represented exactly.
 404 However, if the abstract value is too large, the overflow and inexact
 405 exceptions are raised and an infinity or maximal finite value is returned.
 406 If the abstract value is too small, the input value is rounded to a
 407 subnormal number, and the underflow and inexact exceptions are raised if the
 408 abstract input cannot be represented exactly as a subnormal double-precision
 409 floating-point number.
 410     The input significand must be normalized or smaller.  If the input
 411 significand is not normalized, `zExp' must be 0; in that case, the result
 412 returned is a subnormal number, and it must not require rounding.  In the
 413 usual case that the input significand is normalized, `zExp' must be 1 less
 414 than the ``true'' floating-point exponent.  The handling of underflow and
 415 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 416 -------------------------------------------------------------------------------
 417 */
 418 static float64
 419  roundAndPackFloat64(
 420      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
 421 {
 422     int8 roundingMode;
 423     flag roundNearestEven, increment, isTiny;
 424
 425     roundingMode = float_rounding_mode;
 426     roundNearestEven = ( roundingMode == float_round_nearest_even );
 427     increment = ( (sbits32) zSig2 < 0 );
 428     if ( ! roundNearestEven ) {
 429         if ( roundingMode == float_round_to_zero ) {
 430             increment = 0;
 431         }
 432         else {
 433             if ( zSign ) {
 434                 increment = ( roundingMode == float_round_down ) && zSig2;
 435             }
 436             else {
 437                 increment = ( roundingMode == float_round_up ) && zSig2;
 438             }
 439         }
 440     }
 441     if ( 0x7FD <= (bits16) zExp ) {
 442         if (    ( 0x7FD < zExp )
 443              || (    ( zExp == 0x7FD )
 444                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
 445                   && increment
 446                 )
 447            ) {
 448             float_raise( float_flag_overflow | float_flag_inexact );
 449             if (    ( roundingMode == float_round_to_zero )
 450                  || ( zSign && ( roundingMode == float_round_up ) )
 451                  || ( ! zSign && ( roundingMode == float_round_down ) )
 452                ) {
 453                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
 454             }
 455             return packFloat64( zSign, 0x7FF, 0, 0 );
 456         }
 457         if ( zExp < 0 ) {
 458             isTiny =
 459                    ( float_detect_tininess == float_tininess_before_rounding )
 460                 || ( zExp < -1 )
 461                 || ! increment
 462                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
 463             shift64ExtraRightJamming(
 464                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
 465             zExp = 0;
 466             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
 467             if ( roundNearestEven ) {
 468                 increment = ( (sbits32) zSig2 < 0 );
 469             }
 470             else {
 471                 if ( zSign ) {
 472                     increment = ( roundingMode == float_round_down ) && zSig2;
 473                 }
 474                 else {
 475                     increment = ( roundingMode == float_round_up ) && zSig2;
 476                 }
 477             }
 478         }
 479     }
 480     if ( zSig2 ) set_float_exception_inexact_flag();
 481     if ( increment ) {
 482         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
 483         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
 484     }
 485     else {
 486         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
 487     }
 488     return packFloat64( zSign, zExp, zSig0, zSig1 );
 489
 490 }
 491
 492 /*
 493 -------------------------------------------------------------------------------
 494 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 495 and significand formed by the concatenation of `zSig0' and `zSig1', and
 496 returns the proper double-precision floating-point value corresponding
 497 to the abstract input.  This routine is just like `roundAndPackFloat64'
 498 except that the input significand has fewer bits and does not have to be
 499 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 500 point exponent.
 501 -------------------------------------------------------------------------------
 502 */
 503 static float64
 504  normalizeRoundAndPackFloat64(
 505      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 506 {
 507     int8 shiftCount;
 508     bits32 zSig2;
 509
 510     if ( zSig0 == 0 ) {
 511         zSig0 = zSig1;
 512         zSig1 = 0;
 513         zExp -= 32;
 514     }
 515     shiftCount = countLeadingZeros32( zSig0 ) - 11;
 516     if ( 0 <= shiftCount ) {
 517         zSig2 = 0;
 518         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
 519     }
 520     else {
 521         shift64ExtraRightJamming(
 522             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
 523     }
 524     zExp -= shiftCount;
 525     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
 526
 527 }
 528
 529 /*
 530 -------------------------------------------------------------------------------
 531 Returns the result of converting the 32-bit two's complement integer `a' to
 532 the single-precision floating-point format.  The conversion is performed
 533 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 534 -------------------------------------------------------------------------------
 535 */
 536 float32 int32_to_float32( int32 a )
 537 {
 538     flag zSign;
 539
 540     if ( a == 0 ) return 0;
 541     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
 542     zSign = ( a < 0 );
 543     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
 544
 545 }
 546
 547 /*
 548 -------------------------------------------------------------------------------
 549 Returns the result of converting the 32-bit two's complement integer `a' to
 550 the double-precision floating-point format.  The conversion is performed
 551 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 552 -------------------------------------------------------------------------------
 553 */
 554 float64 int32_to_float64( int32 a )
 555 {
 556     flag zSign;
 557     bits32 absA;
 558     int8 shiftCount;
 559     bits32 zSig0, zSig1;
 560
 561     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
 562     zSign = ( a < 0 );
 563     absA = zSign ? - a : a;
 564     shiftCount = countLeadingZeros32( absA ) - 11;
 565     if ( 0 <= shiftCount ) {
 566         zSig0 = absA<<shiftCount;
 567         zSig1 = 0;
 568     }
 569     else {
 570         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
 571     }
 572     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
 573
 574 }
 575
 576 #ifndef SOFTFLOAT_FOR_GCC
 577 /*
 578 -------------------------------------------------------------------------------
 579 Returns the result of converting the single-precision floating-point value
 580 `a' to the 32-bit two's complement integer format.  The conversion is
 581 performed according to the IEC/IEEE Standard for Binary Floating-Point
 582 Arithmetic---which means in particular that the conversion is rounded
 583 according to the current rounding mode.  If `a' is a NaN, the largest
 584 positive integer is returned.  Otherwise, if the conversion overflows, the
 585 largest integer with the same sign as `a' is returned.
 586 -------------------------------------------------------------------------------
 587 */
 588 int32 float32_to_int32( float32 a )
 589 {
 590     flag aSign;
 591     int16 aExp, shiftCount;
 592     bits32 aSig, aSigExtra;
 593     int32 z;
 594     int8 roundingMode;
 595
 596     aSig = extractFloat32Frac( a );
 597     aExp = extractFloat32Exp( a );
 598     aSign = extractFloat32Sign( a );
 599     shiftCount = aExp - 0x96;
 600     if ( 0 <= shiftCount ) {
 601         if ( 0x9E <= aExp ) {
 602             if ( a != 0xCF000000 ) {
 603                 float_raise( float_flag_invalid );
 604                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
 605                     return 0x7FFFFFFF;
 606                 }
 607             }
 608             return (sbits32) 0x80000000;
 609         }
 610         z = ( aSig | 0x00800000 )<<shiftCount;
 611         if ( aSign ) z = - z;
 612     }
 613     else {
 614         if ( aExp < 0x7E ) {
 615             aSigExtra = aExp | aSig;
 616             z = 0;
 617         }
 618         else {
 619             aSig |= 0x00800000;
 620             aSigExtra = aSig<<( shiftCount & 31 );
 621             z = aSig>>( - shiftCount );
 622         }
 623         if ( aSigExtra ) set_float_exception_inexact_flag();
 624         roundingMode = float_rounding_mode;
 625         if ( roundingMode == float_round_nearest_even ) {
 626             if ( (sbits32) aSigExtra < 0 ) {
 627                 ++z;
 628                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
 629             }
 630             if ( aSign ) z = - z;
 631         }
 632         else {
 633             aSigExtra = ( aSigExtra != 0 );
 634             if ( aSign ) {
 635                 z += ( roundingMode == float_round_down ) & aSigExtra;
 636                 z = - z;
 637             }
 638             else {
 639                 z += ( roundingMode == float_round_up ) & aSigExtra;
 640             }
 641         }
 642     }
 643     return z;
 644
 645 }
 646 #endif
 647
 648 /*
 649 -------------------------------------------------------------------------------
 650 Returns the result of converting the single-precision floating-point value
 651 `a' to the 32-bit two's complement integer format.  The conversion is
 652 performed according to the IEC/IEEE Standard for Binary Floating-Point
 653 Arithmetic, except that the conversion is always rounded toward zero.
 654 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
 655 the conversion overflows, the largest integer with the same sign as `a' is
 656 returned.
 657 -------------------------------------------------------------------------------
 658 */
 659 int32 float32_to_int32_round_to_zero( float32 a )
 660 {
 661     flag aSign;
 662     int16 aExp, shiftCount;
 663     bits32 aSig;
 664     int32 z;
 665
 666     aSig = extractFloat32Frac( a );
 667     aExp = extractFloat32Exp( a );
 668     aSign = extractFloat32Sign( a );
 669     shiftCount = aExp - 0x9E;
 670     if ( 0 <= shiftCount ) {
 671         if ( a != 0xCF000000 ) {
 672             float_raise( float_flag_invalid );
 673             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
 674         }
 675         return (sbits32) 0x80000000;
 676     }
 677     else if ( aExp <= 0x7E ) {
 678         if ( aExp | aSig ) set_float_exception_inexact_flag();
 679         return 0;
 680     }
 681     aSig = ( aSig | 0x00800000 )<<8;
 682     z = aSig>>( - shiftCount );
 683     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
 684         set_float_exception_inexact_flag();
 685     }
 686     if ( aSign ) z = - z;
 687     return z;
 688
 689 }
 690
 691 /*
 692 -------------------------------------------------------------------------------
 693 Returns the result of converting the single-precision floating-point value
 694 `a' to the double-precision floating-point format.  The conversion is
 695 performed according to the IEC/IEEE Standard for Binary Floating-Point
 696 Arithmetic.
 697 -------------------------------------------------------------------------------
 698 */
 699 float64 float32_to_float64( float32 a )
 700 {
 701     flag aSign;
 702     int16 aExp;
 703     bits32 aSig, zSig0, zSig1;
 704
 705     aSig = extractFloat32Frac( a );
 706     aExp = extractFloat32Exp( a );
 707     aSign = extractFloat32Sign( a );
 708     if ( aExp == 0xFF ) {
 709         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
 710         return packFloat64( aSign, 0x7FF, 0, 0 );
 711     }
 712     if ( aExp == 0 ) {
 713         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
 714         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 715         --aExp;
 716     }
 717     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
 718     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
 719
 720 }
 721
 722 #ifndef SOFTFLOAT_FOR_GCC
 723 /*
 724 -------------------------------------------------------------------------------
 725 Rounds the single-precision floating-point value `a' to an integer,
 726 and returns the result as a single-precision floating-point value.  The
 727 operation is performed according to the IEC/IEEE Standard for Binary
 728 Floating-Point Arithmetic.
 729 -------------------------------------------------------------------------------
 730 */
 731 float32 float32_round_to_int( float32 a )
 732 {
 733     flag aSign;
 734     int16 aExp;
 735     bits32 lastBitMask, roundBitsMask;
 736     int8 roundingMode;
 737     float32 z;
 738
 739     aExp = extractFloat32Exp( a );
 740     if ( 0x96 <= aExp ) {
 741         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
 742             return propagateFloat32NaN( a, a );
 743         }
 744         return a;
 745     }
 746     if ( aExp <= 0x7E ) {
 747         if ( (bits32) ( a<<1 ) == 0 ) return a;
 748         set_float_exception_inexact_flag();
 749         aSign = extractFloat32Sign( a );
 750         switch ( float_rounding_mode ) {
 751          case float_round_nearest_even:
 752             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
 753                 return packFloat32( aSign, 0x7F, 0 );
 754             }
 755             break;
 756          case float_round_to_zero:
 757             break;
 758          case float_round_down:
 759             return aSign ? 0xBF800000 : 0;
 760          case float_round_up:
 761             return aSign ? 0x80000000 : 0x3F800000;
 762         }
 763         return packFloat32( aSign, 0, 0 );
 764     }
 765     lastBitMask = 1;
 766     lastBitMask <<= 0x96 - aExp;
 767     roundBitsMask = lastBitMask - 1;
 768     z = a;
 769     roundingMode = float_rounding_mode;
 770     if ( roundingMode == float_round_nearest_even ) {
 771         z += lastBitMask>>1;
 772         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
 773     }
 774     else if ( roundingMode != float_round_to_zero ) {
 775         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
 776             z += roundBitsMask;
 777         }
 778     }
 779     z &= ~ roundBitsMask;
 780     if ( z != a ) set_float_exception_inexact_flag();
 781     return z;
 782
 783 }
 784 #endif
 785
 786 /*
 787 -------------------------------------------------------------------------------
 788 Returns the result of adding the absolute values of the single-precision
 789 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 790 before being returned.  `zSign' is ignored if the result is a NaN.
 791 The addition is performed according to the IEC/IEEE Standard for Binary
 792 Floating-Point Arithmetic.
 793 -------------------------------------------------------------------------------
 794 */
 795 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
 796 {
 797     int16 aExp, bExp, zExp;
 798     bits32 aSig, bSig, zSig;
 799     int16 expDiff;
 800
 801     aSig = extractFloat32Frac( a );
 802     aExp = extractFloat32Exp( a );
 803     bSig = extractFloat32Frac( b );
 804     bExp = extractFloat32Exp( b );
 805     expDiff = aExp - bExp;
 806     aSig <<= 6;
 807     bSig <<= 6;
 808     if ( 0 < expDiff ) {
 809         if ( aExp == 0xFF ) {
 810             if ( aSig ) return propagateFloat32NaN( a, b );
 811             return a;
 812         }
 813         if ( bExp == 0 ) {
 814             --expDiff;
 815         }
 816         else {
 817             bSig |= 0x20000000;
 818         }
 819         shift32RightJamming( bSig, expDiff, &bSig );
 820         zExp = aExp;
 821     }
 822     else if ( expDiff < 0 ) {
 823         if ( bExp == 0xFF ) {
 824             if ( bSig ) return propagateFloat32NaN( a, b );
 825             return packFloat32( zSign, 0xFF, 0 );
 826         }
 827         if ( aExp == 0 ) {
 828             ++expDiff;
 829         }
 830         else {
 831             aSig |= 0x20000000;
 832         }
 833         shift32RightJamming( aSig, - expDiff, &aSig );
 834         zExp = bExp;
 835     }
 836     else {
 837         if ( aExp == 0xFF ) {
 838             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 839             return a;
 840         }
 841         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
 842         zSig = 0x40000000 + aSig + bSig;
 843         zExp = aExp;
 844         goto roundAndPack;
 845     }
 846     aSig |= 0x20000000;
 847     zSig = ( aSig + bSig )<<1;
 848     --zExp;
 849     if ( (sbits32) zSig < 0 ) {
 850         zSig = aSig + bSig;
 851         ++zExp;
 852     }
 853  roundAndPack:
 854     return roundAndPackFloat32( zSign, zExp, zSig );
 855
 856 }
 857
 858 /*
 859 -------------------------------------------------------------------------------
 860 Returns the result of subtracting the absolute values of the single-
 861 precision floating-point values `a' and `b'.  If `zSign' is 1, the
 862 difference is negated before being returned.  `zSign' is ignored if the
 863 result is a NaN.  The subtraction is performed according to the IEC/IEEE
 864 Standard for Binary Floating-Point Arithmetic.
 865 -------------------------------------------------------------------------------
 866 */
 867 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
 868 {
 869     int16 aExp, bExp, zExp;
 870     bits32 aSig, bSig, zSig;
 871     int16 expDiff;
 872
 873     aSig = extractFloat32Frac( a );
 874     aExp = extractFloat32Exp( a );
 875     bSig = extractFloat32Frac( b );
 876     bExp = extractFloat32Exp( b );
 877     expDiff = aExp - bExp;
 878     aSig <<= 7;
 879     bSig <<= 7;
 880     if ( 0 < expDiff ) goto aExpBigger;
 881     if ( expDiff < 0 ) goto bExpBigger;
 882     if ( aExp == 0xFF ) {
 883         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 884         float_raise( float_flag_invalid );
 885         return float32_default_nan;
 886     }
 887     if ( aExp == 0 ) {
 888         aExp = 1;
 889         bExp = 1;
 890     }
 891     if ( bSig < aSig ) goto aBigger;
 892     if ( aSig < bSig ) goto bBigger;
 893     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
 894  bExpBigger:
 895     if ( bExp == 0xFF ) {
 896         if ( bSig ) return propagateFloat32NaN( a, b );
 897         return packFloat32( zSign ^ 1, 0xFF, 0 );
 898     }
 899     if ( aExp == 0 ) {
 900         ++expDiff;
 901     }
 902     else {
 903         aSig |= 0x40000000;
 904     }
 905     shift32RightJamming( aSig, - expDiff, &aSig );
 906     bSig |= 0x40000000;
 907  bBigger:
 908     zSig = bSig - aSig;
 909     zExp = bExp;
 910     zSign ^= 1;
 911     goto normalizeRoundAndPack;
 912  aExpBigger:
 913     if ( aExp == 0xFF ) {
 914         if ( aSig ) return propagateFloat32NaN( a, b );
 915         return a;
 916     }
 917     if ( bExp == 0 ) {
 918         --expDiff;
 919     }
 920     else {
 921         bSig |= 0x40000000;
 922     }
 923     shift32RightJamming( bSig, expDiff, &bSig );
 924     aSig |= 0x40000000;
 925  aBigger:
 926     zSig = aSig - bSig;
 927     zExp = aExp;
 928  normalizeRoundAndPack:
 929     --zExp;
 930     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
 931
 932 }
 933
 934 /*
 935 -------------------------------------------------------------------------------
 936 Returns the result of adding the single-precision floating-point values `a'
 937 and `b'.  The operation is performed according to the IEC/IEEE Standard for
 938 Binary Floating-Point Arithmetic.
 939 -------------------------------------------------------------------------------
 940 */
 941 float32 float32_add( float32 a, float32 b )
 942 {
 943     flag aSign, bSign;
 944
 945     aSign = extractFloat32Sign( a );
 946     bSign = extractFloat32Sign( b );
 947     if ( aSign == bSign ) {
 948         return addFloat32Sigs( a, b, aSign );
 949     }
 950     else {
 951         return subFloat32Sigs( a, b, aSign );
 952     }
 953
 954 }
 955
 956 /*
 957 -------------------------------------------------------------------------------
 958 Returns the result of subtracting the single-precision floating-point values
 959 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 960 for Binary Floating-Point Arithmetic.
 961 -------------------------------------------------------------------------------
 962 */
 963 float32 float32_sub( float32 a, float32 b )
 964 {
 965     flag aSign, bSign;
 966
 967     aSign = extractFloat32Sign( a );
 968     bSign = extractFloat32Sign( b );
 969     if ( aSign == bSign ) {
 970         return subFloat32Sigs( a, b, aSign );
 971     }
 972     else {
 973         return addFloat32Sigs( a, b, aSign );
 974     }
 975
 976 }
 977
 978 /*
 979 -------------------------------------------------------------------------------
 980 Returns the result of multiplying the single-precision floating-point values
 981 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 982 for Binary Floating-Point Arithmetic.
 983 -------------------------------------------------------------------------------
 984 */
 985 float32 float32_mul( float32 a, float32 b )
 986 {
 987     flag aSign, bSign, zSign;
 988     int16 aExp, bExp, zExp;
 989     bits32 aSig, bSig, zSig0, zSig1;
 990
 991     aSig = extractFloat32Frac( a );
 992     aExp = extractFloat32Exp( a );
 993     aSign = extractFloat32Sign( a );
 994     bSig = extractFloat32Frac( b );
 995     bExp = extractFloat32Exp( b );
 996     bSign = extractFloat32Sign( b );
 997     zSign = aSign ^ bSign;
 998     if ( aExp == 0xFF ) {
 999         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1000             return propagateFloat32NaN( a, b );
1001         }
1002         if ( ( bExp | bSig ) == 0 ) {
1003             float_raise( float_flag_invalid );
1004             return float32_default_nan;
1005         }
1006         return packFloat32( zSign, 0xFF, 0 );
1007     }
1008     if ( bExp == 0xFF ) {
1009         if ( bSig ) return propagateFloat32NaN( a, b );
1010         if ( ( aExp | aSig ) == 0 ) {
1011             float_raise( float_flag_invalid );
1012             return float32_default_nan;
1013         }
1014         return packFloat32( zSign, 0xFF, 0 );
1015     }
1016     if ( aExp == 0 ) {
1017         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1018         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1019     }
1020     if ( bExp == 0 ) {
1021         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1022         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1023     }
1024     zExp = aExp + bExp - 0x7F;
1025     aSig = ( aSig | 0x00800000 )<<7;
1026     bSig = ( bSig | 0x00800000 )<<8;
1027     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1028     zSig0 |= ( zSig1 != 0 );
1029     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1030         zSig0 <<= 1;
1031         --zExp;
1032     }
1033     return roundAndPackFloat32( zSign, zExp, zSig0 );
1034
1035 }
1036
1037 /*
1038 -------------------------------------------------------------------------------
1039 Returns the result of dividing the single-precision floating-point value `a'
1040 by the corresponding value `b'.  The operation is performed according to the
1041 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1042 -------------------------------------------------------------------------------
1043 */
1044 float32 float32_div( float32 a, float32 b )
1045 {
1046     flag aSign, bSign, zSign;
1047     int16 aExp, bExp, zExp;
1048     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1049
1050     aSig = extractFloat32Frac( a );
1051     aExp = extractFloat32Exp( a );
1052     aSign = extractFloat32Sign( a );
1053     bSig = extractFloat32Frac( b );
1054     bExp = extractFloat32Exp( b );
1055     bSign = extractFloat32Sign( b );
1056     zSign = aSign ^ bSign;
1057     if ( aExp == 0xFF ) {
1058         if ( aSig ) return propagateFloat32NaN( a, b );
1059         if ( bExp == 0xFF ) {
1060             if ( bSig ) return propagateFloat32NaN( a, b );
1061             float_raise( float_flag_invalid );
1062             return float32_default_nan;
1063         }
1064         return packFloat32( zSign, 0xFF, 0 );
1065     }
1066     if ( bExp == 0xFF ) {
1067         if ( bSig ) return propagateFloat32NaN( a, b );
1068         return packFloat32( zSign, 0, 0 );
1069     }
1070     if ( bExp == 0 ) {
1071         if ( bSig == 0 ) {
1072             if ( ( aExp | aSig ) == 0 ) {
1073                 float_raise( float_flag_invalid );
1074                 return float32_default_nan;
1075             }
1076             float_raise( float_flag_divbyzero );
1077             return packFloat32( zSign, 0xFF, 0 );
1078         }
1079         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1080     }
1081     if ( aExp == 0 ) {
1082         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1083         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1084     }
1085     zExp = aExp - bExp + 0x7D;
1086     aSig = ( aSig | 0x00800000 )<<7;
1087     bSig = ( bSig | 0x00800000 )<<8;
1088     if ( bSig <= ( aSig + aSig ) ) {
1089         aSig >>= 1;
1090         ++zExp;
1091     }
1092     zSig = estimateDiv64To32( aSig, 0, bSig );
1093     if ( ( zSig & 0x3F ) <= 2 ) {
1094         mul32To64( bSig, zSig, &term0, &term1 );
1095         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1096         while ( (sbits32) rem0 < 0 ) {
1097             --zSig;
1098             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1099         }
1100         zSig |= ( rem1 != 0 );
1101     }
1102     return roundAndPackFloat32( zSign, zExp, zSig );
1103
1104 }
1105
1106 #ifndef SOFTFLOAT_FOR_GCC
1107 /*
1108 -------------------------------------------------------------------------------
1109 Returns the remainder of the single-precision floating-point value `a'
1110 with respect to the corresponding value `b'.  The operation is performed
1111 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1112 -------------------------------------------------------------------------------
1113 */
1114 float32 float32_rem( float32 a, float32 b )
1115 {
1116     flag aSign, bSign, zSign;
1117     int16 aExp, bExp, expDiff;
1118     bits32 aSig, bSig, q, allZero, alternateASig;
1119     sbits32 sigMean;
1120
1121     aSig = extractFloat32Frac( a );
1122     aExp = extractFloat32Exp( a );
1123     aSign = extractFloat32Sign( a );
1124     bSig = extractFloat32Frac( b );
1125     bExp = extractFloat32Exp( b );
1126     bSign = extractFloat32Sign( b );
1127     if ( aExp == 0xFF ) {
1128         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1129             return propagateFloat32NaN( a, b );
1130         }
1131         float_raise( float_flag_invalid );
1132         return float32_default_nan;
1133     }
1134     if ( bExp == 0xFF ) {
1135         if ( bSig ) return propagateFloat32NaN( a, b );
1136         return a;
1137     }
1138     if ( bExp == 0 ) {
1139         if ( bSig == 0 ) {
1140             float_raise( float_flag_invalid );
1141             return float32_default_nan;
1142         }
1143         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1144     }
1145     if ( aExp == 0 ) {
1146         if ( aSig == 0 ) return a;
1147         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1148     }
1149     expDiff = aExp - bExp;
1150     aSig = ( aSig | 0x00800000 )<<8;
1151     bSig = ( bSig | 0x00800000 )<<8;
1152     if ( expDiff < 0 ) {
1153         if ( expDiff < -1 ) return a;
1154         aSig >>= 1;
1155     }
1156     q = ( bSig <= aSig );
1157     if ( q ) aSig -= bSig;
1158     expDiff -= 32;
1159     while ( 0 < expDiff ) {
1160         q = estimateDiv64To32( aSig, 0, bSig );
1161         q = ( 2 < q ) ? q - 2 : 0;
1162         aSig = - ( ( bSig>>2 ) * q );
1163         expDiff -= 30;
1164     }
1165     expDiff += 32;
1166     if ( 0 < expDiff ) {
1167         q = estimateDiv64To32( aSig, 0, bSig );
1168         q = ( 2 < q ) ? q - 2 : 0;
1169         q >>= 32 - expDiff;
1170         bSig >>= 2;
1171         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1172     }
1173     else {
1174         aSig >>= 2;
1175         bSig >>= 2;
1176     }
1177     do {
1178         alternateASig = aSig;
1179         ++q;
1180         aSig -= bSig;
1181     } while ( 0 <= (sbits32) aSig );
1182     sigMean = aSig + alternateASig;
1183     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1184         aSig = alternateASig;
1185     }
1186     zSign = ( (sbits32) aSig < 0 );
1187     if ( zSign ) aSig = - aSig;
1188     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1189
1190 }
1191 #endif
1192
1193 #ifndef SOFTFLOAT_FOR_GCC
1194 /*
1195 -------------------------------------------------------------------------------
1196 Returns the square root of the single-precision floating-point value `a'.
1197 The operation is performed according to the IEC/IEEE Standard for Binary
1198 Floating-Point Arithmetic.
1199 -------------------------------------------------------------------------------
1200 */
1201 float32 float32_sqrt( float32 a )
1202 {
1203     flag aSign;
1204     int16 aExp, zExp;
1205     bits32 aSig, zSig, rem0, rem1, term0, term1;
1206
1207     aSig = extractFloat32Frac( a );
1208     aExp = extractFloat32Exp( a );
1209     aSign = extractFloat32Sign( a );
1210     if ( aExp == 0xFF ) {
1211         if ( aSig ) return propagateFloat32NaN( a, 0 );
1212         if ( ! aSign ) return a;
1213         float_raise( float_flag_invalid );
1214         return float32_default_nan;
1215     }
1216     if ( aSign ) {
1217         if ( ( aExp | aSig ) == 0 ) return a;
1218         float_raise( float_flag_invalid );
1219         return float32_default_nan;
1220     }
1221     if ( aExp == 0 ) {
1222         if ( aSig == 0 ) return 0;
1223         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1224     }
1225     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1226     aSig = ( aSig | 0x00800000 )<<8;
1227     zSig = estimateSqrt32( aExp, aSig ) + 2;
1228     if ( ( zSig & 0x7F ) <= 5 ) {
1229         if ( zSig < 2 ) {
1230             zSig = 0x7FFFFFFF;
1231             goto roundAndPack;
1232         }
1233         else {
1234             aSig >>= aExp & 1;
1235             mul32To64( zSig, zSig, &term0, &term1 );
1236             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1237             while ( (sbits32) rem0 < 0 ) {
1238                 --zSig;
1239                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1240                 term1 |= 1;
1241                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1242             }
1243             zSig |= ( ( rem0 | rem1 ) != 0 );
1244         }
1245     }
1246     shift32RightJamming( zSig, 1, &zSig );
1247  roundAndPack:
1248     return roundAndPackFloat32( 0, zExp, zSig );
1249
1250 }
1251 #endif
1252
1253 /*
1254 -------------------------------------------------------------------------------
1255 Returns 1 if the single-precision floating-point value `a' is equal to
1256 the corresponding value `b', and 0 otherwise.  The comparison is performed
1257 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1258 -------------------------------------------------------------------------------
1259 */
1260 flag float32_eq( float32 a, float32 b )
1261 {
1262
1263     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1264          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1265        ) {
1266         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1267             float_raise( float_flag_invalid );
1268         }
1269         return 0;
1270     }
1271     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1272
1273 }
1274
1275 /*
1276 -------------------------------------------------------------------------------
1277 Returns 1 if the single-precision floating-point value `a' is less than
1278 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1279 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1280 Arithmetic.
1281 -------------------------------------------------------------------------------
1282 */
1283 flag float32_le( float32 a, float32 b )
1284 {
1285     flag aSign, bSign;
1286
1287     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1288          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1289        ) {
1290         float_raise( float_flag_invalid );
1291         return 0;
1292     }
1293     aSign = extractFloat32Sign( a );
1294     bSign = extractFloat32Sign( b );
1295     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1296     return ( a == b ) || ( aSign ^ ( a < b ) );
1297
1298 }
1299
1300 /*
1301 -------------------------------------------------------------------------------
1302 Returns 1 if the single-precision floating-point value `a' is less than
1303 the corresponding value `b', and 0 otherwise.  The comparison is performed
1304 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1305 -------------------------------------------------------------------------------
1306 */
1307 flag float32_lt( float32 a, float32 b )
1308 {
1309     flag aSign, bSign;
1310
1311     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1312          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1313        ) {
1314         float_raise( float_flag_invalid );
1315         return 0;
1316     }
1317     aSign = extractFloat32Sign( a );
1318     bSign = extractFloat32Sign( b );
1319     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1320     return ( a != b ) && ( aSign ^ ( a < b ) );
1321
1322 }
1323
1324 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1325 /*
1326 -------------------------------------------------------------------------------
1327 Returns 1 if the single-precision floating-point value `a' is equal to
1328 the corresponding value `b', and 0 otherwise.  The invalid exception is
1329 raised if either operand is a NaN.  Otherwise, the comparison is performed
1330 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1331 -------------------------------------------------------------------------------
1332 */
1333 flag float32_eq_signaling( float32 a, float32 b )
1334 {
1335
1336     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1337          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1338        ) {
1339         float_raise( float_flag_invalid );
1340         return 0;
1341     }
1342     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1343
1344 }
1345
1346 /*
1347 -------------------------------------------------------------------------------
1348 Returns 1 if the single-precision floating-point value `a' is less than or
1349 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1350 cause an exception.  Otherwise, the comparison is performed according to the
1351 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1352 -------------------------------------------------------------------------------
1353 */
1354 flag float32_le_quiet( float32 a, float32 b )
1355 {
1356     flag aSign, bSign;
1357     int16 aExp, bExp;
1358
1359     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1360          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1361        ) {
1362         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1363             float_raise( float_flag_invalid );
1364         }
1365         return 0;
1366     }
1367     aSign = extractFloat32Sign( a );
1368     bSign = extractFloat32Sign( b );
1369     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1370     return ( a == b ) || ( aSign ^ ( a < b ) );
1371
1372 }
1373
1374 /*
1375 -------------------------------------------------------------------------------
1376 Returns 1 if the single-precision floating-point value `a' is less than
1377 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1378 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1379 Standard for Binary Floating-Point Arithmetic.
1380 -------------------------------------------------------------------------------
1381 */
1382 flag float32_lt_quiet( float32 a, float32 b )
1383 {
1384     flag aSign, bSign;
1385
1386     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1387          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1388        ) {
1389         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1390             float_raise( float_flag_invalid );
1391         }
1392         return 0;
1393     }
1394     aSign = extractFloat32Sign( a );
1395     bSign = extractFloat32Sign( b );
1396     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1397     return ( a != b ) && ( aSign ^ ( a < b ) );
1398
1399 }
1400 #endif /* !SOFTFLOAT_FOR_GCC */
1401
1402 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1403 /*
1404 -------------------------------------------------------------------------------
1405 Returns the result of converting the double-precision floating-point value
1406 `a' to the 32-bit two's complement integer format.  The conversion is
1407 performed according to the IEC/IEEE Standard for Binary Floating-Point
1408 Arithmetic---which means in particular that the conversion is rounded
1409 according to the current rounding mode.  If `a' is a NaN, the largest
1410 positive integer is returned.  Otherwise, if the conversion overflows, the
1411 largest integer with the same sign as `a' is returned.
1412 -------------------------------------------------------------------------------
1413 */
1414 int32 float64_to_int32( float64 a )
1415 {
1416     flag aSign;
1417     int16 aExp, shiftCount;
1418     bits32 aSig0, aSig1, absZ, aSigExtra;
1419     int32 z;
1420     int8 roundingMode;
1421
1422     aSig1 = extractFloat64Frac1( a );
1423     aSig0 = extractFloat64Frac0( a );
1424     aExp = extractFloat64Exp( a );
1425     aSign = extractFloat64Sign( a );
1426     shiftCount = aExp - 0x413;
1427     if ( 0 <= shiftCount ) {
1428         if ( 0x41E < aExp ) {
1429             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1430             goto invalid;
1431         }
1432         shortShift64Left(
1433             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1434         if ( 0x80000000 < absZ ) goto invalid;
1435     }
1436     else {
1437         aSig1 = ( aSig1 != 0 );
1438         if ( aExp < 0x3FE ) {
1439             aSigExtra = aExp | aSig0 | aSig1;
1440             absZ = 0;
1441         }
1442         else {
1443             aSig0 |= 0x00100000;
1444             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1445             absZ = aSig0>>( - shiftCount );
1446         }
1447     }
1448     roundingMode = float_rounding_mode;
1449     if ( roundingMode == float_round_nearest_even ) {
1450         if ( (sbits32) aSigExtra < 0 ) {
1451             ++absZ;
1452             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1453         }
1454         z = aSign ? - absZ : absZ;
1455     }
1456     else {
1457         aSigExtra = ( aSigExtra != 0 );
1458         if ( aSign ) {
1459             z = - (   absZ
1460                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1461         }
1462         else {
1463             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1464         }
1465     }
1466     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1467  invalid:
1468         float_raise( float_flag_invalid );
1469         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1470     }
1471     if ( aSigExtra ) set_float_exception_inexact_flag();
1472     return z;
1473
1474 }
1475 #endif /* !SOFTFLOAT_FOR_GCC */
1476
1477 /*
1478 -------------------------------------------------------------------------------
1479 Returns the result of converting the double-precision floating-point value
1480 `a' to the 32-bit two's complement integer format.  The conversion is
1481 performed according to the IEC/IEEE Standard for Binary Floating-Point
1482 Arithmetic, except that the conversion is always rounded toward zero.
1483 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1484 the conversion overflows, the largest integer with the same sign as `a' is
1485 returned.
1486 -------------------------------------------------------------------------------
1487 */
1488 int32 float64_to_int32_round_to_zero( float64 a )
1489 {
1490     flag aSign;
1491     int16 aExp, shiftCount;
1492     bits32 aSig0, aSig1, absZ, aSigExtra;
1493     int32 z;
1494
1495     aSig1 = extractFloat64Frac1( a );
1496     aSig0 = extractFloat64Frac0( a );
1497     aExp = extractFloat64Exp( a );
1498     aSign = extractFloat64Sign( a );
1499     shiftCount = aExp - 0x413;
1500     if ( 0 <= shiftCount ) {
1501         if ( 0x41E < aExp ) {
1502             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1503             goto invalid;
1504         }
1505         shortShift64Left(
1506             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1507     }
1508     else {
1509         if ( aExp < 0x3FF ) {
1510             if ( aExp | aSig0 | aSig1 ) {
1511                 set_float_exception_inexact_flag();
1512             }
1513             return 0;
1514         }
1515         aSig0 |= 0x00100000;
1516         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1517         absZ = aSig0>>( - shiftCount );
1518     }
1519     z = aSign ? - absZ : absZ;
1520     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1521  invalid:
1522         float_raise( float_flag_invalid );
1523         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1524     }
1525     if ( aSigExtra ) set_float_exception_inexact_flag();
1526     return z;
1527
1528 }
1529
1530 /*
1531 -------------------------------------------------------------------------------
1532 Returns the result of converting the double-precision floating-point value
1533 `a' to the single-precision floating-point format.  The conversion is
1534 performed according to the IEC/IEEE Standard for Binary Floating-Point
1535 Arithmetic.
1536 -------------------------------------------------------------------------------
1537 */
1538 float32 float64_to_float32( float64 a )
1539 {
1540     flag aSign;
1541     int16 aExp;
1542     bits32 aSig0, aSig1, zSig;
1543     bits32 allZero;
1544
1545     aSig1 = extractFloat64Frac1( a );
1546     aSig0 = extractFloat64Frac0( a );
1547     aExp = extractFloat64Exp( a );
1548     aSign = extractFloat64Sign( a );
1549     if ( aExp == 0x7FF ) {
1550         if ( aSig0 | aSig1 ) {
1551             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1552         }
1553         return packFloat32( aSign, 0xFF, 0 );
1554     }
1555     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1556     if ( aExp ) zSig |= 0x40000000;
1557     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1558
1559 }
1560
1561 #ifndef SOFTFLOAT_FOR_GCC
1562 /*
1563 -------------------------------------------------------------------------------
1564 Rounds the double-precision floating-point value `a' to an integer,
1565 and returns the result as a double-precision floating-point value.  The
1566 operation is performed according to the IEC/IEEE Standard for Binary
1567 Floating-Point Arithmetic.
1568 -------------------------------------------------------------------------------
1569 */
1570 float64 float64_round_to_int( float64 a )
1571 {
1572     flag aSign;
1573     int16 aExp;
1574     bits32 lastBitMask, roundBitsMask;
1575     int8 roundingMode;
1576     float64 z;
1577
1578     aExp = extractFloat64Exp( a );
1579     if ( 0x413 <= aExp ) {
1580         if ( 0x433 <= aExp ) {
1581             if (    ( aExp == 0x7FF )
1582                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1583                 return propagateFloat64NaN( a, a );
1584             }
1585             return a;
1586         }
1587         lastBitMask = 1;
1588         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1589         roundBitsMask = lastBitMask - 1;
1590         z = a;
1591         roundingMode = float_rounding_mode;
1592         if ( roundingMode == float_round_nearest_even ) {
1593             if ( lastBitMask ) {
1594                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1595                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1596             }
1597             else {
1598                 if ( (sbits32) z.low < 0 ) {
1599                     ++z.high;
1600                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1601                 }
1602             }
1603         }
1604         else if ( roundingMode != float_round_to_zero ) {
1605             if (   extractFloat64Sign( z )
1606                  ^ ( roundingMode == float_round_up ) ) {
1607                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1608             }
1609         }
1610         z.low &= ~ roundBitsMask;
1611     }
1612     else {
1613         if ( aExp <= 0x3FE ) {
1614             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1615             set_float_exception_inexact_flag();
1616             aSign = extractFloat64Sign( a );
1617             switch ( float_rounding_mode ) {
1618              case float_round_nearest_even:
1619                 if (    ( aExp == 0x3FE )
1620                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1621                    ) {
1622                     return packFloat64( aSign, 0x3FF, 0, 0 );
1623                 }
1624                 break;
1625              case float_round_down:
1626                 return
1627                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1628                     : packFloat64( 0, 0, 0, 0 );
1629              case float_round_up:
1630                 return
1631                       aSign ? packFloat64( 1, 0, 0, 0 )
1632                     : packFloat64( 0, 0x3FF, 0, 0 );
1633             }
1634             return packFloat64( aSign, 0, 0, 0 );
1635         }
1636         lastBitMask = 1;
1637         lastBitMask <<= 0x413 - aExp;
1638         roundBitsMask = lastBitMask - 1;
1639         z.low = 0;
1640         z.high = a.high;
1641         roundingMode = float_rounding_mode;
1642         if ( roundingMode == float_round_nearest_even ) {
1643             z.high += lastBitMask>>1;
1644             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1645                 z.high &= ~ lastBitMask;
1646             }
1647         }
1648         else if ( roundingMode != float_round_to_zero ) {
1649             if (   extractFloat64Sign( z )
1650                  ^ ( roundingMode == float_round_up ) ) {
1651                 z.high |= ( a.low != 0 );
1652                 z.high += roundBitsMask;
1653             }
1654         }
1655         z.high &= ~ roundBitsMask;
1656     }
1657     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1658         set_float_exception_inexact_flag();
1659     }
1660     return z;
1661
1662 }
1663 #endif
1664
1665 /*
1666 -------------------------------------------------------------------------------
1667 Returns the result of adding the absolute values of the double-precision
1668 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1669 before being returned.  `zSign' is ignored if the result is a NaN.
1670 The addition is performed according to the IEC/IEEE Standard for Binary
1671 Floating-Point Arithmetic.
1672 -------------------------------------------------------------------------------
1673 */
1674 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1675 {
1676     int16 aExp, bExp, zExp;
1677     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1678     int16 expDiff;
1679
1680     aSig1 = extractFloat64Frac1( a );
1681     aSig0 = extractFloat64Frac0( a );
1682     aExp = extractFloat64Exp( a );
1683     bSig1 = extractFloat64Frac1( b );
1684     bSig0 = extractFloat64Frac0( b );
1685     bExp = extractFloat64Exp( b );
1686     expDiff = aExp - bExp;
1687     if ( 0 < expDiff ) {
1688         if ( aExp == 0x7FF ) {
1689             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1690             return a;
1691         }
1692         if ( bExp == 0 ) {
1693             --expDiff;
1694         }
1695         else {
1696             bSig0 |= 0x00100000;
1697         }
1698         shift64ExtraRightJamming(
1699             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1700         zExp = aExp;
1701     }
1702     else if ( expDiff < 0 ) {
1703         if ( bExp == 0x7FF ) {
1704             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1705             return packFloat64( zSign, 0x7FF, 0, 0 );
1706         }
1707         if ( aExp == 0 ) {
1708             ++expDiff;
1709         }
1710         else {
1711             aSig0 |= 0x00100000;
1712         }
1713         shift64ExtraRightJamming(
1714             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1715         zExp = bExp;
1716     }
1717     else {
1718         if ( aExp == 0x7FF ) {
1719             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1720                 return propagateFloat64NaN( a, b );
1721             }
1722             return a;
1723         }
1724         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1725         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1726         zSig2 = 0;
1727         zSig0 |= 0x00200000;
1728         zExp = aExp;
1729         goto shiftRight1;
1730     }
1731     aSig0 |= 0x00100000;
1732     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1733     --zExp;
1734     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1735     ++zExp;
1736  shiftRight1:
1737     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1738  roundAndPack:
1739     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1740
1741 }
1742
1743 /*
1744 -------------------------------------------------------------------------------
1745 Returns the result of subtracting the absolute values of the double-
1746 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1747 difference is negated before being returned.  `zSign' is ignored if the
1748 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1749 Standard for Binary Floating-Point Arithmetic.
1750 -------------------------------------------------------------------------------
1751 */
1752 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1753 {
1754     int16 aExp, bExp, zExp;
1755     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1756     int16 expDiff;
1757
1758     aSig1 = extractFloat64Frac1( a );
1759     aSig0 = extractFloat64Frac0( a );
1760     aExp = extractFloat64Exp( a );
1761     bSig1 = extractFloat64Frac1( b );
1762     bSig0 = extractFloat64Frac0( b );
1763     bExp = extractFloat64Exp( b );
1764     expDiff = aExp - bExp;
1765     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1766     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1767     if ( 0 < expDiff ) goto aExpBigger;
1768     if ( expDiff < 0 ) goto bExpBigger;
1769     if ( aExp == 0x7FF ) {
1770         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1771             return propagateFloat64NaN( a, b );
1772         }
1773         float_raise( float_flag_invalid );
1774         return float64_default_nan;
1775     }
1776     if ( aExp == 0 ) {
1777         aExp = 1;
1778         bExp = 1;
1779     }
1780     if ( bSig0 < aSig0 ) goto aBigger;
1781     if ( aSig0 < bSig0 ) goto bBigger;
1782     if ( bSig1 < aSig1 ) goto aBigger;
1783     if ( aSig1 < bSig1 ) goto bBigger;
1784     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1785  bExpBigger:
1786     if ( bExp == 0x7FF ) {
1787         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1788         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1789     }
1790     if ( aExp == 0 ) {
1791         ++expDiff;
1792     }
1793     else {
1794         aSig0 |= 0x40000000;
1795     }
1796     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1797     bSig0 |= 0x40000000;
1798  bBigger:
1799     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1800     zExp = bExp;
1801     zSign ^= 1;
1802     goto normalizeRoundAndPack;
1803  aExpBigger:
1804     if ( aExp == 0x7FF ) {
1805         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1806         return a;
1807     }
1808     if ( bExp == 0 ) {
1809         --expDiff;
1810     }
1811     else {
1812         bSig0 |= 0x40000000;
1813     }
1814     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1815     aSig0 |= 0x40000000;
1816  aBigger:
1817     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1818     zExp = aExp;
1819  normalizeRoundAndPack:
1820     --zExp;
1821     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1822
1823 }
1824
1825 /*
1826 -------------------------------------------------------------------------------
1827 Returns the result of adding the double-precision floating-point values `a'
1828 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1829 Binary Floating-Point Arithmetic.
1830 -------------------------------------------------------------------------------
1831 */
1832 float64 float64_add( float64 a, float64 b )
1833 {
1834     flag aSign, bSign;
1835
1836     aSign = extractFloat64Sign( a );
1837     bSign = extractFloat64Sign( b );
1838     if ( aSign == bSign ) {
1839         return addFloat64Sigs( a, b, aSign );
1840     }
1841     else {
1842         return subFloat64Sigs( a, b, aSign );
1843     }
1844
1845 }
1846
1847 /*
1848 -------------------------------------------------------------------------------
1849 Returns the result of subtracting the double-precision floating-point values
1850 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1851 for Binary Floating-Point Arithmetic.
1852 -------------------------------------------------------------------------------
1853 */
1854 float64 float64_sub( float64 a, float64 b )
1855 {
1856     flag aSign, bSign;
1857
1858     aSign = extractFloat64Sign( a );
1859     bSign = extractFloat64Sign( b );
1860     if ( aSign == bSign ) {
1861         return subFloat64Sigs( a, b, aSign );
1862     }
1863     else {
1864         return addFloat64Sigs( a, b, aSign );
1865     }
1866
1867 }
1868
1869 /*
1870 -------------------------------------------------------------------------------
1871 Returns the result of multiplying the double-precision floating-point values
1872 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1873 for Binary Floating-Point Arithmetic.
1874 -------------------------------------------------------------------------------
1875 */
1876 float64 float64_mul( float64 a, float64 b )
1877 {
1878     flag aSign, bSign, zSign;
1879     int16 aExp, bExp, zExp;
1880     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1881
1882     aSig1 = extractFloat64Frac1( a );
1883     aSig0 = extractFloat64Frac0( a );
1884     aExp = extractFloat64Exp( a );
1885     aSign = extractFloat64Sign( a );
1886     bSig1 = extractFloat64Frac1( b );
1887     bSig0 = extractFloat64Frac0( b );
1888     bExp = extractFloat64Exp( b );
1889     bSign = extractFloat64Sign( b );
1890     zSign = aSign ^ bSign;
1891     if ( aExp == 0x7FF ) {
1892         if (    ( aSig0 | aSig1 )
1893              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1894             return propagateFloat64NaN( a, b );
1895         }
1896         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1897         return packFloat64( zSign, 0x7FF, 0, 0 );
1898     }
1899     if ( bExp == 0x7FF ) {
1900         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1901         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1902  invalid:
1903             float_raise( float_flag_invalid );
1904             return float64_default_nan;
1905         }
1906         return packFloat64( zSign, 0x7FF, 0, 0 );
1907     }
1908     if ( aExp == 0 ) {
1909         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1910         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1911     }
1912     if ( bExp == 0 ) {
1913         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1914         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1915     }
1916     zExp = aExp + bExp - 0x400;
1917     aSig0 |= 0x00100000;
1918     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1919     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1920     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1921     zSig2 |= ( zSig3 != 0 );
1922     if ( 0x00200000 <= zSig0 ) {
1923         shift64ExtraRightJamming(
1924             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1925         ++zExp;
1926     }
1927     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1928
1929 }
1930
1931 /*
1932 -------------------------------------------------------------------------------
1933 Returns the result of dividing the double-precision floating-point value `a'
1934 by the corresponding value `b'.  The operation is performed according to the
1935 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1936 -------------------------------------------------------------------------------
1937 */
1938 float64 float64_div( float64 a, float64 b )
1939 {
1940     flag aSign, bSign, zSign;
1941     int16 aExp, bExp, zExp;
1942     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1943     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1944
1945     aSig1 = extractFloat64Frac1( a );
1946     aSig0 = extractFloat64Frac0( a );
1947     aExp = extractFloat64Exp( a );
1948     aSign = extractFloat64Sign( a );
1949     bSig1 = extractFloat64Frac1( b );
1950     bSig0 = extractFloat64Frac0( b );
1951     bExp = extractFloat64Exp( b );
1952     bSign = extractFloat64Sign( b );
1953     zSign = aSign ^ bSign;
1954     if ( aExp == 0x7FF ) {
1955         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1956         if ( bExp == 0x7FF ) {
1957             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1958             goto invalid;
1959         }
1960         return packFloat64( zSign, 0x7FF, 0, 0 );
1961     }
1962     if ( bExp == 0x7FF ) {
1963         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1964         return packFloat64( zSign, 0, 0, 0 );
1965     }
1966     if ( bExp == 0 ) {
1967         if ( ( bSig0 | bSig1 ) == 0 ) {
1968             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1969  invalid:
1970                 float_raise( float_flag_invalid );
1971                 return float64_default_nan;
1972             }
1973             float_raise( float_flag_divbyzero );
1974             return packFloat64( zSign, 0x7FF, 0, 0 );
1975         }
1976         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1977     }
1978     if ( aExp == 0 ) {
1979         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1980         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1981     }
1982     zExp = aExp - bExp + 0x3FD;
1983     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1984     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1985     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1986         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1987         ++zExp;
1988     }
1989     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1990     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1991     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1992     while ( (sbits32) rem0 < 0 ) {
1993         --zSig0;
1994         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1995     }
1996     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1997     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1998         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1999         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
2000         while ( (sbits32) rem1 < 0 ) {
2001             --zSig1;
2002             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
2003         }
2004         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2005     }
2006     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2007     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2008
2009 }
2010
2011 #ifndef SOFTFLOAT_FOR_GCC
2012 /*
2013 -------------------------------------------------------------------------------
2014 Returns the remainder of the double-precision floating-point value `a'
2015 with respect to the corresponding value `b'.  The operation is performed
2016 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2017 -------------------------------------------------------------------------------
2018 */
2019 float64 float64_rem( float64 a, float64 b )
2020 {
2021     flag aSign, bSign, zSign;
2022     int16 aExp, bExp, expDiff;
2023     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2024     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2025     sbits32 sigMean0;
2026     float64 z;
2027
2028     aSig1 = extractFloat64Frac1( a );
2029     aSig0 = extractFloat64Frac0( a );
2030     aExp = extractFloat64Exp( a );
2031     aSign = extractFloat64Sign( a );
2032     bSig1 = extractFloat64Frac1( b );
2033     bSig0 = extractFloat64Frac0( b );
2034     bExp = extractFloat64Exp( b );
2035     bSign = extractFloat64Sign( b );
2036     if ( aExp == 0x7FF ) {
2037         if (    ( aSig0 | aSig1 )
2038              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2039             return propagateFloat64NaN( a, b );
2040         }
2041         goto invalid;
2042     }
2043     if ( bExp == 0x7FF ) {
2044         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2045         return a;
2046     }
2047     if ( bExp == 0 ) {
2048         if ( ( bSig0 | bSig1 ) == 0 ) {
2049  invalid:
2050             float_raise( float_flag_invalid );
2051             return float64_default_nan;
2052         }
2053         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2054     }
2055     if ( aExp == 0 ) {
2056         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2057         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2058     }
2059     expDiff = aExp - bExp;
2060     if ( expDiff < -1 ) return a;
2061     shortShift64Left(
2062         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2063     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2064     q = le64( bSig0, bSig1, aSig0, aSig1 );
2065     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2066     expDiff -= 32;
2067     while ( 0 < expDiff ) {
2068         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2069         q = ( 4 < q ) ? q - 4 : 0;
2070         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2071         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2072         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2073         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2074         expDiff -= 29;
2075     }
2076     if ( -32 < expDiff ) {
2077         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2078         q = ( 4 < q ) ? q - 4 : 0;
2079         q >>= - expDiff;
2080         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2081         expDiff += 24;
2082         if ( expDiff < 0 ) {
2083             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2084         }
2085         else {
2086             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2087         }
2088         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2089         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2090     }
2091     else {
2092         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2093         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2094     }
2095     do {
2096         alternateASig0 = aSig0;
2097         alternateASig1 = aSig1;
2098         ++q;
2099         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2100     } while ( 0 <= (sbits32) aSig0 );
2101     add64(
2102         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2103     if (    ( sigMean0 < 0 )
2104          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2105         aSig0 = alternateASig0;
2106         aSig1 = alternateASig1;
2107     }
2108     zSign = ( (sbits32) aSig0 < 0 );
2109     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2110     return
2111         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2112
2113 }
2114 #endif
2115
2116 #ifndef SOFTFLOAT_FOR_GCC
2117 /*
2118 -------------------------------------------------------------------------------
2119 Returns the square root of the double-precision floating-point value `a'.
2120 The operation is performed according to the IEC/IEEE Standard for Binary
2121 Floating-Point Arithmetic.
2122 -------------------------------------------------------------------------------
2123 */
2124 float64 float64_sqrt( float64 a )
2125 {
2126     flag aSign;
2127     int16 aExp, zExp;
2128     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2129     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2130     float64 z;
2131
2132     aSig1 = extractFloat64Frac1( a );
2133     aSig0 = extractFloat64Frac0( a );
2134     aExp = extractFloat64Exp( a );
2135     aSign = extractFloat64Sign( a );
2136     if ( aExp == 0x7FF ) {
2137         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2138         if ( ! aSign ) return a;
2139         goto invalid;
2140     }
2141     if ( aSign ) {
2142         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2143  invalid:
2144         float_raise( float_flag_invalid );
2145         return float64_default_nan;
2146     }
2147     if ( aExp == 0 ) {
2148         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2149         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2150     }
2151     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2152     aSig0 |= 0x00100000;
2153     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2154     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2155     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2156     doubleZSig0 = zSig0 + zSig0;
2157     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2158     mul32To64( zSig0, zSig0, &term0, &term1 );
2159     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2160     while ( (sbits32) rem0 < 0 ) {
2161         --zSig0;
2162         doubleZSig0 -= 2;
2163         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2164     }
2165     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2166     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2167         if ( zSig1 == 0 ) zSig1 = 1;
2168         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2169         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2170         mul32To64( zSig1, zSig1, &term2, &term3 );
2171         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2172         while ( (sbits32) rem1 < 0 ) {
2173             --zSig1;
2174             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2175             term3 |= 1;
2176             term2 |= doubleZSig0;
2177             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2178         }
2179         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2180     }
2181     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2182     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2183
2184 }
2185 #endif
2186
2187 /*
2188 -------------------------------------------------------------------------------
2189 Returns 1 if the double-precision floating-point value `a' is equal to
2190 the corresponding value `b', and 0 otherwise.  The comparison is performed
2191 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2192 -------------------------------------------------------------------------------
2193 */
2194 flag float64_eq( float64 a, float64 b )
2195 {
2196
2197     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2198               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2199          || (    ( extractFloat64Exp( b ) == 0x7FF )
2200               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2201        ) {
2202         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2203             float_raise( float_flag_invalid );
2204         }
2205         return 0;
2206     }
2207     return ( a == b ) ||
2208         ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2209
2210 }
2211
2212 /*
2213 -------------------------------------------------------------------------------
2214 Returns 1 if the double-precision floating-point value `a' is less than
2215 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2216 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2217 Arithmetic.
2218 -------------------------------------------------------------------------------
2219 */
2220 flag float64_le( float64 a, float64 b )
2221 {
2222     flag aSign, bSign;
2223
2224     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2225               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2226          || (    ( extractFloat64Exp( b ) == 0x7FF )
2227               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2228        ) {
2229         float_raise( float_flag_invalid );
2230         return 0;
2231     }
2232     aSign = extractFloat64Sign( a );
2233     bSign = extractFloat64Sign( b );
2234     if ( aSign != bSign )
2235         return aSign ||
2236             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2237                 0 );
2238     return ( a == b ) ||
2239         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2240 }
2241
2242 /*
2243 -------------------------------------------------------------------------------
2244 Returns 1 if the double-precision floating-point value `a' is less than
2245 the corresponding value `b', and 0 otherwise.  The comparison is performed
2246 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2247 -------------------------------------------------------------------------------
2248 */
2249 flag float64_lt( float64 a, float64 b )
2250 {
2251     flag aSign, bSign;
2252
2253     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2254               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2255          || (    ( extractFloat64Exp( b ) == 0x7FF )
2256               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2257        ) {
2258         float_raise( float_flag_invalid );
2259         return 0;
2260     }
2261     aSign = extractFloat64Sign( a );
2262     bSign = extractFloat64Sign( b );
2263     if ( aSign != bSign )
2264         return aSign &&
2265             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2266               0 );
2267     return ( a != b ) &&
2268            ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2269
2270 }
2271
2272 #ifndef SOFTFLOAT_FOR_GCC
2273 /*
2274 -------------------------------------------------------------------------------
2275 Returns 1 if the double-precision floating-point value `a' is equal to
2276 the corresponding value `b', and 0 otherwise.  The invalid exception is
2277 raised if either operand is a NaN.  Otherwise, the comparison is performed
2278 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2279 -------------------------------------------------------------------------------
2280 */
2281 flag float64_eq_signaling( float64 a, float64 b )
2282 {
2283
2284     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2285               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2286          || (    ( extractFloat64Exp( b ) == 0x7FF )
2287               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2288        ) {
2289         float_raise( float_flag_invalid );
2290         return 0;
2291     }
2292     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2293
2294 }
2295
2296 /*
2297 -------------------------------------------------------------------------------
2298 Returns 1 if the double-precision floating-point value `a' is less than or
2299 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2300 cause an exception.  Otherwise, the comparison is performed according to the
2301 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2302 -------------------------------------------------------------------------------
2303 */
2304 flag float64_le_quiet( float64 a, float64 b )
2305 {
2306     flag aSign, bSign;
2307
2308     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2309               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2310          || (    ( extractFloat64Exp( b ) == 0x7FF )
2311               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2312        ) {
2313         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2314             float_raise( float_flag_invalid );
2315         }
2316         return 0;
2317     }
2318     aSign = extractFloat64Sign( a );
2319     bSign = extractFloat64Sign( b );
2320     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2321     return ( a == b ) || ( aSign ^ ( a < b ) );
2322
2323 }
2324
2325 /*
2326 -------------------------------------------------------------------------------
2327 Returns 1 if the double-precision floating-point value `a' is less than
2328 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2329 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2330 Standard for Binary Floating-Point Arithmetic.
2331 -------------------------------------------------------------------------------
2332 */
2333 flag float64_lt_quiet( float64 a, float64 b )
2334 {
2335     flag aSign, bSign;
2336
2337     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2338               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2339          || (    ( extractFloat64Exp( b ) == 0x7FF )
2340               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2341        ) {
2342         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2343             float_raise( float_flag_invalid );
2344         }
2345         return 0;
2346     }
2347     aSign = extractFloat64Sign( a );
2348     bSign = extractFloat64Sign( b );
2349     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2350     return ( a != b ) && ( aSign ^ ( a < b ) );
2351
2352 }
2353
2354 #endif