ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c

   1 /* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */
   2
   3 /*
   4  * This version hacked for use with gcc -msoft-float by bjh21.
   5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
   6  *  itself).
   7  */
   8
   9 /*
  10  * Things you may want to define:
  11  *
  12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
  13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
  14  *   properly renamed.
  15  */
  16
  17 /*
  18  * This differs from the standard bits32/softfloat.c in that float64
  19  * is defined to be a 64-bit integer rather than a structure.  The
  20  * structure is float64s, with translation between the two going via
  21  * float64u.
  22  */
  23
  24 /*
  25 ===============================================================================
  26
  27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
  28 Arithmetic Package, Release 2a.
  29
  30 Written by John R. Hauser.  This work was made possible in part by the
  31 International Computer Science Institute, located at Suite 600, 1947 Center
  32 Street, Berkeley, California 94704.  Funding was partially provided by the
  33 National Science Foundation under grant MIP-9311980.  The original version
  34 of this code was written as part of a project to build a fixed-point vector
  35 processor in collaboration with the University of California at Berkeley,
  36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  38 arithmetic/SoftFloat.html'.
  39
  40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  45
  46 Derivative works are acceptable, even for commercial purposes, so long as
  47 (1) they include prominent notice that the work is derivative, and (2) they
  48 include prominent notice akin to these four paragraphs for those parts of
  49 this code that are retained.
  50
  51 ===============================================================================
  52 */
  53
  54 #include <sys/cdefs.h>
  55 #if defined(LIBC_SCCS) && !defined(lint)
  56 __RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");
  57 #endif /* LIBC_SCCS and not lint */
  58
  59 #ifdef SOFTFLOAT_FOR_GCC
  60 #include "softfloat-for-gcc.h"
  61 #endif
  62
  63 #include "milieu.h"
  64 #include "softfloat.h"
  65
  66 /*
  67  * Conversions between floats as stored in memory and floats as
  68  * SoftFloat uses them
  69  */
  70 #ifndef FLOAT64_DEMANGLE
  71 #define FLOAT64_DEMANGLE(a) (a)
  72 #endif
  73 #ifndef FLOAT64_MANGLE
  74 #define FLOAT64_MANGLE(a)   (a)
  75 #endif
  76
  77 /*
  78 -------------------------------------------------------------------------------
  79 Floating-point rounding mode and exception flags.
  80 -------------------------------------------------------------------------------
  81 */
  82 #ifndef set_float_rounding_mode
  83 fp_rnd float_rounding_mode = float_round_nearest_even;
  84 fp_except float_exception_flags = 0;
  85 #endif
  86 #ifndef set_float_exception_inexact_flag
  87 #define set_float_exception_inexact_flag() \
  88     ((void)(float_exception_flags |= float_flag_inexact))
  89 #endif
  90
  91 /*
  92 -------------------------------------------------------------------------------
  93 Primitive arithmetic functions, including multi-word arithmetic, and
  94 division and square root approximations.  (Can be specialized to target if
  95 desired.)
  96 -------------------------------------------------------------------------------
  97 */
  98 #include "softfloat-macros"
  99
 100 /*
 101 -------------------------------------------------------------------------------
 102 Functions and definitions to determine:  (1) whether tininess for underflow
 103 is detected before or after rounding by default, (2) what (if anything)
 104 happens when exceptions are raised, (3) how signaling NaNs are distinguished
 105 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
 106 are propagated from function inputs to output.  These details are target-
 107 specific.
 108 -------------------------------------------------------------------------------
 109 */
 110 #include "softfloat-specialize"
 111
 112 /*
 113 -------------------------------------------------------------------------------
 114 Returns the fraction bits of the single-precision floating-point value `a'.
 115 -------------------------------------------------------------------------------
 116 */
 117 INLINE bits32 extractFloat32Frac( float32 a )
 118 {
 119
 120     return a & 0x007FFFFF;
 121
 122 }
 123
 124 /*
 125 -------------------------------------------------------------------------------
 126 Returns the exponent bits of the single-precision floating-point value `a'.
 127 -------------------------------------------------------------------------------
 128 */
 129 INLINE int16 extractFloat32Exp( float32 a )
 130 {
 131
 132     return ( a>>23 ) & 0xFF;
 133
 134 }
 135
 136 /*
 137 -------------------------------------------------------------------------------
 138 Returns the sign bit of the single-precision floating-point value `a'.
 139 -------------------------------------------------------------------------------
 140 */
 141 INLINE flag extractFloat32Sign( float32 a )
 142 {
 143
 144     return a>>31;
 145
 146 }
 147
 148 /*
 149 -------------------------------------------------------------------------------
 150 Normalizes the subnormal single-precision floating-point value represented
 151 by the denormalized significand `aSig'.  The normalized exponent and
 152 significand are stored at the locations pointed to by `zExpPtr' and
 153 `zSigPtr', respectively.
 154 -------------------------------------------------------------------------------
 155 */
 156 static void
 157  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 158 {
 159     int8 shiftCount;
 160
 161     shiftCount = countLeadingZeros32( aSig ) - 8;
 162     *zSigPtr = aSig<<shiftCount;
 163     *zExpPtr = 1 - shiftCount;
 164
 165 }
 166
 167 /*
 168 -------------------------------------------------------------------------------
 169 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 170 single-precision floating-point value, returning the result.  After being
 171 shifted into the proper positions, the three fields are simply added
 172 together to form the result.  This means that any integer portion of `zSig'
 173 will be added into the exponent.  Since a properly normalized significand
 174 will have an integer portion equal to 1, the `zExp' input should be 1 less
 175 than the desired result exponent whenever `zSig' is a complete, normalized
 176 significand.
 177 -------------------------------------------------------------------------------
 178 */
 179 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
 180 {
 181
 182     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
 183
 184 }
 185
 186 /*
 187 -------------------------------------------------------------------------------
 188 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 189 and significand `zSig', and returns the proper single-precision floating-
 190 point value corresponding to the abstract input.  Ordinarily, the abstract
 191 value is simply rounded and packed into the single-precision format, with
 192 the inexact exception raised if the abstract input cannot be represented
 193 exactly.  However, if the abstract value is too large, the overflow and
 194 inexact exceptions are raised and an infinity or maximal finite value is
 195 returned.  If the abstract value is too small, the input value is rounded to
 196 a subnormal number, and the underflow and inexact exceptions are raised if
 197 the abstract input cannot be represented exactly as a subnormal single-
 198 precision floating-point number.
 199     The input significand `zSig' has its binary point between bits 30
 200 and 29, which is 7 bits to the left of the usual location.  This shifted
 201 significand must be normalized or smaller.  If `zSig' is not normalized,
 202 `zExp' must be 0; in that case, the result returned is a subnormal number,
 203 and it must not require rounding.  In the usual case that `zSig' is
 204 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 205 The handling of underflow and overflow follows the IEC/IEEE Standard for
 206 Binary Floating-Point Arithmetic.
 207 -------------------------------------------------------------------------------
 208 */
 209 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 210 {
 211     int8 roundingMode;
 212     flag roundNearestEven;
 213     int8 roundIncrement, roundBits;
 214     flag isTiny;
 215
 216     roundingMode = float_rounding_mode;
 217     roundNearestEven = roundingMode == float_round_nearest_even;
 218     roundIncrement = 0x40;
 219     if ( ! roundNearestEven ) {
 220         if ( roundingMode == float_round_to_zero ) {
 221             roundIncrement = 0;
 222         }
 223         else {
 224             roundIncrement = 0x7F;
 225             if ( zSign ) {
 226                 if ( roundingMode == float_round_up ) roundIncrement = 0;
 227             }
 228             else {
 229                 if ( roundingMode == float_round_down ) roundIncrement = 0;
 230             }
 231         }
 232     }
 233     roundBits = zSig & 0x7F;
 234     if ( 0xFD <= (bits16) zExp ) {
 235         if (    ( 0xFD < zExp )
 236              || (    ( zExp == 0xFD )
 237                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
 238            ) {
 239             float_raise( float_flag_overflow | float_flag_inexact );
 240             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
 241         }
 242         if ( zExp < 0 ) {
 243             isTiny =
 244                    ( float_detect_tininess == float_tininess_before_rounding )
 245                 || ( zExp < -1 )
 246                 || ( zSig + roundIncrement < (uint32)0x80000000 );
 247             shift32RightJamming( zSig, - zExp, &zSig );
 248             zExp = 0;
 249             roundBits = zSig & 0x7F;
 250             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 251         }
 252     }
 253     if ( roundBits ) set_float_exception_inexact_flag();
 254     zSig = ( zSig + roundIncrement )>>7;
 255     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 256     if ( zSig == 0 ) zExp = 0;
 257     return packFloat32( zSign, zExp, zSig );
 258
 259 }
 260
 261 /*
 262 -------------------------------------------------------------------------------
 263 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 264 and significand `zSig', and returns the proper single-precision floating-
 265 point value corresponding to the abstract input.  This routine is just like
 266 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 267 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 268 floating-point exponent.
 269 -------------------------------------------------------------------------------
 270 */
 271 static float32
 272  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 273 {
 274     int8 shiftCount;
 275
 276     shiftCount = countLeadingZeros32( zSig ) - 1;
 277     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
 278
 279 }
 280
 281 /*
 282 -------------------------------------------------------------------------------
 283 Returns the least-significant 32 fraction bits of the double-precision
 284 floating-point value `a'.
 285 -------------------------------------------------------------------------------
 286 */
 287 INLINE bits32 extractFloat64Frac1( float64 a )
 288 {
 289
 290     return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));
 291
 292 }
 293
 294 /*
 295 -------------------------------------------------------------------------------
 296 Returns the most-significant 20 fraction bits of the double-precision
 297 floating-point value `a'.
 298 -------------------------------------------------------------------------------
 299 */
 300 INLINE bits32 extractFloat64Frac0( float64 a )
 301 {
 302
 303     return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);
 304
 305 }
 306
 307 /*
 308 -------------------------------------------------------------------------------
 309 Returns the exponent bits of the double-precision floating-point value `a'.
 310 -------------------------------------------------------------------------------
 311 */
 312 INLINE int16 extractFloat64Exp( float64 a )
 313 {
 314
 315     return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
 316
 317 }
 318
 319 /*
 320 -------------------------------------------------------------------------------
 321 Returns the sign bit of the double-precision floating-point value `a'.
 322 -------------------------------------------------------------------------------
 323 */
 324 INLINE flag extractFloat64Sign( float64 a )
 325 {
 326
 327     return (flag)(FLOAT64_DEMANGLE(a) >> 63);
 328
 329 }
 330
 331 /*
 332 -------------------------------------------------------------------------------
 333 Normalizes the subnormal double-precision floating-point value represented
 334 by the denormalized significand formed by the concatenation of `aSig0' and
 335 `aSig1'.  The normalized exponent is stored at the location pointed to by
 336 `zExpPtr'.  The most significant 21 bits of the normalized significand are
 337 stored at the location pointed to by `zSig0Ptr', and the least significant
 338 32 bits of the normalized significand are stored at the location pointed to
 339 by `zSig1Ptr'.
 340 -------------------------------------------------------------------------------
 341 */
 342 static void
 343  normalizeFloat64Subnormal(
 344      bits32 aSig0,
 345      bits32 aSig1,
 346      int16 *zExpPtr,
 347      bits32 *zSig0Ptr,
 348      bits32 *zSig1Ptr
 349  )
 350 {
 351     int8 shiftCount;
 352
 353     if ( aSig0 == 0 ) {
 354         shiftCount = countLeadingZeros32( aSig1 ) - 11;
 355         if ( shiftCount < 0 ) {
 356             *zSig0Ptr = aSig1>>( - shiftCount );
 357             *zSig1Ptr = aSig1<<( shiftCount & 31 );
 358         }
 359         else {
 360             *zSig0Ptr = aSig1<<shiftCount;
 361             *zSig1Ptr = 0;
 362         }
 363         *zExpPtr = - shiftCount - 31;
 364     }
 365     else {
 366         shiftCount = countLeadingZeros32( aSig0 ) - 11;
 367         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
 368         *zExpPtr = 1 - shiftCount;
 369     }
 370
 371 }
 372
 373 /*
 374 -------------------------------------------------------------------------------
 375 Packs the sign `zSign', the exponent `zExp', and the significand formed by
 376 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
 377 point value, returning the result.  After being shifted into the proper
 378 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
 379 together to form the most significant 32 bits of the result.  This means
 380 that any integer portion of `zSig0' will be added into the exponent.  Since
 381 a properly normalized significand will have an integer portion equal to 1,
 382 the `zExp' input should be 1 less than the desired result exponent whenever
 383 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
 384 -------------------------------------------------------------------------------
 385 */
 386 INLINE float64
 387  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 388 {
 389
 390     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
 391                            ( ( (bits64) zExp )<<52 ) +
 392                            ( ( (bits64) zSig0 )<<32 ) + zSig1 );
 393
 394
 395 }
 396
 397 /*
 398 -------------------------------------------------------------------------------
 399 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 400 and extended significand formed by the concatenation of `zSig0', `zSig1',
 401 and `zSig2', and returns the proper double-precision floating-point value
 402 corresponding to the abstract input.  Ordinarily, the abstract value is
 403 simply rounded and packed into the double-precision format, with the inexact
 404 exception raised if the abstract input cannot be represented exactly.
 405 However, if the abstract value is too large, the overflow and inexact
 406 exceptions are raised and an infinity or maximal finite value is returned.
 407 If the abstract value is too small, the input value is rounded to a
 408 subnormal number, and the underflow and inexact exceptions are raised if the
 409 abstract input cannot be represented exactly as a subnormal double-precision
 410 floating-point number.
 411     The input significand must be normalized or smaller.  If the input
 412 significand is not normalized, `zExp' must be 0; in that case, the result
 413 returned is a subnormal number, and it must not require rounding.  In the
 414 usual case that the input significand is normalized, `zExp' must be 1 less
 415 than the ``true'' floating-point exponent.  The handling of underflow and
 416 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 417 -------------------------------------------------------------------------------
 418 */
 419 static float64
 420  roundAndPackFloat64(
 421      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
 422 {
 423     int8 roundingMode;
 424     flag roundNearestEven, increment, isTiny;
 425
 426     roundingMode = float_rounding_mode;
 427     roundNearestEven = ( roundingMode == float_round_nearest_even );
 428     increment = ( (sbits32) zSig2 < 0 );
 429     if ( ! roundNearestEven ) {
 430         if ( roundingMode == float_round_to_zero ) {
 431             increment = 0;
 432         }
 433         else {
 434             if ( zSign ) {
 435                 increment = ( roundingMode == float_round_down ) && zSig2;
 436             }
 437             else {
 438                 increment = ( roundingMode == float_round_up ) && zSig2;
 439             }
 440         }
 441     }
 442     if ( 0x7FD <= (bits16) zExp ) {
 443         if (    ( 0x7FD < zExp )
 444              || (    ( zExp == 0x7FD )
 445                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
 446                   && increment
 447                 )
 448            ) {
 449             float_raise( float_flag_overflow | float_flag_inexact );
 450             if (    ( roundingMode == float_round_to_zero )
 451                  || ( zSign && ( roundingMode == float_round_up ) )
 452                  || ( ! zSign && ( roundingMode == float_round_down ) )
 453                ) {
 454                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
 455             }
 456             return packFloat64( zSign, 0x7FF, 0, 0 );
 457         }
 458         if ( zExp < 0 ) {
 459             isTiny =
 460                    ( float_detect_tininess == float_tininess_before_rounding )
 461                 || ( zExp < -1 )
 462                 || ! increment
 463                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
 464             shift64ExtraRightJamming(
 465                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
 466             zExp = 0;
 467             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
 468             if ( roundNearestEven ) {
 469                 increment = ( (sbits32) zSig2 < 0 );
 470             }
 471             else {
 472                 if ( zSign ) {
 473                     increment = ( roundingMode == float_round_down ) && zSig2;
 474                 }
 475                 else {
 476                     increment = ( roundingMode == float_round_up ) && zSig2;
 477                 }
 478             }
 479         }
 480     }
 481     if ( zSig2 ) set_float_exception_inexact_flag();
 482     if ( increment ) {
 483         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
 484         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
 485     }
 486     else {
 487         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
 488     }
 489     return packFloat64( zSign, zExp, zSig0, zSig1 );
 490
 491 }
 492
 493 /*
 494 -------------------------------------------------------------------------------
 495 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 496 and significand formed by the concatenation of `zSig0' and `zSig1', and
 497 returns the proper double-precision floating-point value corresponding
 498 to the abstract input.  This routine is just like `roundAndPackFloat64'
 499 except that the input significand has fewer bits and does not have to be
 500 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 501 point exponent.
 502 -------------------------------------------------------------------------------
 503 */
 504 static float64
 505  normalizeRoundAndPackFloat64(
 506      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 507 {
 508     int8 shiftCount;
 509     bits32 zSig2;
 510
 511     if ( zSig0 == 0 ) {
 512         zSig0 = zSig1;
 513         zSig1 = 0;
 514         zExp -= 32;
 515     }
 516     shiftCount = countLeadingZeros32( zSig0 ) - 11;
 517     if ( 0 <= shiftCount ) {
 518         zSig2 = 0;
 519         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
 520     }
 521     else {
 522         shift64ExtraRightJamming(
 523             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
 524     }
 525     zExp -= shiftCount;
 526     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
 527
 528 }
 529
 530 /*
 531 -------------------------------------------------------------------------------
 532 Returns the result of converting the 32-bit two's complement integer `a' to
 533 the single-precision floating-point format.  The conversion is performed
 534 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 535 -------------------------------------------------------------------------------
 536 */
 537 float32 int32_to_float32( int32 a )
 538 {
 539     flag zSign;
 540
 541     if ( a == 0 ) return 0;
 542     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
 543     zSign = ( a < 0 );
 544     return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
 545
 546 }
 547
 548 /*
 549 -------------------------------------------------------------------------------
 550 Returns the result of converting the 32-bit two's complement integer `a' to
 551 the double-precision floating-point format.  The conversion is performed
 552 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 553 -------------------------------------------------------------------------------
 554 */
 555 float64 int32_to_float64( int32 a )
 556 {
 557     flag zSign;
 558     bits32 absA;
 559     int8 shiftCount;
 560     bits32 zSig0, zSig1;
 561
 562     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
 563     zSign = ( a < 0 );
 564     absA = zSign ? - a : a;
 565     shiftCount = countLeadingZeros32( absA ) - 11;
 566     if ( 0 <= shiftCount ) {
 567         zSig0 = absA<<shiftCount;
 568         zSig1 = 0;
 569     }
 570     else {
 571         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
 572     }
 573     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
 574
 575 }
 576
 577 #ifndef SOFTFLOAT_FOR_GCC
 578 /*
 579 -------------------------------------------------------------------------------
 580 Returns the result of converting the single-precision floating-point value
 581 `a' to the 32-bit two's complement integer format.  The conversion is
 582 performed according to the IEC/IEEE Standard for Binary Floating-Point
 583 Arithmetic---which means in particular that the conversion is rounded
 584 according to the current rounding mode.  If `a' is a NaN, the largest
 585 positive integer is returned.  Otherwise, if the conversion overflows, the
 586 largest integer with the same sign as `a' is returned.
 587 -------------------------------------------------------------------------------
 588 */
 589 int32 float32_to_int32( float32 a )
 590 {
 591     flag aSign;
 592     int16 aExp, shiftCount;
 593     bits32 aSig, aSigExtra;
 594     int32 z;
 595     int8 roundingMode;
 596
 597     aSig = extractFloat32Frac( a );
 598     aExp = extractFloat32Exp( a );
 599     aSign = extractFloat32Sign( a );
 600     shiftCount = aExp - 0x96;
 601     if ( 0 <= shiftCount ) {
 602         if ( 0x9E <= aExp ) {
 603             if ( a != 0xCF000000 ) {
 604                 float_raise( float_flag_invalid );
 605                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
 606                     return 0x7FFFFFFF;
 607                 }
 608             }
 609             return (sbits32) 0x80000000;
 610         }
 611         z = ( aSig | 0x00800000 )<<shiftCount;
 612         if ( aSign ) z = - z;
 613     }
 614     else {
 615         if ( aExp < 0x7E ) {
 616             aSigExtra = aExp | aSig;
 617             z = 0;
 618         }
 619         else {
 620             aSig |= 0x00800000;
 621             aSigExtra = aSig<<( shiftCount & 31 );
 622             z = aSig>>( - shiftCount );
 623         }
 624         if ( aSigExtra ) set_float_exception_inexact_flag();
 625         roundingMode = float_rounding_mode;
 626         if ( roundingMode == float_round_nearest_even ) {
 627             if ( (sbits32) aSigExtra < 0 ) {
 628                 ++z;
 629                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
 630             }
 631             if ( aSign ) z = - z;
 632         }
 633         else {
 634             aSigExtra = ( aSigExtra != 0 );
 635             if ( aSign ) {
 636                 z += ( roundingMode == float_round_down ) & aSigExtra;
 637                 z = - z;
 638             }
 639             else {
 640                 z += ( roundingMode == float_round_up ) & aSigExtra;
 641             }
 642         }
 643     }
 644     return z;
 645
 646 }
 647 #endif
 648
 649 /*
 650 -------------------------------------------------------------------------------
 651 Returns the result of converting the single-precision floating-point value
 652 `a' to the 32-bit two's complement integer format.  The conversion is
 653 performed according to the IEC/IEEE Standard for Binary Floating-Point
 654 Arithmetic, except that the conversion is always rounded toward zero.
 655 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
 656 the conversion overflows, the largest integer with the same sign as `a' is
 657 returned.
 658 -------------------------------------------------------------------------------
 659 */
 660 int32 float32_to_int32_round_to_zero( float32 a )
 661 {
 662     flag aSign;
 663     int16 aExp, shiftCount;
 664     bits32 aSig;
 665     int32 z;
 666
 667     aSig = extractFloat32Frac( a );
 668     aExp = extractFloat32Exp( a );
 669     aSign = extractFloat32Sign( a );
 670     shiftCount = aExp - 0x9E;
 671     if ( 0 <= shiftCount ) {
 672         if ( a != 0xCF000000 ) {
 673             float_raise( float_flag_invalid );
 674             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
 675         }
 676         return (sbits32) 0x80000000;
 677     }
 678     else if ( aExp <= 0x7E ) {
 679         if ( aExp | aSig ) set_float_exception_inexact_flag();
 680         return 0;
 681     }
 682     aSig = ( aSig | 0x00800000 )<<8;
 683     z = aSig>>( - shiftCount );
 684     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
 685         set_float_exception_inexact_flag();
 686     }
 687     if ( aSign ) z = - z;
 688     return z;
 689
 690 }
 691
 692 /*
 693 -------------------------------------------------------------------------------
 694 Returns the result of converting the single-precision floating-point value
 695 `a' to the double-precision floating-point format.  The conversion is
 696 performed according to the IEC/IEEE Standard for Binary Floating-Point
 697 Arithmetic.
 698 -------------------------------------------------------------------------------
 699 */
 700 float64 float32_to_float64( float32 a )
 701 {
 702     flag aSign;
 703     int16 aExp;
 704     bits32 aSig, zSig0, zSig1;
 705
 706     aSig = extractFloat32Frac( a );
 707     aExp = extractFloat32Exp( a );
 708     aSign = extractFloat32Sign( a );
 709     if ( aExp == 0xFF ) {
 710         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
 711         return packFloat64( aSign, 0x7FF, 0, 0 );
 712     }
 713     if ( aExp == 0 ) {
 714         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
 715         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 716         --aExp;
 717     }
 718     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
 719     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
 720
 721 }
 722
 723 #ifndef SOFTFLOAT_FOR_GCC
 724 /*
 725 -------------------------------------------------------------------------------
 726 Rounds the single-precision floating-point value `a' to an integer,
 727 and returns the result as a single-precision floating-point value.  The
 728 operation is performed according to the IEC/IEEE Standard for Binary
 729 Floating-Point Arithmetic.
 730 -------------------------------------------------------------------------------
 731 */
 732 float32 float32_round_to_int( float32 a )
 733 {
 734     flag aSign;
 735     int16 aExp;
 736     bits32 lastBitMask, roundBitsMask;
 737     int8 roundingMode;
 738     float32 z;
 739
 740     aExp = extractFloat32Exp( a );
 741     if ( 0x96 <= aExp ) {
 742         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
 743             return propagateFloat32NaN( a, a );
 744         }
 745         return a;
 746     }
 747     if ( aExp <= 0x7E ) {
 748         if ( (bits32) ( a<<1 ) == 0 ) return a;
 749         set_float_exception_inexact_flag();
 750         aSign = extractFloat32Sign( a );
 751         switch ( float_rounding_mode ) {
 752          case float_round_nearest_even:
 753             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
 754                 return packFloat32( aSign, 0x7F, 0 );
 755             }
 756             break;
 757          case float_round_to_zero:
 758             break;
 759          case float_round_down:
 760             return aSign ? 0xBF800000 : 0;
 761          case float_round_up:
 762             return aSign ? 0x80000000 : 0x3F800000;
 763         }
 764         return packFloat32( aSign, 0, 0 );
 765     }
 766     lastBitMask = 1;
 767     lastBitMask <<= 0x96 - aExp;
 768     roundBitsMask = lastBitMask - 1;
 769     z = a;
 770     roundingMode = float_rounding_mode;
 771     if ( roundingMode == float_round_nearest_even ) {
 772         z += lastBitMask>>1;
 773         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
 774     }
 775     else if ( roundingMode != float_round_to_zero ) {
 776         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
 777             z += roundBitsMask;
 778         }
 779     }
 780     z &= ~ roundBitsMask;
 781     if ( z != a ) set_float_exception_inexact_flag();
 782     return z;
 783
 784 }
 785 #endif
 786
 787 /*
 788 -------------------------------------------------------------------------------
 789 Returns the result of adding the absolute values of the single-precision
 790 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 791 before being returned.  `zSign' is ignored if the result is a NaN.
 792 The addition is performed according to the IEC/IEEE Standard for Binary
 793 Floating-Point Arithmetic.
 794 -------------------------------------------------------------------------------
 795 */
 796 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
 797 {
 798     int16 aExp, bExp, zExp;
 799     bits32 aSig, bSig, zSig;
 800     int16 expDiff;
 801
 802     aSig = extractFloat32Frac( a );
 803     aExp = extractFloat32Exp( a );
 804     bSig = extractFloat32Frac( b );
 805     bExp = extractFloat32Exp( b );
 806     expDiff = aExp - bExp;
 807     aSig <<= 6;
 808     bSig <<= 6;
 809     if ( 0 < expDiff ) {
 810         if ( aExp == 0xFF ) {
 811             if ( aSig ) return propagateFloat32NaN( a, b );
 812             return a;
 813         }
 814         if ( bExp == 0 ) {
 815             --expDiff;
 816         }
 817         else {
 818             bSig |= 0x20000000;
 819         }
 820         shift32RightJamming( bSig, expDiff, &bSig );
 821         zExp = aExp;
 822     }
 823     else if ( expDiff < 0 ) {
 824         if ( bExp == 0xFF ) {
 825             if ( bSig ) return propagateFloat32NaN( a, b );
 826             return packFloat32( zSign, 0xFF, 0 );
 827         }
 828         if ( aExp == 0 ) {
 829             ++expDiff;
 830         }
 831         else {
 832             aSig |= 0x20000000;
 833         }
 834         shift32RightJamming( aSig, - expDiff, &aSig );
 835         zExp = bExp;
 836     }
 837     else {
 838         if ( aExp == 0xFF ) {
 839             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 840             return a;
 841         }
 842         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
 843         zSig = 0x40000000 + aSig + bSig;
 844         zExp = aExp;
 845         goto roundAndPack;
 846     }
 847     aSig |= 0x20000000;
 848     zSig = ( aSig + bSig )<<1;
 849     --zExp;
 850     if ( (sbits32) zSig < 0 ) {
 851         zSig = aSig + bSig;
 852         ++zExp;
 853     }
 854  roundAndPack:
 855     return roundAndPackFloat32( zSign, zExp, zSig );
 856
 857 }
 858
 859 /*
 860 -------------------------------------------------------------------------------
 861 Returns the result of subtracting the absolute values of the single-
 862 precision floating-point values `a' and `b'.  If `zSign' is 1, the
 863 difference is negated before being returned.  `zSign' is ignored if the
 864 result is a NaN.  The subtraction is performed according to the IEC/IEEE
 865 Standard for Binary Floating-Point Arithmetic.
 866 -------------------------------------------------------------------------------
 867 */
 868 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
 869 {
 870     int16 aExp, bExp, zExp;
 871     bits32 aSig, bSig, zSig;
 872     int16 expDiff;
 873
 874     aSig = extractFloat32Frac( a );
 875     aExp = extractFloat32Exp( a );
 876     bSig = extractFloat32Frac( b );
 877     bExp = extractFloat32Exp( b );
 878     expDiff = aExp - bExp;
 879     aSig <<= 7;
 880     bSig <<= 7;
 881     if ( 0 < expDiff ) goto aExpBigger;
 882     if ( expDiff < 0 ) goto bExpBigger;
 883     if ( aExp == 0xFF ) {
 884         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 885         float_raise( float_flag_invalid );
 886         return float32_default_nan;
 887     }
 888     if ( aExp == 0 ) {
 889         aExp = 1;
 890         bExp = 1;
 891     }
 892     if ( bSig < aSig ) goto aBigger;
 893     if ( aSig < bSig ) goto bBigger;
 894     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
 895  bExpBigger:
 896     if ( bExp == 0xFF ) {
 897         if ( bSig ) return propagateFloat32NaN( a, b );
 898         return packFloat32( zSign ^ 1, 0xFF, 0 );
 899     }
 900     if ( aExp == 0 ) {
 901         ++expDiff;
 902     }
 903     else {
 904         aSig |= 0x40000000;
 905     }
 906     shift32RightJamming( aSig, - expDiff, &aSig );
 907     bSig |= 0x40000000;
 908  bBigger:
 909     zSig = bSig - aSig;
 910     zExp = bExp;
 911     zSign ^= 1;
 912     goto normalizeRoundAndPack;
 913  aExpBigger:
 914     if ( aExp == 0xFF ) {
 915         if ( aSig ) return propagateFloat32NaN( a, b );
 916         return a;
 917     }
 918     if ( bExp == 0 ) {
 919         --expDiff;
 920     }
 921     else {
 922         bSig |= 0x40000000;
 923     }
 924     shift32RightJamming( bSig, expDiff, &bSig );
 925     aSig |= 0x40000000;
 926  aBigger:
 927     zSig = aSig - bSig;
 928     zExp = aExp;
 929  normalizeRoundAndPack:
 930     --zExp;
 931     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
 932
 933 }
 934
 935 /*
 936 -------------------------------------------------------------------------------
 937 Returns the result of adding the single-precision floating-point values `a'
 938 and `b'.  The operation is performed according to the IEC/IEEE Standard for
 939 Binary Floating-Point Arithmetic.
 940 -------------------------------------------------------------------------------
 941 */
 942 float32 float32_add( float32 a, float32 b )
 943 {
 944     flag aSign, bSign;
 945
 946     aSign = extractFloat32Sign( a );
 947     bSign = extractFloat32Sign( b );
 948     if ( aSign == bSign ) {
 949         return addFloat32Sigs( a, b, aSign );
 950     }
 951     else {
 952         return subFloat32Sigs( a, b, aSign );
 953     }
 954
 955 }
 956
 957 /*
 958 -------------------------------------------------------------------------------
 959 Returns the result of subtracting the single-precision floating-point values
 960 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 961 for Binary Floating-Point Arithmetic.
 962 -------------------------------------------------------------------------------
 963 */
 964 float32 float32_sub( float32 a, float32 b )
 965 {
 966     flag aSign, bSign;
 967
 968     aSign = extractFloat32Sign( a );
 969     bSign = extractFloat32Sign( b );
 970     if ( aSign == bSign ) {
 971         return subFloat32Sigs( a, b, aSign );
 972     }
 973     else {
 974         return addFloat32Sigs( a, b, aSign );
 975     }
 976
 977 }
 978
 979 /*
 980 -------------------------------------------------------------------------------
 981 Returns the result of multiplying the single-precision floating-point values
 982 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 983 for Binary Floating-Point Arithmetic.
 984 -------------------------------------------------------------------------------
 985 */
 986 float32 float32_mul( float32 a, float32 b )
 987 {
 988     flag aSign, bSign, zSign;
 989     int16 aExp, bExp, zExp;
 990     bits32 aSig, bSig, zSig0, zSig1;
 991
 992     aSig = extractFloat32Frac( a );
 993     aExp = extractFloat32Exp( a );
 994     aSign = extractFloat32Sign( a );
 995     bSig = extractFloat32Frac( b );
 996     bExp = extractFloat32Exp( b );
 997     bSign = extractFloat32Sign( b );
 998     zSign = aSign ^ bSign;
 999     if ( aExp == 0xFF ) {
1000         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1001             return propagateFloat32NaN( a, b );
1002         }
1003         if ( ( bExp | bSig ) == 0 ) {
1004             float_raise( float_flag_invalid );
1005             return float32_default_nan;
1006         }
1007         return packFloat32( zSign, 0xFF, 0 );
1008     }
1009     if ( bExp == 0xFF ) {
1010         if ( bSig ) return propagateFloat32NaN( a, b );
1011         if ( ( aExp | aSig ) == 0 ) {
1012             float_raise( float_flag_invalid );
1013             return float32_default_nan;
1014         }
1015         return packFloat32( zSign, 0xFF, 0 );
1016     }
1017     if ( aExp == 0 ) {
1018         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1019         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1020     }
1021     if ( bExp == 0 ) {
1022         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1023         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1024     }
1025     zExp = aExp + bExp - 0x7F;
1026     aSig = ( aSig | 0x00800000 )<<7;
1027     bSig = ( bSig | 0x00800000 )<<8;
1028     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1029     zSig0 |= ( zSig1 != 0 );
1030     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1031         zSig0 <<= 1;
1032         --zExp;
1033     }
1034     return roundAndPackFloat32( zSign, zExp, zSig0 );
1035
1036 }
1037
1038 /*
1039 -------------------------------------------------------------------------------
1040 Returns the result of dividing the single-precision floating-point value `a'
1041 by the corresponding value `b'.  The operation is performed according to the
1042 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1043 -------------------------------------------------------------------------------
1044 */
1045 float32 float32_div( float32 a, float32 b )
1046 {
1047     flag aSign, bSign, zSign;
1048     int16 aExp, bExp, zExp;
1049     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1050
1051     aSig = extractFloat32Frac( a );
1052     aExp = extractFloat32Exp( a );
1053     aSign = extractFloat32Sign( a );
1054     bSig = extractFloat32Frac( b );
1055     bExp = extractFloat32Exp( b );
1056     bSign = extractFloat32Sign( b );
1057     zSign = aSign ^ bSign;
1058     if ( aExp == 0xFF ) {
1059         if ( aSig ) return propagateFloat32NaN( a, b );
1060         if ( bExp == 0xFF ) {
1061             if ( bSig ) return propagateFloat32NaN( a, b );
1062             float_raise( float_flag_invalid );
1063             return float32_default_nan;
1064         }
1065         return packFloat32( zSign, 0xFF, 0 );
1066     }
1067     if ( bExp == 0xFF ) {
1068         if ( bSig ) return propagateFloat32NaN( a, b );
1069         return packFloat32( zSign, 0, 0 );
1070     }
1071     if ( bExp == 0 ) {
1072         if ( bSig == 0 ) {
1073             if ( ( aExp | aSig ) == 0 ) {
1074                 float_raise( float_flag_invalid );
1075                 return float32_default_nan;
1076             }
1077             float_raise( float_flag_divbyzero );
1078             return packFloat32( zSign, 0xFF, 0 );
1079         }
1080         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1081     }
1082     if ( aExp == 0 ) {
1083         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1084         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1085     }
1086     zExp = aExp - bExp + 0x7D;
1087     aSig = ( aSig | 0x00800000 )<<7;
1088     bSig = ( bSig | 0x00800000 )<<8;
1089     if ( bSig <= ( aSig + aSig ) ) {
1090         aSig >>= 1;
1091         ++zExp;
1092     }
1093     zSig = estimateDiv64To32( aSig, 0, bSig );
1094     if ( ( zSig & 0x3F ) <= 2 ) {
1095         mul32To64( bSig, zSig, &term0, &term1 );
1096         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1097         while ( (sbits32) rem0 < 0 ) {
1098             --zSig;
1099             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1100         }
1101         zSig |= ( rem1 != 0 );
1102     }
1103     return roundAndPackFloat32( zSign, zExp, zSig );
1104
1105 }
1106
1107 #ifndef SOFTFLOAT_FOR_GCC
1108 /*
1109 -------------------------------------------------------------------------------
1110 Returns the remainder of the single-precision floating-point value `a'
1111 with respect to the corresponding value `b'.  The operation is performed
1112 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1113 -------------------------------------------------------------------------------
1114 */
1115 float32 float32_rem( float32 a, float32 b )
1116 {
1117     flag aSign, bSign, zSign;
1118     int16 aExp, bExp, expDiff;
1119     bits32 aSig, bSig, q, allZero, alternateASig;
1120     sbits32 sigMean;
1121
1122     aSig = extractFloat32Frac( a );
1123     aExp = extractFloat32Exp( a );
1124     aSign = extractFloat32Sign( a );
1125     bSig = extractFloat32Frac( b );
1126     bExp = extractFloat32Exp( b );
1127     bSign = extractFloat32Sign( b );
1128     if ( aExp == 0xFF ) {
1129         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1130             return propagateFloat32NaN( a, b );
1131         }
1132         float_raise( float_flag_invalid );
1133         return float32_default_nan;
1134     }
1135     if ( bExp == 0xFF ) {
1136         if ( bSig ) return propagateFloat32NaN( a, b );
1137         return a;
1138     }
1139     if ( bExp == 0 ) {
1140         if ( bSig == 0 ) {
1141             float_raise( float_flag_invalid );
1142             return float32_default_nan;
1143         }
1144         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1145     }
1146     if ( aExp == 0 ) {
1147         if ( aSig == 0 ) return a;
1148         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1149     }
1150     expDiff = aExp - bExp;
1151     aSig = ( aSig | 0x00800000 )<<8;
1152     bSig = ( bSig | 0x00800000 )<<8;
1153     if ( expDiff < 0 ) {
1154         if ( expDiff < -1 ) return a;
1155         aSig >>= 1;
1156     }
1157     q = ( bSig <= aSig );
1158     if ( q ) aSig -= bSig;
1159     expDiff -= 32;
1160     while ( 0 < expDiff ) {
1161         q = estimateDiv64To32( aSig, 0, bSig );
1162         q = ( 2 < q ) ? q - 2 : 0;
1163         aSig = - ( ( bSig>>2 ) * q );
1164         expDiff -= 30;
1165     }
1166     expDiff += 32;
1167     if ( 0 < expDiff ) {
1168         q = estimateDiv64To32( aSig, 0, bSig );
1169         q = ( 2 < q ) ? q - 2 : 0;
1170         q >>= 32 - expDiff;
1171         bSig >>= 2;
1172         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1173     }
1174     else {
1175         aSig >>= 2;
1176         bSig >>= 2;
1177     }
1178     do {
1179         alternateASig = aSig;
1180         ++q;
1181         aSig -= bSig;
1182     } while ( 0 <= (sbits32) aSig );
1183     sigMean = aSig + alternateASig;
1184     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1185         aSig = alternateASig;
1186     }
1187     zSign = ( (sbits32) aSig < 0 );
1188     if ( zSign ) aSig = - aSig;
1189     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1190
1191 }
1192 #endif
1193
1194 #ifndef SOFTFLOAT_FOR_GCC
1195 /*
1196 -------------------------------------------------------------------------------
1197 Returns the square root of the single-precision floating-point value `a'.
1198 The operation is performed according to the IEC/IEEE Standard for Binary
1199 Floating-Point Arithmetic.
1200 -------------------------------------------------------------------------------
1201 */
1202 float32 float32_sqrt( float32 a )
1203 {
1204     flag aSign;
1205     int16 aExp, zExp;
1206     bits32 aSig, zSig, rem0, rem1, term0, term1;
1207
1208     aSig = extractFloat32Frac( a );
1209     aExp = extractFloat32Exp( a );
1210     aSign = extractFloat32Sign( a );
1211     if ( aExp == 0xFF ) {
1212         if ( aSig ) return propagateFloat32NaN( a, 0 );
1213         if ( ! aSign ) return a;
1214         float_raise( float_flag_invalid );
1215         return float32_default_nan;
1216     }
1217     if ( aSign ) {
1218         if ( ( aExp | aSig ) == 0 ) return a;
1219         float_raise( float_flag_invalid );
1220         return float32_default_nan;
1221     }
1222     if ( aExp == 0 ) {
1223         if ( aSig == 0 ) return 0;
1224         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1225     }
1226     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1227     aSig = ( aSig | 0x00800000 )<<8;
1228     zSig = estimateSqrt32( aExp, aSig ) + 2;
1229     if ( ( zSig & 0x7F ) <= 5 ) {
1230         if ( zSig < 2 ) {
1231             zSig = 0x7FFFFFFF;
1232             goto roundAndPack;
1233         }
1234         else {
1235             aSig >>= aExp & 1;
1236             mul32To64( zSig, zSig, &term0, &term1 );
1237             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1238             while ( (sbits32) rem0 < 0 ) {
1239                 --zSig;
1240                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1241                 term1 |= 1;
1242                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1243             }
1244             zSig |= ( ( rem0 | rem1 ) != 0 );
1245         }
1246     }
1247     shift32RightJamming( zSig, 1, &zSig );
1248  roundAndPack:
1249     return roundAndPackFloat32( 0, zExp, zSig );
1250
1251 }
1252 #endif
1253
1254 /*
1255 -------------------------------------------------------------------------------
1256 Returns 1 if the single-precision floating-point value `a' is equal to
1257 the corresponding value `b', and 0 otherwise.  The comparison is performed
1258 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259 -------------------------------------------------------------------------------
1260 */
1261 flag float32_eq( float32 a, float32 b )
1262 {
1263
1264     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1265          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1266        ) {
1267         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1268             float_raise( float_flag_invalid );
1269         }
1270         return 0;
1271     }
1272     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1273
1274 }
1275
1276 /*
1277 -------------------------------------------------------------------------------
1278 Returns 1 if the single-precision floating-point value `a' is less than
1279 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1280 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1281 Arithmetic.
1282 -------------------------------------------------------------------------------
1283 */
1284 flag float32_le( float32 a, float32 b )
1285 {
1286     flag aSign, bSign;
1287
1288     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1289          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1290        ) {
1291         float_raise( float_flag_invalid );
1292         return 0;
1293     }
1294     aSign = extractFloat32Sign( a );
1295     bSign = extractFloat32Sign( b );
1296     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1297     return ( a == b ) || ( aSign ^ ( a < b ) );
1298
1299 }
1300
1301 /*
1302 -------------------------------------------------------------------------------
1303 Returns 1 if the single-precision floating-point value `a' is less than
1304 the corresponding value `b', and 0 otherwise.  The comparison is performed
1305 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1306 -------------------------------------------------------------------------------
1307 */
1308 flag float32_lt( float32 a, float32 b )
1309 {
1310     flag aSign, bSign;
1311
1312     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1313          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1314        ) {
1315         float_raise( float_flag_invalid );
1316         return 0;
1317     }
1318     aSign = extractFloat32Sign( a );
1319     bSign = extractFloat32Sign( b );
1320     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1321     return ( a != b ) && ( aSign ^ ( a < b ) );
1322
1323 }
1324
1325 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1326 /*
1327 -------------------------------------------------------------------------------
1328 Returns 1 if the single-precision floating-point value `a' is equal to
1329 the corresponding value `b', and 0 otherwise.  The invalid exception is
1330 raised if either operand is a NaN.  Otherwise, the comparison is performed
1331 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1332 -------------------------------------------------------------------------------
1333 */
1334 flag float32_eq_signaling( float32 a, float32 b )
1335 {
1336
1337     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1338          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1339        ) {
1340         float_raise( float_flag_invalid );
1341         return 0;
1342     }
1343     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1344
1345 }
1346
1347 /*
1348 -------------------------------------------------------------------------------
1349 Returns 1 if the single-precision floating-point value `a' is less than or
1350 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1351 cause an exception.  Otherwise, the comparison is performed according to the
1352 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1353 -------------------------------------------------------------------------------
1354 */
1355 flag float32_le_quiet( float32 a, float32 b )
1356 {
1357     flag aSign, bSign;
1358     int16 aExp, bExp;
1359
1360     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1361          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1362        ) {
1363         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1364             float_raise( float_flag_invalid );
1365         }
1366         return 0;
1367     }
1368     aSign = extractFloat32Sign( a );
1369     bSign = extractFloat32Sign( b );
1370     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1371     return ( a == b ) || ( aSign ^ ( a < b ) );
1372
1373 }
1374
1375 /*
1376 -------------------------------------------------------------------------------
1377 Returns 1 if the single-precision floating-point value `a' is less than
1378 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1379 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1380 Standard for Binary Floating-Point Arithmetic.
1381 -------------------------------------------------------------------------------
1382 */
1383 flag float32_lt_quiet( float32 a, float32 b )
1384 {
1385     flag aSign, bSign;
1386
1387     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1388          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1389        ) {
1390         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1391             float_raise( float_flag_invalid );
1392         }
1393         return 0;
1394     }
1395     aSign = extractFloat32Sign( a );
1396     bSign = extractFloat32Sign( b );
1397     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1398     return ( a != b ) && ( aSign ^ ( a < b ) );
1399
1400 }
1401 #endif /* !SOFTFLOAT_FOR_GCC */
1402
1403 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1404 /*
1405 -------------------------------------------------------------------------------
1406 Returns the result of converting the double-precision floating-point value
1407 `a' to the 32-bit two's complement integer format.  The conversion is
1408 performed according to the IEC/IEEE Standard for Binary Floating-Point
1409 Arithmetic---which means in particular that the conversion is rounded
1410 according to the current rounding mode.  If `a' is a NaN, the largest
1411 positive integer is returned.  Otherwise, if the conversion overflows, the
1412 largest integer with the same sign as `a' is returned.
1413 -------------------------------------------------------------------------------
1414 */
1415 int32 float64_to_int32( float64 a )
1416 {
1417     flag aSign;
1418     int16 aExp, shiftCount;
1419     bits32 aSig0, aSig1, absZ, aSigExtra;
1420     int32 z;
1421     int8 roundingMode;
1422
1423     aSig1 = extractFloat64Frac1( a );
1424     aSig0 = extractFloat64Frac0( a );
1425     aExp = extractFloat64Exp( a );
1426     aSign = extractFloat64Sign( a );
1427     shiftCount = aExp - 0x413;
1428     if ( 0 <= shiftCount ) {
1429         if ( 0x41E < aExp ) {
1430             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1431             goto invalid;
1432         }
1433         shortShift64Left(
1434             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1435         if ( 0x80000000 < absZ ) goto invalid;
1436     }
1437     else {
1438         aSig1 = ( aSig1 != 0 );
1439         if ( aExp < 0x3FE ) {
1440             aSigExtra = aExp | aSig0 | aSig1;
1441             absZ = 0;
1442         }
1443         else {
1444             aSig0 |= 0x00100000;
1445             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1446             absZ = aSig0>>( - shiftCount );
1447         }
1448     }
1449     roundingMode = float_rounding_mode;
1450     if ( roundingMode == float_round_nearest_even ) {
1451         if ( (sbits32) aSigExtra < 0 ) {
1452             ++absZ;
1453             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1454         }
1455         z = aSign ? - absZ : absZ;
1456     }
1457     else {
1458         aSigExtra = ( aSigExtra != 0 );
1459         if ( aSign ) {
1460             z = - (   absZ
1461                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1462         }
1463         else {
1464             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1465         }
1466     }
1467     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1468  invalid:
1469         float_raise( float_flag_invalid );
1470         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1471     }
1472     if ( aSigExtra ) set_float_exception_inexact_flag();
1473     return z;
1474
1475 }
1476 #endif /* !SOFTFLOAT_FOR_GCC */
1477
1478 /*
1479 -------------------------------------------------------------------------------
1480 Returns the result of converting the double-precision floating-point value
1481 `a' to the 32-bit two's complement integer format.  The conversion is
1482 performed according to the IEC/IEEE Standard for Binary Floating-Point
1483 Arithmetic, except that the conversion is always rounded toward zero.
1484 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1485 the conversion overflows, the largest integer with the same sign as `a' is
1486 returned.
1487 -------------------------------------------------------------------------------
1488 */
1489 int32 float64_to_int32_round_to_zero( float64 a )
1490 {
1491     flag aSign;
1492     int16 aExp, shiftCount;
1493     bits32 aSig0, aSig1, absZ, aSigExtra;
1494     int32 z;
1495
1496     aSig1 = extractFloat64Frac1( a );
1497     aSig0 = extractFloat64Frac0( a );
1498     aExp = extractFloat64Exp( a );
1499     aSign = extractFloat64Sign( a );
1500     shiftCount = aExp - 0x413;
1501     if ( 0 <= shiftCount ) {
1502         if ( 0x41E < aExp ) {
1503             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1504             goto invalid;
1505         }
1506         shortShift64Left(
1507             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1508     }
1509     else {
1510         if ( aExp < 0x3FF ) {
1511             if ( aExp | aSig0 | aSig1 ) {
1512                 set_float_exception_inexact_flag();
1513             }
1514             return 0;
1515         }
1516         aSig0 |= 0x00100000;
1517         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1518         absZ = aSig0>>( - shiftCount );
1519     }
1520     z = aSign ? - absZ : absZ;
1521     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1522  invalid:
1523         float_raise( float_flag_invalid );
1524         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1525     }
1526     if ( aSigExtra ) set_float_exception_inexact_flag();
1527     return z;
1528
1529 }
1530
1531 /*
1532 -------------------------------------------------------------------------------
1533 Returns the result of converting the double-precision floating-point value
1534 `a' to the single-precision floating-point format.  The conversion is
1535 performed according to the IEC/IEEE Standard for Binary Floating-Point
1536 Arithmetic.
1537 -------------------------------------------------------------------------------
1538 */
1539 float32 float64_to_float32( float64 a )
1540 {
1541     flag aSign;
1542     int16 aExp;
1543     bits32 aSig0, aSig1, zSig;
1544     bits32 allZero;
1545
1546     aSig1 = extractFloat64Frac1( a );
1547     aSig0 = extractFloat64Frac0( a );
1548     aExp = extractFloat64Exp( a );
1549     aSign = extractFloat64Sign( a );
1550     if ( aExp == 0x7FF ) {
1551         if ( aSig0 | aSig1 ) {
1552             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1553         }
1554         return packFloat32( aSign, 0xFF, 0 );
1555     }
1556     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1557     if ( aExp ) zSig |= 0x40000000;
1558     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1559
1560 }
1561
1562 #ifndef SOFTFLOAT_FOR_GCC
1563 /*
1564 -------------------------------------------------------------------------------
1565 Rounds the double-precision floating-point value `a' to an integer,
1566 and returns the result as a double-precision floating-point value.  The
1567 operation is performed according to the IEC/IEEE Standard for Binary
1568 Floating-Point Arithmetic.
1569 -------------------------------------------------------------------------------
1570 */
1571 float64 float64_round_to_int( float64 a )
1572 {
1573     flag aSign;
1574     int16 aExp;
1575     bits32 lastBitMask, roundBitsMask;
1576     int8 roundingMode;
1577     float64 z;
1578
1579     aExp = extractFloat64Exp( a );
1580     if ( 0x413 <= aExp ) {
1581         if ( 0x433 <= aExp ) {
1582             if (    ( aExp == 0x7FF )
1583                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1584                 return propagateFloat64NaN( a, a );
1585             }
1586             return a;
1587         }
1588         lastBitMask = 1;
1589         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1590         roundBitsMask = lastBitMask - 1;
1591         z = a;
1592         roundingMode = float_rounding_mode;
1593         if ( roundingMode == float_round_nearest_even ) {
1594             if ( lastBitMask ) {
1595                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1596                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1597             }
1598             else {
1599                 if ( (sbits32) z.low < 0 ) {
1600                     ++z.high;
1601                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1602                 }
1603             }
1604         }
1605         else if ( roundingMode != float_round_to_zero ) {
1606             if (   extractFloat64Sign( z )
1607                  ^ ( roundingMode == float_round_up ) ) {
1608                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1609             }
1610         }
1611         z.low &= ~ roundBitsMask;
1612     }
1613     else {
1614         if ( aExp <= 0x3FE ) {
1615             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1616             set_float_exception_inexact_flag();
1617             aSign = extractFloat64Sign( a );
1618             switch ( float_rounding_mode ) {
1619              case float_round_nearest_even:
1620                 if (    ( aExp == 0x3FE )
1621                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1622                    ) {
1623                     return packFloat64( aSign, 0x3FF, 0, 0 );
1624                 }
1625                 break;
1626              case float_round_down:
1627                 return
1628                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1629                     : packFloat64( 0, 0, 0, 0 );
1630              case float_round_up:
1631                 return
1632                       aSign ? packFloat64( 1, 0, 0, 0 )
1633                     : packFloat64( 0, 0x3FF, 0, 0 );
1634             }
1635             return packFloat64( aSign, 0, 0, 0 );
1636         }
1637         lastBitMask = 1;
1638         lastBitMask <<= 0x413 - aExp;
1639         roundBitsMask = lastBitMask - 1;
1640         z.low = 0;
1641         z.high = a.high;
1642         roundingMode = float_rounding_mode;
1643         if ( roundingMode == float_round_nearest_even ) {
1644             z.high += lastBitMask>>1;
1645             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1646                 z.high &= ~ lastBitMask;
1647             }
1648         }
1649         else if ( roundingMode != float_round_to_zero ) {
1650             if (   extractFloat64Sign( z )
1651                  ^ ( roundingMode == float_round_up ) ) {
1652                 z.high |= ( a.low != 0 );
1653                 z.high += roundBitsMask;
1654             }
1655         }
1656         z.high &= ~ roundBitsMask;
1657     }
1658     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1659         set_float_exception_inexact_flag();
1660     }
1661     return z;
1662
1663 }
1664 #endif
1665
1666 /*
1667 -------------------------------------------------------------------------------
1668 Returns the result of adding the absolute values of the double-precision
1669 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1670 before being returned.  `zSign' is ignored if the result is a NaN.
1671 The addition is performed according to the IEC/IEEE Standard for Binary
1672 Floating-Point Arithmetic.
1673 -------------------------------------------------------------------------------
1674 */
1675 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1676 {
1677     int16 aExp, bExp, zExp;
1678     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1679     int16 expDiff;
1680
1681     aSig1 = extractFloat64Frac1( a );
1682     aSig0 = extractFloat64Frac0( a );
1683     aExp = extractFloat64Exp( a );
1684     bSig1 = extractFloat64Frac1( b );
1685     bSig0 = extractFloat64Frac0( b );
1686     bExp = extractFloat64Exp( b );
1687     expDiff = aExp - bExp;
1688     if ( 0 < expDiff ) {
1689         if ( aExp == 0x7FF ) {
1690             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1691             return a;
1692         }
1693         if ( bExp == 0 ) {
1694             --expDiff;
1695         }
1696         else {
1697             bSig0 |= 0x00100000;
1698         }
1699         shift64ExtraRightJamming(
1700             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1701         zExp = aExp;
1702     }
1703     else if ( expDiff < 0 ) {
1704         if ( bExp == 0x7FF ) {
1705             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1706             return packFloat64( zSign, 0x7FF, 0, 0 );
1707         }
1708         if ( aExp == 0 ) {
1709             ++expDiff;
1710         }
1711         else {
1712             aSig0 |= 0x00100000;
1713         }
1714         shift64ExtraRightJamming(
1715             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1716         zExp = bExp;
1717     }
1718     else {
1719         if ( aExp == 0x7FF ) {
1720             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1721                 return propagateFloat64NaN( a, b );
1722             }
1723             return a;
1724         }
1725         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1727         zSig2 = 0;
1728         zSig0 |= 0x00200000;
1729         zExp = aExp;
1730         goto shiftRight1;
1731     }
1732     aSig0 |= 0x00100000;
1733     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1734     --zExp;
1735     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1736     ++zExp;
1737  shiftRight1:
1738     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1739  roundAndPack:
1740     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1741
1742 }
1743
1744 /*
1745 -------------------------------------------------------------------------------
1746 Returns the result of subtracting the absolute values of the double-
1747 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1748 difference is negated before being returned.  `zSign' is ignored if the
1749 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1750 Standard for Binary Floating-Point Arithmetic.
1751 -------------------------------------------------------------------------------
1752 */
1753 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1754 {
1755     int16 aExp, bExp, zExp;
1756     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1757     int16 expDiff;
1758
1759     aSig1 = extractFloat64Frac1( a );
1760     aSig0 = extractFloat64Frac0( a );
1761     aExp = extractFloat64Exp( a );
1762     bSig1 = extractFloat64Frac1( b );
1763     bSig0 = extractFloat64Frac0( b );
1764     bExp = extractFloat64Exp( b );
1765     expDiff = aExp - bExp;
1766     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1767     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1768     if ( 0 < expDiff ) goto aExpBigger;
1769     if ( expDiff < 0 ) goto bExpBigger;
1770     if ( aExp == 0x7FF ) {
1771         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1772             return propagateFloat64NaN( a, b );
1773         }
1774         float_raise( float_flag_invalid );
1775         return float64_default_nan;
1776     }
1777     if ( aExp == 0 ) {
1778         aExp = 1;
1779         bExp = 1;
1780     }
1781     if ( bSig0 < aSig0 ) goto aBigger;
1782     if ( aSig0 < bSig0 ) goto bBigger;
1783     if ( bSig1 < aSig1 ) goto aBigger;
1784     if ( aSig1 < bSig1 ) goto bBigger;
1785     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1786  bExpBigger:
1787     if ( bExp == 0x7FF ) {
1788         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1789         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1790     }
1791     if ( aExp == 0 ) {
1792         ++expDiff;
1793     }
1794     else {
1795         aSig0 |= 0x40000000;
1796     }
1797     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1798     bSig0 |= 0x40000000;
1799  bBigger:
1800     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1801     zExp = bExp;
1802     zSign ^= 1;
1803     goto normalizeRoundAndPack;
1804  aExpBigger:
1805     if ( aExp == 0x7FF ) {
1806         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1807         return a;
1808     }
1809     if ( bExp == 0 ) {
1810         --expDiff;
1811     }
1812     else {
1813         bSig0 |= 0x40000000;
1814     }
1815     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1816     aSig0 |= 0x40000000;
1817  aBigger:
1818     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1819     zExp = aExp;
1820  normalizeRoundAndPack:
1821     --zExp;
1822     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1823
1824 }
1825
1826 /*
1827 -------------------------------------------------------------------------------
1828 Returns the result of adding the double-precision floating-point values `a'
1829 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1830 Binary Floating-Point Arithmetic.
1831 -------------------------------------------------------------------------------
1832 */
1833 float64 float64_add( float64 a, float64 b )
1834 {
1835     flag aSign, bSign;
1836
1837     aSign = extractFloat64Sign( a );
1838     bSign = extractFloat64Sign( b );
1839     if ( aSign == bSign ) {
1840         return addFloat64Sigs( a, b, aSign );
1841     }
1842     else {
1843         return subFloat64Sigs( a, b, aSign );
1844     }
1845
1846 }
1847
1848 /*
1849 -------------------------------------------------------------------------------
1850 Returns the result of subtracting the double-precision floating-point values
1851 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1852 for Binary Floating-Point Arithmetic.
1853 -------------------------------------------------------------------------------
1854 */
1855 float64 float64_sub( float64 a, float64 b )
1856 {
1857     flag aSign, bSign;
1858
1859     aSign = extractFloat64Sign( a );
1860     bSign = extractFloat64Sign( b );
1861     if ( aSign == bSign ) {
1862         return subFloat64Sigs( a, b, aSign );
1863     }
1864     else {
1865         return addFloat64Sigs( a, b, aSign );
1866     }
1867
1868 }
1869
1870 /*
1871 -------------------------------------------------------------------------------
1872 Returns the result of multiplying the double-precision floating-point values
1873 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1874 for Binary Floating-Point Arithmetic.
1875 -------------------------------------------------------------------------------
1876 */
1877 float64 float64_mul( float64 a, float64 b )
1878 {
1879     flag aSign, bSign, zSign;
1880     int16 aExp, bExp, zExp;
1881     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1882
1883     aSig1 = extractFloat64Frac1( a );
1884     aSig0 = extractFloat64Frac0( a );
1885     aExp = extractFloat64Exp( a );
1886     aSign = extractFloat64Sign( a );
1887     bSig1 = extractFloat64Frac1( b );
1888     bSig0 = extractFloat64Frac0( b );
1889     bExp = extractFloat64Exp( b );
1890     bSign = extractFloat64Sign( b );
1891     zSign = aSign ^ bSign;
1892     if ( aExp == 0x7FF ) {
1893         if (    ( aSig0 | aSig1 )
1894              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1895             return propagateFloat64NaN( a, b );
1896         }
1897         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1898         return packFloat64( zSign, 0x7FF, 0, 0 );
1899     }
1900     if ( bExp == 0x7FF ) {
1901         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1902         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1903  invalid:
1904             float_raise( float_flag_invalid );
1905             return float64_default_nan;
1906         }
1907         return packFloat64( zSign, 0x7FF, 0, 0 );
1908     }
1909     if ( aExp == 0 ) {
1910         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1911         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1912     }
1913     if ( bExp == 0 ) {
1914         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1915         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1916     }
1917     zExp = aExp + bExp - 0x400;
1918     aSig0 |= 0x00100000;
1919     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1920     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1921     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1922     zSig2 |= ( zSig3 != 0 );
1923     if ( 0x00200000 <= zSig0 ) {
1924         shift64ExtraRightJamming(
1925             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1926         ++zExp;
1927     }
1928     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1929
1930 }
1931
1932 /*
1933 -------------------------------------------------------------------------------
1934 Returns the result of dividing the double-precision floating-point value `a'
1935 by the corresponding value `b'.  The operation is performed according to the
1936 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1937 -------------------------------------------------------------------------------
1938 */
1939 float64 float64_div( float64 a, float64 b )
1940 {
1941     flag aSign, bSign, zSign;
1942     int16 aExp, bExp, zExp;
1943     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1944     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1945
1946     aSig1 = extractFloat64Frac1( a );
1947     aSig0 = extractFloat64Frac0( a );
1948     aExp = extractFloat64Exp( a );
1949     aSign = extractFloat64Sign( a );
1950     bSig1 = extractFloat64Frac1( b );
1951     bSig0 = extractFloat64Frac0( b );
1952     bExp = extractFloat64Exp( b );
1953     bSign = extractFloat64Sign( b );
1954     zSign = aSign ^ bSign;
1955     if ( aExp == 0x7FF ) {
1956         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1957         if ( bExp == 0x7FF ) {
1958             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1959             goto invalid;
1960         }
1961         return packFloat64( zSign, 0x7FF, 0, 0 );
1962     }
1963     if ( bExp == 0x7FF ) {
1964         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1965         return packFloat64( zSign, 0, 0, 0 );
1966     }
1967     if ( bExp == 0 ) {
1968         if ( ( bSig0 | bSig1 ) == 0 ) {
1969             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1970  invalid:
1971                 float_raise( float_flag_invalid );
1972                 return float64_default_nan;
1973             }
1974             float_raise( float_flag_divbyzero );
1975             return packFloat64( zSign, 0x7FF, 0, 0 );
1976         }
1977         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1978     }
1979     if ( aExp == 0 ) {
1980         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1981         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1982     }
1983     zExp = aExp - bExp + 0x3FD;
1984     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1985     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1986     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1987         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1988         ++zExp;
1989     }
1990     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1991     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1992     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1993     while ( (sbits32) rem0 < 0 ) {
1994         --zSig0;
1995         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1996     }
1997     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1998     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1999         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
2000         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
2001         while ( (sbits32) rem1 < 0 ) {
2002             --zSig1;
2003             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
2004         }
2005         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2006     }
2007     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2008     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2009
2010 }
2011
2012 #ifndef SOFTFLOAT_FOR_GCC
2013 /*
2014 -------------------------------------------------------------------------------
2015 Returns the remainder of the double-precision floating-point value `a'
2016 with respect to the corresponding value `b'.  The operation is performed
2017 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2018 -------------------------------------------------------------------------------
2019 */
2020 float64 float64_rem( float64 a, float64 b )
2021 {
2022     flag aSign, bSign, zSign;
2023     int16 aExp, bExp, expDiff;
2024     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2025     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2026     sbits32 sigMean0;
2027     float64 z;
2028
2029     aSig1 = extractFloat64Frac1( a );
2030     aSig0 = extractFloat64Frac0( a );
2031     aExp = extractFloat64Exp( a );
2032     aSign = extractFloat64Sign( a );
2033     bSig1 = extractFloat64Frac1( b );
2034     bSig0 = extractFloat64Frac0( b );
2035     bExp = extractFloat64Exp( b );
2036     bSign = extractFloat64Sign( b );
2037     if ( aExp == 0x7FF ) {
2038         if (    ( aSig0 | aSig1 )
2039              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2040             return propagateFloat64NaN( a, b );
2041         }
2042         goto invalid;
2043     }
2044     if ( bExp == 0x7FF ) {
2045         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2046         return a;
2047     }
2048     if ( bExp == 0 ) {
2049         if ( ( bSig0 | bSig1 ) == 0 ) {
2050  invalid:
2051             float_raise( float_flag_invalid );
2052             return float64_default_nan;
2053         }
2054         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2055     }
2056     if ( aExp == 0 ) {
2057         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2058         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2059     }
2060     expDiff = aExp - bExp;
2061     if ( expDiff < -1 ) return a;
2062     shortShift64Left(
2063         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2064     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2065     q = le64( bSig0, bSig1, aSig0, aSig1 );
2066     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2067     expDiff -= 32;
2068     while ( 0 < expDiff ) {
2069         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2070         q = ( 4 < q ) ? q - 4 : 0;
2071         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2072         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2073         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2074         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2075         expDiff -= 29;
2076     }
2077     if ( -32 < expDiff ) {
2078         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2079         q = ( 4 < q ) ? q - 4 : 0;
2080         q >>= - expDiff;
2081         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2082         expDiff += 24;
2083         if ( expDiff < 0 ) {
2084             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2085         }
2086         else {
2087             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2088         }
2089         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2090         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2091     }
2092     else {
2093         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2094         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2095     }
2096     do {
2097         alternateASig0 = aSig0;
2098         alternateASig1 = aSig1;
2099         ++q;
2100         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2101     } while ( 0 <= (sbits32) aSig0 );
2102     add64(
2103         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2104     if (    ( sigMean0 < 0 )
2105          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2106         aSig0 = alternateASig0;
2107         aSig1 = alternateASig1;
2108     }
2109     zSign = ( (sbits32) aSig0 < 0 );
2110     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2111     return
2112         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2113
2114 }
2115 #endif
2116
2117 #ifndef SOFTFLOAT_FOR_GCC
2118 /*
2119 -------------------------------------------------------------------------------
2120 Returns the square root of the double-precision floating-point value `a'.
2121 The operation is performed according to the IEC/IEEE Standard for Binary
2122 Floating-Point Arithmetic.
2123 -------------------------------------------------------------------------------
2124 */
2125 float64 float64_sqrt( float64 a )
2126 {
2127     flag aSign;
2128     int16 aExp, zExp;
2129     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2130     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2131     float64 z;
2132
2133     aSig1 = extractFloat64Frac1( a );
2134     aSig0 = extractFloat64Frac0( a );
2135     aExp = extractFloat64Exp( a );
2136     aSign = extractFloat64Sign( a );
2137     if ( aExp == 0x7FF ) {
2138         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2139         if ( ! aSign ) return a;
2140         goto invalid;
2141     }
2142     if ( aSign ) {
2143         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2144  invalid:
2145         float_raise( float_flag_invalid );
2146         return float64_default_nan;
2147     }
2148     if ( aExp == 0 ) {
2149         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2150         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2151     }
2152     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2153     aSig0 |= 0x00100000;
2154     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2155     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2156     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2157     doubleZSig0 = zSig0 + zSig0;
2158     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2159     mul32To64( zSig0, zSig0, &term0, &term1 );
2160     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2161     while ( (sbits32) rem0 < 0 ) {
2162         --zSig0;
2163         doubleZSig0 -= 2;
2164         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2165     }
2166     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2167     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2168         if ( zSig1 == 0 ) zSig1 = 1;
2169         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2170         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2171         mul32To64( zSig1, zSig1, &term2, &term3 );
2172         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2173         while ( (sbits32) rem1 < 0 ) {
2174             --zSig1;
2175             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2176             term3 |= 1;
2177             term2 |= doubleZSig0;
2178             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2179         }
2180         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2181     }
2182     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2183     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2184
2185 }
2186 #endif
2187
2188 /*
2189 -------------------------------------------------------------------------------
2190 Returns 1 if the double-precision floating-point value `a' is equal to
2191 the corresponding value `b', and 0 otherwise.  The comparison is performed
2192 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2193 -------------------------------------------------------------------------------
2194 */
2195 flag float64_eq( float64 a, float64 b )
2196 {
2197
2198     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2199               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2200          || (    ( extractFloat64Exp( b ) == 0x7FF )
2201               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2202        ) {
2203         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2204             float_raise( float_flag_invalid );
2205         }
2206         return 0;
2207     }
2208     return ( a == b ) ||
2209         ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2210
2211 }
2212
2213 /*
2214 -------------------------------------------------------------------------------
2215 Returns 1 if the double-precision floating-point value `a' is less than
2216 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2217 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2218 Arithmetic.
2219 -------------------------------------------------------------------------------
2220 */
2221 flag float64_le( float64 a, float64 b )
2222 {
2223     flag aSign, bSign;
2224
2225     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2226               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2227          || (    ( extractFloat64Exp( b ) == 0x7FF )
2228               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2229        ) {
2230         float_raise( float_flag_invalid );
2231         return 0;
2232     }
2233     aSign = extractFloat64Sign( a );
2234     bSign = extractFloat64Sign( b );
2235     if ( aSign != bSign )
2236         return aSign ||
2237             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2238                 0 );
2239     return ( a == b ) ||
2240         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2241 }
2242
2243 /*
2244 -------------------------------------------------------------------------------
2245 Returns 1 if the double-precision floating-point value `a' is less than
2246 the corresponding value `b', and 0 otherwise.  The comparison is performed
2247 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2248 -------------------------------------------------------------------------------
2249 */
2250 flag float64_lt( float64 a, float64 b )
2251 {
2252     flag aSign, bSign;
2253
2254     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2255               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2256          || (    ( extractFloat64Exp( b ) == 0x7FF )
2257               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2258        ) {
2259         float_raise( float_flag_invalid );
2260         return 0;
2261     }
2262     aSign = extractFloat64Sign( a );
2263     bSign = extractFloat64Sign( b );
2264     if ( aSign != bSign )
2265         return aSign &&
2266             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2267               0 );
2268     return ( a != b ) &&
2269            ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2270
2271 }
2272
2273 #ifndef SOFTFLOAT_FOR_GCC
2274 /*
2275 -------------------------------------------------------------------------------
2276 Returns 1 if the double-precision floating-point value `a' is equal to
2277 the corresponding value `b', and 0 otherwise.  The invalid exception is
2278 raised if either operand is a NaN.  Otherwise, the comparison is performed
2279 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2280 -------------------------------------------------------------------------------
2281 */
2282 flag float64_eq_signaling( float64 a, float64 b )
2283 {
2284
2285     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2286               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2287          || (    ( extractFloat64Exp( b ) == 0x7FF )
2288               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2289        ) {
2290         float_raise( float_flag_invalid );
2291         return 0;
2292     }
2293     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2294
2295 }
2296
2297 /*
2298 -------------------------------------------------------------------------------
2299 Returns 1 if the double-precision floating-point value `a' is less than or
2300 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2301 cause an exception.  Otherwise, the comparison is performed according to the
2302 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2303 -------------------------------------------------------------------------------
2304 */
2305 flag float64_le_quiet( float64 a, float64 b )
2306 {
2307     flag aSign, bSign;
2308
2309     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2310               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2311          || (    ( extractFloat64Exp( b ) == 0x7FF )
2312               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2313        ) {
2314         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2315             float_raise( float_flag_invalid );
2316         }
2317         return 0;
2318     }
2319     aSign = extractFloat64Sign( a );
2320     bSign = extractFloat64Sign( b );
2321     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2322     return ( a == b ) || ( aSign ^ ( a < b ) );
2323
2324 }
2325
2326 /*
2327 -------------------------------------------------------------------------------
2328 Returns 1 if the double-precision floating-point value `a' is less than
2329 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2330 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2331 Standard for Binary Floating-Point Arithmetic.
2332 -------------------------------------------------------------------------------
2333 */
2334 flag float64_lt_quiet( float64 a, float64 b )
2335 {
2336     flag aSign, bSign;
2337
2338     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2339               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2340          || (    ( extractFloat64Exp( b ) == 0x7FF )
2341               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2342        ) {
2343         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2344             float_raise( float_flag_invalid );
2345         }
2346         return 0;
2347     }
2348     aSign = extractFloat64Sign( a );
2349     bSign = extractFloat64Sign( b );
2350     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2351     return ( a != b ) && ( aSign ^ ( a < b ) );
2352
2353 }
2354
2355 #endif