]> git.proxmox.com Git - mirror_edk2.git/blame - ArmPkg/Library/ArmSoftFloatLib/bits32/softfloat.c
ArmPkg/ArmSoftFloatLib: add support for RVCT
[mirror_edk2.git] / ArmPkg / Library / ArmSoftFloatLib / bits32 / softfloat.c
CommitLineData
1dbda2b4
AB
1/* $NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $ */\r
2\r
3/*\r
4 * This version hacked for use with gcc -msoft-float by bjh21.\r
5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides\r
6 * itself).\r
7 */\r
8\r
9/*\r
10 * Things you may want to define:\r
11 *\r
12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with\r
13 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them\r
14 * properly renamed.\r
15 */\r
16\r
17/*\r
18 * This differs from the standard bits32/softfloat.c in that float64\r
19 * is defined to be a 64-bit integer rather than a structure. The\r
20 * structure is float64s, with translation between the two going via\r
21 * float64u.\r
22 */\r
23\r
24/*\r
25===============================================================================\r
26\r
27This C source file is part of the SoftFloat IEC/IEEE Floating-Point\r
28Arithmetic Package, Release 2a.\r
29\r
30Written by John R. Hauser. This work was made possible in part by the\r
31International Computer Science Institute, located at Suite 600, 1947 Center\r
32Street, Berkeley, California 94704. Funding was partially provided by the\r
33National Science Foundation under grant MIP-9311980. The original version\r
34of this code was written as part of a project to build a fixed-point vector\r
35processor in collaboration with the University of California at Berkeley,\r
36overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r
37is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/\r
38arithmetic/SoftFloat.html'.\r
39\r
40THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort\r
41has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT\r
42TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO\r
43PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY\r
44AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.\r
45\r
46Derivative works are acceptable, even for commercial purposes, so long as\r
47(1) they include prominent notice that the work is derivative, and (2) they\r
48include prominent notice akin to these four paragraphs for those parts of\r
49this code that are retained.\r
50\r
51===============================================================================\r
52*/\r
53\r
1dbda2b4
AB
54#if defined(LIBC_SCCS) && !defined(lint)\r
55__RCSID("$NetBSD: softfloat.c,v 1.3 2013/01/10 08:16:11 matt Exp $");\r
56#endif /* LIBC_SCCS and not lint */\r
57\r
58#ifdef SOFTFLOAT_FOR_GCC\r
59#include "softfloat-for-gcc.h"\r
60#endif\r
61\r
62#include "milieu.h"\r
63#include "softfloat.h"\r
64\r
65/*\r
66 * Conversions between floats as stored in memory and floats as\r
67 * SoftFloat uses them\r
68 */\r
69#ifndef FLOAT64_DEMANGLE\r
70#define FLOAT64_DEMANGLE(a) (a)\r
71#endif\r
72#ifndef FLOAT64_MANGLE\r
73#define FLOAT64_MANGLE(a) (a)\r
74#endif\r
75\r
76/*\r
77-------------------------------------------------------------------------------\r
78Floating-point rounding mode and exception flags.\r
79-------------------------------------------------------------------------------\r
80*/\r
81#ifndef set_float_rounding_mode\r
82fp_rnd float_rounding_mode = float_round_nearest_even;\r
83fp_except float_exception_flags = 0;\r
84#endif\r
85#ifndef set_float_exception_inexact_flag\r
86#define set_float_exception_inexact_flag() \\r
87 ((void)(float_exception_flags |= float_flag_inexact))\r
88#endif\r
89\r
90/*\r
91-------------------------------------------------------------------------------\r
92Primitive arithmetic functions, including multi-word arithmetic, and\r
93division and square root approximations. (Can be specialized to target if\r
94desired.)\r
95-------------------------------------------------------------------------------\r
96*/\r
97#include "softfloat-macros"\r
98\r
99/*\r
100-------------------------------------------------------------------------------\r
101Functions and definitions to determine: (1) whether tininess for underflow\r
102is detected before or after rounding by default, (2) what (if anything)\r
103happens when exceptions are raised, (3) how signaling NaNs are distinguished\r
104from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs\r
105are propagated from function inputs to output. These details are target-\r
106specific.\r
107-------------------------------------------------------------------------------\r
108*/\r
109#include "softfloat-specialize"\r
110\r
111/*\r
112-------------------------------------------------------------------------------\r
113Returns the fraction bits of the single-precision floating-point value `a'.\r
114-------------------------------------------------------------------------------\r
115*/\r
116INLINE bits32 extractFloat32Frac( float32 a )\r
117{\r
118\r
119 return a & 0x007FFFFF;\r
120\r
121}\r
122\r
123/*\r
124-------------------------------------------------------------------------------\r
125Returns the exponent bits of the single-precision floating-point value `a'.\r
126-------------------------------------------------------------------------------\r
127*/\r
128INLINE int16 extractFloat32Exp( float32 a )\r
129{\r
130\r
131 return ( a>>23 ) & 0xFF;\r
132\r
133}\r
134\r
135/*\r
136-------------------------------------------------------------------------------\r
137Returns the sign bit of the single-precision floating-point value `a'.\r
138-------------------------------------------------------------------------------\r
139*/\r
140INLINE flag extractFloat32Sign( float32 a )\r
141{\r
142\r
143 return a>>31;\r
144\r
145}\r
146\r
147/*\r
148-------------------------------------------------------------------------------\r
149Normalizes the subnormal single-precision floating-point value represented\r
150by the denormalized significand `aSig'. The normalized exponent and\r
151significand are stored at the locations pointed to by `zExpPtr' and\r
152`zSigPtr', respectively.\r
153-------------------------------------------------------------------------------\r
154*/\r
155static void\r
156 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )\r
157{\r
158 int8 shiftCount;\r
159\r
160 shiftCount = countLeadingZeros32( aSig ) - 8;\r
161 *zSigPtr = aSig<<shiftCount;\r
162 *zExpPtr = 1 - shiftCount;\r
163\r
164}\r
165\r
166/*\r
167-------------------------------------------------------------------------------\r
168Packs the sign `zSign', exponent `zExp', and significand `zSig' into a\r
169single-precision floating-point value, returning the result. After being\r
170shifted into the proper positions, the three fields are simply added\r
171together to form the result. This means that any integer portion of `zSig'\r
172will be added into the exponent. Since a properly normalized significand\r
173will have an integer portion equal to 1, the `zExp' input should be 1 less\r
174than the desired result exponent whenever `zSig' is a complete, normalized\r
175significand.\r
176-------------------------------------------------------------------------------\r
177*/\r
178INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )\r
179{\r
180\r
181 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;\r
182\r
183}\r
184\r
185/*\r
186-------------------------------------------------------------------------------\r
187Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
188and significand `zSig', and returns the proper single-precision floating-\r
189point value corresponding to the abstract input. Ordinarily, the abstract\r
190value is simply rounded and packed into the single-precision format, with\r
191the inexact exception raised if the abstract input cannot be represented\r
192exactly. However, if the abstract value is too large, the overflow and\r
193inexact exceptions are raised and an infinity or maximal finite value is\r
194returned. If the abstract value is too small, the input value is rounded to\r
195a subnormal number, and the underflow and inexact exceptions are raised if\r
196the abstract input cannot be represented exactly as a subnormal single-\r
197precision floating-point number.\r
198 The input significand `zSig' has its binary point between bits 30\r
199and 29, which is 7 bits to the left of the usual location. This shifted\r
200significand must be normalized or smaller. If `zSig' is not normalized,\r
201`zExp' must be 0; in that case, the result returned is a subnormal number,\r
202and it must not require rounding. In the usual case that `zSig' is\r
203normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.\r
204The handling of underflow and overflow follows the IEC/IEEE Standard for\r
205Binary Floating-Point Arithmetic.\r
206-------------------------------------------------------------------------------\r
207*/\r
208static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )\r
209{\r
210 int8 roundingMode;\r
211 flag roundNearestEven;\r
212 int8 roundIncrement, roundBits;\r
213 flag isTiny;\r
214\r
215 roundingMode = float_rounding_mode;\r
216 roundNearestEven = roundingMode == float_round_nearest_even;\r
217 roundIncrement = 0x40;\r
218 if ( ! roundNearestEven ) {\r
219 if ( roundingMode == float_round_to_zero ) {\r
220 roundIncrement = 0;\r
221 }\r
222 else {\r
223 roundIncrement = 0x7F;\r
224 if ( zSign ) {\r
225 if ( roundingMode == float_round_up ) roundIncrement = 0;\r
226 }\r
227 else {\r
228 if ( roundingMode == float_round_down ) roundIncrement = 0;\r
229 }\r
230 }\r
231 }\r
232 roundBits = zSig & 0x7F;\r
233 if ( 0xFD <= (bits16) zExp ) {\r
234 if ( ( 0xFD < zExp )\r
235 || ( ( zExp == 0xFD )\r
236 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )\r
237 ) {\r
238 float_raise( float_flag_overflow | float_flag_inexact );\r
239 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );\r
240 }\r
241 if ( zExp < 0 ) {\r
242 isTiny =\r
243 ( float_detect_tininess == float_tininess_before_rounding )\r
244 || ( zExp < -1 )\r
245 || ( zSig + roundIncrement < (uint32)0x80000000 );\r
246 shift32RightJamming( zSig, - zExp, &zSig );\r
247 zExp = 0;\r
248 roundBits = zSig & 0x7F;\r
249 if ( isTiny && roundBits ) float_raise( float_flag_underflow );\r
250 }\r
251 }\r
252 if ( roundBits ) set_float_exception_inexact_flag();\r
253 zSig = ( zSig + roundIncrement )>>7;\r
254 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );\r
255 if ( zSig == 0 ) zExp = 0;\r
256 return packFloat32( zSign, zExp, zSig );\r
257\r
258}\r
259\r
260/*\r
261-------------------------------------------------------------------------------\r
262Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
263and significand `zSig', and returns the proper single-precision floating-\r
264point value corresponding to the abstract input. This routine is just like\r
265`roundAndPackFloat32' except that `zSig' does not have to be normalized.\r
266Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''\r
267floating-point exponent.\r
268-------------------------------------------------------------------------------\r
269*/\r
270static float32\r
271 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )\r
272{\r
273 int8 shiftCount;\r
274\r
275 shiftCount = countLeadingZeros32( zSig ) - 1;\r
276 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );\r
277\r
278}\r
279\r
280/*\r
281-------------------------------------------------------------------------------\r
282Returns the least-significant 32 fraction bits of the double-precision\r
283floating-point value `a'.\r
284-------------------------------------------------------------------------------\r
285*/\r
286INLINE bits32 extractFloat64Frac1( float64 a )\r
287{\r
288\r
289 return (bits32)(FLOAT64_DEMANGLE(a) & LIT64(0x00000000FFFFFFFF));\r
290\r
291}\r
292\r
293/*\r
294-------------------------------------------------------------------------------\r
295Returns the most-significant 20 fraction bits of the double-precision\r
296floating-point value `a'.\r
297-------------------------------------------------------------------------------\r
298*/\r
299INLINE bits32 extractFloat64Frac0( float64 a )\r
300{\r
301\r
302 return (bits32)((FLOAT64_DEMANGLE(a) >> 32) & 0x000FFFFF);\r
303\r
304}\r
305\r
306/*\r
307-------------------------------------------------------------------------------\r
308Returns the exponent bits of the double-precision floating-point value `a'.\r
309-------------------------------------------------------------------------------\r
310*/\r
311INLINE int16 extractFloat64Exp( float64 a )\r
312{\r
313\r
314 return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);\r
315\r
316}\r
317\r
318/*\r
319-------------------------------------------------------------------------------\r
320Returns the sign bit of the double-precision floating-point value `a'.\r
321-------------------------------------------------------------------------------\r
322*/\r
323INLINE flag extractFloat64Sign( float64 a )\r
324{\r
325\r
326 return (flag)(FLOAT64_DEMANGLE(a) >> 63);\r
327\r
328}\r
329\r
330/*\r
331-------------------------------------------------------------------------------\r
332Normalizes the subnormal double-precision floating-point value represented\r
333by the denormalized significand formed by the concatenation of `aSig0' and\r
334`aSig1'. The normalized exponent is stored at the location pointed to by\r
335`zExpPtr'. The most significant 21 bits of the normalized significand are\r
336stored at the location pointed to by `zSig0Ptr', and the least significant\r
33732 bits of the normalized significand are stored at the location pointed to\r
338by `zSig1Ptr'.\r
339-------------------------------------------------------------------------------\r
340*/\r
341static void\r
342 normalizeFloat64Subnormal(\r
343 bits32 aSig0,\r
344 bits32 aSig1,\r
345 int16 *zExpPtr,\r
346 bits32 *zSig0Ptr,\r
347 bits32 *zSig1Ptr\r
348 )\r
349{\r
350 int8 shiftCount;\r
351\r
352 if ( aSig0 == 0 ) {\r
353 shiftCount = countLeadingZeros32( aSig1 ) - 11;\r
354 if ( shiftCount < 0 ) {\r
355 *zSig0Ptr = aSig1>>( - shiftCount );\r
356 *zSig1Ptr = aSig1<<( shiftCount & 31 );\r
357 }\r
358 else {\r
359 *zSig0Ptr = aSig1<<shiftCount;\r
360 *zSig1Ptr = 0;\r
361 }\r
362 *zExpPtr = - shiftCount - 31;\r
363 }\r
364 else {\r
365 shiftCount = countLeadingZeros32( aSig0 ) - 11;\r
366 shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );\r
367 *zExpPtr = 1 - shiftCount;\r
368 }\r
369\r
370}\r
371\r
372/*\r
373-------------------------------------------------------------------------------\r
374Packs the sign `zSign', the exponent `zExp', and the significand formed by\r
375the concatenation of `zSig0' and `zSig1' into a double-precision floating-\r
376point value, returning the result. After being shifted into the proper\r
377positions, the three fields `zSign', `zExp', and `zSig0' are simply added\r
378together to form the most significant 32 bits of the result. This means\r
379that any integer portion of `zSig0' will be added into the exponent. Since\r
380a properly normalized significand will have an integer portion equal to 1,\r
381the `zExp' input should be 1 less than the desired result exponent whenever\r
382`zSig0' and `zSig1' concatenated form a complete, normalized significand.\r
383-------------------------------------------------------------------------------\r
384*/\r
385INLINE float64\r
386 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )\r
387{\r
388\r
389 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +\r
390 ( ( (bits64) zExp )<<52 ) +\r
391 ( ( (bits64) zSig0 )<<32 ) + zSig1 );\r
392\r
393\r
394}\r
395\r
396/*\r
397-------------------------------------------------------------------------------\r
398Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
399and extended significand formed by the concatenation of `zSig0', `zSig1',\r
400and `zSig2', and returns the proper double-precision floating-point value\r
401corresponding to the abstract input. Ordinarily, the abstract value is\r
402simply rounded and packed into the double-precision format, with the inexact\r
403exception raised if the abstract input cannot be represented exactly.\r
404However, if the abstract value is too large, the overflow and inexact\r
405exceptions are raised and an infinity or maximal finite value is returned.\r
406If the abstract value is too small, the input value is rounded to a\r
407subnormal number, and the underflow and inexact exceptions are raised if the\r
408abstract input cannot be represented exactly as a subnormal double-precision\r
409floating-point number.\r
410 The input significand must be normalized or smaller. If the input\r
411significand is not normalized, `zExp' must be 0; in that case, the result\r
412returned is a subnormal number, and it must not require rounding. In the\r
413usual case that the input significand is normalized, `zExp' must be 1 less\r
414than the ``true'' floating-point exponent. The handling of underflow and\r
415overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
416-------------------------------------------------------------------------------\r
417*/\r
418static float64\r
419 roundAndPackFloat64(\r
420 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )\r
421{\r
422 int8 roundingMode;\r
423 flag roundNearestEven, increment, isTiny;\r
424\r
425 roundingMode = float_rounding_mode;\r
426 roundNearestEven = ( roundingMode == float_round_nearest_even );\r
427 increment = ( (sbits32) zSig2 < 0 );\r
428 if ( ! roundNearestEven ) {\r
429 if ( roundingMode == float_round_to_zero ) {\r
430 increment = 0;\r
431 }\r
432 else {\r
433 if ( zSign ) {\r
434 increment = ( roundingMode == float_round_down ) && zSig2;\r
435 }\r
436 else {\r
437 increment = ( roundingMode == float_round_up ) && zSig2;\r
438 }\r
439 }\r
440 }\r
441 if ( 0x7FD <= (bits16) zExp ) {\r
442 if ( ( 0x7FD < zExp )\r
443 || ( ( zExp == 0x7FD )\r
444 && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )\r
445 && increment\r
446 )\r
447 ) {\r
448 float_raise( float_flag_overflow | float_flag_inexact );\r
449 if ( ( roundingMode == float_round_to_zero )\r
450 || ( zSign && ( roundingMode == float_round_up ) )\r
451 || ( ! zSign && ( roundingMode == float_round_down ) )\r
452 ) {\r
453 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );\r
454 }\r
455 return packFloat64( zSign, 0x7FF, 0, 0 );\r
456 }\r
457 if ( zExp < 0 ) {\r
458 isTiny =\r
459 ( float_detect_tininess == float_tininess_before_rounding )\r
460 || ( zExp < -1 )\r
461 || ! increment\r
462 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );\r
463 shift64ExtraRightJamming(\r
464 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );\r
465 zExp = 0;\r
466 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );\r
467 if ( roundNearestEven ) {\r
468 increment = ( (sbits32) zSig2 < 0 );\r
469 }\r
470 else {\r
471 if ( zSign ) {\r
472 increment = ( roundingMode == float_round_down ) && zSig2;\r
473 }\r
474 else {\r
475 increment = ( roundingMode == float_round_up ) && zSig2;\r
476 }\r
477 }\r
478 }\r
479 }\r
480 if ( zSig2 ) set_float_exception_inexact_flag();\r
481 if ( increment ) {\r
482 add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );\r
483 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );\r
484 }\r
485 else {\r
486 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;\r
487 }\r
488 return packFloat64( zSign, zExp, zSig0, zSig1 );\r
489\r
490}\r
491\r
492/*\r
493-------------------------------------------------------------------------------\r
494Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
495and significand formed by the concatenation of `zSig0' and `zSig1', and\r
496returns the proper double-precision floating-point value corresponding\r
497to the abstract input. This routine is just like `roundAndPackFloat64'\r
498except that the input significand has fewer bits and does not have to be\r
499normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-\r
500point exponent.\r
501-------------------------------------------------------------------------------\r
502*/\r
503static float64\r
504 normalizeRoundAndPackFloat64(\r
505 flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )\r
506{\r
507 int8 shiftCount;\r
508 bits32 zSig2;\r
509\r
510 if ( zSig0 == 0 ) {\r
511 zSig0 = zSig1;\r
512 zSig1 = 0;\r
513 zExp -= 32;\r
514 }\r
515 shiftCount = countLeadingZeros32( zSig0 ) - 11;\r
516 if ( 0 <= shiftCount ) {\r
517 zSig2 = 0;\r
518 shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );\r
519 }\r
520 else {\r
521 shift64ExtraRightJamming(\r
522 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );\r
523 }\r
524 zExp -= shiftCount;\r
525 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );\r
526\r
527}\r
528\r
529/*\r
530-------------------------------------------------------------------------------\r
531Returns the result of converting the 32-bit two's complement integer `a' to\r
532the single-precision floating-point format. The conversion is performed\r
533according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
534-------------------------------------------------------------------------------\r
535*/\r
536float32 int32_to_float32( int32 a )\r
537{\r
538 flag zSign;\r
539\r
540 if ( a == 0 ) return 0;\r
541 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );\r
542 zSign = ( a < 0 );\r
543 return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));\r
544\r
545}\r
546\r
547/*\r
548-------------------------------------------------------------------------------\r
549Returns the result of converting the 32-bit two's complement integer `a' to\r
550the double-precision floating-point format. The conversion is performed\r
551according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
552-------------------------------------------------------------------------------\r
553*/\r
554float64 int32_to_float64( int32 a )\r
555{\r
556 flag zSign;\r
557 bits32 absA;\r
558 int8 shiftCount;\r
559 bits32 zSig0, zSig1;\r
560\r
561 if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );\r
562 zSign = ( a < 0 );\r
563 absA = zSign ? - a : a;\r
564 shiftCount = countLeadingZeros32( absA ) - 11;\r
565 if ( 0 <= shiftCount ) {\r
566 zSig0 = absA<<shiftCount;\r
567 zSig1 = 0;\r
568 }\r
569 else {\r
570 shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );\r
571 }\r
572 return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );\r
573\r
574}\r
575\r
576#ifndef SOFTFLOAT_FOR_GCC\r
577/*\r
578-------------------------------------------------------------------------------\r
579Returns the result of converting the single-precision floating-point value\r
580`a' to the 32-bit two's complement integer format. The conversion is\r
581performed according to the IEC/IEEE Standard for Binary Floating-Point\r
582Arithmetic---which means in particular that the conversion is rounded\r
583according to the current rounding mode. If `a' is a NaN, the largest\r
584positive integer is returned. Otherwise, if the conversion overflows, the\r
585largest integer with the same sign as `a' is returned.\r
586-------------------------------------------------------------------------------\r
587*/\r
588int32 float32_to_int32( float32 a )\r
589{\r
590 flag aSign;\r
591 int16 aExp, shiftCount;\r
592 bits32 aSig, aSigExtra;\r
593 int32 z;\r
594 int8 roundingMode;\r
595\r
596 aSig = extractFloat32Frac( a );\r
597 aExp = extractFloat32Exp( a );\r
598 aSign = extractFloat32Sign( a );\r
599 shiftCount = aExp - 0x96;\r
600 if ( 0 <= shiftCount ) {\r
601 if ( 0x9E <= aExp ) {\r
602 if ( a != 0xCF000000 ) {\r
603 float_raise( float_flag_invalid );\r
604 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {\r
605 return 0x7FFFFFFF;\r
606 }\r
607 }\r
608 return (sbits32) 0x80000000;\r
609 }\r
610 z = ( aSig | 0x00800000 )<<shiftCount;\r
611 if ( aSign ) z = - z;\r
612 }\r
613 else {\r
614 if ( aExp < 0x7E ) {\r
615 aSigExtra = aExp | aSig;\r
616 z = 0;\r
617 }\r
618 else {\r
619 aSig |= 0x00800000;\r
620 aSigExtra = aSig<<( shiftCount & 31 );\r
621 z = aSig>>( - shiftCount );\r
622 }\r
623 if ( aSigExtra ) set_float_exception_inexact_flag();\r
624 roundingMode = float_rounding_mode;\r
625 if ( roundingMode == float_round_nearest_even ) {\r
626 if ( (sbits32) aSigExtra < 0 ) {\r
627 ++z;\r
628 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;\r
629 }\r
630 if ( aSign ) z = - z;\r
631 }\r
632 else {\r
633 aSigExtra = ( aSigExtra != 0 );\r
634 if ( aSign ) {\r
635 z += ( roundingMode == float_round_down ) & aSigExtra;\r
636 z = - z;\r
637 }\r
638 else {\r
639 z += ( roundingMode == float_round_up ) & aSigExtra;\r
640 }\r
641 }\r
642 }\r
643 return z;\r
644\r
645}\r
646#endif\r
647\r
648/*\r
649-------------------------------------------------------------------------------\r
650Returns the result of converting the single-precision floating-point value\r
651`a' to the 32-bit two's complement integer format. The conversion is\r
652performed according to the IEC/IEEE Standard for Binary Floating-Point\r
653Arithmetic, except that the conversion is always rounded toward zero.\r
654If `a' is a NaN, the largest positive integer is returned. Otherwise, if\r
655the conversion overflows, the largest integer with the same sign as `a' is\r
656returned.\r
657-------------------------------------------------------------------------------\r
658*/\r
659int32 float32_to_int32_round_to_zero( float32 a )\r
660{\r
661 flag aSign;\r
662 int16 aExp, shiftCount;\r
663 bits32 aSig;\r
664 int32 z;\r
665\r
666 aSig = extractFloat32Frac( a );\r
667 aExp = extractFloat32Exp( a );\r
668 aSign = extractFloat32Sign( a );\r
669 shiftCount = aExp - 0x9E;\r
670 if ( 0 <= shiftCount ) {\r
671 if ( a != 0xCF000000 ) {\r
672 float_raise( float_flag_invalid );\r
673 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;\r
674 }\r
675 return (sbits32) 0x80000000;\r
676 }\r
677 else if ( aExp <= 0x7E ) {\r
678 if ( aExp | aSig ) set_float_exception_inexact_flag();\r
679 return 0;\r
680 }\r
681 aSig = ( aSig | 0x00800000 )<<8;\r
682 z = aSig>>( - shiftCount );\r
683 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {\r
684 set_float_exception_inexact_flag();\r
685 }\r
686 if ( aSign ) z = - z;\r
687 return z;\r
688\r
689}\r
690\r
691/*\r
692-------------------------------------------------------------------------------\r
693Returns the result of converting the single-precision floating-point value\r
694`a' to the double-precision floating-point format. The conversion is\r
695performed according to the IEC/IEEE Standard for Binary Floating-Point\r
696Arithmetic.\r
697-------------------------------------------------------------------------------\r
698*/\r
699float64 float32_to_float64( float32 a )\r
700{\r
701 flag aSign;\r
702 int16 aExp;\r
703 bits32 aSig, zSig0, zSig1;\r
704\r
705 aSig = extractFloat32Frac( a );\r
706 aExp = extractFloat32Exp( a );\r
707 aSign = extractFloat32Sign( a );\r
708 if ( aExp == 0xFF ) {\r
709 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );\r
710 return packFloat64( aSign, 0x7FF, 0, 0 );\r
711 }\r
712 if ( aExp == 0 ) {\r
713 if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );\r
714 normalizeFloat32Subnormal( aSig, &aExp, &aSig );\r
715 --aExp;\r
716 }\r
717 shift64Right( aSig, 0, 3, &zSig0, &zSig1 );\r
718 return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );\r
719\r
720}\r
721\r
722#ifndef SOFTFLOAT_FOR_GCC\r
723/*\r
724-------------------------------------------------------------------------------\r
725Rounds the single-precision floating-point value `a' to an integer,\r
726and returns the result as a single-precision floating-point value. The\r
727operation is performed according to the IEC/IEEE Standard for Binary\r
728Floating-Point Arithmetic.\r
729-------------------------------------------------------------------------------\r
730*/\r
731float32 float32_round_to_int( float32 a )\r
732{\r
733 flag aSign;\r
734 int16 aExp;\r
735 bits32 lastBitMask, roundBitsMask;\r
736 int8 roundingMode;\r
737 float32 z;\r
738\r
739 aExp = extractFloat32Exp( a );\r
740 if ( 0x96 <= aExp ) {\r
741 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {\r
742 return propagateFloat32NaN( a, a );\r
743 }\r
744 return a;\r
745 }\r
746 if ( aExp <= 0x7E ) {\r
747 if ( (bits32) ( a<<1 ) == 0 ) return a;\r
748 set_float_exception_inexact_flag();\r
749 aSign = extractFloat32Sign( a );\r
750 switch ( float_rounding_mode ) {\r
751 case float_round_nearest_even:\r
752 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {\r
753 return packFloat32( aSign, 0x7F, 0 );\r
754 }\r
755 break;\r
756 case float_round_to_zero:\r
757 break;\r
758 case float_round_down:\r
759 return aSign ? 0xBF800000 : 0;\r
760 case float_round_up:\r
761 return aSign ? 0x80000000 : 0x3F800000;\r
762 }\r
763 return packFloat32( aSign, 0, 0 );\r
764 }\r
765 lastBitMask = 1;\r
766 lastBitMask <<= 0x96 - aExp;\r
767 roundBitsMask = lastBitMask - 1;\r
768 z = a;\r
769 roundingMode = float_rounding_mode;\r
770 if ( roundingMode == float_round_nearest_even ) {\r
771 z += lastBitMask>>1;\r
772 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;\r
773 }\r
774 else if ( roundingMode != float_round_to_zero ) {\r
775 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {\r
776 z += roundBitsMask;\r
777 }\r
778 }\r
779 z &= ~ roundBitsMask;\r
780 if ( z != a ) set_float_exception_inexact_flag();\r
781 return z;\r
782\r
783}\r
784#endif\r
785\r
786/*\r
787-------------------------------------------------------------------------------\r
788Returns the result of adding the absolute values of the single-precision\r
789floating-point values `a' and `b'. If `zSign' is 1, the sum is negated\r
790before being returned. `zSign' is ignored if the result is a NaN.\r
791The addition is performed according to the IEC/IEEE Standard for Binary\r
792Floating-Point Arithmetic.\r
793-------------------------------------------------------------------------------\r
794*/\r
795static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )\r
796{\r
797 int16 aExp, bExp, zExp;\r
798 bits32 aSig, bSig, zSig;\r
799 int16 expDiff;\r
800\r
801 aSig = extractFloat32Frac( a );\r
802 aExp = extractFloat32Exp( a );\r
803 bSig = extractFloat32Frac( b );\r
804 bExp = extractFloat32Exp( b );\r
805 expDiff = aExp - bExp;\r
806 aSig <<= 6;\r
807 bSig <<= 6;\r
808 if ( 0 < expDiff ) {\r
809 if ( aExp == 0xFF ) {\r
810 if ( aSig ) return propagateFloat32NaN( a, b );\r
811 return a;\r
812 }\r
813 if ( bExp == 0 ) {\r
814 --expDiff;\r
815 }\r
816 else {\r
817 bSig |= 0x20000000;\r
818 }\r
819 shift32RightJamming( bSig, expDiff, &bSig );\r
820 zExp = aExp;\r
821 }\r
822 else if ( expDiff < 0 ) {\r
823 if ( bExp == 0xFF ) {\r
824 if ( bSig ) return propagateFloat32NaN( a, b );\r
825 return packFloat32( zSign, 0xFF, 0 );\r
826 }\r
827 if ( aExp == 0 ) {\r
828 ++expDiff;\r
829 }\r
830 else {\r
831 aSig |= 0x20000000;\r
832 }\r
833 shift32RightJamming( aSig, - expDiff, &aSig );\r
834 zExp = bExp;\r
835 }\r
836 else {\r
837 if ( aExp == 0xFF ) {\r
838 if ( aSig | bSig ) return propagateFloat32NaN( a, b );\r
839 return a;\r
840 }\r
841 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );\r
842 zSig = 0x40000000 + aSig + bSig;\r
843 zExp = aExp;\r
844 goto roundAndPack;\r
845 }\r
846 aSig |= 0x20000000;\r
847 zSig = ( aSig + bSig )<<1;\r
848 --zExp;\r
849 if ( (sbits32) zSig < 0 ) {\r
850 zSig = aSig + bSig;\r
851 ++zExp;\r
852 }\r
853 roundAndPack:\r
854 return roundAndPackFloat32( zSign, zExp, zSig );\r
855\r
856}\r
857\r
858/*\r
859-------------------------------------------------------------------------------\r
860Returns the result of subtracting the absolute values of the single-\r
861precision floating-point values `a' and `b'. If `zSign' is 1, the\r
862difference is negated before being returned. `zSign' is ignored if the\r
863result is a NaN. The subtraction is performed according to the IEC/IEEE\r
864Standard for Binary Floating-Point Arithmetic.\r
865-------------------------------------------------------------------------------\r
866*/\r
867static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )\r
868{\r
869 int16 aExp, bExp, zExp;\r
870 bits32 aSig, bSig, zSig;\r
871 int16 expDiff;\r
872\r
873 aSig = extractFloat32Frac( a );\r
874 aExp = extractFloat32Exp( a );\r
875 bSig = extractFloat32Frac( b );\r
876 bExp = extractFloat32Exp( b );\r
877 expDiff = aExp - bExp;\r
878 aSig <<= 7;\r
879 bSig <<= 7;\r
880 if ( 0 < expDiff ) goto aExpBigger;\r
881 if ( expDiff < 0 ) goto bExpBigger;\r
882 if ( aExp == 0xFF ) {\r
883 if ( aSig | bSig ) return propagateFloat32NaN( a, b );\r
884 float_raise( float_flag_invalid );\r
885 return float32_default_nan;\r
886 }\r
887 if ( aExp == 0 ) {\r
888 aExp = 1;\r
889 bExp = 1;\r
890 }\r
891 if ( bSig < aSig ) goto aBigger;\r
892 if ( aSig < bSig ) goto bBigger;\r
893 return packFloat32( float_rounding_mode == float_round_down, 0, 0 );\r
894 bExpBigger:\r
895 if ( bExp == 0xFF ) {\r
896 if ( bSig ) return propagateFloat32NaN( a, b );\r
897 return packFloat32( zSign ^ 1, 0xFF, 0 );\r
898 }\r
899 if ( aExp == 0 ) {\r
900 ++expDiff;\r
901 }\r
902 else {\r
903 aSig |= 0x40000000;\r
904 }\r
905 shift32RightJamming( aSig, - expDiff, &aSig );\r
906 bSig |= 0x40000000;\r
907 bBigger:\r
908 zSig = bSig - aSig;\r
909 zExp = bExp;\r
910 zSign ^= 1;\r
911 goto normalizeRoundAndPack;\r
912 aExpBigger:\r
913 if ( aExp == 0xFF ) {\r
914 if ( aSig ) return propagateFloat32NaN( a, b );\r
915 return a;\r
916 }\r
917 if ( bExp == 0 ) {\r
918 --expDiff;\r
919 }\r
920 else {\r
921 bSig |= 0x40000000;\r
922 }\r
923 shift32RightJamming( bSig, expDiff, &bSig );\r
924 aSig |= 0x40000000;\r
925 aBigger:\r
926 zSig = aSig - bSig;\r
927 zExp = aExp;\r
928 normalizeRoundAndPack:\r
929 --zExp;\r
930 return normalizeRoundAndPackFloat32( zSign, zExp, zSig );\r
931\r
932}\r
933\r
934/*\r
935-------------------------------------------------------------------------------\r
936Returns the result of adding the single-precision floating-point values `a'\r
937and `b'. The operation is performed according to the IEC/IEEE Standard for\r
938Binary Floating-Point Arithmetic.\r
939-------------------------------------------------------------------------------\r
940*/\r
941float32 float32_add( float32 a, float32 b )\r
942{\r
943 flag aSign, bSign;\r
944\r
945 aSign = extractFloat32Sign( a );\r
946 bSign = extractFloat32Sign( b );\r
947 if ( aSign == bSign ) {\r
948 return addFloat32Sigs( a, b, aSign );\r
949 }\r
950 else {\r
951 return subFloat32Sigs( a, b, aSign );\r
952 }\r
953\r
954}\r
955\r
956/*\r
957-------------------------------------------------------------------------------\r
958Returns the result of subtracting the single-precision floating-point values\r
959`a' and `b'. The operation is performed according to the IEC/IEEE Standard\r
960for Binary Floating-Point Arithmetic.\r
961-------------------------------------------------------------------------------\r
962*/\r
963float32 float32_sub( float32 a, float32 b )\r
964{\r
965 flag aSign, bSign;\r
966\r
967 aSign = extractFloat32Sign( a );\r
968 bSign = extractFloat32Sign( b );\r
969 if ( aSign == bSign ) {\r
970 return subFloat32Sigs( a, b, aSign );\r
971 }\r
972 else {\r
973 return addFloat32Sigs( a, b, aSign );\r
974 }\r
975\r
976}\r
977\r
978/*\r
979-------------------------------------------------------------------------------\r
980Returns the result of multiplying the single-precision floating-point values\r
981`a' and `b'. The operation is performed according to the IEC/IEEE Standard\r
982for Binary Floating-Point Arithmetic.\r
983-------------------------------------------------------------------------------\r
984*/\r
985float32 float32_mul( float32 a, float32 b )\r
986{\r
987 flag aSign, bSign, zSign;\r
988 int16 aExp, bExp, zExp;\r
989 bits32 aSig, bSig, zSig0, zSig1;\r
990\r
991 aSig = extractFloat32Frac( a );\r
992 aExp = extractFloat32Exp( a );\r
993 aSign = extractFloat32Sign( a );\r
994 bSig = extractFloat32Frac( b );\r
995 bExp = extractFloat32Exp( b );\r
996 bSign = extractFloat32Sign( b );\r
997 zSign = aSign ^ bSign;\r
998 if ( aExp == 0xFF ) {\r
999 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {\r
1000 return propagateFloat32NaN( a, b );\r
1001 }\r
1002 if ( ( bExp | bSig ) == 0 ) {\r
1003 float_raise( float_flag_invalid );\r
1004 return float32_default_nan;\r
1005 }\r
1006 return packFloat32( zSign, 0xFF, 0 );\r
1007 }\r
1008 if ( bExp == 0xFF ) {\r
1009 if ( bSig ) return propagateFloat32NaN( a, b );\r
1010 if ( ( aExp | aSig ) == 0 ) {\r
1011 float_raise( float_flag_invalid );\r
1012 return float32_default_nan;\r
1013 }\r
1014 return packFloat32( zSign, 0xFF, 0 );\r
1015 }\r
1016 if ( aExp == 0 ) {\r
1017 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );\r
1018 normalizeFloat32Subnormal( aSig, &aExp, &aSig );\r
1019 }\r
1020 if ( bExp == 0 ) {\r
1021 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );\r
1022 normalizeFloat32Subnormal( bSig, &bExp, &bSig );\r
1023 }\r
1024 zExp = aExp + bExp - 0x7F;\r
1025 aSig = ( aSig | 0x00800000 )<<7;\r
1026 bSig = ( bSig | 0x00800000 )<<8;\r
1027 mul32To64( aSig, bSig, &zSig0, &zSig1 );\r
1028 zSig0 |= ( zSig1 != 0 );\r
1029 if ( 0 <= (sbits32) ( zSig0<<1 ) ) {\r
1030 zSig0 <<= 1;\r
1031 --zExp;\r
1032 }\r
1033 return roundAndPackFloat32( zSign, zExp, zSig0 );\r
1034\r
1035}\r
1036\r
1037/*\r
1038-------------------------------------------------------------------------------\r
1039Returns the result of dividing the single-precision floating-point value `a'\r
1040by the corresponding value `b'. The operation is performed according to the\r
1041IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1042-------------------------------------------------------------------------------\r
1043*/\r
1044float32 float32_div( float32 a, float32 b )\r
1045{\r
1046 flag aSign, bSign, zSign;\r
1047 int16 aExp, bExp, zExp;\r
1048 bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;\r
1049\r
1050 aSig = extractFloat32Frac( a );\r
1051 aExp = extractFloat32Exp( a );\r
1052 aSign = extractFloat32Sign( a );\r
1053 bSig = extractFloat32Frac( b );\r
1054 bExp = extractFloat32Exp( b );\r
1055 bSign = extractFloat32Sign( b );\r
1056 zSign = aSign ^ bSign;\r
1057 if ( aExp == 0xFF ) {\r
1058 if ( aSig ) return propagateFloat32NaN( a, b );\r
1059 if ( bExp == 0xFF ) {\r
1060 if ( bSig ) return propagateFloat32NaN( a, b );\r
1061 float_raise( float_flag_invalid );\r
1062 return float32_default_nan;\r
1063 }\r
1064 return packFloat32( zSign, 0xFF, 0 );\r
1065 }\r
1066 if ( bExp == 0xFF ) {\r
1067 if ( bSig ) return propagateFloat32NaN( a, b );\r
1068 return packFloat32( zSign, 0, 0 );\r
1069 }\r
1070 if ( bExp == 0 ) {\r
1071 if ( bSig == 0 ) {\r
1072 if ( ( aExp | aSig ) == 0 ) {\r
1073 float_raise( float_flag_invalid );\r
1074 return float32_default_nan;\r
1075 }\r
1076 float_raise( float_flag_divbyzero );\r
1077 return packFloat32( zSign, 0xFF, 0 );\r
1078 }\r
1079 normalizeFloat32Subnormal( bSig, &bExp, &bSig );\r
1080 }\r
1081 if ( aExp == 0 ) {\r
1082 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );\r
1083 normalizeFloat32Subnormal( aSig, &aExp, &aSig );\r
1084 }\r
1085 zExp = aExp - bExp + 0x7D;\r
1086 aSig = ( aSig | 0x00800000 )<<7;\r
1087 bSig = ( bSig | 0x00800000 )<<8;\r
1088 if ( bSig <= ( aSig + aSig ) ) {\r
1089 aSig >>= 1;\r
1090 ++zExp;\r
1091 }\r
1092 zSig = estimateDiv64To32( aSig, 0, bSig );\r
1093 if ( ( zSig & 0x3F ) <= 2 ) {\r
1094 mul32To64( bSig, zSig, &term0, &term1 );\r
1095 sub64( aSig, 0, term0, term1, &rem0, &rem1 );\r
1096 while ( (sbits32) rem0 < 0 ) {\r
1097 --zSig;\r
1098 add64( rem0, rem1, 0, bSig, &rem0, &rem1 );\r
1099 }\r
1100 zSig |= ( rem1 != 0 );\r
1101 }\r
1102 return roundAndPackFloat32( zSign, zExp, zSig );\r
1103\r
1104}\r
1105\r
1106#ifndef SOFTFLOAT_FOR_GCC\r
1107/*\r
1108-------------------------------------------------------------------------------\r
1109Returns the remainder of the single-precision floating-point value `a'\r
1110with respect to the corresponding value `b'. The operation is performed\r
1111according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1112-------------------------------------------------------------------------------\r
1113*/\r
1114float32 float32_rem( float32 a, float32 b )\r
1115{\r
1116 flag aSign, bSign, zSign;\r
1117 int16 aExp, bExp, expDiff;\r
1118 bits32 aSig, bSig, q, allZero, alternateASig;\r
1119 sbits32 sigMean;\r
1120\r
1121 aSig = extractFloat32Frac( a );\r
1122 aExp = extractFloat32Exp( a );\r
1123 aSign = extractFloat32Sign( a );\r
1124 bSig = extractFloat32Frac( b );\r
1125 bExp = extractFloat32Exp( b );\r
1126 bSign = extractFloat32Sign( b );\r
1127 if ( aExp == 0xFF ) {\r
1128 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {\r
1129 return propagateFloat32NaN( a, b );\r
1130 }\r
1131 float_raise( float_flag_invalid );\r
1132 return float32_default_nan;\r
1133 }\r
1134 if ( bExp == 0xFF ) {\r
1135 if ( bSig ) return propagateFloat32NaN( a, b );\r
1136 return a;\r
1137 }\r
1138 if ( bExp == 0 ) {\r
1139 if ( bSig == 0 ) {\r
1140 float_raise( float_flag_invalid );\r
1141 return float32_default_nan;\r
1142 }\r
1143 normalizeFloat32Subnormal( bSig, &bExp, &bSig );\r
1144 }\r
1145 if ( aExp == 0 ) {\r
1146 if ( aSig == 0 ) return a;\r
1147 normalizeFloat32Subnormal( aSig, &aExp, &aSig );\r
1148 }\r
1149 expDiff = aExp - bExp;\r
1150 aSig = ( aSig | 0x00800000 )<<8;\r
1151 bSig = ( bSig | 0x00800000 )<<8;\r
1152 if ( expDiff < 0 ) {\r
1153 if ( expDiff < -1 ) return a;\r
1154 aSig >>= 1;\r
1155 }\r
1156 q = ( bSig <= aSig );\r
1157 if ( q ) aSig -= bSig;\r
1158 expDiff -= 32;\r
1159 while ( 0 < expDiff ) {\r
1160 q = estimateDiv64To32( aSig, 0, bSig );\r
1161 q = ( 2 < q ) ? q - 2 : 0;\r
1162 aSig = - ( ( bSig>>2 ) * q );\r
1163 expDiff -= 30;\r
1164 }\r
1165 expDiff += 32;\r
1166 if ( 0 < expDiff ) {\r
1167 q = estimateDiv64To32( aSig, 0, bSig );\r
1168 q = ( 2 < q ) ? q - 2 : 0;\r
1169 q >>= 32 - expDiff;\r
1170 bSig >>= 2;\r
1171 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;\r
1172 }\r
1173 else {\r
1174 aSig >>= 2;\r
1175 bSig >>= 2;\r
1176 }\r
1177 do {\r
1178 alternateASig = aSig;\r
1179 ++q;\r
1180 aSig -= bSig;\r
1181 } while ( 0 <= (sbits32) aSig );\r
1182 sigMean = aSig + alternateASig;\r
1183 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {\r
1184 aSig = alternateASig;\r
1185 }\r
1186 zSign = ( (sbits32) aSig < 0 );\r
1187 if ( zSign ) aSig = - aSig;\r
1188 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );\r
1189\r
1190}\r
1191#endif\r
1192\r
1193#ifndef SOFTFLOAT_FOR_GCC\r
1194/*\r
1195-------------------------------------------------------------------------------\r
1196Returns the square root of the single-precision floating-point value `a'.\r
1197The operation is performed according to the IEC/IEEE Standard for Binary\r
1198Floating-Point Arithmetic.\r
1199-------------------------------------------------------------------------------\r
1200*/\r
1201float32 float32_sqrt( float32 a )\r
1202{\r
1203 flag aSign;\r
1204 int16 aExp, zExp;\r
1205 bits32 aSig, zSig, rem0, rem1, term0, term1;\r
1206\r
1207 aSig = extractFloat32Frac( a );\r
1208 aExp = extractFloat32Exp( a );\r
1209 aSign = extractFloat32Sign( a );\r
1210 if ( aExp == 0xFF ) {\r
1211 if ( aSig ) return propagateFloat32NaN( a, 0 );\r
1212 if ( ! aSign ) return a;\r
1213 float_raise( float_flag_invalid );\r
1214 return float32_default_nan;\r
1215 }\r
1216 if ( aSign ) {\r
1217 if ( ( aExp | aSig ) == 0 ) return a;\r
1218 float_raise( float_flag_invalid );\r
1219 return float32_default_nan;\r
1220 }\r
1221 if ( aExp == 0 ) {\r
1222 if ( aSig == 0 ) return 0;\r
1223 normalizeFloat32Subnormal( aSig, &aExp, &aSig );\r
1224 }\r
1225 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;\r
1226 aSig = ( aSig | 0x00800000 )<<8;\r
1227 zSig = estimateSqrt32( aExp, aSig ) + 2;\r
1228 if ( ( zSig & 0x7F ) <= 5 ) {\r
1229 if ( zSig < 2 ) {\r
1230 zSig = 0x7FFFFFFF;\r
1231 goto roundAndPack;\r
1232 }\r
1233 else {\r
1234 aSig >>= aExp & 1;\r
1235 mul32To64( zSig, zSig, &term0, &term1 );\r
1236 sub64( aSig, 0, term0, term1, &rem0, &rem1 );\r
1237 while ( (sbits32) rem0 < 0 ) {\r
1238 --zSig;\r
1239 shortShift64Left( 0, zSig, 1, &term0, &term1 );\r
1240 term1 |= 1;\r
1241 add64( rem0, rem1, term0, term1, &rem0, &rem1 );\r
1242 }\r
1243 zSig |= ( ( rem0 | rem1 ) != 0 );\r
1244 }\r
1245 }\r
1246 shift32RightJamming( zSig, 1, &zSig );\r
1247 roundAndPack:\r
1248 return roundAndPackFloat32( 0, zExp, zSig );\r
1249\r
1250}\r
1251#endif\r
1252\r
1253/*\r
1254-------------------------------------------------------------------------------\r
1255Returns 1 if the single-precision floating-point value `a' is equal to\r
1256the corresponding value `b', and 0 otherwise. The comparison is performed\r
1257according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1258-------------------------------------------------------------------------------\r
1259*/\r
1260flag float32_eq( float32 a, float32 b )\r
1261{\r
1262\r
1263 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1264 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1265 ) {\r
1266 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {\r
1267 float_raise( float_flag_invalid );\r
1268 }\r
1269 return 0;\r
1270 }\r
1271 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );\r
1272\r
1273}\r
1274\r
1275/*\r
1276-------------------------------------------------------------------------------\r
1277Returns 1 if the single-precision floating-point value `a' is less than\r
1278or equal to the corresponding value `b', and 0 otherwise. The comparison\r
1279is performed according to the IEC/IEEE Standard for Binary Floating-Point\r
1280Arithmetic.\r
1281-------------------------------------------------------------------------------\r
1282*/\r
1283flag float32_le( float32 a, float32 b )\r
1284{\r
1285 flag aSign, bSign;\r
1286\r
1287 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1288 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1289 ) {\r
1290 float_raise( float_flag_invalid );\r
1291 return 0;\r
1292 }\r
1293 aSign = extractFloat32Sign( a );\r
1294 bSign = extractFloat32Sign( b );\r
1295 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );\r
1296 return ( a == b ) || ( aSign ^ ( a < b ) );\r
1297\r
1298}\r
1299\r
1300/*\r
1301-------------------------------------------------------------------------------\r
1302Returns 1 if the single-precision floating-point value `a' is less than\r
1303the corresponding value `b', and 0 otherwise. The comparison is performed\r
1304according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1305-------------------------------------------------------------------------------\r
1306*/\r
1307flag float32_lt( float32 a, float32 b )\r
1308{\r
1309 flag aSign, bSign;\r
1310\r
1311 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1312 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1313 ) {\r
1314 float_raise( float_flag_invalid );\r
1315 return 0;\r
1316 }\r
1317 aSign = extractFloat32Sign( a );\r
1318 bSign = extractFloat32Sign( b );\r
1319 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );\r
1320 return ( a != b ) && ( aSign ^ ( a < b ) );\r
1321\r
1322}\r
1323\r
1324#ifndef SOFTFLOAT_FOR_GCC /* Not needed */\r
1325/*\r
1326-------------------------------------------------------------------------------\r
1327Returns 1 if the single-precision floating-point value `a' is equal to\r
1328the corresponding value `b', and 0 otherwise. The invalid exception is\r
1329raised if either operand is a NaN. Otherwise, the comparison is performed\r
1330according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1331-------------------------------------------------------------------------------\r
1332*/\r
1333flag float32_eq_signaling( float32 a, float32 b )\r
1334{\r
1335\r
1336 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1337 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1338 ) {\r
1339 float_raise( float_flag_invalid );\r
1340 return 0;\r
1341 }\r
1342 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );\r
1343\r
1344}\r
1345\r
1346/*\r
1347-------------------------------------------------------------------------------\r
1348Returns 1 if the single-precision floating-point value `a' is less than or\r
1349equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not\r
1350cause an exception. Otherwise, the comparison is performed according to the\r
1351IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1352-------------------------------------------------------------------------------\r
1353*/\r
1354flag float32_le_quiet( float32 a, float32 b )\r
1355{\r
1356 flag aSign, bSign;\r
1357 int16 aExp, bExp;\r
1358\r
1359 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1360 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1361 ) {\r
1362 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {\r
1363 float_raise( float_flag_invalid );\r
1364 }\r
1365 return 0;\r
1366 }\r
1367 aSign = extractFloat32Sign( a );\r
1368 bSign = extractFloat32Sign( b );\r
1369 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );\r
1370 return ( a == b ) || ( aSign ^ ( a < b ) );\r
1371\r
1372}\r
1373\r
1374/*\r
1375-------------------------------------------------------------------------------\r
1376Returns 1 if the single-precision floating-point value `a' is less than\r
1377the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an\r
1378exception. Otherwise, the comparison is performed according to the IEC/IEEE\r
1379Standard for Binary Floating-Point Arithmetic.\r
1380-------------------------------------------------------------------------------\r
1381*/\r
1382flag float32_lt_quiet( float32 a, float32 b )\r
1383{\r
1384 flag aSign, bSign;\r
1385\r
1386 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )\r
1387 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )\r
1388 ) {\r
1389 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {\r
1390 float_raise( float_flag_invalid );\r
1391 }\r
1392 return 0;\r
1393 }\r
1394 aSign = extractFloat32Sign( a );\r
1395 bSign = extractFloat32Sign( b );\r
1396 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );\r
1397 return ( a != b ) && ( aSign ^ ( a < b ) );\r
1398\r
1399}\r
1400#endif /* !SOFTFLOAT_FOR_GCC */\r
1401\r
1402#ifndef SOFTFLOAT_FOR_GCC /* Not needed */\r
1403/*\r
1404-------------------------------------------------------------------------------\r
1405Returns the result of converting the double-precision floating-point value\r
1406`a' to the 32-bit two's complement integer format. The conversion is\r
1407performed according to the IEC/IEEE Standard for Binary Floating-Point\r
1408Arithmetic---which means in particular that the conversion is rounded\r
1409according to the current rounding mode. If `a' is a NaN, the largest\r
1410positive integer is returned. Otherwise, if the conversion overflows, the\r
1411largest integer with the same sign as `a' is returned.\r
1412-------------------------------------------------------------------------------\r
1413*/\r
1414int32 float64_to_int32( float64 a )\r
1415{\r
1416 flag aSign;\r
1417 int16 aExp, shiftCount;\r
1418 bits32 aSig0, aSig1, absZ, aSigExtra;\r
1419 int32 z;\r
1420 int8 roundingMode;\r
1421\r
1422 aSig1 = extractFloat64Frac1( a );\r
1423 aSig0 = extractFloat64Frac0( a );\r
1424 aExp = extractFloat64Exp( a );\r
1425 aSign = extractFloat64Sign( a );\r
1426 shiftCount = aExp - 0x413;\r
1427 if ( 0 <= shiftCount ) {\r
1428 if ( 0x41E < aExp ) {\r
1429 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;\r
1430 goto invalid;\r
1431 }\r
1432 shortShift64Left(\r
1433 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );\r
1434 if ( 0x80000000 < absZ ) goto invalid;\r
1435 }\r
1436 else {\r
1437 aSig1 = ( aSig1 != 0 );\r
1438 if ( aExp < 0x3FE ) {\r
1439 aSigExtra = aExp | aSig0 | aSig1;\r
1440 absZ = 0;\r
1441 }\r
1442 else {\r
1443 aSig0 |= 0x00100000;\r
1444 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;\r
1445 absZ = aSig0>>( - shiftCount );\r
1446 }\r
1447 }\r
1448 roundingMode = float_rounding_mode;\r
1449 if ( roundingMode == float_round_nearest_even ) {\r
1450 if ( (sbits32) aSigExtra < 0 ) {\r
1451 ++absZ;\r
1452 if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;\r
1453 }\r
1454 z = aSign ? - absZ : absZ;\r
1455 }\r
1456 else {\r
1457 aSigExtra = ( aSigExtra != 0 );\r
1458 if ( aSign ) {\r
1459 z = - ( absZ\r
1460 + ( ( roundingMode == float_round_down ) & aSigExtra ) );\r
1461 }\r
1462 else {\r
1463 z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );\r
1464 }\r
1465 }\r
1466 if ( ( aSign ^ ( z < 0 ) ) && z ) {\r
1467 invalid:\r
1468 float_raise( float_flag_invalid );\r
1469 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;\r
1470 }\r
1471 if ( aSigExtra ) set_float_exception_inexact_flag();\r
1472 return z;\r
1473\r
1474}\r
1475#endif /* !SOFTFLOAT_FOR_GCC */\r
1476\r
1477/*\r
1478-------------------------------------------------------------------------------\r
1479Returns the result of converting the double-precision floating-point value\r
1480`a' to the 32-bit two's complement integer format. The conversion is\r
1481performed according to the IEC/IEEE Standard for Binary Floating-Point\r
1482Arithmetic, except that the conversion is always rounded toward zero.\r
1483If `a' is a NaN, the largest positive integer is returned. Otherwise, if\r
1484the conversion overflows, the largest integer with the same sign as `a' is\r
1485returned.\r
1486-------------------------------------------------------------------------------\r
1487*/\r
1488int32 float64_to_int32_round_to_zero( float64 a )\r
1489{\r
1490 flag aSign;\r
1491 int16 aExp, shiftCount;\r
1492 bits32 aSig0, aSig1, absZ, aSigExtra;\r
1493 int32 z;\r
1494\r
1495 aSig1 = extractFloat64Frac1( a );\r
1496 aSig0 = extractFloat64Frac0( a );\r
1497 aExp = extractFloat64Exp( a );\r
1498 aSign = extractFloat64Sign( a );\r
1499 shiftCount = aExp - 0x413;\r
1500 if ( 0 <= shiftCount ) {\r
1501 if ( 0x41E < aExp ) {\r
1502 if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;\r
1503 goto invalid;\r
1504 }\r
1505 shortShift64Left(\r
1506 aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );\r
1507 }\r
1508 else {\r
1509 if ( aExp < 0x3FF ) {\r
1510 if ( aExp | aSig0 | aSig1 ) {\r
1511 set_float_exception_inexact_flag();\r
1512 }\r
1513 return 0;\r
1514 }\r
1515 aSig0 |= 0x00100000;\r
1516 aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;\r
1517 absZ = aSig0>>( - shiftCount );\r
1518 }\r
1519 z = aSign ? - absZ : absZ;\r
1520 if ( ( aSign ^ ( z < 0 ) ) && z ) {\r
1521 invalid:\r
1522 float_raise( float_flag_invalid );\r
1523 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;\r
1524 }\r
1525 if ( aSigExtra ) set_float_exception_inexact_flag();\r
1526 return z;\r
1527\r
1528}\r
1529\r
1530/*\r
1531-------------------------------------------------------------------------------\r
1532Returns the result of converting the double-precision floating-point value\r
1533`a' to the single-precision floating-point format. The conversion is\r
1534performed according to the IEC/IEEE Standard for Binary Floating-Point\r
1535Arithmetic.\r
1536-------------------------------------------------------------------------------\r
1537*/\r
1538float32 float64_to_float32( float64 a )\r
1539{\r
1540 flag aSign;\r
1541 int16 aExp;\r
1542 bits32 aSig0, aSig1, zSig;\r
1543 bits32 allZero;\r
1544\r
1545 aSig1 = extractFloat64Frac1( a );\r
1546 aSig0 = extractFloat64Frac0( a );\r
1547 aExp = extractFloat64Exp( a );\r
1548 aSign = extractFloat64Sign( a );\r
1549 if ( aExp == 0x7FF ) {\r
1550 if ( aSig0 | aSig1 ) {\r
1551 return commonNaNToFloat32( float64ToCommonNaN( a ) );\r
1552 }\r
1553 return packFloat32( aSign, 0xFF, 0 );\r
1554 }\r
1555 shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );\r
1556 if ( aExp ) zSig |= 0x40000000;\r
1557 return roundAndPackFloat32( aSign, aExp - 0x381, zSig );\r
1558\r
1559}\r
1560\r
1561#ifndef SOFTFLOAT_FOR_GCC\r
1562/*\r
1563-------------------------------------------------------------------------------\r
1564Rounds the double-precision floating-point value `a' to an integer,\r
1565and returns the result as a double-precision floating-point value. The\r
1566operation is performed according to the IEC/IEEE Standard for Binary\r
1567Floating-Point Arithmetic.\r
1568-------------------------------------------------------------------------------\r
1569*/\r
1570float64 float64_round_to_int( float64 a )\r
1571{\r
1572 flag aSign;\r
1573 int16 aExp;\r
1574 bits32 lastBitMask, roundBitsMask;\r
1575 int8 roundingMode;\r
1576 float64 z;\r
1577\r
1578 aExp = extractFloat64Exp( a );\r
1579 if ( 0x413 <= aExp ) {\r
1580 if ( 0x433 <= aExp ) {\r
1581 if ( ( aExp == 0x7FF )\r
1582 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {\r
1583 return propagateFloat64NaN( a, a );\r
1584 }\r
1585 return a;\r
1586 }\r
1587 lastBitMask = 1;\r
1588 lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;\r
1589 roundBitsMask = lastBitMask - 1;\r
1590 z = a;\r
1591 roundingMode = float_rounding_mode;\r
1592 if ( roundingMode == float_round_nearest_even ) {\r
1593 if ( lastBitMask ) {\r
1594 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );\r
1595 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;\r
1596 }\r
1597 else {\r
1598 if ( (sbits32) z.low < 0 ) {\r
1599 ++z.high;\r
1600 if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;\r
1601 }\r
1602 }\r
1603 }\r
1604 else if ( roundingMode != float_round_to_zero ) {\r
1605 if ( extractFloat64Sign( z )\r
1606 ^ ( roundingMode == float_round_up ) ) {\r
1607 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );\r
1608 }\r
1609 }\r
1610 z.low &= ~ roundBitsMask;\r
1611 }\r
1612 else {\r
1613 if ( aExp <= 0x3FE ) {\r
1614 if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;\r
1615 set_float_exception_inexact_flag();\r
1616 aSign = extractFloat64Sign( a );\r
1617 switch ( float_rounding_mode ) {\r
1618 case float_round_nearest_even:\r
1619 if ( ( aExp == 0x3FE )\r
1620 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )\r
1621 ) {\r
1622 return packFloat64( aSign, 0x3FF, 0, 0 );\r
1623 }\r
1624 break;\r
1625 case float_round_down:\r
1626 return\r
1627 aSign ? packFloat64( 1, 0x3FF, 0, 0 )\r
1628 : packFloat64( 0, 0, 0, 0 );\r
1629 case float_round_up:\r
1630 return\r
1631 aSign ? packFloat64( 1, 0, 0, 0 )\r
1632 : packFloat64( 0, 0x3FF, 0, 0 );\r
1633 }\r
1634 return packFloat64( aSign, 0, 0, 0 );\r
1635 }\r
1636 lastBitMask = 1;\r
1637 lastBitMask <<= 0x413 - aExp;\r
1638 roundBitsMask = lastBitMask - 1;\r
1639 z.low = 0;\r
1640 z.high = a.high;\r
1641 roundingMode = float_rounding_mode;\r
1642 if ( roundingMode == float_round_nearest_even ) {\r
1643 z.high += lastBitMask>>1;\r
1644 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {\r
1645 z.high &= ~ lastBitMask;\r
1646 }\r
1647 }\r
1648 else if ( roundingMode != float_round_to_zero ) {\r
1649 if ( extractFloat64Sign( z )\r
1650 ^ ( roundingMode == float_round_up ) ) {\r
1651 z.high |= ( a.low != 0 );\r
1652 z.high += roundBitsMask;\r
1653 }\r
1654 }\r
1655 z.high &= ~ roundBitsMask;\r
1656 }\r
1657 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {\r
1658 set_float_exception_inexact_flag();\r
1659 }\r
1660 return z;\r
1661\r
1662}\r
1663#endif\r
1664\r
1665/*\r
1666-------------------------------------------------------------------------------\r
1667Returns the result of adding the absolute values of the double-precision\r
1668floating-point values `a' and `b'. If `zSign' is 1, the sum is negated\r
1669before being returned. `zSign' is ignored if the result is a NaN.\r
1670The addition is performed according to the IEC/IEEE Standard for Binary\r
1671Floating-Point Arithmetic.\r
1672-------------------------------------------------------------------------------\r
1673*/\r
1674static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )\r
1675{\r
1676 int16 aExp, bExp, zExp;\r
1677 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;\r
1678 int16 expDiff;\r
1679\r
1680 aSig1 = extractFloat64Frac1( a );\r
1681 aSig0 = extractFloat64Frac0( a );\r
1682 aExp = extractFloat64Exp( a );\r
1683 bSig1 = extractFloat64Frac1( b );\r
1684 bSig0 = extractFloat64Frac0( b );\r
1685 bExp = extractFloat64Exp( b );\r
1686 expDiff = aExp - bExp;\r
1687 if ( 0 < expDiff ) {\r
1688 if ( aExp == 0x7FF ) {\r
1689 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );\r
1690 return a;\r
1691 }\r
1692 if ( bExp == 0 ) {\r
1693 --expDiff;\r
1694 }\r
1695 else {\r
1696 bSig0 |= 0x00100000;\r
1697 }\r
1698 shift64ExtraRightJamming(\r
1699 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );\r
1700 zExp = aExp;\r
1701 }\r
1702 else if ( expDiff < 0 ) {\r
1703 if ( bExp == 0x7FF ) {\r
1704 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
1705 return packFloat64( zSign, 0x7FF, 0, 0 );\r
1706 }\r
1707 if ( aExp == 0 ) {\r
1708 ++expDiff;\r
1709 }\r
1710 else {\r
1711 aSig0 |= 0x00100000;\r
1712 }\r
1713 shift64ExtraRightJamming(\r
1714 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );\r
1715 zExp = bExp;\r
1716 }\r
1717 else {\r
1718 if ( aExp == 0x7FF ) {\r
1719 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {\r
1720 return propagateFloat64NaN( a, b );\r
1721 }\r
1722 return a;\r
1723 }\r
1724 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );\r
1725 if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );\r
1726 zSig2 = 0;\r
1727 zSig0 |= 0x00200000;\r
1728 zExp = aExp;\r
1729 goto shiftRight1;\r
1730 }\r
1731 aSig0 |= 0x00100000;\r
1732 add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );\r
1733 --zExp;\r
1734 if ( zSig0 < 0x00200000 ) goto roundAndPack;\r
1735 ++zExp;\r
1736 shiftRight1:\r
1737 shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );\r
1738 roundAndPack:\r
1739 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );\r
1740\r
1741}\r
1742\r
1743/*\r
1744-------------------------------------------------------------------------------\r
1745Returns the result of subtracting the absolute values of the double-\r
1746precision floating-point values `a' and `b'. If `zSign' is 1, the\r
1747difference is negated before being returned. `zSign' is ignored if the\r
1748result is a NaN. The subtraction is performed according to the IEC/IEEE\r
1749Standard for Binary Floating-Point Arithmetic.\r
1750-------------------------------------------------------------------------------\r
1751*/\r
1752static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )\r
1753{\r
1754 int16 aExp, bExp, zExp;\r
1755 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;\r
1756 int16 expDiff;\r
1757\r
1758 aSig1 = extractFloat64Frac1( a );\r
1759 aSig0 = extractFloat64Frac0( a );\r
1760 aExp = extractFloat64Exp( a );\r
1761 bSig1 = extractFloat64Frac1( b );\r
1762 bSig0 = extractFloat64Frac0( b );\r
1763 bExp = extractFloat64Exp( b );\r
1764 expDiff = aExp - bExp;\r
1765 shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );\r
1766 shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );\r
1767 if ( 0 < expDiff ) goto aExpBigger;\r
1768 if ( expDiff < 0 ) goto bExpBigger;\r
1769 if ( aExp == 0x7FF ) {\r
1770 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {\r
1771 return propagateFloat64NaN( a, b );\r
1772 }\r
1773 float_raise( float_flag_invalid );\r
1774 return float64_default_nan;\r
1775 }\r
1776 if ( aExp == 0 ) {\r
1777 aExp = 1;\r
1778 bExp = 1;\r
1779 }\r
1780 if ( bSig0 < aSig0 ) goto aBigger;\r
1781 if ( aSig0 < bSig0 ) goto bBigger;\r
1782 if ( bSig1 < aSig1 ) goto aBigger;\r
1783 if ( aSig1 < bSig1 ) goto bBigger;\r
1784 return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );\r
1785 bExpBigger:\r
1786 if ( bExp == 0x7FF ) {\r
1787 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
1788 return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );\r
1789 }\r
1790 if ( aExp == 0 ) {\r
1791 ++expDiff;\r
1792 }\r
1793 else {\r
1794 aSig0 |= 0x40000000;\r
1795 }\r
1796 shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );\r
1797 bSig0 |= 0x40000000;\r
1798 bBigger:\r
1799 sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );\r
1800 zExp = bExp;\r
1801 zSign ^= 1;\r
1802 goto normalizeRoundAndPack;\r
1803 aExpBigger:\r
1804 if ( aExp == 0x7FF ) {\r
1805 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );\r
1806 return a;\r
1807 }\r
1808 if ( bExp == 0 ) {\r
1809 --expDiff;\r
1810 }\r
1811 else {\r
1812 bSig0 |= 0x40000000;\r
1813 }\r
1814 shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );\r
1815 aSig0 |= 0x40000000;\r
1816 aBigger:\r
1817 sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );\r
1818 zExp = aExp;\r
1819 normalizeRoundAndPack:\r
1820 --zExp;\r
1821 return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );\r
1822\r
1823}\r
1824\r
1825/*\r
1826-------------------------------------------------------------------------------\r
1827Returns the result of adding the double-precision floating-point values `a'\r
1828and `b'. The operation is performed according to the IEC/IEEE Standard for\r
1829Binary Floating-Point Arithmetic.\r
1830-------------------------------------------------------------------------------\r
1831*/\r
1832float64 float64_add( float64 a, float64 b )\r
1833{\r
1834 flag aSign, bSign;\r
1835\r
1836 aSign = extractFloat64Sign( a );\r
1837 bSign = extractFloat64Sign( b );\r
1838 if ( aSign == bSign ) {\r
1839 return addFloat64Sigs( a, b, aSign );\r
1840 }\r
1841 else {\r
1842 return subFloat64Sigs( a, b, aSign );\r
1843 }\r
1844\r
1845}\r
1846\r
1847/*\r
1848-------------------------------------------------------------------------------\r
1849Returns the result of subtracting the double-precision floating-point values\r
1850`a' and `b'. The operation is performed according to the IEC/IEEE Standard\r
1851for Binary Floating-Point Arithmetic.\r
1852-------------------------------------------------------------------------------\r
1853*/\r
1854float64 float64_sub( float64 a, float64 b )\r
1855{\r
1856 flag aSign, bSign;\r
1857\r
1858 aSign = extractFloat64Sign( a );\r
1859 bSign = extractFloat64Sign( b );\r
1860 if ( aSign == bSign ) {\r
1861 return subFloat64Sigs( a, b, aSign );\r
1862 }\r
1863 else {\r
1864 return addFloat64Sigs( a, b, aSign );\r
1865 }\r
1866\r
1867}\r
1868\r
1869/*\r
1870-------------------------------------------------------------------------------\r
1871Returns the result of multiplying the double-precision floating-point values\r
1872`a' and `b'. The operation is performed according to the IEC/IEEE Standard\r
1873for Binary Floating-Point Arithmetic.\r
1874-------------------------------------------------------------------------------\r
1875*/\r
1876float64 float64_mul( float64 a, float64 b )\r
1877{\r
1878 flag aSign, bSign, zSign;\r
1879 int16 aExp, bExp, zExp;\r
1880 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;\r
1881\r
1882 aSig1 = extractFloat64Frac1( a );\r
1883 aSig0 = extractFloat64Frac0( a );\r
1884 aExp = extractFloat64Exp( a );\r
1885 aSign = extractFloat64Sign( a );\r
1886 bSig1 = extractFloat64Frac1( b );\r
1887 bSig0 = extractFloat64Frac0( b );\r
1888 bExp = extractFloat64Exp( b );\r
1889 bSign = extractFloat64Sign( b );\r
1890 zSign = aSign ^ bSign;\r
1891 if ( aExp == 0x7FF ) {\r
1892 if ( ( aSig0 | aSig1 )\r
1893 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {\r
1894 return propagateFloat64NaN( a, b );\r
1895 }\r
1896 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;\r
1897 return packFloat64( zSign, 0x7FF, 0, 0 );\r
1898 }\r
1899 if ( bExp == 0x7FF ) {\r
1900 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
1901 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {\r
1902 invalid:\r
1903 float_raise( float_flag_invalid );\r
1904 return float64_default_nan;\r
1905 }\r
1906 return packFloat64( zSign, 0x7FF, 0, 0 );\r
1907 }\r
1908 if ( aExp == 0 ) {\r
1909 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );\r
1910 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );\r
1911 }\r
1912 if ( bExp == 0 ) {\r
1913 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );\r
1914 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );\r
1915 }\r
1916 zExp = aExp + bExp - 0x400;\r
1917 aSig0 |= 0x00100000;\r
1918 shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );\r
1919 mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );\r
1920 add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );\r
1921 zSig2 |= ( zSig3 != 0 );\r
1922 if ( 0x00200000 <= zSig0 ) {\r
1923 shift64ExtraRightJamming(\r
1924 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );\r
1925 ++zExp;\r
1926 }\r
1927 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );\r
1928\r
1929}\r
1930\r
1931/*\r
1932-------------------------------------------------------------------------------\r
1933Returns the result of dividing the double-precision floating-point value `a'\r
1934by the corresponding value `b'. The operation is performed according to the\r
1935IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
1936-------------------------------------------------------------------------------\r
1937*/\r
1938float64 float64_div( float64 a, float64 b )\r
1939{\r
1940 flag aSign, bSign, zSign;\r
1941 int16 aExp, bExp, zExp;\r
1942 bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;\r
1943 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;\r
1944\r
1945 aSig1 = extractFloat64Frac1( a );\r
1946 aSig0 = extractFloat64Frac0( a );\r
1947 aExp = extractFloat64Exp( a );\r
1948 aSign = extractFloat64Sign( a );\r
1949 bSig1 = extractFloat64Frac1( b );\r
1950 bSig0 = extractFloat64Frac0( b );\r
1951 bExp = extractFloat64Exp( b );\r
1952 bSign = extractFloat64Sign( b );\r
1953 zSign = aSign ^ bSign;\r
1954 if ( aExp == 0x7FF ) {\r
1955 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );\r
1956 if ( bExp == 0x7FF ) {\r
1957 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
1958 goto invalid;\r
1959 }\r
1960 return packFloat64( zSign, 0x7FF, 0, 0 );\r
1961 }\r
1962 if ( bExp == 0x7FF ) {\r
1963 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
1964 return packFloat64( zSign, 0, 0, 0 );\r
1965 }\r
1966 if ( bExp == 0 ) {\r
1967 if ( ( bSig0 | bSig1 ) == 0 ) {\r
1968 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {\r
1969 invalid:\r
1970 float_raise( float_flag_invalid );\r
1971 return float64_default_nan;\r
1972 }\r
1973 float_raise( float_flag_divbyzero );\r
1974 return packFloat64( zSign, 0x7FF, 0, 0 );\r
1975 }\r
1976 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );\r
1977 }\r
1978 if ( aExp == 0 ) {\r
1979 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );\r
1980 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );\r
1981 }\r
1982 zExp = aExp - bExp + 0x3FD;\r
1983 shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );\r
1984 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );\r
1985 if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {\r
1986 shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );\r
1987 ++zExp;\r
1988 }\r
1989 zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );\r
1990 mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );\r
1991 sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );\r
1992 while ( (sbits32) rem0 < 0 ) {\r
1993 --zSig0;\r
1994 add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );\r
1995 }\r
1996 zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );\r
1997 if ( ( zSig1 & 0x3FF ) <= 4 ) {\r
1998 mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );\r
1999 sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );\r
2000 while ( (sbits32) rem1 < 0 ) {\r
2001 --zSig1;\r
2002 add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );\r
2003 }\r
2004 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );\r
2005 }\r
2006 shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );\r
2007 return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );\r
2008\r
2009}\r
2010\r
2011#ifndef SOFTFLOAT_FOR_GCC\r
2012/*\r
2013-------------------------------------------------------------------------------\r
2014Returns the remainder of the double-precision floating-point value `a'\r
2015with respect to the corresponding value `b'. The operation is performed\r
2016according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
2017-------------------------------------------------------------------------------\r
2018*/\r
2019float64 float64_rem( float64 a, float64 b )\r
2020{\r
2021 flag aSign, bSign, zSign;\r
2022 int16 aExp, bExp, expDiff;\r
2023 bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;\r
2024 bits32 allZero, alternateASig0, alternateASig1, sigMean1;\r
2025 sbits32 sigMean0;\r
2026 float64 z;\r
2027\r
2028 aSig1 = extractFloat64Frac1( a );\r
2029 aSig0 = extractFloat64Frac0( a );\r
2030 aExp = extractFloat64Exp( a );\r
2031 aSign = extractFloat64Sign( a );\r
2032 bSig1 = extractFloat64Frac1( b );\r
2033 bSig0 = extractFloat64Frac0( b );\r
2034 bExp = extractFloat64Exp( b );\r
2035 bSign = extractFloat64Sign( b );\r
2036 if ( aExp == 0x7FF ) {\r
2037 if ( ( aSig0 | aSig1 )\r
2038 || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {\r
2039 return propagateFloat64NaN( a, b );\r
2040 }\r
2041 goto invalid;\r
2042 }\r
2043 if ( bExp == 0x7FF ) {\r
2044 if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );\r
2045 return a;\r
2046 }\r
2047 if ( bExp == 0 ) {\r
2048 if ( ( bSig0 | bSig1 ) == 0 ) {\r
2049 invalid:\r
2050 float_raise( float_flag_invalid );\r
2051 return float64_default_nan;\r
2052 }\r
2053 normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );\r
2054 }\r
2055 if ( aExp == 0 ) {\r
2056 if ( ( aSig0 | aSig1 ) == 0 ) return a;\r
2057 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );\r
2058 }\r
2059 expDiff = aExp - bExp;\r
2060 if ( expDiff < -1 ) return a;\r
2061 shortShift64Left(\r
2062 aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );\r
2063 shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );\r
2064 q = le64( bSig0, bSig1, aSig0, aSig1 );\r
2065 if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );\r
2066 expDiff -= 32;\r
2067 while ( 0 < expDiff ) {\r
2068 q = estimateDiv64To32( aSig0, aSig1, bSig0 );\r
2069 q = ( 4 < q ) ? q - 4 : 0;\r
2070 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );\r
2071 shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );\r
2072 shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );\r
2073 sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );\r
2074 expDiff -= 29;\r
2075 }\r
2076 if ( -32 < expDiff ) {\r
2077 q = estimateDiv64To32( aSig0, aSig1, bSig0 );\r
2078 q = ( 4 < q ) ? q - 4 : 0;\r
2079 q >>= - expDiff;\r
2080 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );\r
2081 expDiff += 24;\r
2082 if ( expDiff < 0 ) {\r
2083 shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );\r
2084 }\r
2085 else {\r
2086 shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );\r
2087 }\r
2088 mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );\r
2089 sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );\r
2090 }\r
2091 else {\r
2092 shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );\r
2093 shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );\r
2094 }\r
2095 do {\r
2096 alternateASig0 = aSig0;\r
2097 alternateASig1 = aSig1;\r
2098 ++q;\r
2099 sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );\r
2100 } while ( 0 <= (sbits32) aSig0 );\r
2101 add64(\r
2102 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );\r
2103 if ( ( sigMean0 < 0 )\r
2104 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {\r
2105 aSig0 = alternateASig0;\r
2106 aSig1 = alternateASig1;\r
2107 }\r
2108 zSign = ( (sbits32) aSig0 < 0 );\r
2109 if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );\r
2110 return\r
2111 normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );\r
2112\r
2113}\r
2114#endif\r
2115\r
2116#ifndef SOFTFLOAT_FOR_GCC\r
2117/*\r
2118-------------------------------------------------------------------------------\r
2119Returns the square root of the double-precision floating-point value `a'.\r
2120The operation is performed according to the IEC/IEEE Standard for Binary\r
2121Floating-Point Arithmetic.\r
2122-------------------------------------------------------------------------------\r
2123*/\r
2124float64 float64_sqrt( float64 a )\r
2125{\r
2126 flag aSign;\r
2127 int16 aExp, zExp;\r
2128 bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;\r
2129 bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;\r
2130 float64 z;\r
2131\r
2132 aSig1 = extractFloat64Frac1( a );\r
2133 aSig0 = extractFloat64Frac0( a );\r
2134 aExp = extractFloat64Exp( a );\r
2135 aSign = extractFloat64Sign( a );\r
2136 if ( aExp == 0x7FF ) {\r
2137 if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );\r
2138 if ( ! aSign ) return a;\r
2139 goto invalid;\r
2140 }\r
2141 if ( aSign ) {\r
2142 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;\r
2143 invalid:\r
2144 float_raise( float_flag_invalid );\r
2145 return float64_default_nan;\r
2146 }\r
2147 if ( aExp == 0 ) {\r
2148 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );\r
2149 normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );\r
2150 }\r
2151 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;\r
2152 aSig0 |= 0x00100000;\r
2153 shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );\r
2154 zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;\r
2155 if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;\r
2156 doubleZSig0 = zSig0 + zSig0;\r
2157 shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );\r
2158 mul32To64( zSig0, zSig0, &term0, &term1 );\r
2159 sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );\r
2160 while ( (sbits32) rem0 < 0 ) {\r
2161 --zSig0;\r
2162 doubleZSig0 -= 2;\r
2163 add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );\r
2164 }\r
2165 zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );\r
2166 if ( ( zSig1 & 0x1FF ) <= 5 ) {\r
2167 if ( zSig1 == 0 ) zSig1 = 1;\r
2168 mul32To64( doubleZSig0, zSig1, &term1, &term2 );\r
2169 sub64( rem1, 0, term1, term2, &rem1, &rem2 );\r
2170 mul32To64( zSig1, zSig1, &term2, &term3 );\r
2171 sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );\r
2172 while ( (sbits32) rem1 < 0 ) {\r
2173 --zSig1;\r
2174 shortShift64Left( 0, zSig1, 1, &term2, &term3 );\r
2175 term3 |= 1;\r
2176 term2 |= doubleZSig0;\r
2177 add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );\r
2178 }\r
2179 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );\r
2180 }\r
2181 shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );\r
2182 return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );\r
2183\r
2184}\r
2185#endif\r
2186\r
2187/*\r
2188-------------------------------------------------------------------------------\r
2189Returns 1 if the double-precision floating-point value `a' is equal to\r
2190the corresponding value `b', and 0 otherwise. The comparison is performed\r
2191according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
2192-------------------------------------------------------------------------------\r
2193*/\r
2194flag float64_eq( float64 a, float64 b )\r
2195{\r
2196\r
2197 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2198 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2199 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2200 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2201 ) {\r
2202 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {\r
2203 float_raise( float_flag_invalid );\r
2204 }\r
2205 return 0;\r
2206 }\r
2207 return ( a == b ) ||\r
2208 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );\r
2209\r
2210}\r
2211\r
2212/*\r
2213-------------------------------------------------------------------------------\r
2214Returns 1 if the double-precision floating-point value `a' is less than\r
2215or equal to the corresponding value `b', and 0 otherwise. The comparison\r
2216is performed according to the IEC/IEEE Standard for Binary Floating-Point\r
2217Arithmetic.\r
2218-------------------------------------------------------------------------------\r
2219*/\r
2220flag float64_le( float64 a, float64 b )\r
2221{\r
2222 flag aSign, bSign;\r
2223\r
2224 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2225 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2226 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2227 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2228 ) {\r
2229 float_raise( float_flag_invalid );\r
2230 return 0;\r
2231 }\r
2232 aSign = extractFloat64Sign( a );\r
2233 bSign = extractFloat64Sign( b );\r
2234 if ( aSign != bSign )\r
2235 return aSign ||\r
2236 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==\r
2237 0 );\r
2238 return ( a == b ) ||\r
2239 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );\r
2240}\r
2241\r
2242/*\r
2243-------------------------------------------------------------------------------\r
2244Returns 1 if the double-precision floating-point value `a' is less than\r
2245the corresponding value `b', and 0 otherwise. The comparison is performed\r
2246according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
2247-------------------------------------------------------------------------------\r
2248*/\r
2249flag float64_lt( float64 a, float64 b )\r
2250{\r
2251 flag aSign, bSign;\r
2252\r
2253 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2254 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2255 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2256 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2257 ) {\r
2258 float_raise( float_flag_invalid );\r
2259 return 0;\r
2260 }\r
2261 aSign = extractFloat64Sign( a );\r
2262 bSign = extractFloat64Sign( b );\r
2263 if ( aSign != bSign )\r
2264 return aSign &&\r
2265 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=\r
2266 0 );\r
2267 return ( a != b ) &&\r
2268 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );\r
2269\r
2270}\r
2271\r
2272#ifndef SOFTFLOAT_FOR_GCC\r
2273/*\r
2274-------------------------------------------------------------------------------\r
2275Returns 1 if the double-precision floating-point value `a' is equal to\r
2276the corresponding value `b', and 0 otherwise. The invalid exception is\r
2277raised if either operand is a NaN. Otherwise, the comparison is performed\r
2278according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
2279-------------------------------------------------------------------------------\r
2280*/\r
2281flag float64_eq_signaling( float64 a, float64 b )\r
2282{\r
2283\r
2284 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2285 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2286 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2287 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2288 ) {\r
2289 float_raise( float_flag_invalid );\r
2290 return 0;\r
2291 }\r
2292 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );\r
2293\r
2294}\r
2295\r
2296/*\r
2297-------------------------------------------------------------------------------\r
2298Returns 1 if the double-precision floating-point value `a' is less than or\r
2299equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not\r
2300cause an exception. Otherwise, the comparison is performed according to the\r
2301IEC/IEEE Standard for Binary Floating-Point Arithmetic.\r
2302-------------------------------------------------------------------------------\r
2303*/\r
2304flag float64_le_quiet( float64 a, float64 b )\r
2305{\r
2306 flag aSign, bSign;\r
2307\r
2308 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2309 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2310 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2311 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2312 ) {\r
2313 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {\r
2314 float_raise( float_flag_invalid );\r
2315 }\r
2316 return 0;\r
2317 }\r
2318 aSign = extractFloat64Sign( a );\r
2319 bSign = extractFloat64Sign( b );\r
2320 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );\r
2321 return ( a == b ) || ( aSign ^ ( a < b ) );\r
2322\r
2323}\r
2324\r
2325/*\r
2326-------------------------------------------------------------------------------\r
2327Returns 1 if the double-precision floating-point value `a' is less than\r
2328the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an\r
2329exception. Otherwise, the comparison is performed according to the IEC/IEEE\r
2330Standard for Binary Floating-Point Arithmetic.\r
2331-------------------------------------------------------------------------------\r
2332*/\r
2333flag float64_lt_quiet( float64 a, float64 b )\r
2334{\r
2335 flag aSign, bSign;\r
2336\r
2337 if ( ( ( extractFloat64Exp( a ) == 0x7FF )\r
2338 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )\r
2339 || ( ( extractFloat64Exp( b ) == 0x7FF )\r
2340 && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )\r
2341 ) {\r
2342 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {\r
2343 float_raise( float_flag_invalid );\r
2344 }\r
2345 return 0;\r
2346 }\r
2347 aSign = extractFloat64Sign( a );\r
2348 bSign = extractFloat64Sign( b );\r
2349 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );\r
2350 return ( a != b ) && ( aSign ^ ( a < b ) );\r
2351\r
2352}\r
2353\r
2354#endif\r