]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
fpu: add mechanism to check for invalid long double formats
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
2ac8bd03 86
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
96#include "softfloat-macros.h"
97
98/*----------------------------------------------------------------------------
99| Functions and definitions to determine: (1) whether tininess for underflow
100| is detected before or after rounding by default, (2) what (if anything)
101| happens when exceptions are raised, (3) how signaling NaNs are distinguished
102| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
103| are propagated from function inputs to output. These details are target-
104| specific.
105*----------------------------------------------------------------------------*/
106#include "softfloat-specialize.h"
107
bb4d4bb3
PM
108/*----------------------------------------------------------------------------
109| Returns the fraction bits of the half-precision floating-point value `a'.
110*----------------------------------------------------------------------------*/
111
a49db98d 112static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
113{
114 return float16_val(a) & 0x3ff;
115}
116
117/*----------------------------------------------------------------------------
118| Returns the exponent bits of the half-precision floating-point value `a'.
119*----------------------------------------------------------------------------*/
120
0c48262d 121static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126/*----------------------------------------------------------------------------
127| Returns the sign bit of the single-precision floating-point value `a'.
128*----------------------------------------------------------------------------*/
129
a49db98d 130static inline flag extractFloat16Sign(float16 a)
bb4d4bb3
PM
131{
132 return float16_val(a)>>15;
133}
134
158142c2
FB
135/*----------------------------------------------------------------------------
136| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
137| and 7, and returns the properly rounded 32-bit integer corresponding to the
138| input. If `zSign' is 1, the input is negated before being converted to an
139| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
140| is simply rounded to an integer, with the inexact exception raised if the
141| input cannot be represented exactly as an integer. However, if the fixed-
142| point input is too large, the invalid exception is raised and the largest
143| positive or negative integer is returned.
144*----------------------------------------------------------------------------*/
145
f4014512 146static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 147{
8f506c70 148 int8_t roundingMode;
158142c2 149 flag roundNearestEven;
8f506c70 150 int8_t roundIncrement, roundBits;
760e1416 151 int32_t z;
158142c2 152
a2f2d288 153 roundingMode = status->float_rounding_mode;
158142c2 154 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
155 switch (roundingMode) {
156 case float_round_nearest_even:
f9288a76 157 case float_round_ties_away:
dc355b76
PM
158 roundIncrement = 0x40;
159 break;
160 case float_round_to_zero:
161 roundIncrement = 0;
162 break;
163 case float_round_up:
164 roundIncrement = zSign ? 0 : 0x7f;
165 break;
166 case float_round_down:
167 roundIncrement = zSign ? 0x7f : 0;
168 break;
169 default:
170 abort();
158142c2
FB
171 }
172 roundBits = absZ & 0x7F;
173 absZ = ( absZ + roundIncrement )>>7;
174 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
175 z = absZ;
176 if ( zSign ) z = - z;
177 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 178 float_raise(float_flag_invalid, status);
bb98fe42 179 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 180 }
a2f2d288
PM
181 if (roundBits) {
182 status->float_exception_flags |= float_flag_inexact;
183 }
158142c2
FB
184 return z;
185
186}
187
188/*----------------------------------------------------------------------------
189| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
190| `absZ1', with binary point between bits 63 and 64 (between the input words),
191| and returns the properly rounded 64-bit integer corresponding to the input.
192| If `zSign' is 1, the input is negated before being converted to an integer.
193| Ordinarily, the fixed-point input is simply rounded to an integer, with
194| the inexact exception raised if the input cannot be represented exactly as
195| an integer. However, if the fixed-point input is too large, the invalid
196| exception is raised and the largest positive or negative integer is
197| returned.
198*----------------------------------------------------------------------------*/
199
f42c2224 200static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 201 float_status *status)
158142c2 202{
8f506c70 203 int8_t roundingMode;
158142c2 204 flag roundNearestEven, increment;
760e1416 205 int64_t z;
158142c2 206
a2f2d288 207 roundingMode = status->float_rounding_mode;
158142c2 208 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
209 switch (roundingMode) {
210 case float_round_nearest_even:
f9288a76 211 case float_round_ties_away:
dc355b76
PM
212 increment = ((int64_t) absZ1 < 0);
213 break;
214 case float_round_to_zero:
215 increment = 0;
216 break;
217 case float_round_up:
218 increment = !zSign && absZ1;
219 break;
220 case float_round_down:
221 increment = zSign && absZ1;
222 break;
223 default:
224 abort();
158142c2
FB
225 }
226 if ( increment ) {
227 ++absZ0;
228 if ( absZ0 == 0 ) goto overflow;
bb98fe42 229 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
230 }
231 z = absZ0;
232 if ( zSign ) z = - z;
233 if ( z && ( ( z < 0 ) ^ zSign ) ) {
234 overflow:
ff32e16e 235 float_raise(float_flag_invalid, status);
158142c2 236 return
bb98fe42 237 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
238 : LIT64( 0x7FFFFFFFFFFFFFFF );
239 }
a2f2d288
PM
240 if (absZ1) {
241 status->float_exception_flags |= float_flag_inexact;
242 }
158142c2
FB
243 return z;
244
245}
246
fb3ea83a
TM
247/*----------------------------------------------------------------------------
248| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
249| `absZ1', with binary point between bits 63 and 64 (between the input words),
250| and returns the properly rounded 64-bit unsigned integer corresponding to the
251| input. Ordinarily, the fixed-point input is simply rounded to an integer,
252| with the inexact exception raised if the input cannot be represented exactly
253| as an integer. However, if the fixed-point input is too large, the invalid
254| exception is raised and the largest unsigned integer is returned.
255*----------------------------------------------------------------------------*/
256
f42c2224 257static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 258 uint64_t absZ1, float_status *status)
fb3ea83a 259{
8f506c70 260 int8_t roundingMode;
fb3ea83a
TM
261 flag roundNearestEven, increment;
262
a2f2d288 263 roundingMode = status->float_rounding_mode;
fb3ea83a 264 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
265 switch (roundingMode) {
266 case float_round_nearest_even:
f9288a76 267 case float_round_ties_away:
dc355b76
PM
268 increment = ((int64_t)absZ1 < 0);
269 break;
270 case float_round_to_zero:
271 increment = 0;
272 break;
273 case float_round_up:
274 increment = !zSign && absZ1;
275 break;
276 case float_round_down:
277 increment = zSign && absZ1;
278 break;
279 default:
280 abort();
fb3ea83a
TM
281 }
282 if (increment) {
283 ++absZ0;
284 if (absZ0 == 0) {
ff32e16e 285 float_raise(float_flag_invalid, status);
fb3ea83a
TM
286 return LIT64(0xFFFFFFFFFFFFFFFF);
287 }
288 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
289 }
290
291 if (zSign && absZ0) {
ff32e16e 292 float_raise(float_flag_invalid, status);
fb3ea83a
TM
293 return 0;
294 }
295
296 if (absZ1) {
a2f2d288 297 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
298 }
299 return absZ0;
300}
301
158142c2
FB
302/*----------------------------------------------------------------------------
303| Returns the fraction bits of the single-precision floating-point value `a'.
304*----------------------------------------------------------------------------*/
305
a49db98d 306static inline uint32_t extractFloat32Frac( float32 a )
158142c2
FB
307{
308
f090c9d4 309 return float32_val(a) & 0x007FFFFF;
158142c2
FB
310
311}
312
313/*----------------------------------------------------------------------------
314| Returns the exponent bits of the single-precision floating-point value `a'.
315*----------------------------------------------------------------------------*/
316
0c48262d 317static inline int extractFloat32Exp(float32 a)
158142c2
FB
318{
319
f090c9d4 320 return ( float32_val(a)>>23 ) & 0xFF;
158142c2
FB
321
322}
323
324/*----------------------------------------------------------------------------
325| Returns the sign bit of the single-precision floating-point value `a'.
326*----------------------------------------------------------------------------*/
327
a49db98d 328static inline flag extractFloat32Sign( float32 a )
158142c2
FB
329{
330
f090c9d4 331 return float32_val(a)>>31;
158142c2
FB
332
333}
334
37d18660
PM
335/*----------------------------------------------------------------------------
336| If `a' is denormal and we are in flush-to-zero mode then set the
337| input-denormal exception and return zero. Otherwise just return the value.
338*----------------------------------------------------------------------------*/
e5a41ffa 339float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 340{
a2f2d288 341 if (status->flush_inputs_to_zero) {
37d18660 342 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 343 float_raise(float_flag_input_denormal, status);
37d18660
PM
344 return make_float32(float32_val(a) & 0x80000000);
345 }
346 }
347 return a;
348}
349
158142c2
FB
350/*----------------------------------------------------------------------------
351| Normalizes the subnormal single-precision floating-point value represented
352| by the denormalized significand `aSig'. The normalized exponent and
353| significand are stored at the locations pointed to by `zExpPtr' and
354| `zSigPtr', respectively.
355*----------------------------------------------------------------------------*/
356
357static void
0c48262d 358 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 359{
8f506c70 360 int8_t shiftCount;
158142c2
FB
361
362 shiftCount = countLeadingZeros32( aSig ) - 8;
363 *zSigPtr = aSig<<shiftCount;
364 *zExpPtr = 1 - shiftCount;
365
366}
367
368/*----------------------------------------------------------------------------
369| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
370| single-precision floating-point value, returning the result. After being
371| shifted into the proper positions, the three fields are simply added
372| together to form the result. This means that any integer portion of `zSig'
373| will be added into the exponent. Since a properly normalized significand
374| will have an integer portion equal to 1, the `zExp' input should be 1 less
375| than the desired result exponent whenever `zSig' is a complete, normalized
376| significand.
377*----------------------------------------------------------------------------*/
378
0c48262d 379static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
158142c2
FB
380{
381
f090c9d4 382 return make_float32(
bb98fe42 383 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
158142c2
FB
384
385}
386
387/*----------------------------------------------------------------------------
388| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
389| and significand `zSig', and returns the proper single-precision floating-
390| point value corresponding to the abstract input. Ordinarily, the abstract
391| value is simply rounded and packed into the single-precision format, with
392| the inexact exception raised if the abstract input cannot be represented
393| exactly. However, if the abstract value is too large, the overflow and
394| inexact exceptions are raised and an infinity or maximal finite value is
395| returned. If the abstract value is too small, the input value is rounded to
396| a subnormal number, and the underflow and inexact exceptions are raised if
397| the abstract input cannot be represented exactly as a subnormal single-
398| precision floating-point number.
399| The input significand `zSig' has its binary point between bits 30
400| and 29, which is 7 bits to the left of the usual location. This shifted
401| significand must be normalized or smaller. If `zSig' is not normalized,
402| `zExp' must be 0; in that case, the result returned is a subnormal number,
403| and it must not require rounding. In the usual case that `zSig' is
404| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
405| The handling of underflow and overflow follows the IEC/IEEE Standard for
406| Binary Floating-Point Arithmetic.
407*----------------------------------------------------------------------------*/
408
0c48262d 409static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 410 float_status *status)
158142c2 411{
8f506c70 412 int8_t roundingMode;
158142c2 413 flag roundNearestEven;
8f506c70 414 int8_t roundIncrement, roundBits;
158142c2
FB
415 flag isTiny;
416
a2f2d288 417 roundingMode = status->float_rounding_mode;
158142c2 418 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
419 switch (roundingMode) {
420 case float_round_nearest_even:
f9288a76 421 case float_round_ties_away:
dc355b76
PM
422 roundIncrement = 0x40;
423 break;
424 case float_round_to_zero:
425 roundIncrement = 0;
426 break;
427 case float_round_up:
428 roundIncrement = zSign ? 0 : 0x7f;
429 break;
430 case float_round_down:
431 roundIncrement = zSign ? 0x7f : 0;
432 break;
433 default:
434 abort();
435 break;
158142c2
FB
436 }
437 roundBits = zSig & 0x7F;
bb98fe42 438 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
439 if ( ( 0xFD < zExp )
440 || ( ( zExp == 0xFD )
bb98fe42 441 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 442 ) {
ff32e16e 443 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 444 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
445 }
446 if ( zExp < 0 ) {
a2f2d288 447 if (status->flush_to_zero) {
ff32e16e 448 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
449 return packFloat32(zSign, 0, 0);
450 }
158142c2 451 isTiny =
a2f2d288
PM
452 (status->float_detect_tininess
453 == float_tininess_before_rounding)
158142c2
FB
454 || ( zExp < -1 )
455 || ( zSig + roundIncrement < 0x80000000 );
456 shift32RightJamming( zSig, - zExp, &zSig );
457 zExp = 0;
458 roundBits = zSig & 0x7F;
ff32e16e
PM
459 if (isTiny && roundBits) {
460 float_raise(float_flag_underflow, status);
461 }
158142c2
FB
462 }
463 }
a2f2d288
PM
464 if (roundBits) {
465 status->float_exception_flags |= float_flag_inexact;
466 }
158142c2
FB
467 zSig = ( zSig + roundIncrement )>>7;
468 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
469 if ( zSig == 0 ) zExp = 0;
470 return packFloat32( zSign, zExp, zSig );
471
472}
473
474/*----------------------------------------------------------------------------
475| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
476| and significand `zSig', and returns the proper single-precision floating-
477| point value corresponding to the abstract input. This routine is just like
478| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
479| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
480| floating-point exponent.
481*----------------------------------------------------------------------------*/
482
483static float32
0c48262d 484 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 485 float_status *status)
158142c2 486{
8f506c70 487 int8_t shiftCount;
158142c2
FB
488
489 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
490 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
491 status);
158142c2
FB
492
493}
494
495/*----------------------------------------------------------------------------
496| Returns the fraction bits of the double-precision floating-point value `a'.
497*----------------------------------------------------------------------------*/
498
a49db98d 499static inline uint64_t extractFloat64Frac( float64 a )
158142c2
FB
500{
501
f090c9d4 502 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
158142c2
FB
503
504}
505
506/*----------------------------------------------------------------------------
507| Returns the exponent bits of the double-precision floating-point value `a'.
508*----------------------------------------------------------------------------*/
509
0c48262d 510static inline int extractFloat64Exp(float64 a)
158142c2
FB
511{
512
f090c9d4 513 return ( float64_val(a)>>52 ) & 0x7FF;
158142c2
FB
514
515}
516
517/*----------------------------------------------------------------------------
518| Returns the sign bit of the double-precision floating-point value `a'.
519*----------------------------------------------------------------------------*/
520
a49db98d 521static inline flag extractFloat64Sign( float64 a )
158142c2
FB
522{
523
f090c9d4 524 return float64_val(a)>>63;
158142c2
FB
525
526}
527
37d18660
PM
528/*----------------------------------------------------------------------------
529| If `a' is denormal and we are in flush-to-zero mode then set the
530| input-denormal exception and return zero. Otherwise just return the value.
531*----------------------------------------------------------------------------*/
e5a41ffa 532float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 533{
a2f2d288 534 if (status->flush_inputs_to_zero) {
37d18660 535 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 536 float_raise(float_flag_input_denormal, status);
37d18660
PM
537 return make_float64(float64_val(a) & (1ULL << 63));
538 }
539 }
540 return a;
541}
542
158142c2
FB
543/*----------------------------------------------------------------------------
544| Normalizes the subnormal double-precision floating-point value represented
545| by the denormalized significand `aSig'. The normalized exponent and
546| significand are stored at the locations pointed to by `zExpPtr' and
547| `zSigPtr', respectively.
548*----------------------------------------------------------------------------*/
549
550static void
0c48262d 551 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 552{
8f506c70 553 int8_t shiftCount;
158142c2
FB
554
555 shiftCount = countLeadingZeros64( aSig ) - 11;
556 *zSigPtr = aSig<<shiftCount;
557 *zExpPtr = 1 - shiftCount;
558
559}
560
561/*----------------------------------------------------------------------------
562| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
563| double-precision floating-point value, returning the result. After being
564| shifted into the proper positions, the three fields are simply added
565| together to form the result. This means that any integer portion of `zSig'
566| will be added into the exponent. Since a properly normalized significand
567| will have an integer portion equal to 1, the `zExp' input should be 1 less
568| than the desired result exponent whenever `zSig' is a complete, normalized
569| significand.
570*----------------------------------------------------------------------------*/
571
0c48262d 572static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
573{
574
f090c9d4 575 return make_float64(
bb98fe42 576 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
577
578}
579
580/*----------------------------------------------------------------------------
581| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
582| and significand `zSig', and returns the proper double-precision floating-
583| point value corresponding to the abstract input. Ordinarily, the abstract
584| value is simply rounded and packed into the double-precision format, with
585| the inexact exception raised if the abstract input cannot be represented
586| exactly. However, if the abstract value is too large, the overflow and
587| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
588| returned. If the abstract value is too small, the input value is rounded to
589| a subnormal number, and the underflow and inexact exceptions are raised if
590| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
591| precision floating-point number.
592| The input significand `zSig' has its binary point between bits 62
593| and 61, which is 10 bits to the left of the usual location. This shifted
594| significand must be normalized or smaller. If `zSig' is not normalized,
595| `zExp' must be 0; in that case, the result returned is a subnormal number,
596| and it must not require rounding. In the usual case that `zSig' is
597| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
598| The handling of underflow and overflow follows the IEC/IEEE Standard for
599| Binary Floating-Point Arithmetic.
600*----------------------------------------------------------------------------*/
601
0c48262d 602static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 603 float_status *status)
158142c2 604{
8f506c70 605 int8_t roundingMode;
158142c2 606 flag roundNearestEven;
0c48262d 607 int roundIncrement, roundBits;
158142c2
FB
608 flag isTiny;
609
a2f2d288 610 roundingMode = status->float_rounding_mode;
158142c2 611 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
612 switch (roundingMode) {
613 case float_round_nearest_even:
f9288a76 614 case float_round_ties_away:
dc355b76
PM
615 roundIncrement = 0x200;
616 break;
617 case float_round_to_zero:
618 roundIncrement = 0;
619 break;
620 case float_round_up:
621 roundIncrement = zSign ? 0 : 0x3ff;
622 break;
623 case float_round_down:
624 roundIncrement = zSign ? 0x3ff : 0;
625 break;
626 default:
627 abort();
158142c2
FB
628 }
629 roundBits = zSig & 0x3FF;
bb98fe42 630 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
631 if ( ( 0x7FD < zExp )
632 || ( ( zExp == 0x7FD )
bb98fe42 633 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 634 ) {
ff32e16e 635 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 636 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
158142c2
FB
637 }
638 if ( zExp < 0 ) {
a2f2d288 639 if (status->flush_to_zero) {
ff32e16e 640 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
641 return packFloat64(zSign, 0, 0);
642 }
158142c2 643 isTiny =
a2f2d288
PM
644 (status->float_detect_tininess
645 == float_tininess_before_rounding)
158142c2
FB
646 || ( zExp < -1 )
647 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
648 shift64RightJamming( zSig, - zExp, &zSig );
649 zExp = 0;
650 roundBits = zSig & 0x3FF;
ff32e16e
PM
651 if (isTiny && roundBits) {
652 float_raise(float_flag_underflow, status);
653 }
158142c2
FB
654 }
655 }
a2f2d288
PM
656 if (roundBits) {
657 status->float_exception_flags |= float_flag_inexact;
658 }
158142c2
FB
659 zSig = ( zSig + roundIncrement )>>10;
660 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
661 if ( zSig == 0 ) zExp = 0;
662 return packFloat64( zSign, zExp, zSig );
663
664}
665
666/*----------------------------------------------------------------------------
667| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
668| and significand `zSig', and returns the proper double-precision floating-
669| point value corresponding to the abstract input. This routine is just like
670| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
671| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
672| floating-point exponent.
673*----------------------------------------------------------------------------*/
674
675static float64
0c48262d 676 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 677 float_status *status)
158142c2 678{
8f506c70 679 int8_t shiftCount;
158142c2
FB
680
681 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
682 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
683 status);
158142c2
FB
684
685}
686
158142c2
FB
687/*----------------------------------------------------------------------------
688| Returns the fraction bits of the extended double-precision floating-point
689| value `a'.
690*----------------------------------------------------------------------------*/
691
a49db98d 692static inline uint64_t extractFloatx80Frac( floatx80 a )
158142c2
FB
693{
694
695 return a.low;
696
697}
698
699/*----------------------------------------------------------------------------
700| Returns the exponent bits of the extended double-precision floating-point
701| value `a'.
702*----------------------------------------------------------------------------*/
703
f4014512 704static inline int32_t extractFloatx80Exp( floatx80 a )
158142c2
FB
705{
706
707 return a.high & 0x7FFF;
708
709}
710
711/*----------------------------------------------------------------------------
712| Returns the sign bit of the extended double-precision floating-point value
713| `a'.
714*----------------------------------------------------------------------------*/
715
a49db98d 716static inline flag extractFloatx80Sign( floatx80 a )
158142c2
FB
717{
718
719 return a.high>>15;
720
721}
722
723/*----------------------------------------------------------------------------
724| Normalizes the subnormal extended double-precision floating-point value
725| represented by the denormalized significand `aSig'. The normalized exponent
726| and significand are stored at the locations pointed to by `zExpPtr' and
727| `zSigPtr', respectively.
728*----------------------------------------------------------------------------*/
729
730static void
f4014512 731 normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
158142c2 732{
8f506c70 733 int8_t shiftCount;
158142c2
FB
734
735 shiftCount = countLeadingZeros64( aSig );
736 *zSigPtr = aSig<<shiftCount;
737 *zExpPtr = 1 - shiftCount;
738
739}
740
741/*----------------------------------------------------------------------------
742| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
743| extended double-precision floating-point value, returning the result.
744*----------------------------------------------------------------------------*/
745
f4014512 746static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
158142c2
FB
747{
748 floatx80 z;
749
750 z.low = zSig;
bb98fe42 751 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
158142c2
FB
752 return z;
753
754}
755
756/*----------------------------------------------------------------------------
757| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
758| and extended significand formed by the concatenation of `zSig0' and `zSig1',
759| and returns the proper extended double-precision floating-point value
760| corresponding to the abstract input. Ordinarily, the abstract value is
761| rounded and packed into the extended double-precision format, with the
762| inexact exception raised if the abstract input cannot be represented
763| exactly. However, if the abstract value is too large, the overflow and
764| inexact exceptions are raised and an infinity or maximal finite value is
765| returned. If the abstract value is too small, the input value is rounded to
766| a subnormal number, and the underflow and inexact exceptions are raised if
767| the abstract input cannot be represented exactly as a subnormal extended
768| double-precision floating-point number.
769| If `roundingPrecision' is 32 or 64, the result is rounded to the same
770| number of bits as single or double precision, respectively. Otherwise, the
771| result is rounded to the full precision of the extended double-precision
772| format.
773| The input significand must be normalized or smaller. If the input
774| significand is not normalized, `zExp' must be 0; in that case, the result
775| returned is a subnormal number, and it must not require rounding. The
776| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
777| Floating-Point Arithmetic.
778*----------------------------------------------------------------------------*/
779
8f506c70 780static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
f4014512 781 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
e5a41ffa 782 float_status *status)
158142c2 783{
8f506c70 784 int8_t roundingMode;
158142c2 785 flag roundNearestEven, increment, isTiny;
f42c2224 786 int64_t roundIncrement, roundMask, roundBits;
158142c2 787
a2f2d288 788 roundingMode = status->float_rounding_mode;
158142c2
FB
789 roundNearestEven = ( roundingMode == float_round_nearest_even );
790 if ( roundingPrecision == 80 ) goto precision80;
791 if ( roundingPrecision == 64 ) {
792 roundIncrement = LIT64( 0x0000000000000400 );
793 roundMask = LIT64( 0x00000000000007FF );
794 }
795 else if ( roundingPrecision == 32 ) {
796 roundIncrement = LIT64( 0x0000008000000000 );
797 roundMask = LIT64( 0x000000FFFFFFFFFF );
798 }
799 else {
800 goto precision80;
801 }
802 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
803 switch (roundingMode) {
804 case float_round_nearest_even:
f9288a76 805 case float_round_ties_away:
dc355b76
PM
806 break;
807 case float_round_to_zero:
808 roundIncrement = 0;
809 break;
810 case float_round_up:
811 roundIncrement = zSign ? 0 : roundMask;
812 break;
813 case float_round_down:
814 roundIncrement = zSign ? roundMask : 0;
815 break;
816 default:
817 abort();
158142c2
FB
818 }
819 roundBits = zSig0 & roundMask;
bb98fe42 820 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
821 if ( ( 0x7FFE < zExp )
822 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
823 ) {
824 goto overflow;
825 }
826 if ( zExp <= 0 ) {
a2f2d288 827 if (status->flush_to_zero) {
ff32e16e 828 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
829 return packFloatx80(zSign, 0, 0);
830 }
158142c2 831 isTiny =
a2f2d288
PM
832 (status->float_detect_tininess
833 == float_tininess_before_rounding)
158142c2
FB
834 || ( zExp < 0 )
835 || ( zSig0 <= zSig0 + roundIncrement );
836 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
837 zExp = 0;
838 roundBits = zSig0 & roundMask;
ff32e16e
PM
839 if (isTiny && roundBits) {
840 float_raise(float_flag_underflow, status);
841 }
a2f2d288
PM
842 if (roundBits) {
843 status->float_exception_flags |= float_flag_inexact;
844 }
158142c2 845 zSig0 += roundIncrement;
bb98fe42 846 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
847 roundIncrement = roundMask + 1;
848 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
849 roundMask |= roundIncrement;
850 }
851 zSig0 &= ~ roundMask;
852 return packFloatx80( zSign, zExp, zSig0 );
853 }
854 }
a2f2d288
PM
855 if (roundBits) {
856 status->float_exception_flags |= float_flag_inexact;
857 }
158142c2
FB
858 zSig0 += roundIncrement;
859 if ( zSig0 < roundIncrement ) {
860 ++zExp;
861 zSig0 = LIT64( 0x8000000000000000 );
862 }
863 roundIncrement = roundMask + 1;
864 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
865 roundMask |= roundIncrement;
866 }
867 zSig0 &= ~ roundMask;
868 if ( zSig0 == 0 ) zExp = 0;
869 return packFloatx80( zSign, zExp, zSig0 );
870 precision80:
dc355b76
PM
871 switch (roundingMode) {
872 case float_round_nearest_even:
f9288a76 873 case float_round_ties_away:
dc355b76
PM
874 increment = ((int64_t)zSig1 < 0);
875 break;
876 case float_round_to_zero:
877 increment = 0;
878 break;
879 case float_round_up:
880 increment = !zSign && zSig1;
881 break;
882 case float_round_down:
883 increment = zSign && zSig1;
884 break;
885 default:
886 abort();
158142c2 887 }
bb98fe42 888 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
889 if ( ( 0x7FFE < zExp )
890 || ( ( zExp == 0x7FFE )
891 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
892 && increment
893 )
894 ) {
895 roundMask = 0;
896 overflow:
ff32e16e 897 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
898 if ( ( roundingMode == float_round_to_zero )
899 || ( zSign && ( roundingMode == float_round_up ) )
900 || ( ! zSign && ( roundingMode == float_round_down ) )
901 ) {
902 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
903 }
904 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
905 }
906 if ( zExp <= 0 ) {
907 isTiny =
a2f2d288
PM
908 (status->float_detect_tininess
909 == float_tininess_before_rounding)
158142c2
FB
910 || ( zExp < 0 )
911 || ! increment
912 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
913 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
914 zExp = 0;
ff32e16e
PM
915 if (isTiny && zSig1) {
916 float_raise(float_flag_underflow, status);
917 }
a2f2d288
PM
918 if (zSig1) {
919 status->float_exception_flags |= float_flag_inexact;
920 }
dc355b76
PM
921 switch (roundingMode) {
922 case float_round_nearest_even:
f9288a76 923 case float_round_ties_away:
dc355b76
PM
924 increment = ((int64_t)zSig1 < 0);
925 break;
926 case float_round_to_zero:
927 increment = 0;
928 break;
929 case float_round_up:
930 increment = !zSign && zSig1;
931 break;
932 case float_round_down:
933 increment = zSign && zSig1;
934 break;
935 default:
936 abort();
158142c2
FB
937 }
938 if ( increment ) {
939 ++zSig0;
940 zSig0 &=
bb98fe42
AF
941 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
942 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
943 }
944 return packFloatx80( zSign, zExp, zSig0 );
945 }
946 }
a2f2d288
PM
947 if (zSig1) {
948 status->float_exception_flags |= float_flag_inexact;
949 }
158142c2
FB
950 if ( increment ) {
951 ++zSig0;
952 if ( zSig0 == 0 ) {
953 ++zExp;
954 zSig0 = LIT64( 0x8000000000000000 );
955 }
956 else {
bb98fe42 957 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
958 }
959 }
960 else {
961 if ( zSig0 == 0 ) zExp = 0;
962 }
963 return packFloatx80( zSign, zExp, zSig0 );
964
965}
966
967/*----------------------------------------------------------------------------
968| Takes an abstract floating-point value having sign `zSign', exponent
969| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
970| and returns the proper extended double-precision floating-point value
971| corresponding to the abstract input. This routine is just like
972| `roundAndPackFloatx80' except that the input significand does not have to be
973| normalized.
974*----------------------------------------------------------------------------*/
975
8f506c70 976static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
f4014512 977 flag zSign, int32_t zExp,
e5a41ffa
PM
978 uint64_t zSig0, uint64_t zSig1,
979 float_status *status)
158142c2 980{
8f506c70 981 int8_t shiftCount;
158142c2
FB
982
983 if ( zSig0 == 0 ) {
984 zSig0 = zSig1;
985 zSig1 = 0;
986 zExp -= 64;
987 }
988 shiftCount = countLeadingZeros64( zSig0 );
989 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
990 zExp -= shiftCount;
ff32e16e
PM
991 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
992 zSig0, zSig1, status);
158142c2
FB
993
994}
995
158142c2
FB
996/*----------------------------------------------------------------------------
997| Returns the least-significant 64 fraction bits of the quadruple-precision
998| floating-point value `a'.
999*----------------------------------------------------------------------------*/
1000
a49db98d 1001static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
1002{
1003
1004 return a.low;
1005
1006}
1007
1008/*----------------------------------------------------------------------------
1009| Returns the most-significant 48 fraction bits of the quadruple-precision
1010| floating-point value `a'.
1011*----------------------------------------------------------------------------*/
1012
a49db98d 1013static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
1014{
1015
1016 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1017
1018}
1019
1020/*----------------------------------------------------------------------------
1021| Returns the exponent bits of the quadruple-precision floating-point value
1022| `a'.
1023*----------------------------------------------------------------------------*/
1024
f4014512 1025static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
1026{
1027
1028 return ( a.high>>48 ) & 0x7FFF;
1029
1030}
1031
1032/*----------------------------------------------------------------------------
1033| Returns the sign bit of the quadruple-precision floating-point value `a'.
1034*----------------------------------------------------------------------------*/
1035
a49db98d 1036static inline flag extractFloat128Sign( float128 a )
158142c2
FB
1037{
1038
1039 return a.high>>63;
1040
1041}
1042
1043/*----------------------------------------------------------------------------
1044| Normalizes the subnormal quadruple-precision floating-point value
1045| represented by the denormalized significand formed by the concatenation of
1046| `aSig0' and `aSig1'. The normalized exponent is stored at the location
1047| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
1048| significand are stored at the location pointed to by `zSig0Ptr', and the
1049| least significant 64 bits of the normalized significand are stored at the
1050| location pointed to by `zSig1Ptr'.
1051*----------------------------------------------------------------------------*/
1052
1053static void
1054 normalizeFloat128Subnormal(
bb98fe42
AF
1055 uint64_t aSig0,
1056 uint64_t aSig1,
f4014512 1057 int32_t *zExpPtr,
bb98fe42
AF
1058 uint64_t *zSig0Ptr,
1059 uint64_t *zSig1Ptr
158142c2
FB
1060 )
1061{
8f506c70 1062 int8_t shiftCount;
158142c2
FB
1063
1064 if ( aSig0 == 0 ) {
1065 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1066 if ( shiftCount < 0 ) {
1067 *zSig0Ptr = aSig1>>( - shiftCount );
1068 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1069 }
1070 else {
1071 *zSig0Ptr = aSig1<<shiftCount;
1072 *zSig1Ptr = 0;
1073 }
1074 *zExpPtr = - shiftCount - 63;
1075 }
1076 else {
1077 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1078 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1079 *zExpPtr = 1 - shiftCount;
1080 }
1081
1082}
1083
1084/*----------------------------------------------------------------------------
1085| Packs the sign `zSign', the exponent `zExp', and the significand formed
1086| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
1087| floating-point value, returning the result. After being shifted into the
1088| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
1089| added together to form the most significant 32 bits of the result. This
1090| means that any integer portion of `zSig0' will be added into the exponent.
1091| Since a properly normalized significand will have an integer portion equal
1092| to 1, the `zExp' input should be 1 less than the desired result exponent
1093| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1094| significand.
1095*----------------------------------------------------------------------------*/
1096
a49db98d 1097static inline float128
f4014512 1098 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
1099{
1100 float128 z;
1101
1102 z.low = zSig1;
bb98fe42 1103 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
1104 return z;
1105
1106}
1107
1108/*----------------------------------------------------------------------------
1109| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1110| and extended significand formed by the concatenation of `zSig0', `zSig1',
1111| and `zSig2', and returns the proper quadruple-precision floating-point value
1112| corresponding to the abstract input. Ordinarily, the abstract value is
1113| simply rounded and packed into the quadruple-precision format, with the
1114| inexact exception raised if the abstract input cannot be represented
1115| exactly. However, if the abstract value is too large, the overflow and
1116| inexact exceptions are raised and an infinity or maximal finite value is
1117| returned. If the abstract value is too small, the input value is rounded to
1118| a subnormal number, and the underflow and inexact exceptions are raised if
1119| the abstract input cannot be represented exactly as a subnormal quadruple-
1120| precision floating-point number.
1121| The input significand must be normalized or smaller. If the input
1122| significand is not normalized, `zExp' must be 0; in that case, the result
1123| returned is a subnormal number, and it must not require rounding. In the
1124| usual case that the input significand is normalized, `zExp' must be 1 less
1125| than the ``true'' floating-point exponent. The handling of underflow and
1126| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1127*----------------------------------------------------------------------------*/
1128
f4014512 1129static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
1130 uint64_t zSig0, uint64_t zSig1,
1131 uint64_t zSig2, float_status *status)
158142c2 1132{
8f506c70 1133 int8_t roundingMode;
158142c2
FB
1134 flag roundNearestEven, increment, isTiny;
1135
a2f2d288 1136 roundingMode = status->float_rounding_mode;
158142c2 1137 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
1138 switch (roundingMode) {
1139 case float_round_nearest_even:
f9288a76 1140 case float_round_ties_away:
dc355b76
PM
1141 increment = ((int64_t)zSig2 < 0);
1142 break;
1143 case float_round_to_zero:
1144 increment = 0;
1145 break;
1146 case float_round_up:
1147 increment = !zSign && zSig2;
1148 break;
1149 case float_round_down:
1150 increment = zSign && zSig2;
1151 break;
1152 default:
1153 abort();
158142c2 1154 }
bb98fe42 1155 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
1156 if ( ( 0x7FFD < zExp )
1157 || ( ( zExp == 0x7FFD )
1158 && eq128(
1159 LIT64( 0x0001FFFFFFFFFFFF ),
1160 LIT64( 0xFFFFFFFFFFFFFFFF ),
1161 zSig0,
1162 zSig1
1163 )
1164 && increment
1165 )
1166 ) {
ff32e16e 1167 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
1168 if ( ( roundingMode == float_round_to_zero )
1169 || ( zSign && ( roundingMode == float_round_up ) )
1170 || ( ! zSign && ( roundingMode == float_round_down ) )
1171 ) {
1172 return
1173 packFloat128(
1174 zSign,
1175 0x7FFE,
1176 LIT64( 0x0000FFFFFFFFFFFF ),
1177 LIT64( 0xFFFFFFFFFFFFFFFF )
1178 );
1179 }
1180 return packFloat128( zSign, 0x7FFF, 0, 0 );
1181 }
1182 if ( zExp < 0 ) {
a2f2d288 1183 if (status->flush_to_zero) {
ff32e16e 1184 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
1185 return packFloat128(zSign, 0, 0, 0);
1186 }
158142c2 1187 isTiny =
a2f2d288
PM
1188 (status->float_detect_tininess
1189 == float_tininess_before_rounding)
158142c2
FB
1190 || ( zExp < -1 )
1191 || ! increment
1192 || lt128(
1193 zSig0,
1194 zSig1,
1195 LIT64( 0x0001FFFFFFFFFFFF ),
1196 LIT64( 0xFFFFFFFFFFFFFFFF )
1197 );
1198 shift128ExtraRightJamming(
1199 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1200 zExp = 0;
ff32e16e
PM
1201 if (isTiny && zSig2) {
1202 float_raise(float_flag_underflow, status);
1203 }
dc355b76
PM
1204 switch (roundingMode) {
1205 case float_round_nearest_even:
f9288a76 1206 case float_round_ties_away:
dc355b76
PM
1207 increment = ((int64_t)zSig2 < 0);
1208 break;
1209 case float_round_to_zero:
1210 increment = 0;
1211 break;
1212 case float_round_up:
1213 increment = !zSign && zSig2;
1214 break;
1215 case float_round_down:
1216 increment = zSign && zSig2;
1217 break;
1218 default:
1219 abort();
158142c2
FB
1220 }
1221 }
1222 }
a2f2d288
PM
1223 if (zSig2) {
1224 status->float_exception_flags |= float_flag_inexact;
1225 }
158142c2
FB
1226 if ( increment ) {
1227 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1228 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1229 }
1230 else {
1231 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1232 }
1233 return packFloat128( zSign, zExp, zSig0, zSig1 );
1234
1235}
1236
1237/*----------------------------------------------------------------------------
1238| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1239| and significand formed by the concatenation of `zSig0' and `zSig1', and
1240| returns the proper quadruple-precision floating-point value corresponding
1241| to the abstract input. This routine is just like `roundAndPackFloat128'
1242| except that the input significand has fewer bits and does not have to be
1243| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1244| point exponent.
1245*----------------------------------------------------------------------------*/
1246
f4014512 1247static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
1248 uint64_t zSig0, uint64_t zSig1,
1249 float_status *status)
158142c2 1250{
8f506c70 1251 int8_t shiftCount;
bb98fe42 1252 uint64_t zSig2;
158142c2
FB
1253
1254 if ( zSig0 == 0 ) {
1255 zSig0 = zSig1;
1256 zSig1 = 0;
1257 zExp -= 64;
1258 }
1259 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1260 if ( 0 <= shiftCount ) {
1261 zSig2 = 0;
1262 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1263 }
1264 else {
1265 shift128ExtraRightJamming(
1266 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1267 }
1268 zExp -= shiftCount;
ff32e16e 1269 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
1270
1271}
1272
158142c2
FB
1273/*----------------------------------------------------------------------------
1274| Returns the result of converting the 32-bit two's complement integer `a'
1275| to the single-precision floating-point format. The conversion is performed
1276| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1277*----------------------------------------------------------------------------*/
1278
e5a41ffa 1279float32 int32_to_float32(int32_t a, float_status *status)
158142c2
FB
1280{
1281 flag zSign;
1282
f090c9d4 1283 if ( a == 0 ) return float32_zero;
bb98fe42 1284 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
158142c2 1285 zSign = ( a < 0 );
ff32e16e 1286 return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
158142c2
FB
1287}
1288
1289/*----------------------------------------------------------------------------
1290| Returns the result of converting the 32-bit two's complement integer `a'
1291| to the double-precision floating-point format. The conversion is performed
1292| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1293*----------------------------------------------------------------------------*/
1294
e5a41ffa 1295float64 int32_to_float64(int32_t a, float_status *status)
158142c2
FB
1296{
1297 flag zSign;
3a87d009 1298 uint32_t absA;
8f506c70 1299 int8_t shiftCount;
bb98fe42 1300 uint64_t zSig;
158142c2 1301
f090c9d4 1302 if ( a == 0 ) return float64_zero;
158142c2
FB
1303 zSign = ( a < 0 );
1304 absA = zSign ? - a : a;
1305 shiftCount = countLeadingZeros32( absA ) + 21;
1306 zSig = absA;
1307 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1308
1309}
1310
158142c2
FB
1311/*----------------------------------------------------------------------------
1312| Returns the result of converting the 32-bit two's complement integer `a'
1313| to the extended double-precision floating-point format. The conversion
1314| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1315| Arithmetic.
1316*----------------------------------------------------------------------------*/
1317
e5a41ffa 1318floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
1319{
1320 flag zSign;
3a87d009 1321 uint32_t absA;
8f506c70 1322 int8_t shiftCount;
bb98fe42 1323 uint64_t zSig;
158142c2
FB
1324
1325 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1326 zSign = ( a < 0 );
1327 absA = zSign ? - a : a;
1328 shiftCount = countLeadingZeros32( absA ) + 32;
1329 zSig = absA;
1330 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1331
1332}
1333
158142c2
FB
1334/*----------------------------------------------------------------------------
1335| Returns the result of converting the 32-bit two's complement integer `a' to
1336| the quadruple-precision floating-point format. The conversion is performed
1337| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1338*----------------------------------------------------------------------------*/
1339
e5a41ffa 1340float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
1341{
1342 flag zSign;
3a87d009 1343 uint32_t absA;
8f506c70 1344 int8_t shiftCount;
bb98fe42 1345 uint64_t zSig0;
158142c2
FB
1346
1347 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1348 zSign = ( a < 0 );
1349 absA = zSign ? - a : a;
1350 shiftCount = countLeadingZeros32( absA ) + 17;
1351 zSig0 = absA;
1352 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1353
1354}
1355
158142c2
FB
1356/*----------------------------------------------------------------------------
1357| Returns the result of converting the 64-bit two's complement integer `a'
1358| to the single-precision floating-point format. The conversion is performed
1359| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1360*----------------------------------------------------------------------------*/
1361
e5a41ffa 1362float32 int64_to_float32(int64_t a, float_status *status)
158142c2
FB
1363{
1364 flag zSign;
182f42fd 1365 uint64_t absA;
8f506c70 1366 int8_t shiftCount;
158142c2 1367
f090c9d4 1368 if ( a == 0 ) return float32_zero;
158142c2
FB
1369 zSign = ( a < 0 );
1370 absA = zSign ? - a : a;
1371 shiftCount = countLeadingZeros64( absA ) - 40;
1372 if ( 0 <= shiftCount ) {
1373 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1374 }
1375 else {
1376 shiftCount += 7;
1377 if ( shiftCount < 0 ) {
1378 shift64RightJamming( absA, - shiftCount, &absA );
1379 }
1380 else {
1381 absA <<= shiftCount;
1382 }
ff32e16e 1383 return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
158142c2
FB
1384 }
1385
1386}
1387
1388/*----------------------------------------------------------------------------
1389| Returns the result of converting the 64-bit two's complement integer `a'
1390| to the double-precision floating-point format. The conversion is performed
1391| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1392*----------------------------------------------------------------------------*/
1393
e5a41ffa 1394float64 int64_to_float64(int64_t a, float_status *status)
158142c2
FB
1395{
1396 flag zSign;
1397
f090c9d4 1398 if ( a == 0 ) return float64_zero;
bb98fe42 1399 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
158142c2
FB
1400 return packFloat64( 1, 0x43E, 0 );
1401 }
1402 zSign = ( a < 0 );
ff32e16e 1403 return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
158142c2
FB
1404}
1405
158142c2
FB
1406/*----------------------------------------------------------------------------
1407| Returns the result of converting the 64-bit two's complement integer `a'
1408| to the extended double-precision floating-point format. The conversion
1409| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1410| Arithmetic.
1411*----------------------------------------------------------------------------*/
1412
e5a41ffa 1413floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
1414{
1415 flag zSign;
182f42fd 1416 uint64_t absA;
8f506c70 1417 int8_t shiftCount;
158142c2
FB
1418
1419 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1420 zSign = ( a < 0 );
1421 absA = zSign ? - a : a;
1422 shiftCount = countLeadingZeros64( absA );
1423 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1424
1425}
1426
158142c2
FB
1427/*----------------------------------------------------------------------------
1428| Returns the result of converting the 64-bit two's complement integer `a' to
1429| the quadruple-precision floating-point format. The conversion is performed
1430| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1431*----------------------------------------------------------------------------*/
1432
e5a41ffa 1433float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
1434{
1435 flag zSign;
182f42fd 1436 uint64_t absA;
8f506c70 1437 int8_t shiftCount;
f4014512 1438 int32_t zExp;
bb98fe42 1439 uint64_t zSig0, zSig1;
158142c2
FB
1440
1441 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1442 zSign = ( a < 0 );
1443 absA = zSign ? - a : a;
1444 shiftCount = countLeadingZeros64( absA ) + 49;
1445 zExp = 0x406E - shiftCount;
1446 if ( 64 <= shiftCount ) {
1447 zSig1 = 0;
1448 zSig0 = absA;
1449 shiftCount -= 64;
1450 }
1451 else {
1452 zSig1 = absA;
1453 zSig0 = 0;
1454 }
1455 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1456 return packFloat128( zSign, zExp, zSig0, zSig1 );
1457
1458}
1459
6bb8e0f1
PM
1460/*----------------------------------------------------------------------------
1461| Returns the result of converting the 64-bit unsigned integer `a'
1462| to the single-precision floating-point format. The conversion is performed
1463| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1464*----------------------------------------------------------------------------*/
1465
e5a41ffa 1466float32 uint64_to_float32(uint64_t a, float_status *status)
6bb8e0f1
PM
1467{
1468 int shiftcount;
1469
1470 if (a == 0) {
1471 return float32_zero;
1472 }
1473
1474 /* Determine (left) shift needed to put first set bit into bit posn 23
1475 * (since packFloat32() expects the binary point between bits 23 and 22);
1476 * this is the fast case for smallish numbers.
1477 */
1478 shiftcount = countLeadingZeros64(a) - 40;
1479 if (shiftcount >= 0) {
1480 return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
1481 }
1482 /* Otherwise we need to do a round-and-pack. roundAndPackFloat32()
1483 * expects the binary point between bits 30 and 29, hence the + 7.
1484 */
1485 shiftcount += 7;
1486 if (shiftcount < 0) {
1487 shift64RightJamming(a, -shiftcount, &a);
1488 } else {
1489 a <<= shiftcount;
1490 }
1491
ff32e16e 1492 return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
6bb8e0f1
PM
1493}
1494
1495/*----------------------------------------------------------------------------
1496| Returns the result of converting the 64-bit unsigned integer `a'
1497| to the double-precision floating-point format. The conversion is performed
1498| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1499*----------------------------------------------------------------------------*/
1500
e5a41ffa 1501float64 uint64_to_float64(uint64_t a, float_status *status)
6bb8e0f1
PM
1502{
1503 int exp = 0x43C;
1504 int shiftcount;
1505
1506 if (a == 0) {
1507 return float64_zero;
1508 }
1509
1510 shiftcount = countLeadingZeros64(a) - 1;
1511 if (shiftcount < 0) {
1512 shift64RightJamming(a, -shiftcount, &a);
1513 } else {
1514 a <<= shiftcount;
1515 }
ff32e16e 1516 return roundAndPackFloat64(0, exp - shiftcount, a, status);
6bb8e0f1
PM
1517}
1518
1519/*----------------------------------------------------------------------------
1520| Returns the result of converting the 64-bit unsigned integer `a'
1521| to the quadruple-precision floating-point format. The conversion is performed
1522| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1523*----------------------------------------------------------------------------*/
1524
e5a41ffa 1525float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
1526{
1527 if (a == 0) {
1528 return float128_zero;
1529 }
ff32e16e 1530 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1e397ead
RH
1531}
1532
158142c2
FB
1533/*----------------------------------------------------------------------------
1534| Returns the result of converting the single-precision floating-point value
1535| `a' to the 32-bit two's complement integer format. The conversion is
1536| performed according to the IEC/IEEE Standard for Binary Floating-Point
1537| Arithmetic---which means in particular that the conversion is rounded
1538| according to the current rounding mode. If `a' is a NaN, the largest
1539| positive integer is returned. Otherwise, if the conversion overflows, the
1540| largest integer with the same sign as `a' is returned.
1541*----------------------------------------------------------------------------*/
1542
f4014512 1543int32_t float32_to_int32(float32 a, float_status *status)
158142c2
FB
1544{
1545 flag aSign;
0c48262d 1546 int aExp;
07d792d2 1547 int shiftCount;
bb98fe42
AF
1548 uint32_t aSig;
1549 uint64_t aSig64;
158142c2 1550
ff32e16e 1551 a = float32_squash_input_denormal(a, status);
158142c2
FB
1552 aSig = extractFloat32Frac( a );
1553 aExp = extractFloat32Exp( a );
1554 aSign = extractFloat32Sign( a );
1555 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1556 if ( aExp ) aSig |= 0x00800000;
1557 shiftCount = 0xAF - aExp;
1558 aSig64 = aSig;
1559 aSig64 <<= 32;
1560 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
ff32e16e 1561 return roundAndPackInt32(aSign, aSig64, status);
158142c2
FB
1562
1563}
1564
1565/*----------------------------------------------------------------------------
1566| Returns the result of converting the single-precision floating-point value
1567| `a' to the 32-bit two's complement integer format. The conversion is
1568| performed according to the IEC/IEEE Standard for Binary Floating-Point
1569| Arithmetic, except that the conversion is always rounded toward zero.
1570| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1571| the conversion overflows, the largest integer with the same sign as `a' is
1572| returned.
1573*----------------------------------------------------------------------------*/
1574
f4014512 1575int32_t float32_to_int32_round_to_zero(float32 a, float_status *status)
158142c2
FB
1576{
1577 flag aSign;
0c48262d 1578 int aExp;
07d792d2 1579 int shiftCount;
bb98fe42 1580 uint32_t aSig;
b3a6a2e0 1581 int32_t z;
ff32e16e 1582 a = float32_squash_input_denormal(a, status);
158142c2
FB
1583
1584 aSig = extractFloat32Frac( a );
1585 aExp = extractFloat32Exp( a );
1586 aSign = extractFloat32Sign( a );
1587 shiftCount = aExp - 0x9E;
1588 if ( 0 <= shiftCount ) {
f090c9d4 1589 if ( float32_val(a) != 0xCF000000 ) {
ff32e16e 1590 float_raise(float_flag_invalid, status);
158142c2
FB
1591 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1592 }
bb98fe42 1593 return (int32_t) 0x80000000;
158142c2
FB
1594 }
1595 else if ( aExp <= 0x7E ) {
a2f2d288
PM
1596 if (aExp | aSig) {
1597 status->float_exception_flags |= float_flag_inexact;
1598 }
158142c2
FB
1599 return 0;
1600 }
1601 aSig = ( aSig | 0x00800000 )<<8;
1602 z = aSig>>( - shiftCount );
bb98fe42 1603 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
a2f2d288 1604 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
1605 }
1606 if ( aSign ) z = - z;
1607 return z;
1608
1609}
1610
cbcef455
PM
1611/*----------------------------------------------------------------------------
1612| Returns the result of converting the single-precision floating-point value
1613| `a' to the 16-bit two's complement integer format. The conversion is
1614| performed according to the IEC/IEEE Standard for Binary Floating-Point
1615| Arithmetic, except that the conversion is always rounded toward zero.
1616| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1617| the conversion overflows, the largest integer with the same sign as `a' is
1618| returned.
1619*----------------------------------------------------------------------------*/
1620
0bb721d7 1621int16_t float32_to_int16_round_to_zero(float32 a, float_status *status)
cbcef455
PM
1622{
1623 flag aSign;
0c48262d 1624 int aExp;
07d792d2 1625 int shiftCount;
bb98fe42 1626 uint32_t aSig;
f4014512 1627 int32_t z;
cbcef455
PM
1628
1629 aSig = extractFloat32Frac( a );
1630 aExp = extractFloat32Exp( a );
1631 aSign = extractFloat32Sign( a );
1632 shiftCount = aExp - 0x8E;
1633 if ( 0 <= shiftCount ) {
1634 if ( float32_val(a) != 0xC7000000 ) {
ff32e16e 1635 float_raise(float_flag_invalid, status);
cbcef455
PM
1636 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1637 return 0x7FFF;
1638 }
1639 }
bb98fe42 1640 return (int32_t) 0xffff8000;
cbcef455
PM
1641 }
1642 else if ( aExp <= 0x7E ) {
1643 if ( aExp | aSig ) {
a2f2d288 1644 status->float_exception_flags |= float_flag_inexact;
cbcef455
PM
1645 }
1646 return 0;
1647 }
1648 shiftCount -= 0x10;
1649 aSig = ( aSig | 0x00800000 )<<8;
1650 z = aSig>>( - shiftCount );
bb98fe42 1651 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
a2f2d288 1652 status->float_exception_flags |= float_flag_inexact;
cbcef455
PM
1653 }
1654 if ( aSign ) {
1655 z = - z;
1656 }
1657 return z;
1658
1659}
1660
158142c2
FB
1661/*----------------------------------------------------------------------------
1662| Returns the result of converting the single-precision floating-point value
1663| `a' to the 64-bit two's complement integer format. The conversion is
1664| performed according to the IEC/IEEE Standard for Binary Floating-Point
1665| Arithmetic---which means in particular that the conversion is rounded
1666| according to the current rounding mode. If `a' is a NaN, the largest
1667| positive integer is returned. Otherwise, if the conversion overflows, the
1668| largest integer with the same sign as `a' is returned.
1669*----------------------------------------------------------------------------*/
1670
f42c2224 1671int64_t float32_to_int64(float32 a, float_status *status)
158142c2
FB
1672{
1673 flag aSign;
0c48262d 1674 int aExp;
07d792d2 1675 int shiftCount;
bb98fe42
AF
1676 uint32_t aSig;
1677 uint64_t aSig64, aSigExtra;
ff32e16e 1678 a = float32_squash_input_denormal(a, status);
158142c2
FB
1679
1680 aSig = extractFloat32Frac( a );
1681 aExp = extractFloat32Exp( a );
1682 aSign = extractFloat32Sign( a );
1683 shiftCount = 0xBE - aExp;
1684 if ( shiftCount < 0 ) {
ff32e16e 1685 float_raise(float_flag_invalid, status);
158142c2
FB
1686 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1687 return LIT64( 0x7FFFFFFFFFFFFFFF );
1688 }
bb98fe42 1689 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
1690 }
1691 if ( aExp ) aSig |= 0x00800000;
1692 aSig64 = aSig;
1693 aSig64 <<= 40;
1694 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
ff32e16e 1695 return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
158142c2
FB
1696
1697}
1698
2f18bbf9
TM
1699/*----------------------------------------------------------------------------
1700| Returns the result of converting the single-precision floating-point value
1701| `a' to the 64-bit unsigned integer format. The conversion is
1702| performed according to the IEC/IEEE Standard for Binary Floating-Point
1703| Arithmetic---which means in particular that the conversion is rounded
1704| according to the current rounding mode. If `a' is a NaN, the largest
1705| unsigned integer is returned. Otherwise, if the conversion overflows, the
1706| largest unsigned integer is returned. If the 'a' is negative, the result
1707| is rounded and zero is returned; values that do not round to zero will
1708| raise the inexact exception flag.
1709*----------------------------------------------------------------------------*/
1710
182f42fd 1711uint64_t float32_to_uint64(float32 a, float_status *status)
2f18bbf9
TM
1712{
1713 flag aSign;
0c48262d 1714 int aExp;
07d792d2 1715 int shiftCount;
2f18bbf9
TM
1716 uint32_t aSig;
1717 uint64_t aSig64, aSigExtra;
ff32e16e 1718 a = float32_squash_input_denormal(a, status);
2f18bbf9
TM
1719
1720 aSig = extractFloat32Frac(a);
1721 aExp = extractFloat32Exp(a);
1722 aSign = extractFloat32Sign(a);
1723 if ((aSign) && (aExp > 126)) {
ff32e16e 1724 float_raise(float_flag_invalid, status);
2f18bbf9
TM
1725 if (float32_is_any_nan(a)) {
1726 return LIT64(0xFFFFFFFFFFFFFFFF);
1727 } else {
1728 return 0;
1729 }
1730 }
1731 shiftCount = 0xBE - aExp;
1732 if (aExp) {
1733 aSig |= 0x00800000;
1734 }
1735 if (shiftCount < 0) {
ff32e16e 1736 float_raise(float_flag_invalid, status);
2f18bbf9
TM
1737 return LIT64(0xFFFFFFFFFFFFFFFF);
1738 }
1739
1740 aSig64 = aSig;
1741 aSig64 <<= 40;
1742 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
ff32e16e 1743 return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
2f18bbf9
TM
1744}
1745
a13d4489
TM
1746/*----------------------------------------------------------------------------
1747| Returns the result of converting the single-precision floating-point value
1748| `a' to the 64-bit unsigned integer format. The conversion is
1749| performed according to the IEC/IEEE Standard for Binary Floating-Point
1750| Arithmetic, except that the conversion is always rounded toward zero. If
1751| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the
1752| conversion overflows, the largest unsigned integer is returned. If the
1753| 'a' is negative, the result is rounded and zero is returned; values that do
1754| not round to zero will raise the inexact flag.
1755*----------------------------------------------------------------------------*/
1756
182f42fd 1757uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status)
a13d4489 1758{
a2f2d288 1759 signed char current_rounding_mode = status->float_rounding_mode;
ff32e16e
PM
1760 set_float_rounding_mode(float_round_to_zero, status);
1761 int64_t v = float32_to_uint64(a, status);
1762 set_float_rounding_mode(current_rounding_mode, status);
a13d4489
TM
1763 return v;
1764}
1765
158142c2
FB
1766/*----------------------------------------------------------------------------
1767| Returns the result of converting the single-precision floating-point value
1768| `a' to the 64-bit two's complement integer format. The conversion is
1769| performed according to the IEC/IEEE Standard for Binary Floating-Point
1770| Arithmetic, except that the conversion is always rounded toward zero. If
1771| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1772| conversion overflows, the largest integer with the same sign as `a' is
1773| returned.
1774*----------------------------------------------------------------------------*/
1775
f42c2224 1776int64_t float32_to_int64_round_to_zero(float32 a, float_status *status)
158142c2
FB
1777{
1778 flag aSign;
0c48262d 1779 int aExp;
07d792d2 1780 int shiftCount;
bb98fe42
AF
1781 uint32_t aSig;
1782 uint64_t aSig64;
f42c2224 1783 int64_t z;
ff32e16e 1784 a = float32_squash_input_denormal(a, status);
158142c2
FB
1785
1786 aSig = extractFloat32Frac( a );
1787 aExp = extractFloat32Exp( a );
1788 aSign = extractFloat32Sign( a );
1789 shiftCount = aExp - 0xBE;
1790 if ( 0 <= shiftCount ) {
f090c9d4 1791 if ( float32_val(a) != 0xDF000000 ) {
ff32e16e 1792 float_raise(float_flag_invalid, status);
158142c2
FB
1793 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1794 return LIT64( 0x7FFFFFFFFFFFFFFF );
1795 }
1796 }
bb98fe42 1797 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
1798 }
1799 else if ( aExp <= 0x7E ) {
a2f2d288
PM
1800 if (aExp | aSig) {
1801 status->float_exception_flags |= float_flag_inexact;
1802 }
158142c2
FB
1803 return 0;
1804 }
1805 aSig64 = aSig | 0x00800000;
1806 aSig64 <<= 40;
1807 z = aSig64>>( - shiftCount );
bb98fe42 1808 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
a2f2d288 1809 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
1810 }
1811 if ( aSign ) z = - z;
1812 return z;
1813
1814}
1815
1816/*----------------------------------------------------------------------------
1817| Returns the result of converting the single-precision floating-point value
1818| `a' to the double-precision floating-point format. The conversion is
1819| performed according to the IEC/IEEE Standard for Binary Floating-Point
1820| Arithmetic.
1821*----------------------------------------------------------------------------*/
1822
e5a41ffa 1823float64 float32_to_float64(float32 a, float_status *status)
158142c2
FB
1824{
1825 flag aSign;
0c48262d 1826 int aExp;
bb98fe42 1827 uint32_t aSig;
ff32e16e 1828 a = float32_squash_input_denormal(a, status);
158142c2
FB
1829
1830 aSig = extractFloat32Frac( a );
1831 aExp = extractFloat32Exp( a );
1832 aSign = extractFloat32Sign( a );
1833 if ( aExp == 0xFF ) {
ff32e16e
PM
1834 if (aSig) {
1835 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
1836 }
158142c2
FB
1837 return packFloat64( aSign, 0x7FF, 0 );
1838 }
1839 if ( aExp == 0 ) {
1840 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1841 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1842 --aExp;
1843 }
bb98fe42 1844 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
158142c2
FB
1845
1846}
1847
158142c2
FB
1848/*----------------------------------------------------------------------------
1849| Returns the result of converting the single-precision floating-point value
1850| `a' to the extended double-precision floating-point format. The conversion
1851| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1852| Arithmetic.
1853*----------------------------------------------------------------------------*/
1854
e5a41ffa 1855floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
1856{
1857 flag aSign;
0c48262d 1858 int aExp;
bb98fe42 1859 uint32_t aSig;
158142c2 1860
ff32e16e 1861 a = float32_squash_input_denormal(a, status);
158142c2
FB
1862 aSig = extractFloat32Frac( a );
1863 aExp = extractFloat32Exp( a );
1864 aSign = extractFloat32Sign( a );
1865 if ( aExp == 0xFF ) {
ff32e16e
PM
1866 if (aSig) {
1867 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
1868 }
158142c2
FB
1869 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1870 }
1871 if ( aExp == 0 ) {
1872 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1873 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1874 }
1875 aSig |= 0x00800000;
bb98fe42 1876 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
1877
1878}
1879
158142c2
FB
1880/*----------------------------------------------------------------------------
1881| Returns the result of converting the single-precision floating-point value
1882| `a' to the double-precision floating-point format. The conversion is
1883| performed according to the IEC/IEEE Standard for Binary Floating-Point
1884| Arithmetic.
1885*----------------------------------------------------------------------------*/
1886
e5a41ffa 1887float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
1888{
1889 flag aSign;
0c48262d 1890 int aExp;
bb98fe42 1891 uint32_t aSig;
158142c2 1892
ff32e16e 1893 a = float32_squash_input_denormal(a, status);
158142c2
FB
1894 aSig = extractFloat32Frac( a );
1895 aExp = extractFloat32Exp( a );
1896 aSign = extractFloat32Sign( a );
1897 if ( aExp == 0xFF ) {
ff32e16e
PM
1898 if (aSig) {
1899 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
1900 }
158142c2
FB
1901 return packFloat128( aSign, 0x7FFF, 0, 0 );
1902 }
1903 if ( aExp == 0 ) {
1904 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1905 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1906 --aExp;
1907 }
bb98fe42 1908 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
1909
1910}
1911
158142c2
FB
1912/*----------------------------------------------------------------------------
1913| Rounds the single-precision floating-point value `a' to an integer, and
1914| returns the result as a single-precision floating-point value. The
1915| operation is performed according to the IEC/IEEE Standard for Binary
1916| Floating-Point Arithmetic.
1917*----------------------------------------------------------------------------*/
1918
e5a41ffa 1919float32 float32_round_to_int(float32 a, float_status *status)
158142c2
FB
1920{
1921 flag aSign;
0c48262d 1922 int aExp;
bb98fe42 1923 uint32_t lastBitMask, roundBitsMask;
bb98fe42 1924 uint32_t z;
ff32e16e 1925 a = float32_squash_input_denormal(a, status);
158142c2
FB
1926
1927 aExp = extractFloat32Exp( a );
1928 if ( 0x96 <= aExp ) {
1929 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
ff32e16e 1930 return propagateFloat32NaN(a, a, status);
158142c2
FB
1931 }
1932 return a;
1933 }
1934 if ( aExp <= 0x7E ) {
bb98fe42 1935 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
a2f2d288 1936 status->float_exception_flags |= float_flag_inexact;
158142c2 1937 aSign = extractFloat32Sign( a );
a2f2d288 1938 switch (status->float_rounding_mode) {
158142c2
FB
1939 case float_round_nearest_even:
1940 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1941 return packFloat32( aSign, 0x7F, 0 );
1942 }
1943 break;
f9288a76
PM
1944 case float_round_ties_away:
1945 if (aExp == 0x7E) {
1946 return packFloat32(aSign, 0x7F, 0);
1947 }
1948 break;
158142c2 1949 case float_round_down:
f090c9d4 1950 return make_float32(aSign ? 0xBF800000 : 0);
158142c2 1951 case float_round_up:
f090c9d4 1952 return make_float32(aSign ? 0x80000000 : 0x3F800000);
158142c2
FB
1953 }
1954 return packFloat32( aSign, 0, 0 );
1955 }
1956 lastBitMask = 1;
1957 lastBitMask <<= 0x96 - aExp;
1958 roundBitsMask = lastBitMask - 1;
f090c9d4 1959 z = float32_val(a);
a2f2d288 1960 switch (status->float_rounding_mode) {
dc355b76 1961 case float_round_nearest_even:
158142c2 1962 z += lastBitMask>>1;
dc355b76
PM
1963 if ((z & roundBitsMask) == 0) {
1964 z &= ~lastBitMask;
1965 }
1966 break;
f9288a76
PM
1967 case float_round_ties_away:
1968 z += lastBitMask >> 1;
1969 break;
dc355b76
PM
1970 case float_round_to_zero:
1971 break;
1972 case float_round_up:
1973 if (!extractFloat32Sign(make_float32(z))) {
1974 z += roundBitsMask;
1975 }
1976 break;
1977 case float_round_down:
1978 if (extractFloat32Sign(make_float32(z))) {
158142c2
FB
1979 z += roundBitsMask;
1980 }
dc355b76
PM
1981 break;
1982 default:
1983 abort();
158142c2
FB
1984 }
1985 z &= ~ roundBitsMask;
a2f2d288
PM
1986 if (z != float32_val(a)) {
1987 status->float_exception_flags |= float_flag_inexact;
1988 }
f090c9d4 1989 return make_float32(z);
158142c2
FB
1990
1991}
1992
1993/*----------------------------------------------------------------------------
1994| Returns the result of adding the absolute values of the single-precision
1995| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1996| before being returned. `zSign' is ignored if the result is a NaN.
1997| The addition is performed according to the IEC/IEEE Standard for Binary
1998| Floating-Point Arithmetic.
1999*----------------------------------------------------------------------------*/
2000
e5a41ffa
PM
2001static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
2002 float_status *status)
158142c2 2003{
0c48262d 2004 int aExp, bExp, zExp;
bb98fe42 2005 uint32_t aSig, bSig, zSig;
0c48262d 2006 int expDiff;
158142c2
FB
2007
2008 aSig = extractFloat32Frac( a );
2009 aExp = extractFloat32Exp( a );
2010 bSig = extractFloat32Frac( b );
2011 bExp = extractFloat32Exp( b );
2012 expDiff = aExp - bExp;
2013 aSig <<= 6;
2014 bSig <<= 6;
2015 if ( 0 < expDiff ) {
2016 if ( aExp == 0xFF ) {
ff32e16e
PM
2017 if (aSig) {
2018 return propagateFloat32NaN(a, b, status);
2019 }
158142c2
FB
2020 return a;
2021 }
2022 if ( bExp == 0 ) {
2023 --expDiff;
2024 }
2025 else {
2026 bSig |= 0x20000000;
2027 }
2028 shift32RightJamming( bSig, expDiff, &bSig );
2029 zExp = aExp;
2030 }
2031 else if ( expDiff < 0 ) {
2032 if ( bExp == 0xFF ) {
ff32e16e
PM
2033 if (bSig) {
2034 return propagateFloat32NaN(a, b, status);
2035 }
158142c2
FB
2036 return packFloat32( zSign, 0xFF, 0 );
2037 }
2038 if ( aExp == 0 ) {
2039 ++expDiff;
2040 }
2041 else {
2042 aSig |= 0x20000000;
2043 }
2044 shift32RightJamming( aSig, - expDiff, &aSig );
2045 zExp = bExp;
2046 }
2047 else {
2048 if ( aExp == 0xFF ) {
ff32e16e
PM
2049 if (aSig | bSig) {
2050 return propagateFloat32NaN(a, b, status);
2051 }
158142c2
FB
2052 return a;
2053 }
fe76d976 2054 if ( aExp == 0 ) {
a2f2d288 2055 if (status->flush_to_zero) {
e6afc87f 2056 if (aSig | bSig) {
ff32e16e 2057 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2058 }
2059 return packFloat32(zSign, 0, 0);
2060 }
fe76d976
PB
2061 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
2062 }
158142c2
FB
2063 zSig = 0x40000000 + aSig + bSig;
2064 zExp = aExp;
2065 goto roundAndPack;
2066 }
2067 aSig |= 0x20000000;
2068 zSig = ( aSig + bSig )<<1;
2069 --zExp;
bb98fe42 2070 if ( (int32_t) zSig < 0 ) {
158142c2
FB
2071 zSig = aSig + bSig;
2072 ++zExp;
2073 }
2074 roundAndPack:
ff32e16e 2075 return roundAndPackFloat32(zSign, zExp, zSig, status);
158142c2
FB
2076
2077}
2078
2079/*----------------------------------------------------------------------------
2080| Returns the result of subtracting the absolute values of the single-
2081| precision floating-point values `a' and `b'. If `zSign' is 1, the
2082| difference is negated before being returned. `zSign' is ignored if the
2083| result is a NaN. The subtraction is performed according to the IEC/IEEE
2084| Standard for Binary Floating-Point Arithmetic.
2085*----------------------------------------------------------------------------*/
2086
e5a41ffa
PM
2087static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
2088 float_status *status)
158142c2 2089{
0c48262d 2090 int aExp, bExp, zExp;
bb98fe42 2091 uint32_t aSig, bSig, zSig;
0c48262d 2092 int expDiff;
158142c2
FB
2093
2094 aSig = extractFloat32Frac( a );
2095 aExp = extractFloat32Exp( a );
2096 bSig = extractFloat32Frac( b );
2097 bExp = extractFloat32Exp( b );
2098 expDiff = aExp - bExp;
2099 aSig <<= 7;
2100 bSig <<= 7;
2101 if ( 0 < expDiff ) goto aExpBigger;
2102 if ( expDiff < 0 ) goto bExpBigger;
2103 if ( aExp == 0xFF ) {
ff32e16e
PM
2104 if (aSig | bSig) {
2105 return propagateFloat32NaN(a, b, status);
2106 }
2107 float_raise(float_flag_invalid, status);
af39bc8c 2108 return float32_default_nan(status);
158142c2
FB
2109 }
2110 if ( aExp == 0 ) {
2111 aExp = 1;
2112 bExp = 1;
2113 }
2114 if ( bSig < aSig ) goto aBigger;
2115 if ( aSig < bSig ) goto bBigger;
a2f2d288 2116 return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
2117 bExpBigger:
2118 if ( bExp == 0xFF ) {
ff32e16e
PM
2119 if (bSig) {
2120 return propagateFloat32NaN(a, b, status);
2121 }
158142c2
FB
2122 return packFloat32( zSign ^ 1, 0xFF, 0 );
2123 }
2124 if ( aExp == 0 ) {
2125 ++expDiff;
2126 }
2127 else {
2128 aSig |= 0x40000000;
2129 }
2130 shift32RightJamming( aSig, - expDiff, &aSig );
2131 bSig |= 0x40000000;
2132 bBigger:
2133 zSig = bSig - aSig;
2134 zExp = bExp;
2135 zSign ^= 1;
2136 goto normalizeRoundAndPack;
2137 aExpBigger:
2138 if ( aExp == 0xFF ) {
ff32e16e
PM
2139 if (aSig) {
2140 return propagateFloat32NaN(a, b, status);
2141 }
158142c2
FB
2142 return a;
2143 }
2144 if ( bExp == 0 ) {
2145 --expDiff;
2146 }
2147 else {
2148 bSig |= 0x40000000;
2149 }
2150 shift32RightJamming( bSig, expDiff, &bSig );
2151 aSig |= 0x40000000;
2152 aBigger:
2153 zSig = aSig - bSig;
2154 zExp = aExp;
2155 normalizeRoundAndPack:
2156 --zExp;
ff32e16e 2157 return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
158142c2
FB
2158
2159}
2160
2161/*----------------------------------------------------------------------------
2162| Returns the result of adding the single-precision floating-point values `a'
2163| and `b'. The operation is performed according to the IEC/IEEE Standard for
2164| Binary Floating-Point Arithmetic.
2165*----------------------------------------------------------------------------*/
2166
e5a41ffa 2167float32 float32_add(float32 a, float32 b, float_status *status)
158142c2
FB
2168{
2169 flag aSign, bSign;
ff32e16e
PM
2170 a = float32_squash_input_denormal(a, status);
2171 b = float32_squash_input_denormal(b, status);
158142c2
FB
2172
2173 aSign = extractFloat32Sign( a );
2174 bSign = extractFloat32Sign( b );
2175 if ( aSign == bSign ) {
ff32e16e 2176 return addFloat32Sigs(a, b, aSign, status);
158142c2
FB
2177 }
2178 else {
ff32e16e 2179 return subFloat32Sigs(a, b, aSign, status);
158142c2
FB
2180 }
2181
2182}
2183
2184/*----------------------------------------------------------------------------
2185| Returns the result of subtracting the single-precision floating-point values
2186| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2187| for Binary Floating-Point Arithmetic.
2188*----------------------------------------------------------------------------*/
2189
e5a41ffa 2190float32 float32_sub(float32 a, float32 b, float_status *status)
158142c2
FB
2191{
2192 flag aSign, bSign;
ff32e16e
PM
2193 a = float32_squash_input_denormal(a, status);
2194 b = float32_squash_input_denormal(b, status);
158142c2
FB
2195
2196 aSign = extractFloat32Sign( a );
2197 bSign = extractFloat32Sign( b );
2198 if ( aSign == bSign ) {
ff32e16e 2199 return subFloat32Sigs(a, b, aSign, status);
158142c2
FB
2200 }
2201 else {
ff32e16e 2202 return addFloat32Sigs(a, b, aSign, status);
158142c2
FB
2203 }
2204
2205}
2206
2207/*----------------------------------------------------------------------------
2208| Returns the result of multiplying the single-precision floating-point values
2209| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2210| for Binary Floating-Point Arithmetic.
2211*----------------------------------------------------------------------------*/
2212
e5a41ffa 2213float32 float32_mul(float32 a, float32 b, float_status *status)
158142c2
FB
2214{
2215 flag aSign, bSign, zSign;
0c48262d 2216 int aExp, bExp, zExp;
bb98fe42
AF
2217 uint32_t aSig, bSig;
2218 uint64_t zSig64;
2219 uint32_t zSig;
158142c2 2220
ff32e16e
PM
2221 a = float32_squash_input_denormal(a, status);
2222 b = float32_squash_input_denormal(b, status);
37d18660 2223
158142c2
FB
2224 aSig = extractFloat32Frac( a );
2225 aExp = extractFloat32Exp( a );
2226 aSign = extractFloat32Sign( a );
2227 bSig = extractFloat32Frac( b );
2228 bExp = extractFloat32Exp( b );
2229 bSign = extractFloat32Sign( b );
2230 zSign = aSign ^ bSign;
2231 if ( aExp == 0xFF ) {
2232 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 2233 return propagateFloat32NaN(a, b, status);
158142c2
FB
2234 }
2235 if ( ( bExp | bSig ) == 0 ) {
ff32e16e 2236 float_raise(float_flag_invalid, status);
af39bc8c 2237 return float32_default_nan(status);
158142c2
FB
2238 }
2239 return packFloat32( zSign, 0xFF, 0 );
2240 }
2241 if ( bExp == 0xFF ) {
ff32e16e
PM
2242 if (bSig) {
2243 return propagateFloat32NaN(a, b, status);
2244 }
158142c2 2245 if ( ( aExp | aSig ) == 0 ) {
ff32e16e 2246 float_raise(float_flag_invalid, status);
af39bc8c 2247 return float32_default_nan(status);
158142c2
FB
2248 }
2249 return packFloat32( zSign, 0xFF, 0 );
2250 }
2251 if ( aExp == 0 ) {
2252 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2253 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2254 }
2255 if ( bExp == 0 ) {
2256 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2257 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2258 }
2259 zExp = aExp + bExp - 0x7F;
2260 aSig = ( aSig | 0x00800000 )<<7;
2261 bSig = ( bSig | 0x00800000 )<<8;
bb98fe42 2262 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
158142c2 2263 zSig = zSig64;
bb98fe42 2264 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
158142c2
FB
2265 zSig <<= 1;
2266 --zExp;
2267 }
ff32e16e 2268 return roundAndPackFloat32(zSign, zExp, zSig, status);
158142c2
FB
2269
2270}
2271
2272/*----------------------------------------------------------------------------
2273| Returns the result of dividing the single-precision floating-point value `a'
2274| by the corresponding value `b'. The operation is performed according to the
2275| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2276*----------------------------------------------------------------------------*/
2277
e5a41ffa 2278float32 float32_div(float32 a, float32 b, float_status *status)
158142c2
FB
2279{
2280 flag aSign, bSign, zSign;
0c48262d 2281 int aExp, bExp, zExp;
bb98fe42 2282 uint32_t aSig, bSig, zSig;
ff32e16e
PM
2283 a = float32_squash_input_denormal(a, status);
2284 b = float32_squash_input_denormal(b, status);
158142c2
FB
2285
2286 aSig = extractFloat32Frac( a );
2287 aExp = extractFloat32Exp( a );
2288 aSign = extractFloat32Sign( a );
2289 bSig = extractFloat32Frac( b );
2290 bExp = extractFloat32Exp( b );
2291 bSign = extractFloat32Sign( b );
2292 zSign = aSign ^ bSign;
2293 if ( aExp == 0xFF ) {
ff32e16e
PM
2294 if (aSig) {
2295 return propagateFloat32NaN(a, b, status);
2296 }
158142c2 2297 if ( bExp == 0xFF ) {
ff32e16e
PM
2298 if (bSig) {
2299 return propagateFloat32NaN(a, b, status);
2300 }
2301 float_raise(float_flag_invalid, status);
af39bc8c 2302 return float32_default_nan(status);
158142c2
FB
2303 }
2304 return packFloat32( zSign, 0xFF, 0 );
2305 }
2306 if ( bExp == 0xFF ) {
ff32e16e
PM
2307 if (bSig) {
2308 return propagateFloat32NaN(a, b, status);
2309 }
158142c2
FB
2310 return packFloat32( zSign, 0, 0 );
2311 }
2312 if ( bExp == 0 ) {
2313 if ( bSig == 0 ) {
2314 if ( ( aExp | aSig ) == 0 ) {
ff32e16e 2315 float_raise(float_flag_invalid, status);
af39bc8c 2316 return float32_default_nan(status);
158142c2 2317 }
ff32e16e 2318 float_raise(float_flag_divbyzero, status);
158142c2
FB
2319 return packFloat32( zSign, 0xFF, 0 );
2320 }
2321 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2322 }
2323 if ( aExp == 0 ) {
2324 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2325 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2326 }
2327 zExp = aExp - bExp + 0x7D;
2328 aSig = ( aSig | 0x00800000 )<<7;
2329 bSig = ( bSig | 0x00800000 )<<8;
2330 if ( bSig <= ( aSig + aSig ) ) {
2331 aSig >>= 1;
2332 ++zExp;
2333 }
bb98fe42 2334 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2 2335 if ( ( zSig & 0x3F ) == 0 ) {
bb98fe42 2336 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
158142c2 2337 }
ff32e16e 2338 return roundAndPackFloat32(zSign, zExp, zSig, status);
158142c2
FB
2339
2340}
2341
2342/*----------------------------------------------------------------------------
2343| Returns the remainder of the single-precision floating-point value `a'
2344| with respect to the corresponding value `b'. The operation is performed
2345| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2346*----------------------------------------------------------------------------*/
2347
e5a41ffa 2348float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 2349{
ed086f3d 2350 flag aSign, zSign;
0c48262d 2351 int aExp, bExp, expDiff;
bb98fe42
AF
2352 uint32_t aSig, bSig;
2353 uint32_t q;
2354 uint64_t aSig64, bSig64, q64;
2355 uint32_t alternateASig;
2356 int32_t sigMean;
ff32e16e
PM
2357 a = float32_squash_input_denormal(a, status);
2358 b = float32_squash_input_denormal(b, status);
158142c2
FB
2359
2360 aSig = extractFloat32Frac( a );
2361 aExp = extractFloat32Exp( a );
2362 aSign = extractFloat32Sign( a );
2363 bSig = extractFloat32Frac( b );
2364 bExp = extractFloat32Exp( b );
158142c2
FB
2365 if ( aExp == 0xFF ) {
2366 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 2367 return propagateFloat32NaN(a, b, status);
158142c2 2368 }
ff32e16e 2369 float_raise(float_flag_invalid, status);
af39bc8c 2370 return float32_default_nan(status);
158142c2
FB
2371 }
2372 if ( bExp == 0xFF ) {
ff32e16e
PM
2373 if (bSig) {
2374 return propagateFloat32NaN(a, b, status);
2375 }
158142c2
FB
2376 return a;
2377 }
2378 if ( bExp == 0 ) {
2379 if ( bSig == 0 ) {
ff32e16e 2380 float_raise(float_flag_invalid, status);
af39bc8c 2381 return float32_default_nan(status);
158142c2
FB
2382 }
2383 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2384 }
2385 if ( aExp == 0 ) {
2386 if ( aSig == 0 ) return a;
2387 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2388 }
2389 expDiff = aExp - bExp;
2390 aSig |= 0x00800000;
2391 bSig |= 0x00800000;
2392 if ( expDiff < 32 ) {
2393 aSig <<= 8;
2394 bSig <<= 8;
2395 if ( expDiff < 0 ) {
2396 if ( expDiff < -1 ) return a;
2397 aSig >>= 1;
2398 }
2399 q = ( bSig <= aSig );
2400 if ( q ) aSig -= bSig;
2401 if ( 0 < expDiff ) {
bb98fe42 2402 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
2403 q >>= 32 - expDiff;
2404 bSig >>= 2;
2405 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2406 }
2407 else {
2408 aSig >>= 2;
2409 bSig >>= 2;
2410 }
2411 }
2412 else {
2413 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
2414 aSig64 = ( (uint64_t) aSig )<<40;
2415 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
2416 expDiff -= 64;
2417 while ( 0 < expDiff ) {
2418 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2419 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2420 aSig64 = - ( ( bSig * q64 )<<38 );
2421 expDiff -= 62;
2422 }
2423 expDiff += 64;
2424 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2425 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2426 q = q64>>( 64 - expDiff );
2427 bSig <<= 6;
2428 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2429 }
2430 do {
2431 alternateASig = aSig;
2432 ++q;
2433 aSig -= bSig;
bb98fe42 2434 } while ( 0 <= (int32_t) aSig );
158142c2
FB
2435 sigMean = aSig + alternateASig;
2436 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2437 aSig = alternateASig;
2438 }
bb98fe42 2439 zSign = ( (int32_t) aSig < 0 );
158142c2 2440 if ( zSign ) aSig = - aSig;
ff32e16e 2441 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
2442}
2443
369be8f6
PM
2444/*----------------------------------------------------------------------------
2445| Returns the result of multiplying the single-precision floating-point values
2446| `a' and `b' then adding 'c', with no intermediate rounding step after the
2447| multiplication. The operation is performed according to the IEC/IEEE
2448| Standard for Binary Floating-Point Arithmetic 754-2008.
2449| The flags argument allows the caller to select negation of the
2450| addend, the intermediate product, or the final result. (The difference
2451| between this and having the caller do a separate negation is that negating
2452| externally will flip the sign bit on NaNs.)
2453*----------------------------------------------------------------------------*/
2454
e5a41ffa
PM
2455float32 float32_muladd(float32 a, float32 b, float32 c, int flags,
2456 float_status *status)
369be8f6
PM
2457{
2458 flag aSign, bSign, cSign, zSign;
0c48262d 2459 int aExp, bExp, cExp, pExp, zExp, expDiff;
369be8f6
PM
2460 uint32_t aSig, bSig, cSig;
2461 flag pInf, pZero, pSign;
2462 uint64_t pSig64, cSig64, zSig64;
2463 uint32_t pSig;
2464 int shiftcount;
2465 flag signflip, infzero;
2466
ff32e16e
PM
2467 a = float32_squash_input_denormal(a, status);
2468 b = float32_squash_input_denormal(b, status);
2469 c = float32_squash_input_denormal(c, status);
369be8f6
PM
2470 aSig = extractFloat32Frac(a);
2471 aExp = extractFloat32Exp(a);
2472 aSign = extractFloat32Sign(a);
2473 bSig = extractFloat32Frac(b);
2474 bExp = extractFloat32Exp(b);
2475 bSign = extractFloat32Sign(b);
2476 cSig = extractFloat32Frac(c);
2477 cExp = extractFloat32Exp(c);
2478 cSign = extractFloat32Sign(c);
2479
2480 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2481 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2482
2483 /* It is implementation-defined whether the cases of (0,inf,qnan)
2484 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2485 * they return if they do), so we have to hand this information
2486 * off to the target-specific pick-a-NaN routine.
2487 */
2488 if (((aExp == 0xff) && aSig) ||
2489 ((bExp == 0xff) && bSig) ||
2490 ((cExp == 0xff) && cSig)) {
ff32e16e 2491 return propagateFloat32MulAddNaN(a, b, c, infzero, status);
369be8f6
PM
2492 }
2493
2494 if (infzero) {
ff32e16e 2495 float_raise(float_flag_invalid, status);
af39bc8c 2496 return float32_default_nan(status);
369be8f6
PM
2497 }
2498
2499 if (flags & float_muladd_negate_c) {
2500 cSign ^= 1;
2501 }
2502
2503 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2504
2505 /* Work out the sign and type of the product */
2506 pSign = aSign ^ bSign;
2507 if (flags & float_muladd_negate_product) {
2508 pSign ^= 1;
2509 }
2510 pInf = (aExp == 0xff) || (bExp == 0xff);
2511 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2512
2513 if (cExp == 0xff) {
2514 if (pInf && (pSign ^ cSign)) {
2515 /* addition of opposite-signed infinities => InvalidOperation */
ff32e16e 2516 float_raise(float_flag_invalid, status);
af39bc8c 2517 return float32_default_nan(status);
369be8f6
PM
2518 }
2519 /* Otherwise generate an infinity of the same sign */
2520 return packFloat32(cSign ^ signflip, 0xff, 0);
2521 }
2522
2523 if (pInf) {
2524 return packFloat32(pSign ^ signflip, 0xff, 0);
2525 }
2526
2527 if (pZero) {
2528 if (cExp == 0) {
2529 if (cSig == 0) {
2530 /* Adding two exact zeroes */
2531 if (pSign == cSign) {
2532 zSign = pSign;
a2f2d288 2533 } else if (status->float_rounding_mode == float_round_down) {
369be8f6
PM
2534 zSign = 1;
2535 } else {
2536 zSign = 0;
2537 }
2538 return packFloat32(zSign ^ signflip, 0, 0);
2539 }
2540 /* Exact zero plus a denorm */
a2f2d288 2541 if (status->flush_to_zero) {
ff32e16e 2542 float_raise(float_flag_output_denormal, status);
369be8f6
PM
2543 return packFloat32(cSign ^ signflip, 0, 0);
2544 }
2545 }
2546 /* Zero plus something non-zero : just return the something */
67d43538
PM
2547 if (flags & float_muladd_halve_result) {
2548 if (cExp == 0) {
2549 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2550 }
2551 /* Subtract one to halve, and one again because roundAndPackFloat32
2552 * wants one less than the true exponent.
2553 */
2554 cExp -= 2;
2555 cSig = (cSig | 0x00800000) << 7;
ff32e16e 2556 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
67d43538 2557 }
a6e7c184 2558 return packFloat32(cSign ^ signflip, cExp, cSig);
369be8f6
PM
2559 }
2560
2561 if (aExp == 0) {
2562 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2563 }
2564 if (bExp == 0) {
2565 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2566 }
2567
2568 /* Calculate the actual result a * b + c */
2569
2570 /* Multiply first; this is easy. */
2571 /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2572 * because we want the true exponent, not the "one-less-than"
2573 * flavour that roundAndPackFloat32() takes.
2574 */
2575 pExp = aExp + bExp - 0x7e;
2576 aSig = (aSig | 0x00800000) << 7;
2577 bSig = (bSig | 0x00800000) << 8;
2578 pSig64 = (uint64_t)aSig * bSig;
2579 if ((int64_t)(pSig64 << 1) >= 0) {
2580 pSig64 <<= 1;
2581 pExp--;
2582 }
2583
2584 zSign = pSign ^ signflip;
2585
2586 /* Now pSig64 is the significand of the multiply, with the explicit bit in
2587 * position 62.
2588 */
2589 if (cExp == 0) {
2590 if (!cSig) {
2591 /* Throw out the special case of c being an exact zero now */
2592 shift64RightJamming(pSig64, 32, &pSig64);
2593 pSig = pSig64;
67d43538
PM
2594 if (flags & float_muladd_halve_result) {
2595 pExp--;
2596 }
369be8f6 2597 return roundAndPackFloat32(zSign, pExp - 1,
ff32e16e 2598 pSig, status);
369be8f6
PM
2599 }
2600 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2601 }
2602
2603 cSig64 = (uint64_t)cSig << (62 - 23);
2604 cSig64 |= LIT64(0x4000000000000000);
2605 expDiff = pExp - cExp;
2606
2607 if (pSign == cSign) {
2608 /* Addition */
2609 if (expDiff > 0) {
2610 /* scale c to match p */
2611 shift64RightJamming(cSig64, expDiff, &cSig64);
2612 zExp = pExp;
2613 } else if (expDiff < 0) {
2614 /* scale p to match c */
2615 shift64RightJamming(pSig64, -expDiff, &pSig64);
2616 zExp = cExp;
2617 } else {
2618 /* no scaling needed */
2619 zExp = cExp;
2620 }
2621 /* Add significands and make sure explicit bit ends up in posn 62 */
2622 zSig64 = pSig64 + cSig64;
2623 if ((int64_t)zSig64 < 0) {
2624 shift64RightJamming(zSig64, 1, &zSig64);
2625 } else {
2626 zExp--;
2627 }
2628 } else {
2629 /* Subtraction */
2630 if (expDiff > 0) {
2631 shift64RightJamming(cSig64, expDiff, &cSig64);
2632 zSig64 = pSig64 - cSig64;
2633 zExp = pExp;
2634 } else if (expDiff < 0) {
2635 shift64RightJamming(pSig64, -expDiff, &pSig64);
2636 zSig64 = cSig64 - pSig64;
2637 zExp = cExp;
2638 zSign ^= 1;
2639 } else {
2640 zExp = pExp;
2641 if (cSig64 < pSig64) {
2642 zSig64 = pSig64 - cSig64;
2643 } else if (pSig64 < cSig64) {
2644 zSig64 = cSig64 - pSig64;
2645 zSign ^= 1;
2646 } else {
2647 /* Exact zero */
2648 zSign = signflip;
a2f2d288 2649 if (status->float_rounding_mode == float_round_down) {
369be8f6
PM
2650 zSign ^= 1;
2651 }
2652 return packFloat32(zSign, 0, 0);
2653 }
2654 }
2655 --zExp;
2656 /* Normalize to put the explicit bit back into bit 62. */
2657 shiftcount = countLeadingZeros64(zSig64) - 1;
2658 zSig64 <<= shiftcount;
2659 zExp -= shiftcount;
2660 }
67d43538
PM
2661 if (flags & float_muladd_halve_result) {
2662 zExp--;
2663 }
2664
369be8f6 2665 shift64RightJamming(zSig64, 32, &zSig64);
ff32e16e 2666 return roundAndPackFloat32(zSign, zExp, zSig64, status);
369be8f6
PM
2667}
2668
2669
158142c2
FB
2670/*----------------------------------------------------------------------------
2671| Returns the square root of the single-precision floating-point value `a'.
2672| The operation is performed according to the IEC/IEEE Standard for Binary
2673| Floating-Point Arithmetic.
2674*----------------------------------------------------------------------------*/
2675
e5a41ffa 2676float32 float32_sqrt(float32 a, float_status *status)
158142c2
FB
2677{
2678 flag aSign;
0c48262d 2679 int aExp, zExp;
bb98fe42
AF
2680 uint32_t aSig, zSig;
2681 uint64_t rem, term;
ff32e16e 2682 a = float32_squash_input_denormal(a, status);
158142c2
FB
2683
2684 aSig = extractFloat32Frac( a );
2685 aExp = extractFloat32Exp( a );
2686 aSign = extractFloat32Sign( a );
2687 if ( aExp == 0xFF ) {
ff32e16e
PM
2688 if (aSig) {
2689 return propagateFloat32NaN(a, float32_zero, status);
2690 }
158142c2 2691 if ( ! aSign ) return a;
ff32e16e 2692 float_raise(float_flag_invalid, status);
af39bc8c 2693 return float32_default_nan(status);
158142c2
FB
2694 }
2695 if ( aSign ) {
2696 if ( ( aExp | aSig ) == 0 ) return a;
ff32e16e 2697 float_raise(float_flag_invalid, status);
af39bc8c 2698 return float32_default_nan(status);
158142c2
FB
2699 }
2700 if ( aExp == 0 ) {
f090c9d4 2701 if ( aSig == 0 ) return float32_zero;
158142c2
FB
2702 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2703 }
2704 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2705 aSig = ( aSig | 0x00800000 )<<8;
2706 zSig = estimateSqrt32( aExp, aSig ) + 2;
2707 if ( ( zSig & 0x7F ) <= 5 ) {
2708 if ( zSig < 2 ) {
2709 zSig = 0x7FFFFFFF;
2710 goto roundAndPack;
2711 }
2712 aSig >>= aExp & 1;
bb98fe42
AF
2713 term = ( (uint64_t) zSig ) * zSig;
2714 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2715 while ( (int64_t) rem < 0 ) {
158142c2 2716 --zSig;
bb98fe42 2717 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
158142c2
FB
2718 }
2719 zSig |= ( rem != 0 );
2720 }
2721 shift32RightJamming( zSig, 1, &zSig );
2722 roundAndPack:
ff32e16e 2723 return roundAndPackFloat32(0, zExp, zSig, status);
158142c2
FB
2724
2725}
2726
8229c991
AJ
2727/*----------------------------------------------------------------------------
2728| Returns the binary exponential of the single-precision floating-point value
2729| `a'. The operation is performed according to the IEC/IEEE Standard for
2730| Binary Floating-Point Arithmetic.
2731|
2732| Uses the following identities:
2733|
2734| 1. -------------------------------------------------------------------------
2735| x x*ln(2)
2736| 2 = e
2737|
2738| 2. -------------------------------------------------------------------------
2739| 2 3 4 5 n
2740| x x x x x x x
2741| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2742| 1! 2! 3! 4! 5! n!
2743*----------------------------------------------------------------------------*/
2744
2745static const float64 float32_exp2_coefficients[15] =
2746{
d5138cf4
PM
2747 const_float64( 0x3ff0000000000000ll ), /* 1 */
2748 const_float64( 0x3fe0000000000000ll ), /* 2 */
2749 const_float64( 0x3fc5555555555555ll ), /* 3 */
2750 const_float64( 0x3fa5555555555555ll ), /* 4 */
2751 const_float64( 0x3f81111111111111ll ), /* 5 */
2752 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
2753 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
2754 const_float64( 0x3efa01a01a01a01all ), /* 8 */
2755 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
2756 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2757 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2758 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2759 const_float64( 0x3de6124613a86d09ll ), /* 13 */
2760 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2761 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
2762};
2763
e5a41ffa 2764float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
2765{
2766 flag aSign;
0c48262d 2767 int aExp;
bb98fe42 2768 uint32_t aSig;
8229c991
AJ
2769 float64 r, x, xn;
2770 int i;
ff32e16e 2771 a = float32_squash_input_denormal(a, status);
8229c991
AJ
2772
2773 aSig = extractFloat32Frac( a );
2774 aExp = extractFloat32Exp( a );
2775 aSign = extractFloat32Sign( a );
2776
2777 if ( aExp == 0xFF) {
ff32e16e
PM
2778 if (aSig) {
2779 return propagateFloat32NaN(a, float32_zero, status);
2780 }
8229c991
AJ
2781 return (aSign) ? float32_zero : a;
2782 }
2783 if (aExp == 0) {
2784 if (aSig == 0) return float32_one;
2785 }
2786
ff32e16e 2787 float_raise(float_flag_inexact, status);
8229c991
AJ
2788
2789 /* ******************************* */
2790 /* using float64 for approximation */
2791 /* ******************************* */
ff32e16e
PM
2792 x = float32_to_float64(a, status);
2793 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
2794
2795 xn = x;
2796 r = float64_one;
2797 for (i = 0 ; i < 15 ; i++) {
2798 float64 f;
2799
ff32e16e
PM
2800 f = float64_mul(xn, float32_exp2_coefficients[i], status);
2801 r = float64_add(r, f, status);
8229c991 2802
ff32e16e 2803 xn = float64_mul(xn, x, status);
8229c991
AJ
2804 }
2805
2806 return float64_to_float32(r, status);
2807}
2808
374dfc33
AJ
2809/*----------------------------------------------------------------------------
2810| Returns the binary log of the single-precision floating-point value `a'.
2811| The operation is performed according to the IEC/IEEE Standard for Binary
2812| Floating-Point Arithmetic.
2813*----------------------------------------------------------------------------*/
e5a41ffa 2814float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
2815{
2816 flag aSign, zSign;
0c48262d 2817 int aExp;
bb98fe42 2818 uint32_t aSig, zSig, i;
374dfc33 2819
ff32e16e 2820 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
2821 aSig = extractFloat32Frac( a );
2822 aExp = extractFloat32Exp( a );
2823 aSign = extractFloat32Sign( a );
2824
2825 if ( aExp == 0 ) {
2826 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2827 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2828 }
2829 if ( aSign ) {
ff32e16e 2830 float_raise(float_flag_invalid, status);
af39bc8c 2831 return float32_default_nan(status);
374dfc33
AJ
2832 }
2833 if ( aExp == 0xFF ) {
ff32e16e
PM
2834 if (aSig) {
2835 return propagateFloat32NaN(a, float32_zero, status);
2836 }
374dfc33
AJ
2837 return a;
2838 }
2839
2840 aExp -= 0x7F;
2841 aSig |= 0x00800000;
2842 zSign = aExp < 0;
2843 zSig = aExp << 23;
2844
2845 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 2846 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
2847 if ( aSig & 0x01000000 ) {
2848 aSig >>= 1;
2849 zSig |= i;
2850 }
2851 }
2852
2853 if ( zSign )
2854 zSig = -zSig;
2855
ff32e16e 2856 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
2857}
2858
158142c2
FB
2859/*----------------------------------------------------------------------------
2860| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
2861| the corresponding value `b', and 0 otherwise. The invalid exception is
2862| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
2863| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2864*----------------------------------------------------------------------------*/
2865
e5a41ffa 2866int float32_eq(float32 a, float32 b, float_status *status)
158142c2 2867{
b689362d 2868 uint32_t av, bv;
ff32e16e
PM
2869 a = float32_squash_input_denormal(a, status);
2870 b = float32_squash_input_denormal(b, status);
158142c2
FB
2871
2872 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2873 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2874 ) {
ff32e16e 2875 float_raise(float_flag_invalid, status);
158142c2
FB
2876 return 0;
2877 }
b689362d
AJ
2878 av = float32_val(a);
2879 bv = float32_val(b);
2880 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
2881}
2882
2883/*----------------------------------------------------------------------------
2884| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
2885| or equal to the corresponding value `b', and 0 otherwise. The invalid
2886| exception is raised if either operand is a NaN. The comparison is performed
2887| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
2888*----------------------------------------------------------------------------*/
2889
e5a41ffa 2890int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
2891{
2892 flag aSign, bSign;
bb98fe42 2893 uint32_t av, bv;
ff32e16e
PM
2894 a = float32_squash_input_denormal(a, status);
2895 b = float32_squash_input_denormal(b, status);
158142c2
FB
2896
2897 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2898 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2899 ) {
ff32e16e 2900 float_raise(float_flag_invalid, status);
158142c2
FB
2901 return 0;
2902 }
2903 aSign = extractFloat32Sign( a );
2904 bSign = extractFloat32Sign( b );
f090c9d4
PB
2905 av = float32_val(a);
2906 bv = float32_val(b);
bb98fe42 2907 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 2908 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
2909
2910}
2911
2912/*----------------------------------------------------------------------------
2913| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
2914| the corresponding value `b', and 0 otherwise. The invalid exception is
2915| raised if either operand is a NaN. The comparison is performed according
2916| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
2917*----------------------------------------------------------------------------*/
2918
e5a41ffa 2919int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
2920{
2921 flag aSign, bSign;
bb98fe42 2922 uint32_t av, bv;
ff32e16e
PM
2923 a = float32_squash_input_denormal(a, status);
2924 b = float32_squash_input_denormal(b, status);
158142c2
FB
2925
2926 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2927 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2928 ) {
ff32e16e 2929 float_raise(float_flag_invalid, status);
158142c2
FB
2930 return 0;
2931 }
2932 aSign = extractFloat32Sign( a );
2933 bSign = extractFloat32Sign( b );
f090c9d4
PB
2934 av = float32_val(a);
2935 bv = float32_val(b);
bb98fe42 2936 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 2937 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
2938
2939}
2940
67b7861d
AJ
2941/*----------------------------------------------------------------------------
2942| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
2943| be compared, and 0 otherwise. The invalid exception is raised if either
2944| operand is a NaN. The comparison is performed according to the IEC/IEEE
2945| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
2946*----------------------------------------------------------------------------*/
2947
e5a41ffa 2948int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 2949{
ff32e16e
PM
2950 a = float32_squash_input_denormal(a, status);
2951 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
2952
2953 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2954 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2955 ) {
ff32e16e 2956 float_raise(float_flag_invalid, status);
67b7861d
AJ
2957 return 1;
2958 }
2959 return 0;
2960}
b689362d 2961
158142c2
FB
2962/*----------------------------------------------------------------------------
2963| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
2964| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2965| exception. The comparison is performed according to the IEC/IEEE Standard
2966| for Binary Floating-Point Arithmetic.
158142c2
FB
2967*----------------------------------------------------------------------------*/
2968
e5a41ffa 2969int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 2970{
ff32e16e
PM
2971 a = float32_squash_input_denormal(a, status);
2972 b = float32_squash_input_denormal(b, status);
158142c2
FB
2973
2974 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2975 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2976 ) {
af39bc8c
AM
2977 if (float32_is_signaling_nan(a, status)
2978 || float32_is_signaling_nan(b, status)) {
ff32e16e 2979 float_raise(float_flag_invalid, status);
b689362d 2980 }
158142c2
FB
2981 return 0;
2982 }
b689362d
AJ
2983 return ( float32_val(a) == float32_val(b) ) ||
2984 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
2985}
2986
2987/*----------------------------------------------------------------------------
2988| Returns 1 if the single-precision floating-point value `a' is less than or
2989| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2990| cause an exception. Otherwise, the comparison is performed according to the
2991| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2992*----------------------------------------------------------------------------*/
2993
e5a41ffa 2994int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
2995{
2996 flag aSign, bSign;
bb98fe42 2997 uint32_t av, bv;
ff32e16e
PM
2998 a = float32_squash_input_denormal(a, status);
2999 b = float32_squash_input_denormal(b, status);
158142c2
FB
3000
3001 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3002 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3003 ) {
af39bc8c
AM
3004 if (float32_is_signaling_nan(a, status)
3005 || float32_is_signaling_nan(b, status)) {
ff32e16e 3006 float_raise(float_flag_invalid, status);
158142c2
FB
3007 }
3008 return 0;
3009 }
3010 aSign = extractFloat32Sign( a );
3011 bSign = extractFloat32Sign( b );
f090c9d4
PB
3012 av = float32_val(a);
3013 bv = float32_val(b);
bb98fe42 3014 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3015 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3016
3017}
3018
3019/*----------------------------------------------------------------------------
3020| Returns 1 if the single-precision floating-point value `a' is less than
3021| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3022| exception. Otherwise, the comparison is performed according to the IEC/IEEE
3023| Standard for Binary Floating-Point Arithmetic.
3024*----------------------------------------------------------------------------*/
3025
e5a41ffa 3026int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3027{
3028 flag aSign, bSign;
bb98fe42 3029 uint32_t av, bv;
ff32e16e
PM
3030 a = float32_squash_input_denormal(a, status);
3031 b = float32_squash_input_denormal(b, status);
158142c2
FB
3032
3033 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3034 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3035 ) {
af39bc8c
AM
3036 if (float32_is_signaling_nan(a, status)
3037 || float32_is_signaling_nan(b, status)) {
ff32e16e 3038 float_raise(float_flag_invalid, status);
158142c2
FB
3039 }
3040 return 0;
3041 }
3042 aSign = extractFloat32Sign( a );
3043 bSign = extractFloat32Sign( b );
f090c9d4
PB
3044 av = float32_val(a);
3045 bv = float32_val(b);
bb98fe42 3046 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3047 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3048
3049}
3050
67b7861d
AJ
3051/*----------------------------------------------------------------------------
3052| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3053| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3054| comparison is performed according to the IEC/IEEE Standard for Binary
3055| Floating-Point Arithmetic.
3056*----------------------------------------------------------------------------*/
3057
e5a41ffa 3058int float32_unordered_quiet(float32 a, float32 b, float_status *status)
67b7861d 3059{
ff32e16e
PM
3060 a = float32_squash_input_denormal(a, status);
3061 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3062
3063 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3064 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3065 ) {
af39bc8c
AM
3066 if (float32_is_signaling_nan(a, status)
3067 || float32_is_signaling_nan(b, status)) {
ff32e16e 3068 float_raise(float_flag_invalid, status);
67b7861d
AJ
3069 }
3070 return 1;
3071 }
3072 return 0;
3073}
3074
158142c2
FB
3075/*----------------------------------------------------------------------------
3076| Returns the result of converting the double-precision floating-point value
3077| `a' to the 32-bit two's complement integer format. The conversion is
3078| performed according to the IEC/IEEE Standard for Binary Floating-Point
3079| Arithmetic---which means in particular that the conversion is rounded
3080| according to the current rounding mode. If `a' is a NaN, the largest
3081| positive integer is returned. Otherwise, if the conversion overflows, the
3082| largest integer with the same sign as `a' is returned.
3083*----------------------------------------------------------------------------*/
3084
f4014512 3085int32_t float64_to_int32(float64 a, float_status *status)
158142c2
FB
3086{
3087 flag aSign;
0c48262d 3088 int aExp;
07d792d2 3089 int shiftCount;
bb98fe42 3090 uint64_t aSig;
ff32e16e 3091 a = float64_squash_input_denormal(a, status);
158142c2
FB
3092
3093 aSig = extractFloat64Frac( a );
3094 aExp = extractFloat64Exp( a );
3095 aSign = extractFloat64Sign( a );
3096 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3097 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3098 shiftCount = 0x42C - aExp;
3099 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 3100 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
3101
3102}
3103
3104/*----------------------------------------------------------------------------
3105| Returns the result of converting the double-precision floating-point value
3106| `a' to the 32-bit two's complement integer format. The conversion is
3107| performed according to the IEC/IEEE Standard for Binary Floating-Point
3108| Arithmetic, except that the conversion is always rounded toward zero.
3109| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
3110| the conversion overflows, the largest integer with the same sign as `a' is
3111| returned.
3112*----------------------------------------------------------------------------*/
3113
f4014512 3114int32_t float64_to_int32_round_to_zero(float64 a, float_status *status)
158142c2
FB
3115{
3116 flag aSign;
0c48262d 3117 int aExp;
07d792d2 3118 int shiftCount;
bb98fe42 3119 uint64_t aSig, savedASig;
b3a6a2e0 3120 int32_t z;
ff32e16e 3121 a = float64_squash_input_denormal(a, status);
158142c2
FB
3122
3123 aSig = extractFloat64Frac( a );
3124 aExp = extractFloat64Exp( a );
3125 aSign = extractFloat64Sign( a );
3126 if ( 0x41E < aExp ) {
3127 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3128 goto invalid;
3129 }
3130 else if ( aExp < 0x3FF ) {
a2f2d288
PM
3131 if (aExp || aSig) {
3132 status->float_exception_flags |= float_flag_inexact;
3133 }
158142c2
FB
3134 return 0;
3135 }
3136 aSig |= LIT64( 0x0010000000000000 );
3137 shiftCount = 0x433 - aExp;
3138 savedASig = aSig;
3139 aSig >>= shiftCount;
3140 z = aSig;
3141 if ( aSign ) z = - z;
3142 if ( ( z < 0 ) ^ aSign ) {
3143 invalid:
ff32e16e 3144 float_raise(float_flag_invalid, status);
bb98fe42 3145 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
3146 }
3147 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 3148 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
3149 }
3150 return z;
3151
3152}
3153
cbcef455
PM
3154/*----------------------------------------------------------------------------
3155| Returns the result of converting the double-precision floating-point value
3156| `a' to the 16-bit two's complement integer format. The conversion is
3157| performed according to the IEC/IEEE Standard for Binary Floating-Point
3158| Arithmetic, except that the conversion is always rounded toward zero.
3159| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
3160| the conversion overflows, the largest integer with the same sign as `a' is
3161| returned.
3162*----------------------------------------------------------------------------*/
3163
0bb721d7 3164int16_t float64_to_int16_round_to_zero(float64 a, float_status *status)
cbcef455
PM
3165{
3166 flag aSign;
0c48262d 3167 int aExp;
07d792d2 3168 int shiftCount;
bb98fe42 3169 uint64_t aSig, savedASig;
f4014512 3170 int32_t z;
cbcef455
PM
3171
3172 aSig = extractFloat64Frac( a );
3173 aExp = extractFloat64Exp( a );
3174 aSign = extractFloat64Sign( a );
3175 if ( 0x40E < aExp ) {
3176 if ( ( aExp == 0x7FF ) && aSig ) {
3177 aSign = 0;
3178 }
3179 goto invalid;
3180 }
3181 else if ( aExp < 0x3FF ) {
3182 if ( aExp || aSig ) {
a2f2d288 3183 status->float_exception_flags |= float_flag_inexact;
cbcef455
PM
3184 }
3185 return 0;
3186 }
3187 aSig |= LIT64( 0x0010000000000000 );
3188 shiftCount = 0x433 - aExp;
3189 savedASig = aSig;
3190 aSig >>= shiftCount;
3191 z = aSig;
3192 if ( aSign ) {
3193 z = - z;
3194 }
3195 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3196 invalid:
ff32e16e 3197 float_raise(float_flag_invalid, status);
bb98fe42 3198 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
cbcef455
PM
3199 }
3200 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 3201 status->float_exception_flags |= float_flag_inexact;
cbcef455
PM
3202 }
3203 return z;
3204}
3205
158142c2
FB
3206/*----------------------------------------------------------------------------
3207| Returns the result of converting the double-precision floating-point value
3208| `a' to the 64-bit two's complement integer format. The conversion is
3209| performed according to the IEC/IEEE Standard for Binary Floating-Point
3210| Arithmetic---which means in particular that the conversion is rounded
3211| according to the current rounding mode. If `a' is a NaN, the largest
3212| positive integer is returned. Otherwise, if the conversion overflows, the
3213| largest integer with the same sign as `a' is returned.
3214*----------------------------------------------------------------------------*/
3215
f42c2224 3216int64_t float64_to_int64(float64 a, float_status *status)
158142c2
FB
3217{
3218 flag aSign;
0c48262d 3219 int aExp;
07d792d2 3220 int shiftCount;
bb98fe42 3221 uint64_t aSig, aSigExtra;
ff32e16e 3222 a = float64_squash_input_denormal(a, status);
158142c2
FB
3223
3224 aSig = extractFloat64Frac( a );
3225 aExp = extractFloat64Exp( a );
3226 aSign = extractFloat64Sign( a );
3227 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3228 shiftCount = 0x433 - aExp;
3229 if ( shiftCount <= 0 ) {
3230 if ( 0x43E < aExp ) {
ff32e16e 3231 float_raise(float_flag_invalid, status);
158142c2
FB
3232 if ( ! aSign
3233 || ( ( aExp == 0x7FF )
3234 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3235 ) {
3236 return LIT64( 0x7FFFFFFFFFFFFFFF );
3237 }
bb98fe42 3238 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
3239 }
3240 aSigExtra = 0;
3241 aSig <<= - shiftCount;
3242 }
3243 else {
3244 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3245 }
ff32e16e 3246 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
3247
3248}
3249
3250/*----------------------------------------------------------------------------
3251| Returns the result of converting the double-precision floating-point value
3252| `a' to the 64-bit two's complement integer format. The conversion is
3253| performed according to the IEC/IEEE Standard for Binary Floating-Point
3254| Arithmetic, except that the conversion is always rounded toward zero.
3255| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
3256| the conversion overflows, the largest integer with the same sign as `a' is
3257| returned.
3258*----------------------------------------------------------------------------*/
3259
f42c2224 3260int64_t float64_to_int64_round_to_zero(float64 a, float_status *status)
158142c2
FB
3261{
3262 flag aSign;
0c48262d 3263 int aExp;
07d792d2 3264 int shiftCount;
bb98fe42 3265 uint64_t aSig;
f42c2224 3266 int64_t z;
ff32e16e 3267 a = float64_squash_input_denormal(a, status);
158142c2
FB
3268
3269 aSig = extractFloat64Frac( a );
3270 aExp = extractFloat64Exp( a );
3271 aSign = extractFloat64Sign( a );
3272 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3273 shiftCount = aExp - 0x433;
3274 if ( 0 <= shiftCount ) {
3275 if ( 0x43E <= aExp ) {
f090c9d4 3276 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
ff32e16e 3277 float_raise(float_flag_invalid, status);
158142c2
FB
3278 if ( ! aSign
3279 || ( ( aExp == 0x7FF )
3280 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3281 ) {
3282 return LIT64( 0x7FFFFFFFFFFFFFFF );
3283 }
3284 }
bb98fe42 3285 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
3286 }
3287 z = aSig<<shiftCount;
3288 }
3289 else {
3290 if ( aExp < 0x3FE ) {
a2f2d288
PM
3291 if (aExp | aSig) {
3292 status->float_exception_flags |= float_flag_inexact;
3293 }
158142c2
FB
3294 return 0;
3295 }
3296 z = aSig>>( - shiftCount );
bb98fe42 3297 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 3298 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
3299 }
3300 }
3301 if ( aSign ) z = - z;
3302 return z;
3303
3304}
3305
3306/*----------------------------------------------------------------------------
3307| Returns the result of converting the double-precision floating-point value
3308| `a' to the single-precision floating-point format. The conversion is
3309| performed according to the IEC/IEEE Standard for Binary Floating-Point
3310| Arithmetic.
3311*----------------------------------------------------------------------------*/
3312
e5a41ffa 3313float32 float64_to_float32(float64 a, float_status *status)
158142c2
FB
3314{
3315 flag aSign;
0c48262d 3316 int aExp;
bb98fe42
AF
3317 uint64_t aSig;
3318 uint32_t zSig;
ff32e16e 3319 a = float64_squash_input_denormal(a, status);
158142c2
FB
3320
3321 aSig = extractFloat64Frac( a );
3322 aExp = extractFloat64Exp( a );
3323 aSign = extractFloat64Sign( a );
3324 if ( aExp == 0x7FF ) {
ff32e16e
PM
3325 if (aSig) {
3326 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3327 }
158142c2
FB
3328 return packFloat32( aSign, 0xFF, 0 );
3329 }
3330 shift64RightJamming( aSig, 22, &aSig );
3331 zSig = aSig;
3332 if ( aExp || zSig ) {
3333 zSig |= 0x40000000;
3334 aExp -= 0x381;
3335 }
ff32e16e 3336 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
3337
3338}
3339
60011498
PB
3340
3341/*----------------------------------------------------------------------------
3342| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3343| half-precision floating-point value, returning the result. After being
3344| shifted into the proper positions, the three fields are simply added
3345| together to form the result. This means that any integer portion of `zSig'
3346| will be added into the exponent. Since a properly normalized significand
3347| will have an integer portion equal to 1, the `zExp' input should be 1 less
3348| than the desired result exponent whenever `zSig' is a complete, normalized
3349| significand.
3350*----------------------------------------------------------------------------*/
0c48262d 3351static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
60011498 3352{
bb4d4bb3 3353 return make_float16(
bb98fe42 3354 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
60011498
PB
3355}
3356
c4a1c5e7
PM
3357/*----------------------------------------------------------------------------
3358| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3359| and significand `zSig', and returns the proper half-precision floating-
3360| point value corresponding to the abstract input. Ordinarily, the abstract
3361| value is simply rounded and packed into the half-precision format, with
3362| the inexact exception raised if the abstract input cannot be represented
3363| exactly. However, if the abstract value is too large, the overflow and
3364| inexact exceptions are raised and an infinity or maximal finite value is
3365| returned. If the abstract value is too small, the input value is rounded to
3366| a subnormal number, and the underflow and inexact exceptions are raised if
3367| the abstract input cannot be represented exactly as a subnormal half-
3368| precision floating-point number.
3369| The `ieee' flag indicates whether to use IEEE standard half precision, or
3370| ARM-style "alternative representation", which omits the NaN and Inf
3371| encodings in order to raise the maximum representable exponent by one.
3372| The input significand `zSig' has its binary point between bits 22
3373| and 23, which is 13 bits to the left of the usual location. This shifted
3374| significand must be normalized or smaller. If `zSig' is not normalized,
3375| `zExp' must be 0; in that case, the result returned is a subnormal number,
3376| and it must not require rounding. In the usual case that `zSig' is
3377| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3378| Note the slightly odd position of the binary point in zSig compared with the
3379| other roundAndPackFloat functions. This should probably be fixed if we
3380| need to implement more float16 routines than just conversion.
3381| The handling of underflow and overflow follows the IEC/IEEE Standard for
3382| Binary Floating-Point Arithmetic.
3383*----------------------------------------------------------------------------*/
3384
0c48262d 3385static float16 roundAndPackFloat16(flag zSign, int zExp,
e5a41ffa
PM
3386 uint32_t zSig, flag ieee,
3387 float_status *status)
c4a1c5e7
PM
3388{
3389 int maxexp = ieee ? 29 : 30;
3390 uint32_t mask;
3391 uint32_t increment;
c4a1c5e7
PM
3392 bool rounding_bumps_exp;
3393 bool is_tiny = false;
3394
3395 /* Calculate the mask of bits of the mantissa which are not
3396 * representable in half-precision and will be lost.
3397 */
3398 if (zExp < 1) {
3399 /* Will be denormal in halfprec */
3400 mask = 0x00ffffff;
3401 if (zExp >= -11) {
3402 mask >>= 11 + zExp;
3403 }
3404 } else {
3405 /* Normal number in halfprec */
3406 mask = 0x00001fff;
3407 }
3408
a2f2d288 3409 switch (status->float_rounding_mode) {
c4a1c5e7
PM
3410 case float_round_nearest_even:
3411 increment = (mask + 1) >> 1;
3412 if ((zSig & mask) == increment) {
3413 increment = zSig & (increment << 1);
3414 }
3415 break;
f9288a76
PM
3416 case float_round_ties_away:
3417 increment = (mask + 1) >> 1;
3418 break;
c4a1c5e7
PM
3419 case float_round_up:
3420 increment = zSign ? 0 : mask;
3421 break;
3422 case float_round_down:
3423 increment = zSign ? mask : 0;
3424 break;
3425 default: /* round_to_zero */
3426 increment = 0;
3427 break;
3428 }
3429
3430 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3431
3432 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3433 if (ieee) {
ff32e16e 3434 float_raise(float_flag_overflow | float_flag_inexact, status);
c4a1c5e7
PM
3435 return packFloat16(zSign, 0x1f, 0);
3436 } else {
ff32e16e 3437 float_raise(float_flag_invalid, status);
c4a1c5e7
PM
3438 return packFloat16(zSign, 0x1f, 0x3ff);
3439 }
3440 }
3441
3442 if (zExp < 0) {
3443 /* Note that flush-to-zero does not affect half-precision results */
3444 is_tiny =
a2f2d288 3445 (status->float_detect_tininess == float_tininess_before_rounding)
c4a1c5e7
PM
3446 || (zExp < -1)
3447 || (!rounding_bumps_exp);
3448 }
3449 if (zSig & mask) {
ff32e16e 3450 float_raise(float_flag_inexact, status);
c4a1c5e7 3451 if (is_tiny) {
ff32e16e 3452 float_raise(float_flag_underflow, status);
c4a1c5e7
PM
3453 }
3454 }
3455
3456 zSig += increment;
3457 if (rounding_bumps_exp) {
3458 zSig >>= 1;
3459 zExp++;
3460 }
3461
3462 if (zExp < -10) {
3463 return packFloat16(zSign, 0, 0);
3464 }
3465 if (zExp < 0) {
3466 zSig >>= -zExp;
3467 zExp = 0;
3468 }
3469 return packFloat16(zSign, zExp, zSig >> 13);
3470}
3471
0c48262d 3472static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
c4a1c5e7
PM
3473 uint32_t *zSigPtr)
3474{
3475 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3476 *zSigPtr = aSig << shiftCount;
3477 *zExpPtr = 1 - shiftCount;
3478}
3479
60011498
PB
3480/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3481 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
bb4d4bb3 3482
e5a41ffa 3483float32 float16_to_float32(float16 a, flag ieee, float_status *status)
60011498
PB
3484{
3485 flag aSign;
0c48262d 3486 int aExp;
bb98fe42 3487 uint32_t aSig;
60011498 3488
bb4d4bb3
PM
3489 aSign = extractFloat16Sign(a);
3490 aExp = extractFloat16Exp(a);
3491 aSig = extractFloat16Frac(a);
60011498
PB
3492
3493 if (aExp == 0x1f && ieee) {
3494 if (aSig) {
ff32e16e 3495 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
60011498 3496 }
4be8eeac 3497 return packFloat32(aSign, 0xff, 0);
60011498
PB
3498 }
3499 if (aExp == 0) {
60011498
PB
3500 if (aSig == 0) {
3501 return packFloat32(aSign, 0, 0);
3502 }
3503
c4a1c5e7
PM
3504 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3505 aExp--;
60011498
PB
3506 }
3507 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3508}
3509
e5a41ffa 3510float16 float32_to_float16(float32 a, flag ieee, float_status *status)
60011498
PB
3511{
3512 flag aSign;
0c48262d 3513 int aExp;
bb98fe42 3514 uint32_t aSig;
38970efa 3515
ff32e16e 3516 a = float32_squash_input_denormal(a, status);
60011498
PB
3517
3518 aSig = extractFloat32Frac( a );
3519 aExp = extractFloat32Exp( a );
3520 aSign = extractFloat32Sign( a );
3521 if ( aExp == 0xFF ) {
3522 if (aSig) {
600e30d2 3523 /* Input is a NaN */
600e30d2 3524 if (!ieee) {
ff32e16e 3525 float_raise(float_flag_invalid, status);
600e30d2
PM
3526 return packFloat16(aSign, 0, 0);
3527 }
38970efa 3528 return commonNaNToFloat16(
ff32e16e 3529 float32ToCommonNaN(a, status), status);
60011498 3530 }
600e30d2
PM
3531 /* Infinity */
3532 if (!ieee) {
ff32e16e 3533 float_raise(float_flag_invalid, status);
600e30d2
PM
3534 return packFloat16(aSign, 0x1f, 0x3ff);
3535 }
3536 return packFloat16(aSign, 0x1f, 0);
60011498 3537 }
600e30d2 3538 if (aExp == 0 && aSig == 0) {
60011498
PB
3539 return packFloat16(aSign, 0, 0);
3540 }
38970efa
PM
3541 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3542 * even if the input is denormal; however this is harmless because
3543 * the largest possible single-precision denormal is still smaller
3544 * than the smallest representable half-precision denormal, and so we
3545 * will end up ignoring aSig and returning via the "always return zero"
3546 * codepath.
3547 */
60011498 3548 aSig |= 0x00800000;
c4a1c5e7 3549 aExp -= 0x71;
60011498 3550
ff32e16e 3551 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
60011498
PB
3552}
3553
e5a41ffa 3554float64 float16_to_float64(float16 a, flag ieee, float_status *status)
14c9a07e
PM
3555{
3556 flag aSign;
0c48262d 3557 int aExp;
14c9a07e
PM
3558 uint32_t aSig;
3559
3560 aSign = extractFloat16Sign(a);
3561 aExp = extractFloat16Exp(a);
3562 aSig = extractFloat16Frac(a);
3563
3564 if (aExp == 0x1f && ieee) {
3565 if (aSig) {
3566 return commonNaNToFloat64(
ff32e16e 3567 float16ToCommonNaN(a, status), status);
14c9a07e
PM
3568 }
3569 return packFloat64(aSign, 0x7ff, 0);
3570 }
3571 if (aExp == 0) {
3572 if (aSig == 0) {
3573 return packFloat64(aSign, 0, 0);
3574 }
3575
3576 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3577 aExp--;
3578 }
3579 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3580}
3581
e5a41ffa 3582float16 float64_to_float16(float64 a, flag ieee, float_status *status)
14c9a07e
PM
3583{
3584 flag aSign;
0c48262d 3585 int aExp;
14c9a07e
PM
3586 uint64_t aSig;
3587 uint32_t zSig;
3588
ff32e16e 3589 a = float64_squash_input_denormal(a, status);
14c9a07e
PM
3590
3591 aSig = extractFloat64Frac(a);
3592 aExp = extractFloat64Exp(a);
3593 aSign = extractFloat64Sign(a);
3594 if (aExp == 0x7FF) {
3595 if (aSig) {
3596 /* Input is a NaN */
3597 if (!ieee) {
ff32e16e 3598 float_raise(float_flag_invalid, status);
14c9a07e
PM
3599 return packFloat16(aSign, 0, 0);
3600 }
3601 return commonNaNToFloat16(
ff32e16e 3602 float64ToCommonNaN(a, status), status);
14c9a07e
PM
3603 }
3604 /* Infinity */
3605 if (!ieee) {
ff32e16e 3606 float_raise(float_flag_invalid, status);
14c9a07e
PM
3607 return packFloat16(aSign, 0x1f, 0x3ff);
3608 }
3609 return packFloat16(aSign, 0x1f, 0);
3610 }
3611 shift64RightJamming(aSig, 29, &aSig);
3612 zSig = aSig;
3613 if (aExp == 0 && zSig == 0) {
3614 return packFloat16(aSign, 0, 0);
3615 }
3616 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3617 * even if the input is denormal; however this is harmless because
3618 * the largest possible single-precision denormal is still smaller
3619 * than the smallest representable half-precision denormal, and so we
3620 * will end up ignoring aSig and returning via the "always return zero"
3621 * codepath.
3622 */
3623 zSig |= 0x00800000;
3624 aExp -= 0x3F1;
3625
ff32e16e 3626 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
14c9a07e
PM
3627}
3628
158142c2
FB
3629/*----------------------------------------------------------------------------
3630| Returns the result of converting the double-precision floating-point value
3631| `a' to the extended double-precision floating-point format. The conversion
3632| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3633| Arithmetic.
3634*----------------------------------------------------------------------------*/
3635
e5a41ffa 3636floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
3637{
3638 flag aSign;
0c48262d 3639 int aExp;
bb98fe42 3640 uint64_t aSig;
158142c2 3641
ff32e16e 3642 a = float64_squash_input_denormal(a, status);
158142c2
FB
3643 aSig = extractFloat64Frac( a );
3644 aExp = extractFloat64Exp( a );
3645 aSign = extractFloat64Sign( a );
3646 if ( aExp == 0x7FF ) {
ff32e16e
PM
3647 if (aSig) {
3648 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3649 }
158142c2
FB
3650 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3651 }
3652 if ( aExp == 0 ) {
3653 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3654 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3655 }
3656 return
3657 packFloatx80(
3658 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3659
3660}
3661
158142c2
FB
3662/*----------------------------------------------------------------------------
3663| Returns the result of converting the double-precision floating-point value
3664| `a' to the quadruple-precision floating-point format. The conversion is
3665| performed according to the IEC/IEEE Standard for Binary Floating-Point
3666| Arithmetic.
3667*----------------------------------------------------------------------------*/
3668
e5a41ffa 3669float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
3670{
3671 flag aSign;
0c48262d 3672 int aExp;
bb98fe42 3673 uint64_t aSig, zSig0, zSig1;
158142c2 3674
ff32e16e 3675 a = float64_squash_input_denormal(a, status);
158142c2
FB
3676 aSig = extractFloat64Frac( a );
3677 aExp = extractFloat64Exp( a );
3678 aSign = extractFloat64Sign( a );
3679 if ( aExp == 0x7FF ) {
ff32e16e
PM
3680 if (aSig) {
3681 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3682 }
158142c2
FB
3683 return packFloat128( aSign, 0x7FFF, 0, 0 );
3684 }
3685 if ( aExp == 0 ) {
3686 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3687 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3688 --aExp;
3689 }
3690 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3691 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3692
3693}
3694
158142c2
FB
3695/*----------------------------------------------------------------------------
3696| Rounds the double-precision floating-point value `a' to an integer, and
3697| returns the result as a double-precision floating-point value. The
3698| operation is performed according to the IEC/IEEE Standard for Binary
3699| Floating-Point Arithmetic.
3700*----------------------------------------------------------------------------*/
3701
e5a41ffa 3702float64 float64_round_to_int(float64 a, float_status *status)
158142c2
FB
3703{
3704 flag aSign;
0c48262d 3705 int aExp;
bb98fe42 3706 uint64_t lastBitMask, roundBitsMask;
bb98fe42 3707 uint64_t z;
ff32e16e 3708 a = float64_squash_input_denormal(a, status);
158142c2
FB
3709
3710 aExp = extractFloat64Exp( a );
3711 if ( 0x433 <= aExp ) {
3712 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
ff32e16e 3713 return propagateFloat64NaN(a, a, status);
158142c2
FB
3714 }
3715 return a;
3716 }
3717 if ( aExp < 0x3FF ) {
bb98fe42 3718 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
a2f2d288 3719 status->float_exception_flags |= float_flag_inexact;
158142c2 3720 aSign = extractFloat64Sign( a );
a2f2d288 3721 switch (status->float_rounding_mode) {
158142c2
FB
3722 case float_round_nearest_even:
3723 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3724 return packFloat64( aSign, 0x3FF, 0 );
3725 }
3726 break;
f9288a76
PM
3727 case float_round_ties_away:
3728 if (aExp == 0x3FE) {
3729 return packFloat64(aSign, 0x3ff, 0);
3730 }
3731 break;
158142c2 3732 case float_round_down:
f090c9d4 3733 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
158142c2 3734 case float_round_up:
f090c9d4
PB
3735 return make_float64(
3736 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
158142c2
FB
3737 }
3738 return packFloat64( aSign, 0, 0 );
3739 }
3740 lastBitMask = 1;
3741 lastBitMask <<= 0x433 - aExp;
3742 roundBitsMask = lastBitMask - 1;
f090c9d4 3743 z = float64_val(a);
a2f2d288 3744 switch (status->float_rounding_mode) {
dc355b76
PM
3745 case float_round_nearest_even:
3746 z += lastBitMask >> 1;
3747 if ((z & roundBitsMask) == 0) {
3748 z &= ~lastBitMask;
3749 }
3750 break;
f9288a76
PM
3751 case float_round_ties_away:
3752 z += lastBitMask >> 1;
3753 break;
dc355b76
PM
3754 case float_round_to_zero:
3755 break;
3756 case float_round_up:
3757 if (!extractFloat64Sign(make_float64(z))) {
3758 z += roundBitsMask;
3759 }
3760 break;
3761 case float_round_down:
3762 if (extractFloat64Sign(make_float64(z))) {
158142c2
FB
3763 z += roundBitsMask;
3764 }
dc355b76
PM
3765 break;
3766 default:
3767 abort();
158142c2
FB
3768 }
3769 z &= ~ roundBitsMask;
a2f2d288
PM
3770 if (z != float64_val(a)) {
3771 status->float_exception_flags |= float_flag_inexact;
3772 }
f090c9d4 3773 return make_float64(z);
158142c2
FB
3774
3775}
3776
e5a41ffa 3777float64 float64_trunc_to_int(float64 a, float_status *status)
e6e5906b
PB
3778{
3779 int oldmode;
3780 float64 res;
a2f2d288
PM
3781 oldmode = status->float_rounding_mode;
3782 status->float_rounding_mode = float_round_to_zero;
ff32e16e 3783 res = float64_round_to_int(a, status);
a2f2d288 3784 status->float_rounding_mode = oldmode;
e6e5906b
PB
3785 return res;
3786}
3787
158142c2
FB
3788/*----------------------------------------------------------------------------
3789| Returns the result of adding the absolute values of the double-precision
3790| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
3791| before being returned. `zSign' is ignored if the result is a NaN.
3792| The addition is performed according to the IEC/IEEE Standard for Binary
3793| Floating-Point Arithmetic.
3794*----------------------------------------------------------------------------*/
3795
e5a41ffa
PM
3796static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
3797 float_status *status)
158142c2 3798{
0c48262d 3799 int aExp, bExp, zExp;
bb98fe42 3800 uint64_t aSig, bSig, zSig;
0c48262d 3801 int expDiff;
158142c2
FB
3802
3803 aSig = extractFloat64Frac( a );
3804 aExp = extractFloat64Exp( a );
3805 bSig = extractFloat64Frac( b );
3806 bExp = extractFloat64Exp( b );
3807 expDiff = aExp - bExp;
3808 aSig <<= 9;
3809 bSig <<= 9;
3810 if ( 0 < expDiff ) {
3811 if ( aExp == 0x7FF ) {
ff32e16e
PM
3812 if (aSig) {
3813 return propagateFloat64NaN(a, b, status);
3814 }
158142c2
FB
3815 return a;
3816 }
3817 if ( bExp == 0 ) {
3818 --expDiff;
3819 }
3820 else {
3821 bSig |= LIT64( 0x2000000000000000 );
3822 }
3823 shift64RightJamming( bSig, expDiff, &bSig );
3824 zExp = aExp;
3825 }
3826 else if ( expDiff < 0 ) {
3827 if ( bExp == 0x7FF ) {
ff32e16e
PM
3828 if (bSig) {
3829 return propagateFloat64NaN(a, b, status);
3830 }
158142c2
FB
3831 return packFloat64( zSign, 0x7FF, 0 );
3832 }
3833 if ( aExp == 0 ) {
3834 ++expDiff;
3835 }
3836 else {
3837 aSig |= LIT64( 0x2000000000000000 );
3838 }
3839 shift64RightJamming( aSig, - expDiff, &aSig );
3840 zExp = bExp;
3841 }
3842 else {
3843 if ( aExp == 0x7FF ) {
ff32e16e
PM
3844 if (aSig | bSig) {
3845 return propagateFloat64NaN(a, b, status);
3846 }
158142c2
FB
3847 return a;
3848 }
fe76d976 3849 if ( aExp == 0 ) {
a2f2d288 3850 if (status->flush_to_zero) {
e6afc87f 3851 if (aSig | bSig) {
ff32e16e 3852 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3853 }
3854 return packFloat64(zSign, 0, 0);
3855 }
fe76d976
PB
3856 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3857 }
158142c2
FB
3858 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3859 zExp = aExp;
3860 goto roundAndPack;
3861 }
3862 aSig |= LIT64( 0x2000000000000000 );
3863 zSig = ( aSig + bSig )<<1;
3864 --zExp;
bb98fe42 3865 if ( (int64_t) zSig < 0 ) {
158142c2
FB
3866 zSig = aSig + bSig;
3867 ++zExp;
3868 }
3869 roundAndPack:
ff32e16e 3870 return roundAndPackFloat64(zSign, zExp, zSig, status);
158142c2
FB
3871
3872}
3873
3874/*----------------------------------------------------------------------------
3875| Returns the result of subtracting the absolute values of the double-
3876| precision floating-point values `a' and `b'. If `zSign' is 1, the
3877| difference is negated before being returned. `zSign' is ignored if the
3878| result is a NaN. The subtraction is performed according to the IEC/IEEE
3879| Standard for Binary Floating-Point Arithmetic.
3880*----------------------------------------------------------------------------*/
3881
e5a41ffa
PM
3882static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
3883 float_status *status)
158142c2 3884{
0c48262d 3885 int aExp, bExp, zExp;
bb98fe42 3886 uint64_t aSig, bSig, zSig;
0c48262d 3887 int expDiff;
158142c2
FB
3888
3889 aSig = extractFloat64Frac( a );
3890 aExp = extractFloat64Exp( a );
3891 bSig = extractFloat64Frac( b );
3892 bExp = extractFloat64Exp( b );
3893 expDiff = aExp - bExp;
3894 aSig <<= 10;
3895 bSig <<= 10;
3896 if ( 0 < expDiff ) goto aExpBigger;
3897 if ( expDiff < 0 ) goto bExpBigger;
3898 if ( aExp == 0x7FF ) {
ff32e16e
PM
3899 if (aSig | bSig) {
3900 return propagateFloat64NaN(a, b, status);
3901 }
3902 float_raise(float_flag_invalid, status);
af39bc8c 3903 return float64_default_nan(status);
158142c2
FB
3904 }
3905 if ( aExp == 0 ) {
3906 aExp = 1;
3907 bExp = 1;
3908 }
3909 if ( bSig < aSig ) goto aBigger;
3910 if ( aSig < bSig ) goto bBigger;
a2f2d288 3911 return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
3912 bExpBigger:
3913 if ( bExp == 0x7FF ) {
ff32e16e
PM
3914 if (bSig) {
3915 return propagateFloat64NaN(a, b, status);
3916 }
158142c2
FB
3917 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3918 }
3919 if ( aExp == 0 ) {
3920 ++expDiff;
3921 }
3922 else {
3923 aSig |= LIT64( 0x4000000000000000 );
3924 }
3925 shift64RightJamming( aSig, - expDiff, &aSig );
3926 bSig |= LIT64( 0x4000000000000000 );
3927 bBigger:
3928 zSig = bSig - aSig;
3929 zExp = bExp;
3930 zSign ^= 1;
3931 goto normalizeRoundAndPack;
3932 aExpBigger:
3933 if ( aExp == 0x7FF ) {
ff32e16e
PM
3934 if (aSig) {
3935 return propagateFloat64NaN(a, b, status);
3936 }
158142c2
FB
3937 return a;
3938 }
3939 if ( bExp == 0 ) {
3940 --expDiff;
3941 }
3942 else {
3943 bSig |= LIT64( 0x4000000000000000 );
3944 }
3945 shift64RightJamming( bSig, expDiff, &bSig );
3946 aSig |= LIT64( 0x4000000000000000 );
3947 aBigger:
3948 zSig = aSig - bSig;
3949 zExp = aExp;
3950 normalizeRoundAndPack:
3951 --zExp;
ff32e16e 3952 return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
158142c2
FB
3953
3954}
3955
3956/*----------------------------------------------------------------------------
3957| Returns the result of adding the double-precision floating-point values `a'
3958| and `b'. The operation is performed according to the IEC/IEEE Standard for
3959| Binary Floating-Point Arithmetic.
3960*----------------------------------------------------------------------------*/
3961
e5a41ffa 3962float64 float64_add(float64 a, float64 b, float_status *status)
158142c2
FB
3963{
3964 flag aSign, bSign;
ff32e16e
PM
3965 a = float64_squash_input_denormal(a, status);
3966 b = float64_squash_input_denormal(b, status);
158142c2
FB
3967
3968 aSign = extractFloat64Sign( a );
3969 bSign = extractFloat64Sign( b );
3970 if ( aSign == bSign ) {
ff32e16e 3971 return addFloat64Sigs(a, b, aSign, status);
158142c2
FB
3972 }
3973 else {
ff32e16e 3974 return subFloat64Sigs(a, b, aSign, status);
158142c2
FB
3975 }
3976
3977}
3978
3979/*----------------------------------------------------------------------------
3980| Returns the result of subtracting the double-precision floating-point values
3981| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3982| for Binary Floating-Point Arithmetic.
3983*----------------------------------------------------------------------------*/
3984
e5a41ffa 3985float64 float64_sub(float64 a, float64 b, float_status *status)
158142c2
FB
3986{
3987 flag aSign, bSign;
ff32e16e
PM
3988 a = float64_squash_input_denormal(a, status);
3989 b = float64_squash_input_denormal(b, status);
158142c2
FB
3990
3991 aSign = extractFloat64Sign( a );
3992 bSign = extractFloat64Sign( b );
3993 if ( aSign == bSign ) {
ff32e16e 3994 return subFloat64Sigs(a, b, aSign, status);
158142c2
FB
3995 }
3996 else {
ff32e16e 3997 return addFloat64Sigs(a, b, aSign, status);
158142c2
FB
3998 }
3999
4000}
4001
4002/*----------------------------------------------------------------------------
4003| Returns the result of multiplying the double-precision floating-point values
4004| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
4005| for Binary Floating-Point Arithmetic.
4006*----------------------------------------------------------------------------*/
4007
e5a41ffa 4008float64 float64_mul(float64 a, float64 b, float_status *status)
158142c2
FB
4009{
4010 flag aSign, bSign, zSign;
0c48262d 4011 int aExp, bExp, zExp;
bb98fe42 4012 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 4013
ff32e16e
PM
4014 a = float64_squash_input_denormal(a, status);
4015 b = float64_squash_input_denormal(b, status);
37d18660 4016
158142c2
FB
4017 aSig = extractFloat64Frac( a );
4018 aExp = extractFloat64Exp( a );
4019 aSign = extractFloat64Sign( a );
4020 bSig = extractFloat64Frac( b );
4021 bExp = extractFloat64Exp( b );
4022 bSign = extractFloat64Sign( b );
4023 zSign = aSign ^ bSign;
4024 if ( aExp == 0x7FF ) {
4025 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4026 return propagateFloat64NaN(a, b, status);
158142c2
FB
4027 }
4028 if ( ( bExp | bSig ) == 0 ) {
ff32e16e 4029 float_raise(float_flag_invalid, status);
af39bc8c 4030 return float64_default_nan(status);
158142c2
FB
4031 }
4032 return packFloat64( zSign, 0x7FF, 0 );
4033 }
4034 if ( bExp == 0x7FF ) {
ff32e16e
PM
4035 if (bSig) {
4036 return propagateFloat64NaN(a, b, status);
4037 }
158142c2 4038 if ( ( aExp | aSig ) == 0 ) {
ff32e16e 4039 float_raise(float_flag_invalid, status);
af39bc8c 4040 return float64_default_nan(status);
158142c2
FB
4041 }
4042 return packFloat64( zSign, 0x7FF, 0 );
4043 }
4044 if ( aExp == 0 ) {
4045 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4046 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4047 }
4048 if ( bExp == 0 ) {
4049 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
4050 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4051 }
4052 zExp = aExp + bExp - 0x3FF;
4053 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4054 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4055 mul64To128( aSig, bSig, &zSig0, &zSig1 );
4056 zSig0 |= ( zSig1 != 0 );
bb98fe42 4057 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
158142c2
FB
4058 zSig0 <<= 1;
4059 --zExp;
4060 }
ff32e16e 4061 return roundAndPackFloat64(zSign, zExp, zSig0, status);
158142c2
FB
4062
4063}
4064
4065/*----------------------------------------------------------------------------
4066| Returns the result of dividing the double-precision floating-point value `a'
4067| by the corresponding value `b'. The operation is performed according to
4068| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4069*----------------------------------------------------------------------------*/
4070
e5a41ffa 4071float64 float64_div(float64 a, float64 b, float_status *status)
158142c2
FB
4072{
4073 flag aSign, bSign, zSign;
0c48262d 4074 int aExp, bExp, zExp;
bb98fe42
AF
4075 uint64_t aSig, bSig, zSig;
4076 uint64_t rem0, rem1;
4077 uint64_t term0, term1;
ff32e16e
PM
4078 a = float64_squash_input_denormal(a, status);
4079 b = float64_squash_input_denormal(b, status);
158142c2
FB
4080
4081 aSig = extractFloat64Frac( a );
4082 aExp = extractFloat64Exp( a );
4083 aSign = extractFloat64Sign( a );
4084 bSig = extractFloat64Frac( b );
4085 bExp = extractFloat64Exp( b );
4086 bSign = extractFloat64Sign( b );
4087 zSign = aSign ^ bSign;
4088 if ( aExp == 0x7FF ) {
ff32e16e
PM
4089 if (aSig) {
4090 return propagateFloat64NaN(a, b, status);
4091 }
158142c2 4092 if ( bExp == 0x7FF ) {
ff32e16e
PM
4093 if (bSig) {
4094 return propagateFloat64NaN(a, b, status);
4095 }
4096 float_raise(float_flag_invalid, status);
af39bc8c 4097 return float64_default_nan(status);
158142c2
FB
4098 }
4099 return packFloat64( zSign, 0x7FF, 0 );
4100 }
4101 if ( bExp == 0x7FF ) {
ff32e16e
PM
4102 if (bSig) {
4103 return propagateFloat64NaN(a, b, status);
4104 }
158142c2
FB
4105 return packFloat64( zSign, 0, 0 );
4106 }
4107 if ( bExp == 0 ) {
4108 if ( bSig == 0 ) {
4109 if ( ( aExp | aSig ) == 0 ) {
ff32e16e 4110 float_raise(float_flag_invalid, status);
af39bc8c 4111 return float64_default_nan(status);
158142c2 4112 }
ff32e16e 4113 float_raise(float_flag_divbyzero, status);
158142c2
FB
4114 return packFloat64( zSign, 0x7FF, 0 );
4115 }
4116 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4117 }
4118 if ( aExp == 0 ) {
4119 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4120 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4121 }
4122 zExp = aExp - bExp + 0x3FD;
4123 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4124 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4125 if ( bSig <= ( aSig + aSig ) ) {
4126 aSig >>= 1;
4127 ++zExp;
4128 }
4129 zSig = estimateDiv128To64( aSig, 0, bSig );
4130 if ( ( zSig & 0x1FF ) <= 2 ) {
4131 mul64To128( bSig, zSig, &term0, &term1 );
4132 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
bb98fe42 4133 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
4134 --zSig;
4135 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4136 }
4137 zSig |= ( rem1 != 0 );
4138 }
ff32e16e 4139 return roundAndPackFloat64(zSign, zExp, zSig, status);
158142c2
FB
4140
4141}
4142
4143/*----------------------------------------------------------------------------
4144| Returns the remainder of the double-precision floating-point value `a'
4145| with respect to the corresponding value `b'. The operation is performed
4146| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4147*----------------------------------------------------------------------------*/
4148
e5a41ffa 4149float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4150{
ed086f3d 4151 flag aSign, zSign;
0c48262d 4152 int aExp, bExp, expDiff;
bb98fe42
AF
4153 uint64_t aSig, bSig;
4154 uint64_t q, alternateASig;
4155 int64_t sigMean;
158142c2 4156
ff32e16e
PM
4157 a = float64_squash_input_denormal(a, status);
4158 b = float64_squash_input_denormal(b, status);
158142c2
FB
4159 aSig = extractFloat64Frac( a );
4160 aExp = extractFloat64Exp( a );
4161 aSign = extractFloat64Sign( a );
4162 bSig = extractFloat64Frac( b );
4163 bExp = extractFloat64Exp( b );
158142c2
FB
4164 if ( aExp == 0x7FF ) {
4165 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4166 return propagateFloat64NaN(a, b, status);
158142c2 4167 }
ff32e16e 4168 float_raise(float_flag_invalid, status);
af39bc8c 4169 return float64_default_nan(status);
158142c2
FB
4170 }
4171 if ( bExp == 0x7FF ) {
ff32e16e
PM
4172 if (bSig) {
4173 return propagateFloat64NaN(a, b, status);
4174 }
158142c2
FB
4175 return a;
4176 }
4177 if ( bExp == 0 ) {
4178 if ( bSig == 0 ) {
ff32e16e 4179 float_raise(float_flag_invalid, status);
af39bc8c 4180 return float64_default_nan(status);
158142c2
FB
4181 }
4182 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4183 }
4184 if ( aExp == 0 ) {
4185 if ( aSig == 0 ) return a;
4186 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4187 }
4188 expDiff = aExp - bExp;
4189 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4190 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4191 if ( expDiff < 0 ) {
4192 if ( expDiff < -1 ) return a;
4193 aSig >>= 1;
4194 }
4195 q = ( bSig <= aSig );
4196 if ( q ) aSig -= bSig;
4197 expDiff -= 64;
4198 while ( 0 < expDiff ) {
4199 q = estimateDiv128To64( aSig, 0, bSig );
4200 q = ( 2 < q ) ? q - 2 : 0;
4201 aSig = - ( ( bSig>>2 ) * q );
4202 expDiff -= 62;
4203 }
4204 expDiff += 64;
4205 if ( 0 < expDiff ) {
4206 q = estimateDiv128To64( aSig, 0, bSig );
4207 q = ( 2 < q ) ? q - 2 : 0;
4208 q >>= 64 - expDiff;
4209 bSig >>= 2;
4210 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4211 }
4212 else {
4213 aSig >>= 2;
4214 bSig >>= 2;
4215 }
4216 do {
4217 alternateASig = aSig;
4218 ++q;
4219 aSig -= bSig;
bb98fe42 4220 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4221 sigMean = aSig + alternateASig;
4222 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4223 aSig = alternateASig;
4224 }
bb98fe42 4225 zSign = ( (int64_t) aSig < 0 );
158142c2 4226 if ( zSign ) aSig = - aSig;
ff32e16e 4227 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4228
4229}
4230
369be8f6
PM
4231/*----------------------------------------------------------------------------
4232| Returns the result of multiplying the double-precision floating-point values
4233| `a' and `b' then adding 'c', with no intermediate rounding step after the
4234| multiplication. The operation is performed according to the IEC/IEEE
4235| Standard for Binary Floating-Point Arithmetic 754-2008.
4236| The flags argument allows the caller to select negation of the
4237| addend, the intermediate product, or the final result. (The difference
4238| between this and having the caller do a separate negation is that negating
4239| externally will flip the sign bit on NaNs.)
4240*----------------------------------------------------------------------------*/
4241
e5a41ffa
PM
4242float64 float64_muladd(float64 a, float64 b, float64 c, int flags,
4243 float_status *status)
369be8f6
PM
4244{
4245 flag aSign, bSign, cSign, zSign;
0c48262d 4246 int aExp, bExp, cExp, pExp, zExp, expDiff;
369be8f6
PM
4247 uint64_t aSig, bSig, cSig;
4248 flag pInf, pZero, pSign;
4249 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4250 int shiftcount;
4251 flag signflip, infzero;
4252
ff32e16e
PM
4253 a = float64_squash_input_denormal(a, status);
4254 b = float64_squash_input_denormal(b, status);
4255 c = float64_squash_input_denormal(c, status);
369be8f6
PM
4256 aSig = extractFloat64Frac(a);
4257 aExp = extractFloat64Exp(a);
4258 aSign = extractFloat64Sign(a);
4259 bSig = extractFloat64Frac(b);
4260 bExp = extractFloat64Exp(b);
4261 bSign = extractFloat64Sign(b);
4262 cSig = extractFloat64Frac(c);
4263 cExp = extractFloat64Exp(c);
4264 cSign = extractFloat64Sign(c);
4265
4266 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4267 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4268
4269 /* It is implementation-defined whether the cases of (0,inf,qnan)
4270 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
4271 * they return if they do), so we have to hand this information
4272 * off to the target-specific pick-a-NaN routine.
4273 */
4274 if (((aExp == 0x7ff) && aSig) ||
4275 ((bExp == 0x7ff) && bSig) ||
4276 ((cExp == 0x7ff) && cSig)) {
ff32e16e 4277 return propagateFloat64MulAddNaN(a, b, c, infzero, status);
369be8f6
PM
4278 }
4279
4280 if (infzero) {
ff32e16e 4281 float_raise(float_flag_invalid, status);
af39bc8c 4282 return float64_default_nan(status);
369be8f6
PM
4283 }
4284
4285 if (flags & float_muladd_negate_c) {
4286 cSign ^= 1;
4287 }
4288
4289 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4290
4291 /* Work out the sign and type of the product */
4292 pSign = aSign ^ bSign;
4293 if (flags & float_muladd_negate_product) {
4294 pSign ^= 1;
4295 }
4296 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4297 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4298
4299 if (cExp == 0x7ff) {
4300 if (pInf && (pSign ^ cSign)) {
4301 /* addition of opposite-signed infinities => InvalidOperation */
ff32e16e 4302 float_raise(float_flag_invalid, status);
af39bc8c 4303 return float64_default_nan(status);
369be8f6
PM
4304 }
4305 /* Otherwise generate an infinity of the same sign */
4306 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4307 }
4308
4309 if (pInf) {
4310 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4311 }
4312
4313 if (pZero) {
4314 if (cExp == 0) {
4315 if (cSig == 0) {
4316 /* Adding two exact zeroes */
4317 if (pSign == cSign) {
4318 zSign = pSign;
a2f2d288 4319 } else if (status->float_rounding_mode == float_round_down) {
369be8f6
PM
4320 zSign = 1;
4321 } else {
4322 zSign = 0;
4323 }
4324 return packFloat64(zSign ^ signflip, 0, 0);
4325 }
4326 /* Exact zero plus a denorm */
a2f2d288 4327 if (status->flush_to_zero) {
ff32e16e 4328 float_raise(float_flag_output_denormal, status);
369be8f6
PM
4329 return packFloat64(cSign ^ signflip, 0, 0);
4330 }
4331 }
4332 /* Zero plus something non-zero : just return the something */
67d43538
PM
4333 if (flags & float_muladd_halve_result) {
4334 if (cExp == 0) {
4335 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4336 }
4337 /* Subtract one to halve, and one again because roundAndPackFloat64
4338 * wants one less than the true exponent.
4339 */
4340 cExp -= 2;
4341 cSig = (cSig | 0x0010000000000000ULL) << 10;
ff32e16e 4342 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
67d43538 4343 }
a6e7c184 4344 return packFloat64(cSign ^ signflip, cExp, cSig);
369be8f6
PM
4345 }
4346
4347 if (aExp == 0) {
4348 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4349 }
4350 if (bExp == 0) {
4351 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4352 }
4353
4354 /* Calculate the actual result a * b + c */
4355
4356 /* Multiply first; this is easy. */
4357 /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
4358 * because we want the true exponent, not the "one-less-than"
4359 * flavour that roundAndPackFloat64() takes.
4360 */
4361 pExp = aExp + bExp - 0x3fe;
4362 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4363 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4364 mul64To128(aSig, bSig, &pSig0, &pSig1);
4365 if ((int64_t)(pSig0 << 1) >= 0) {
4366 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4367 pExp--;
4368 }
4369
4370 zSign = pSign ^ signflip;
4371
4372 /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
4373 * bit in position 126.
4374 */
4375 if (cExp == 0) {
4376 if (!cSig) {
4377 /* Throw out the special case of c being an exact zero now */
4378 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
67d43538
PM
4379 if (flags & float_muladd_halve_result) {
4380 pExp--;
4381 }
369be8f6 4382 return roundAndPackFloat64(zSign, pExp - 1,
ff32e16e 4383 pSig1, status);
369be8f6
PM
4384 }
4385 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4386 }
4387
4388 /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
4389 * significand of the addend, with the explicit bit in position 126.
4390 */
4391 cSig0 = cSig << (126 - 64 - 52);
4392 cSig1 = 0;
4393 cSig0 |= LIT64(0x4000000000000000);
4394 expDiff = pExp - cExp;
4395
4396 if (pSign == cSign) {
4397 /* Addition */
4398 if (expDiff > 0) {
4399 /* scale c to match p */
4400 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4401 zExp = pExp;
4402 } else if (expDiff < 0) {
4403 /* scale p to match c */
4404 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4405 zExp = cExp;
4406 } else {
4407 /* no scaling needed */
4408 zExp = cExp;
4409 }
4410 /* Add significands and make sure explicit bit ends up in posn 126 */
4411 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4412 if ((int64_t)zSig0 < 0) {
4413 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4414 } else {
4415 zExp--;
4416 }
4417 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
67d43538
PM
4418 if (flags & float_muladd_halve_result) {
4419 zExp--;
4420 }
ff32e16e 4421 return roundAndPackFloat64(zSign, zExp, zSig1, status);
369be8f6
PM
4422 } else {
4423 /* Subtraction */
4424 if (expDiff > 0) {
4425 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4426 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4427 zExp = pExp;
4428 } else if (expDiff < 0) {
4429 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4430 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4431 zExp = cExp;
4432 zSign ^= 1;
4433 } else {
4434 zExp = pExp;
4435 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4436 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4437 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4438 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4439 zSign ^= 1;
4440 } else {
4441 /* Exact zero */
4442 zSign = signflip;
a2f2d288 4443 if (status->float_rounding_mode == float_round_down) {
369be8f6
PM
4444 zSign ^= 1;
4445 }
4446 return packFloat64(zSign, 0, 0);
4447 }
4448 }
4449 --zExp;
4450 /* Do the equivalent of normalizeRoundAndPackFloat64() but
4451 * starting with the significand in a pair of uint64_t.
4452 */
4453 if (zSig0) {
4454 shiftcount = countLeadingZeros64(zSig0) - 1;
4455 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4456 if (zSig1) {
4457 zSig0 |= 1;
4458 }
4459 zExp -= shiftcount;
4460 } else {
e3d142d0
PM
4461 shiftcount = countLeadingZeros64(zSig1);
4462 if (shiftcount == 0) {
4463 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4464 zExp -= 63;
4465 } else {
4466 shiftcount--;
4467 zSig0 = zSig1 << shiftcount;
4468 zExp -= (shiftcount + 64);
4469 }
369be8f6 4470 }
67d43538
PM
4471 if (flags & float_muladd_halve_result) {
4472 zExp--;
4473 }
ff32e16e 4474 return roundAndPackFloat64(zSign, zExp, zSig0, status);
369be8f6
PM
4475 }
4476}
4477
158142c2
FB
4478/*----------------------------------------------------------------------------
4479| Returns the square root of the double-precision floating-point value `a'.
4480| The operation is performed according to the IEC/IEEE Standard for Binary
4481| Floating-Point Arithmetic.
4482*----------------------------------------------------------------------------*/
4483
e5a41ffa 4484float64 float64_sqrt(float64 a, float_status *status)
158142c2
FB
4485{
4486 flag aSign;
0c48262d 4487 int aExp, zExp;
bb98fe42
AF
4488 uint64_t aSig, zSig, doubleZSig;
4489 uint64_t rem0, rem1, term0, term1;
ff32e16e 4490 a = float64_squash_input_denormal(a, status);
158142c2
FB
4491
4492 aSig = extractFloat64Frac( a );
4493 aExp = extractFloat64Exp( a );
4494 aSign = extractFloat64Sign( a );
4495 if ( aExp == 0x7FF ) {
ff32e16e
PM
4496 if (aSig) {
4497 return propagateFloat64NaN(a, a, status);
4498 }
158142c2 4499 if ( ! aSign ) return a;
ff32e16e 4500 float_raise(float_flag_invalid, status);
af39bc8c 4501 return float64_default_nan(status);
158142c2
FB
4502 }
4503 if ( aSign ) {
4504 if ( ( aExp | aSig ) == 0 ) return a;
ff32e16e 4505 float_raise(float_flag_invalid, status);
af39bc8c 4506 return float64_default_nan(status);
158142c2
FB
4507 }
4508 if ( aExp == 0 ) {
f090c9d4 4509 if ( aSig == 0 ) return float64_zero;
158142c2
FB
4510 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4511 }
4512 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4513 aSig |= LIT64( 0x0010000000000000 );
4514 zSig = estimateSqrt32( aExp, aSig>>21 );
4515 aSig <<= 9 - ( aExp & 1 );
4516 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4517 if ( ( zSig & 0x1FF ) <= 5 ) {
4518 doubleZSig = zSig<<1;
4519 mul64To128( zSig, zSig, &term0, &term1 );
4520 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
bb98fe42 4521 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
4522 --zSig;
4523 doubleZSig -= 2;
4524 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4525 }
4526 zSig |= ( ( rem0 | rem1 ) != 0 );
4527 }
ff32e16e 4528 return roundAndPackFloat64(0, zExp, zSig, status);
158142c2
FB
4529
4530}
4531
374dfc33
AJ
4532/*----------------------------------------------------------------------------
4533| Returns the binary log of the double-precision floating-point value `a'.
4534| The operation is performed according to the IEC/IEEE Standard for Binary
4535| Floating-Point Arithmetic.
4536*----------------------------------------------------------------------------*/
e5a41ffa 4537float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4538{
4539 flag aSign, zSign;
0c48262d 4540 int aExp;
bb98fe42 4541 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4542 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4543
4544 aSig = extractFloat64Frac( a );
4545 aExp = extractFloat64Exp( a );
4546 aSign = extractFloat64Sign( a );
4547
4548 if ( aExp == 0 ) {
4549 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4550 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4551 }
4552 if ( aSign ) {
ff32e16e 4553 float_raise(float_flag_invalid, status);
af39bc8c 4554 return float64_default_nan(status);
374dfc33
AJ
4555 }
4556 if ( aExp == 0x7FF ) {
ff32e16e
PM
4557 if (aSig) {
4558 return propagateFloat64NaN(a, float64_zero, status);
4559 }
374dfc33
AJ
4560 return a;
4561 }
4562
4563 aExp -= 0x3FF;
4564 aSig |= LIT64( 0x0010000000000000 );
4565 zSign = aExp < 0;
bb98fe42 4566 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4567 for (i = 1LL << 51; i > 0; i >>= 1) {
4568 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4569 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4570 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4571 aSig >>= 1;
4572 zSig |= i;
4573 }
4574 }
4575
4576 if ( zSign )
4577 zSig = -zSig;
ff32e16e 4578 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4579}
4580
158142c2
FB
4581/*----------------------------------------------------------------------------
4582| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4583| corresponding value `b', and 0 otherwise. The invalid exception is raised
4584| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4585| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4586*----------------------------------------------------------------------------*/
4587
e5a41ffa 4588int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4589{
bb98fe42 4590 uint64_t av, bv;
ff32e16e
PM
4591 a = float64_squash_input_denormal(a, status);
4592 b = float64_squash_input_denormal(b, status);
158142c2
FB
4593
4594 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4595 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4596 ) {
ff32e16e 4597 float_raise(float_flag_invalid, status);
158142c2
FB
4598 return 0;
4599 }
f090c9d4 4600 av = float64_val(a);
a1b91bb4 4601 bv = float64_val(b);
bb98fe42 4602 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4603
4604}
4605
4606/*----------------------------------------------------------------------------
4607| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4608| equal to the corresponding value `b', and 0 otherwise. The invalid
4609| exception is raised if either operand is a NaN. The comparison is performed
4610| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4611*----------------------------------------------------------------------------*/
4612
e5a41ffa 4613int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4614{
4615 flag aSign, bSign;
bb98fe42 4616 uint64_t av, bv;
ff32e16e
PM
4617 a = float64_squash_input_denormal(a, status);
4618 b = float64_squash_input_denormal(b, status);
158142c2
FB
4619
4620 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4621 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4622 ) {
ff32e16e 4623 float_raise(float_flag_invalid, status);
158142c2
FB
4624 return 0;
4625 }
4626 aSign = extractFloat64Sign( a );
4627 bSign = extractFloat64Sign( b );
f090c9d4 4628 av = float64_val(a);
a1b91bb4 4629 bv = float64_val(b);
bb98fe42 4630 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4631 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4632
4633}
4634
4635/*----------------------------------------------------------------------------
4636| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4637| the corresponding value `b', and 0 otherwise. The invalid exception is
4638| raised if either operand is a NaN. The comparison is performed according
4639| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4640*----------------------------------------------------------------------------*/
4641
e5a41ffa 4642int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4643{
4644 flag aSign, bSign;
bb98fe42 4645 uint64_t av, bv;
158142c2 4646
ff32e16e
PM
4647 a = float64_squash_input_denormal(a, status);
4648 b = float64_squash_input_denormal(b, status);
158142c2
FB
4649 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4650 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4651 ) {
ff32e16e 4652 float_raise(float_flag_invalid, status);
158142c2
FB
4653 return 0;
4654 }
4655 aSign = extractFloat64Sign( a );
4656 bSign = extractFloat64Sign( b );
f090c9d4 4657 av = float64_val(a);
a1b91bb4 4658 bv = float64_val(b);
bb98fe42 4659 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4660 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4661
4662}
4663
67b7861d
AJ
4664/*----------------------------------------------------------------------------
4665| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4666| be compared, and 0 otherwise. The invalid exception is raised if either
4667| operand is a NaN. The comparison is performed according to the IEC/IEEE
4668| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4669*----------------------------------------------------------------------------*/
4670
e5a41ffa 4671int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4672{
ff32e16e
PM
4673 a = float64_squash_input_denormal(a, status);
4674 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4675
4676 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4677 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4678 ) {
ff32e16e 4679 float_raise(float_flag_invalid, status);
67b7861d
AJ
4680 return 1;
4681 }
4682 return 0;
4683}
4684
158142c2
FB
4685/*----------------------------------------------------------------------------
4686| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4687| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4688| exception.The comparison is performed according to the IEC/IEEE Standard
4689| for Binary Floating-Point Arithmetic.
158142c2
FB
4690*----------------------------------------------------------------------------*/
4691
e5a41ffa 4692int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4693{
bb98fe42 4694 uint64_t av, bv;
ff32e16e
PM
4695 a = float64_squash_input_denormal(a, status);
4696 b = float64_squash_input_denormal(b, status);
158142c2
FB
4697
4698 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4699 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4700 ) {
af39bc8c
AM
4701 if (float64_is_signaling_nan(a, status)
4702 || float64_is_signaling_nan(b, status)) {
ff32e16e 4703 float_raise(float_flag_invalid, status);
b689362d 4704 }
158142c2
FB
4705 return 0;
4706 }
f090c9d4 4707 av = float64_val(a);
a1b91bb4 4708 bv = float64_val(b);
bb98fe42 4709 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4710
4711}
4712
4713/*----------------------------------------------------------------------------
4714| Returns 1 if the double-precision floating-point value `a' is less than or
4715| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4716| cause an exception. Otherwise, the comparison is performed according to the
4717| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4718*----------------------------------------------------------------------------*/
4719
e5a41ffa 4720int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4721{
4722 flag aSign, bSign;
bb98fe42 4723 uint64_t av, bv;
ff32e16e
PM
4724 a = float64_squash_input_denormal(a, status);
4725 b = float64_squash_input_denormal(b, status);
158142c2
FB
4726
4727 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4728 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4729 ) {
af39bc8c
AM
4730 if (float64_is_signaling_nan(a, status)
4731 || float64_is_signaling_nan(b, status)) {
ff32e16e 4732 float_raise(float_flag_invalid, status);
158142c2
FB
4733 }
4734 return 0;
4735 }
4736 aSign = extractFloat64Sign( a );
4737 bSign = extractFloat64Sign( b );
f090c9d4 4738 av = float64_val(a);
a1b91bb4 4739 bv = float64_val(b);
bb98fe42 4740 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4741 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4742
4743}
4744
4745/*----------------------------------------------------------------------------
4746| Returns 1 if the double-precision floating-point value `a' is less than
4747| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4748| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4749| Standard for Binary Floating-Point Arithmetic.
4750*----------------------------------------------------------------------------*/
4751
e5a41ffa 4752int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4753{
4754 flag aSign, bSign;
bb98fe42 4755 uint64_t av, bv;
ff32e16e
PM
4756 a = float64_squash_input_denormal(a, status);
4757 b = float64_squash_input_denormal(b, status);
158142c2
FB
4758
4759 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4760 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4761 ) {
af39bc8c
AM
4762 if (float64_is_signaling_nan(a, status)
4763 || float64_is_signaling_nan(b, status)) {
ff32e16e 4764 float_raise(float_flag_invalid, status);
158142c2
FB
4765 }
4766 return 0;
4767 }
4768 aSign = extractFloat64Sign( a );
4769 bSign = extractFloat64Sign( b );
f090c9d4 4770 av = float64_val(a);
a1b91bb4 4771 bv = float64_val(b);
bb98fe42 4772 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4773 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4774
4775}
4776
67b7861d
AJ
4777/*----------------------------------------------------------------------------
4778| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4779| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4780| comparison is performed according to the IEC/IEEE Standard for Binary
4781| Floating-Point Arithmetic.
4782*----------------------------------------------------------------------------*/
4783
e5a41ffa 4784int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4785{
ff32e16e
PM
4786 a = float64_squash_input_denormal(a, status);
4787 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4788
4789 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4790 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4791 ) {
af39bc8c
AM
4792 if (float64_is_signaling_nan(a, status)
4793 || float64_is_signaling_nan(b, status)) {
ff32e16e 4794 float_raise(float_flag_invalid, status);
67b7861d
AJ
4795 }
4796 return 1;
4797 }
4798 return 0;
4799}
4800
158142c2
FB
4801/*----------------------------------------------------------------------------
4802| Returns the result of converting the extended double-precision floating-
4803| point value `a' to the 32-bit two's complement integer format. The
4804| conversion is performed according to the IEC/IEEE Standard for Binary
4805| Floating-Point Arithmetic---which means in particular that the conversion
4806| is rounded according to the current rounding mode. If `a' is a NaN, the
4807| largest positive integer is returned. Otherwise, if the conversion
4808| overflows, the largest integer with the same sign as `a' is returned.
4809*----------------------------------------------------------------------------*/
4810
f4014512 4811int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4812{
4813 flag aSign;
f4014512 4814 int32_t aExp, shiftCount;
bb98fe42 4815 uint64_t aSig;
158142c2 4816
d1eb8f2a
AD
4817 if (floatx80_invalid_encoding(a)) {
4818 float_raise(float_flag_invalid, status);
4819 return 1 << 31;
4820 }
158142c2
FB
4821 aSig = extractFloatx80Frac( a );
4822 aExp = extractFloatx80Exp( a );
4823 aSign = extractFloatx80Sign( a );
bb98fe42 4824 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4825 shiftCount = 0x4037 - aExp;
4826 if ( shiftCount <= 0 ) shiftCount = 1;
4827 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4828 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4829
4830}
4831
4832/*----------------------------------------------------------------------------
4833| Returns the result of converting the extended double-precision floating-
4834| point value `a' to the 32-bit two's complement integer format. The
4835| conversion is performed according to the IEC/IEEE Standard for Binary
4836| Floating-Point Arithmetic, except that the conversion is always rounded
4837| toward zero. If `a' is a NaN, the largest positive integer is returned.
4838| Otherwise, if the conversion overflows, the largest integer with the same
4839| sign as `a' is returned.
4840*----------------------------------------------------------------------------*/
4841
f4014512 4842int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4843{
4844 flag aSign;
f4014512 4845 int32_t aExp, shiftCount;
bb98fe42 4846 uint64_t aSig, savedASig;
b3a6a2e0 4847 int32_t z;
158142c2 4848
d1eb8f2a
AD
4849 if (floatx80_invalid_encoding(a)) {
4850 float_raise(float_flag_invalid, status);
4851 return 1 << 31;
4852 }
158142c2
FB
4853 aSig = extractFloatx80Frac( a );
4854 aExp = extractFloatx80Exp( a );
4855 aSign = extractFloatx80Sign( a );
4856 if ( 0x401E < aExp ) {
bb98fe42 4857 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4858 goto invalid;
4859 }
4860 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4861 if (aExp || aSig) {
4862 status->float_exception_flags |= float_flag_inexact;
4863 }
158142c2
FB
4864 return 0;
4865 }
4866 shiftCount = 0x403E - aExp;
4867 savedASig = aSig;
4868 aSig >>= shiftCount;
4869 z = aSig;
4870 if ( aSign ) z = - z;
4871 if ( ( z < 0 ) ^ aSign ) {
4872 invalid:
ff32e16e 4873 float_raise(float_flag_invalid, status);
bb98fe42 4874 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4875 }
4876 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4877 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4878 }
4879 return z;
4880
4881}
4882
4883/*----------------------------------------------------------------------------
4884| Returns the result of converting the extended double-precision floating-
4885| point value `a' to the 64-bit two's complement integer format. The
4886| conversion is performed according to the IEC/IEEE Standard for Binary
4887| Floating-Point Arithmetic---which means in particular that the conversion
4888| is rounded according to the current rounding mode. If `a' is a NaN,
4889| the largest positive integer is returned. Otherwise, if the conversion
4890| overflows, the largest integer with the same sign as `a' is returned.
4891*----------------------------------------------------------------------------*/
4892
f42c2224 4893int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4894{
4895 flag aSign;
f4014512 4896 int32_t aExp, shiftCount;
bb98fe42 4897 uint64_t aSig, aSigExtra;
158142c2 4898
d1eb8f2a
AD
4899 if (floatx80_invalid_encoding(a)) {
4900 float_raise(float_flag_invalid, status);
4901 return 1ULL << 63;
4902 }
158142c2
FB
4903 aSig = extractFloatx80Frac( a );
4904 aExp = extractFloatx80Exp( a );
4905 aSign = extractFloatx80Sign( a );
4906 shiftCount = 0x403E - aExp;
4907 if ( shiftCount <= 0 ) {
4908 if ( shiftCount ) {
ff32e16e 4909 float_raise(float_flag_invalid, status);
158142c2
FB
4910 if ( ! aSign
4911 || ( ( aExp == 0x7FFF )
4912 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4913 ) {
4914 return LIT64( 0x7FFFFFFFFFFFFFFF );
4915 }
bb98fe42 4916 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4917 }
4918 aSigExtra = 0;
4919 }
4920 else {
4921 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4922 }
ff32e16e 4923 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4924
4925}
4926
4927/*----------------------------------------------------------------------------
4928| Returns the result of converting the extended double-precision floating-
4929| point value `a' to the 64-bit two's complement integer format. The
4930| conversion is performed according to the IEC/IEEE Standard for Binary
4931| Floating-Point Arithmetic, except that the conversion is always rounded
4932| toward zero. If `a' is a NaN, the largest positive integer is returned.
4933| Otherwise, if the conversion overflows, the largest integer with the same
4934| sign as `a' is returned.
4935*----------------------------------------------------------------------------*/
4936
f42c2224 4937int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4938{
4939 flag aSign;
f4014512 4940 int32_t aExp, shiftCount;
bb98fe42 4941 uint64_t aSig;
f42c2224 4942 int64_t z;
158142c2 4943
d1eb8f2a
AD
4944 if (floatx80_invalid_encoding(a)) {
4945 float_raise(float_flag_invalid, status);
4946 return 1ULL << 63;
4947 }
158142c2
FB
4948 aSig = extractFloatx80Frac( a );
4949 aExp = extractFloatx80Exp( a );
4950 aSign = extractFloatx80Sign( a );
4951 shiftCount = aExp - 0x403E;
4952 if ( 0 <= shiftCount ) {
4953 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4954 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4955 float_raise(float_flag_invalid, status);
158142c2
FB
4956 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4957 return LIT64( 0x7FFFFFFFFFFFFFFF );
4958 }
4959 }
bb98fe42 4960 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4961 }
4962 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4963 if (aExp | aSig) {
4964 status->float_exception_flags |= float_flag_inexact;
4965 }
158142c2
FB
4966 return 0;
4967 }
4968 z = aSig>>( - shiftCount );
bb98fe42 4969 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4970 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4971 }
4972 if ( aSign ) z = - z;
4973 return z;
4974
4975}
4976
4977/*----------------------------------------------------------------------------
4978| Returns the result of converting the extended double-precision floating-
4979| point value `a' to the single-precision floating-point format. The
4980| conversion is performed according to the IEC/IEEE Standard for Binary
4981| Floating-Point Arithmetic.
4982*----------------------------------------------------------------------------*/
4983
e5a41ffa 4984float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4985{
4986 flag aSign;
f4014512 4987 int32_t aExp;
bb98fe42 4988 uint64_t aSig;
158142c2 4989
d1eb8f2a
AD
4990 if (floatx80_invalid_encoding(a)) {
4991 float_raise(float_flag_invalid, status);
4992 return float32_default_nan(status);
4993 }
158142c2
FB
4994 aSig = extractFloatx80Frac( a );
4995 aExp = extractFloatx80Exp( a );
4996 aSign = extractFloatx80Sign( a );
4997 if ( aExp == 0x7FFF ) {
bb98fe42 4998 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4999 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5000 }
5001 return packFloat32( aSign, 0xFF, 0 );
5002 }
5003 shift64RightJamming( aSig, 33, &aSig );
5004 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5005 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5006
5007}
5008
5009/*----------------------------------------------------------------------------
5010| Returns the result of converting the extended double-precision floating-
5011| point value `a' to the double-precision floating-point format. The
5012| conversion is performed according to the IEC/IEEE Standard for Binary
5013| Floating-Point Arithmetic.
5014*----------------------------------------------------------------------------*/
5015
e5a41ffa 5016float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5017{
5018 flag aSign;
f4014512 5019 int32_t aExp;
bb98fe42 5020 uint64_t aSig, zSig;
158142c2 5021
d1eb8f2a
AD
5022 if (floatx80_invalid_encoding(a)) {
5023 float_raise(float_flag_invalid, status);
5024 return float64_default_nan(status);
5025 }
158142c2
FB
5026 aSig = extractFloatx80Frac( a );
5027 aExp = extractFloatx80Exp( a );
5028 aSign = extractFloatx80Sign( a );
5029 if ( aExp == 0x7FFF ) {
bb98fe42 5030 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5031 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5032 }
5033 return packFloat64( aSign, 0x7FF, 0 );
5034 }
5035 shift64RightJamming( aSig, 1, &zSig );
5036 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5037 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5038
5039}
5040
158142c2
FB
5041/*----------------------------------------------------------------------------
5042| Returns the result of converting the extended double-precision floating-
5043| point value `a' to the quadruple-precision floating-point format. The
5044| conversion is performed according to the IEC/IEEE Standard for Binary
5045| Floating-Point Arithmetic.
5046*----------------------------------------------------------------------------*/
5047
e5a41ffa 5048float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5049{
5050 flag aSign;
0c48262d 5051 int aExp;
bb98fe42 5052 uint64_t aSig, zSig0, zSig1;
158142c2 5053
d1eb8f2a
AD
5054 if (floatx80_invalid_encoding(a)) {
5055 float_raise(float_flag_invalid, status);
5056 return float128_default_nan(status);
5057 }
158142c2
FB
5058 aSig = extractFloatx80Frac( a );
5059 aExp = extractFloatx80Exp( a );
5060 aSign = extractFloatx80Sign( a );
bb98fe42 5061 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5062 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5063 }
5064 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5065 return packFloat128( aSign, aExp, zSig0, zSig1 );
5066
5067}
5068
158142c2
FB
5069/*----------------------------------------------------------------------------
5070| Rounds the extended double-precision floating-point value `a' to an integer,
5071| and returns the result as an extended quadruple-precision floating-point
5072| value. The operation is performed according to the IEC/IEEE Standard for
5073| Binary Floating-Point Arithmetic.
5074*----------------------------------------------------------------------------*/
5075
e5a41ffa 5076floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5077{
5078 flag aSign;
f4014512 5079 int32_t aExp;
bb98fe42 5080 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5081 floatx80 z;
5082
d1eb8f2a
AD
5083 if (floatx80_invalid_encoding(a)) {
5084 float_raise(float_flag_invalid, status);
5085 return floatx80_default_nan(status);
5086 }
158142c2
FB
5087 aExp = extractFloatx80Exp( a );
5088 if ( 0x403E <= aExp ) {
bb98fe42 5089 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5090 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5091 }
5092 return a;
5093 }
5094 if ( aExp < 0x3FFF ) {
5095 if ( ( aExp == 0 )
bb98fe42 5096 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
5097 return a;
5098 }
a2f2d288 5099 status->float_exception_flags |= float_flag_inexact;
158142c2 5100 aSign = extractFloatx80Sign( a );
a2f2d288 5101 switch (status->float_rounding_mode) {
158142c2 5102 case float_round_nearest_even:
bb98fe42 5103 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5104 ) {
5105 return
5106 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5107 }
5108 break;
f9288a76
PM
5109 case float_round_ties_away:
5110 if (aExp == 0x3FFE) {
5111 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5112 }
5113 break;
158142c2
FB
5114 case float_round_down:
5115 return
5116 aSign ?
5117 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5118 : packFloatx80( 0, 0, 0 );
5119 case float_round_up:
5120 return
5121 aSign ? packFloatx80( 1, 0, 0 )
5122 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5123 }
5124 return packFloatx80( aSign, 0, 0 );
5125 }
5126 lastBitMask = 1;
5127 lastBitMask <<= 0x403E - aExp;
5128 roundBitsMask = lastBitMask - 1;
5129 z = a;
a2f2d288 5130 switch (status->float_rounding_mode) {
dc355b76 5131 case float_round_nearest_even:
158142c2 5132 z.low += lastBitMask>>1;
dc355b76
PM
5133 if ((z.low & roundBitsMask) == 0) {
5134 z.low &= ~lastBitMask;
5135 }
5136 break;
f9288a76
PM
5137 case float_round_ties_away:
5138 z.low += lastBitMask >> 1;
5139 break;
dc355b76
PM
5140 case float_round_to_zero:
5141 break;
5142 case float_round_up:
5143 if (!extractFloatx80Sign(z)) {
5144 z.low += roundBitsMask;
5145 }
5146 break;
5147 case float_round_down:
5148 if (extractFloatx80Sign(z)) {
158142c2
FB
5149 z.low += roundBitsMask;
5150 }
dc355b76
PM
5151 break;
5152 default:
5153 abort();
158142c2
FB
5154 }
5155 z.low &= ~ roundBitsMask;
5156 if ( z.low == 0 ) {
5157 ++z.high;
5158 z.low = LIT64( 0x8000000000000000 );
5159 }
a2f2d288
PM
5160 if (z.low != a.low) {
5161 status->float_exception_flags |= float_flag_inexact;
5162 }
158142c2
FB
5163 return z;
5164
5165}
5166
5167/*----------------------------------------------------------------------------
5168| Returns the result of adding the absolute values of the extended double-
5169| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5170| negated before being returned. `zSign' is ignored if the result is a NaN.
5171| The addition is performed according to the IEC/IEEE Standard for Binary
5172| Floating-Point Arithmetic.
5173*----------------------------------------------------------------------------*/
5174
e5a41ffa
PM
5175static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5176 float_status *status)
158142c2 5177{
f4014512 5178 int32_t aExp, bExp, zExp;
bb98fe42 5179 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5180 int32_t expDiff;
158142c2
FB
5181
5182 aSig = extractFloatx80Frac( a );
5183 aExp = extractFloatx80Exp( a );
5184 bSig = extractFloatx80Frac( b );
5185 bExp = extractFloatx80Exp( b );
5186 expDiff = aExp - bExp;
5187 if ( 0 < expDiff ) {
5188 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5189 if ((uint64_t)(aSig << 1)) {
5190 return propagateFloatx80NaN(a, b, status);
5191 }
158142c2
FB
5192 return a;
5193 }
5194 if ( bExp == 0 ) --expDiff;
5195 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5196 zExp = aExp;
5197 }
5198 else if ( expDiff < 0 ) {
5199 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5200 if ((uint64_t)(bSig << 1)) {
5201 return propagateFloatx80NaN(a, b, status);
5202 }
158142c2
FB
5203 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5204 }
5205 if ( aExp == 0 ) ++expDiff;
5206 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5207 zExp = bExp;
5208 }
5209 else {
5210 if ( aExp == 0x7FFF ) {
bb98fe42 5211 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5212 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5213 }
5214 return a;
5215 }
5216 zSig1 = 0;
5217 zSig0 = aSig + bSig;
5218 if ( aExp == 0 ) {
5219 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5220 goto roundAndPack;
5221 }
5222 zExp = aExp;
5223 goto shiftRight1;
5224 }
5225 zSig0 = aSig + bSig;
bb98fe42 5226 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5227 shiftRight1:
5228 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5229 zSig0 |= LIT64( 0x8000000000000000 );
5230 ++zExp;
5231 roundAndPack:
a2f2d288 5232 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5233 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5234}
5235
5236/*----------------------------------------------------------------------------
5237| Returns the result of subtracting the absolute values of the extended
5238| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5239| difference is negated before being returned. `zSign' is ignored if the
5240| result is a NaN. The subtraction is performed according to the IEC/IEEE
5241| Standard for Binary Floating-Point Arithmetic.
5242*----------------------------------------------------------------------------*/
5243
e5a41ffa
PM
5244static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5245 float_status *status)
158142c2 5246{
f4014512 5247 int32_t aExp, bExp, zExp;
bb98fe42 5248 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5249 int32_t expDiff;
158142c2
FB
5250
5251 aSig = extractFloatx80Frac( a );
5252 aExp = extractFloatx80Exp( a );
5253 bSig = extractFloatx80Frac( b );
5254 bExp = extractFloatx80Exp( b );
5255 expDiff = aExp - bExp;
5256 if ( 0 < expDiff ) goto aExpBigger;
5257 if ( expDiff < 0 ) goto bExpBigger;
5258 if ( aExp == 0x7FFF ) {
bb98fe42 5259 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5260 return propagateFloatx80NaN(a, b, status);
158142c2 5261 }
ff32e16e 5262 float_raise(float_flag_invalid, status);
af39bc8c 5263 return floatx80_default_nan(status);
158142c2
FB
5264 }
5265 if ( aExp == 0 ) {
5266 aExp = 1;
5267 bExp = 1;
5268 }
5269 zSig1 = 0;
5270 if ( bSig < aSig ) goto aBigger;
5271 if ( aSig < bSig ) goto bBigger;
a2f2d288 5272 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5273 bExpBigger:
5274 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5275 if ((uint64_t)(bSig << 1)) {
5276 return propagateFloatx80NaN(a, b, status);
5277 }
158142c2
FB
5278 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5279 }
5280 if ( aExp == 0 ) ++expDiff;
5281 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5282 bBigger:
5283 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5284 zExp = bExp;
5285 zSign ^= 1;
5286 goto normalizeRoundAndPack;
5287 aExpBigger:
5288 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5289 if ((uint64_t)(aSig << 1)) {
5290 return propagateFloatx80NaN(a, b, status);
5291 }
158142c2
FB
5292 return a;
5293 }
5294 if ( bExp == 0 ) --expDiff;
5295 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5296 aBigger:
5297 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5298 zExp = aExp;
5299 normalizeRoundAndPack:
a2f2d288 5300 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5301 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5302}
5303
5304/*----------------------------------------------------------------------------
5305| Returns the result of adding the extended double-precision floating-point
5306| values `a' and `b'. The operation is performed according to the IEC/IEEE
5307| Standard for Binary Floating-Point Arithmetic.
5308*----------------------------------------------------------------------------*/
5309
e5a41ffa 5310floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5311{
5312 flag aSign, bSign;
5313
d1eb8f2a
AD
5314 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5315 float_raise(float_flag_invalid, status);
5316 return floatx80_default_nan(status);
5317 }
158142c2
FB
5318 aSign = extractFloatx80Sign( a );
5319 bSign = extractFloatx80Sign( b );
5320 if ( aSign == bSign ) {
ff32e16e 5321 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5322 }
5323 else {
ff32e16e 5324 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5325 }
5326
5327}
5328
5329/*----------------------------------------------------------------------------
5330| Returns the result of subtracting the extended double-precision floating-
5331| point values `a' and `b'. The operation is performed according to the
5332| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5333*----------------------------------------------------------------------------*/
5334
e5a41ffa 5335floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5336{
5337 flag aSign, bSign;
5338
d1eb8f2a
AD
5339 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5340 float_raise(float_flag_invalid, status);
5341 return floatx80_default_nan(status);
5342 }
158142c2
FB
5343 aSign = extractFloatx80Sign( a );
5344 bSign = extractFloatx80Sign( b );
5345 if ( aSign == bSign ) {
ff32e16e 5346 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5347 }
5348 else {
ff32e16e 5349 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5350 }
5351
5352}
5353
5354/*----------------------------------------------------------------------------
5355| Returns the result of multiplying the extended double-precision floating-
5356| point values `a' and `b'. The operation is performed according to the
5357| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5358*----------------------------------------------------------------------------*/
5359
e5a41ffa 5360floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5361{
5362 flag aSign, bSign, zSign;
f4014512 5363 int32_t aExp, bExp, zExp;
bb98fe42 5364 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5365
d1eb8f2a
AD
5366 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5367 float_raise(float_flag_invalid, status);
5368 return floatx80_default_nan(status);
5369 }
158142c2
FB
5370 aSig = extractFloatx80Frac( a );
5371 aExp = extractFloatx80Exp( a );
5372 aSign = extractFloatx80Sign( a );
5373 bSig = extractFloatx80Frac( b );
5374 bExp = extractFloatx80Exp( b );
5375 bSign = extractFloatx80Sign( b );
5376 zSign = aSign ^ bSign;
5377 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5378 if ( (uint64_t) ( aSig<<1 )
5379 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5380 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5381 }
5382 if ( ( bExp | bSig ) == 0 ) goto invalid;
5383 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5384 }
5385 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5386 if ((uint64_t)(bSig << 1)) {
5387 return propagateFloatx80NaN(a, b, status);
5388 }
158142c2
FB
5389 if ( ( aExp | aSig ) == 0 ) {
5390 invalid:
ff32e16e 5391 float_raise(float_flag_invalid, status);
af39bc8c 5392 return floatx80_default_nan(status);
158142c2
FB
5393 }
5394 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5395 }
5396 if ( aExp == 0 ) {
5397 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5398 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5399 }
5400 if ( bExp == 0 ) {
5401 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5402 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5403 }
5404 zExp = aExp + bExp - 0x3FFE;
5405 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5406 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5407 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5408 --zExp;
5409 }
a2f2d288 5410 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5411 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5412}
5413
5414/*----------------------------------------------------------------------------
5415| Returns the result of dividing the extended double-precision floating-point
5416| value `a' by the corresponding value `b'. The operation is performed
5417| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5418*----------------------------------------------------------------------------*/
5419
e5a41ffa 5420floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5421{
5422 flag aSign, bSign, zSign;
f4014512 5423 int32_t aExp, bExp, zExp;
bb98fe42
AF
5424 uint64_t aSig, bSig, zSig0, zSig1;
5425 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5426
d1eb8f2a
AD
5427 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5428 float_raise(float_flag_invalid, status);
5429 return floatx80_default_nan(status);
5430 }
158142c2
FB
5431 aSig = extractFloatx80Frac( a );
5432 aExp = extractFloatx80Exp( a );
5433 aSign = extractFloatx80Sign( a );
5434 bSig = extractFloatx80Frac( b );
5435 bExp = extractFloatx80Exp( b );
5436 bSign = extractFloatx80Sign( b );
5437 zSign = aSign ^ bSign;
5438 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5439 if ((uint64_t)(aSig << 1)) {
5440 return propagateFloatx80NaN(a, b, status);
5441 }
158142c2 5442 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5443 if ((uint64_t)(bSig << 1)) {
5444 return propagateFloatx80NaN(a, b, status);
5445 }
158142c2
FB
5446 goto invalid;
5447 }
5448 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5449 }
5450 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5451 if ((uint64_t)(bSig << 1)) {
5452 return propagateFloatx80NaN(a, b, status);
5453 }
158142c2
FB
5454 return packFloatx80( zSign, 0, 0 );
5455 }
5456 if ( bExp == 0 ) {
5457 if ( bSig == 0 ) {
5458 if ( ( aExp | aSig ) == 0 ) {
5459 invalid:
ff32e16e 5460 float_raise(float_flag_invalid, status);
af39bc8c 5461 return floatx80_default_nan(status);
158142c2 5462 }
ff32e16e 5463 float_raise(float_flag_divbyzero, status);
158142c2
FB
5464 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5465 }
5466 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5467 }
5468 if ( aExp == 0 ) {
5469 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5470 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5471 }
5472 zExp = aExp - bExp + 0x3FFE;
5473 rem1 = 0;
5474 if ( bSig <= aSig ) {
5475 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5476 ++zExp;
5477 }
5478 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5479 mul64To128( bSig, zSig0, &term0, &term1 );
5480 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5481 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5482 --zSig0;
5483 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5484 }
5485 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5486 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5487 mul64To128( bSig, zSig1, &term1, &term2 );
5488 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5489 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5490 --zSig1;
5491 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5492 }
5493 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5494 }
a2f2d288 5495 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5496 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5497}
5498
5499/*----------------------------------------------------------------------------
5500| Returns the remainder of the extended double-precision floating-point value
5501| `a' with respect to the corresponding value `b'. The operation is performed
5502| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5503*----------------------------------------------------------------------------*/
5504
e5a41ffa 5505floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5506{
ed086f3d 5507 flag aSign, zSign;
f4014512 5508 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5509 uint64_t aSig0, aSig1, bSig;
5510 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5511
d1eb8f2a
AD
5512 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5513 float_raise(float_flag_invalid, status);
5514 return floatx80_default_nan(status);
5515 }
158142c2
FB
5516 aSig0 = extractFloatx80Frac( a );
5517 aExp = extractFloatx80Exp( a );
5518 aSign = extractFloatx80Sign( a );
5519 bSig = extractFloatx80Frac( b );
5520 bExp = extractFloatx80Exp( b );
158142c2 5521 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5522 if ( (uint64_t) ( aSig0<<1 )
5523 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5524 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5525 }
5526 goto invalid;
5527 }
5528 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5529 if ((uint64_t)(bSig << 1)) {
5530 return propagateFloatx80NaN(a, b, status);
5531 }
158142c2
FB
5532 return a;
5533 }
5534 if ( bExp == 0 ) {
5535 if ( bSig == 0 ) {
5536 invalid:
ff32e16e 5537 float_raise(float_flag_invalid, status);
af39bc8c 5538 return floatx80_default_nan(status);
158142c2
FB
5539 }
5540 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5541 }
5542 if ( aExp == 0 ) {
bb98fe42 5543 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5544 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5545 }
5546 bSig |= LIT64( 0x8000000000000000 );
5547 zSign = aSign;
5548 expDiff = aExp - bExp;
5549 aSig1 = 0;
5550 if ( expDiff < 0 ) {
5551 if ( expDiff < -1 ) return a;
5552 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5553 expDiff = 0;
5554 }
5555 q = ( bSig <= aSig0 );
5556 if ( q ) aSig0 -= bSig;
5557 expDiff -= 64;
5558 while ( 0 < expDiff ) {
5559 q = estimateDiv128To64( aSig0, aSig1, bSig );
5560 q = ( 2 < q ) ? q - 2 : 0;
5561 mul64To128( bSig, q, &term0, &term1 );
5562 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5563 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5564 expDiff -= 62;
5565 }
5566 expDiff += 64;
5567 if ( 0 < expDiff ) {
5568 q = estimateDiv128To64( aSig0, aSig1, bSig );
5569 q = ( 2 < q ) ? q - 2 : 0;
5570 q >>= 64 - expDiff;
5571 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5572 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5573 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5574 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5575 ++q;
5576 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5577 }
5578 }
5579 else {
5580 term1 = 0;
5581 term0 = bSig;
5582 }
5583 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5584 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5585 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5586 && ( q & 1 ) )
5587 ) {
5588 aSig0 = alternateASig0;
5589 aSig1 = alternateASig1;
5590 zSign = ! zSign;
5591 }
5592 return
5593 normalizeRoundAndPackFloatx80(
ff32e16e 5594 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5595
5596}
5597
5598/*----------------------------------------------------------------------------
5599| Returns the square root of the extended double-precision floating-point
5600| value `a'. The operation is performed according to the IEC/IEEE Standard
5601| for Binary Floating-Point Arithmetic.
5602*----------------------------------------------------------------------------*/
5603
e5a41ffa 5604floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5605{
5606 flag aSign;
f4014512 5607 int32_t aExp, zExp;
bb98fe42
AF
5608 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5609 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5610
d1eb8f2a
AD
5611 if (floatx80_invalid_encoding(a)) {
5612 float_raise(float_flag_invalid, status);
5613 return floatx80_default_nan(status);
5614 }
158142c2
FB
5615 aSig0 = extractFloatx80Frac( a );
5616 aExp = extractFloatx80Exp( a );
5617 aSign = extractFloatx80Sign( a );
5618 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5619 if ((uint64_t)(aSig0 << 1)) {
5620 return propagateFloatx80NaN(a, a, status);
5621 }
158142c2
FB
5622 if ( ! aSign ) return a;
5623 goto invalid;
5624 }
5625 if ( aSign ) {
5626 if ( ( aExp | aSig0 ) == 0 ) return a;
5627 invalid:
ff32e16e 5628 float_raise(float_flag_invalid, status);
af39bc8c 5629 return floatx80_default_nan(status);
158142c2
FB
5630 }
5631 if ( aExp == 0 ) {
5632 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5633 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5634 }
5635 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5636 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5637 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5638 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5639 doubleZSig0 = zSig0<<1;
5640 mul64To128( zSig0, zSig0, &term0, &term1 );
5641 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5642 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5643 --zSig0;
5644 doubleZSig0 -= 2;
5645 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5646 }
5647 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5648 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5649 if ( zSig1 == 0 ) zSig1 = 1;
5650 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5651 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5652 mul64To128( zSig1, zSig1, &term2, &term3 );
5653 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5654 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5655 --zSig1;
5656 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5657 term3 |= 1;
5658 term2 |= doubleZSig0;
5659 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5660 }
5661 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5662 }
5663 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5664 zSig0 |= doubleZSig0;
a2f2d288
PM
5665 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5666 0, zExp, zSig0, zSig1, status);
158142c2
FB
5667}
5668
5669/*----------------------------------------------------------------------------
b689362d
AJ
5670| Returns 1 if the extended double-precision floating-point value `a' is equal
5671| to the corresponding value `b', and 0 otherwise. The invalid exception is
5672| raised if either operand is a NaN. Otherwise, the comparison is performed
5673| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5674*----------------------------------------------------------------------------*/
5675
e5a41ffa 5676int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5677{
5678
d1eb8f2a
AD
5679 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5680 || (extractFloatx80Exp(a) == 0x7FFF
5681 && (uint64_t) (extractFloatx80Frac(a) << 1))
5682 || (extractFloatx80Exp(b) == 0x7FFF
5683 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5684 ) {
ff32e16e 5685 float_raise(float_flag_invalid, status);
158142c2
FB
5686 return 0;
5687 }
5688 return
5689 ( a.low == b.low )
5690 && ( ( a.high == b.high )
5691 || ( ( a.low == 0 )
bb98fe42 5692 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5693 );
5694
5695}
5696
5697/*----------------------------------------------------------------------------
5698| Returns 1 if the extended double-precision floating-point value `a' is
5699| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5700| invalid exception is raised if either operand is a NaN. The comparison is
5701| performed according to the IEC/IEEE Standard for Binary Floating-Point
5702| Arithmetic.
158142c2
FB
5703*----------------------------------------------------------------------------*/
5704
e5a41ffa 5705int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5706{
5707 flag aSign, bSign;
5708
d1eb8f2a
AD
5709 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5710 || (extractFloatx80Exp(a) == 0x7FFF
5711 && (uint64_t) (extractFloatx80Frac(a) << 1))
5712 || (extractFloatx80Exp(b) == 0x7FFF
5713 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5714 ) {
ff32e16e 5715 float_raise(float_flag_invalid, status);
158142c2
FB
5716 return 0;
5717 }
5718 aSign = extractFloatx80Sign( a );
5719 bSign = extractFloatx80Sign( b );
5720 if ( aSign != bSign ) {
5721 return
5722 aSign
bb98fe42 5723 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5724 == 0 );
5725 }
5726 return
5727 aSign ? le128( b.high, b.low, a.high, a.low )
5728 : le128( a.high, a.low, b.high, b.low );
5729
5730}
5731
5732/*----------------------------------------------------------------------------
5733| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5734| less than the corresponding value `b', and 0 otherwise. The invalid
5735| exception is raised if either operand is a NaN. The comparison is performed
5736| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5737*----------------------------------------------------------------------------*/
5738
e5a41ffa 5739int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5740{
5741 flag aSign, bSign;
5742
d1eb8f2a
AD
5743 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5744 || (extractFloatx80Exp(a) == 0x7FFF
5745 && (uint64_t) (extractFloatx80Frac(a) << 1))
5746 || (extractFloatx80Exp(b) == 0x7FFF
5747 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5748 ) {
ff32e16e 5749 float_raise(float_flag_invalid, status);
158142c2
FB
5750 return 0;
5751 }
5752 aSign = extractFloatx80Sign( a );
5753 bSign = extractFloatx80Sign( b );
5754 if ( aSign != bSign ) {
5755 return
5756 aSign
bb98fe42 5757 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5758 != 0 );
5759 }
5760 return
5761 aSign ? lt128( b.high, b.low, a.high, a.low )
5762 : lt128( a.high, a.low, b.high, b.low );
5763
5764}
5765
67b7861d
AJ
5766/*----------------------------------------------------------------------------
5767| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5768| cannot be compared, and 0 otherwise. The invalid exception is raised if
5769| either operand is a NaN. The comparison is performed according to the
5770| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5771*----------------------------------------------------------------------------*/
e5a41ffa 5772int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5773{
d1eb8f2a
AD
5774 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5775 || (extractFloatx80Exp(a) == 0x7FFF
5776 && (uint64_t) (extractFloatx80Frac(a) << 1))
5777 || (extractFloatx80Exp(b) == 0x7FFF
5778 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5779 ) {
ff32e16e 5780 float_raise(float_flag_invalid, status);
67b7861d
AJ
5781 return 1;
5782 }
5783 return 0;
5784}
5785
158142c2 5786/*----------------------------------------------------------------------------
b689362d 5787| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5788| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5789| cause an exception. The comparison is performed according to the IEC/IEEE
5790| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5791*----------------------------------------------------------------------------*/
5792
e5a41ffa 5793int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5794{
5795
d1eb8f2a
AD
5796 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5797 float_raise(float_flag_invalid, status);
5798 return 0;
5799 }
158142c2 5800 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5801 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5802 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5803 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5804 ) {
af39bc8c
AM
5805 if (floatx80_is_signaling_nan(a, status)
5806 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5807 float_raise(float_flag_invalid, status);
b689362d 5808 }
158142c2
FB
5809 return 0;
5810 }
5811 return
5812 ( a.low == b.low )
5813 && ( ( a.high == b.high )
5814 || ( ( a.low == 0 )
bb98fe42 5815 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5816 );
5817
5818}
5819
5820/*----------------------------------------------------------------------------
5821| Returns 1 if the extended double-precision floating-point value `a' is less
5822| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5823| do not cause an exception. Otherwise, the comparison is performed according
5824| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5825*----------------------------------------------------------------------------*/
5826
e5a41ffa 5827int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5828{
5829 flag aSign, bSign;
5830
d1eb8f2a
AD
5831 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5832 float_raise(float_flag_invalid, status);
5833 return 0;
5834 }
158142c2 5835 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5836 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5837 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5838 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5839 ) {
af39bc8c
AM
5840 if (floatx80_is_signaling_nan(a, status)
5841 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5842 float_raise(float_flag_invalid, status);
158142c2
FB
5843 }
5844 return 0;
5845 }
5846 aSign = extractFloatx80Sign( a );
5847 bSign = extractFloatx80Sign( b );
5848 if ( aSign != bSign ) {
5849 return
5850 aSign
bb98fe42 5851 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5852 == 0 );
5853 }
5854 return
5855 aSign ? le128( b.high, b.low, a.high, a.low )
5856 : le128( a.high, a.low, b.high, b.low );
5857
5858}
5859
5860/*----------------------------------------------------------------------------
5861| Returns 1 if the extended double-precision floating-point value `a' is less
5862| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5863| an exception. Otherwise, the comparison is performed according to the
5864| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5865*----------------------------------------------------------------------------*/
5866
e5a41ffa 5867int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5868{
5869 flag aSign, bSign;
5870
d1eb8f2a
AD
5871 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5872 float_raise(float_flag_invalid, status);
5873 return 0;
5874 }
158142c2 5875 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5876 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5877 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5878 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5879 ) {
af39bc8c
AM
5880 if (floatx80_is_signaling_nan(a, status)
5881 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5882 float_raise(float_flag_invalid, status);
158142c2
FB
5883 }
5884 return 0;
5885 }
5886 aSign = extractFloatx80Sign( a );
5887 bSign = extractFloatx80Sign( b );
5888 if ( aSign != bSign ) {
5889 return
5890 aSign
bb98fe42 5891 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5892 != 0 );
5893 }
5894 return
5895 aSign ? lt128( b.high, b.low, a.high, a.low )
5896 : lt128( a.high, a.low, b.high, b.low );
5897
5898}
5899
67b7861d
AJ
5900/*----------------------------------------------------------------------------
5901| Returns 1 if the extended double-precision floating-point values `a' and `b'
5902| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5903| The comparison is performed according to the IEC/IEEE Standard for Binary
5904| Floating-Point Arithmetic.
5905*----------------------------------------------------------------------------*/
e5a41ffa 5906int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5907{
d1eb8f2a
AD
5908 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5909 float_raise(float_flag_invalid, status);
5910 return 1;
5911 }
67b7861d
AJ
5912 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5913 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5914 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5915 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5916 ) {
af39bc8c
AM
5917 if (floatx80_is_signaling_nan(a, status)
5918 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5919 float_raise(float_flag_invalid, status);
67b7861d
AJ
5920 }
5921 return 1;
5922 }
5923 return 0;
5924}
5925
158142c2
FB
5926/*----------------------------------------------------------------------------
5927| Returns the result of converting the quadruple-precision floating-point
5928| value `a' to the 32-bit two's complement integer format. The conversion
5929| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5930| Arithmetic---which means in particular that the conversion is rounded
5931| according to the current rounding mode. If `a' is a NaN, the largest
5932| positive integer is returned. Otherwise, if the conversion overflows, the
5933| largest integer with the same sign as `a' is returned.
5934*----------------------------------------------------------------------------*/
5935
f4014512 5936int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5937{
5938 flag aSign;
f4014512 5939 int32_t aExp, shiftCount;
bb98fe42 5940 uint64_t aSig0, aSig1;
158142c2
FB
5941
5942 aSig1 = extractFloat128Frac1( a );
5943 aSig0 = extractFloat128Frac0( a );
5944 aExp = extractFloat128Exp( a );
5945 aSign = extractFloat128Sign( a );
5946 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5947 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5948 aSig0 |= ( aSig1 != 0 );
5949 shiftCount = 0x4028 - aExp;
5950 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5951 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5952
5953}
5954
5955/*----------------------------------------------------------------------------
5956| Returns the result of converting the quadruple-precision floating-point
5957| value `a' to the 32-bit two's complement integer format. The conversion
5958| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5959| Arithmetic, except that the conversion is always rounded toward zero. If
5960| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5961| conversion overflows, the largest integer with the same sign as `a' is
5962| returned.
5963*----------------------------------------------------------------------------*/
5964
f4014512 5965int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5966{
5967 flag aSign;
f4014512 5968 int32_t aExp, shiftCount;
bb98fe42 5969 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5970 int32_t z;
158142c2
FB
5971
5972 aSig1 = extractFloat128Frac1( a );
5973 aSig0 = extractFloat128Frac0( a );
5974 aExp = extractFloat128Exp( a );
5975 aSign = extractFloat128Sign( a );
5976 aSig0 |= ( aSig1 != 0 );
5977 if ( 0x401E < aExp ) {
5978 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5979 goto invalid;
5980 }
5981 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5982 if (aExp || aSig0) {
5983 status->float_exception_flags |= float_flag_inexact;
5984 }
158142c2
FB
5985 return 0;
5986 }
5987 aSig0 |= LIT64( 0x0001000000000000 );
5988 shiftCount = 0x402F - aExp;
5989 savedASig = aSig0;
5990 aSig0 >>= shiftCount;
5991 z = aSig0;
5992 if ( aSign ) z = - z;
5993 if ( ( z < 0 ) ^ aSign ) {
5994 invalid:
ff32e16e 5995 float_raise(float_flag_invalid, status);
bb98fe42 5996 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5997 }
5998 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5999 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6000 }
6001 return z;
6002
6003}
6004
6005/*----------------------------------------------------------------------------
6006| Returns the result of converting the quadruple-precision floating-point
6007| value `a' to the 64-bit two's complement integer format. The conversion
6008| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6009| Arithmetic---which means in particular that the conversion is rounded
6010| according to the current rounding mode. If `a' is a NaN, the largest
6011| positive integer is returned. Otherwise, if the conversion overflows, the
6012| largest integer with the same sign as `a' is returned.
6013*----------------------------------------------------------------------------*/
6014
f42c2224 6015int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6016{
6017 flag aSign;
f4014512 6018 int32_t aExp, shiftCount;
bb98fe42 6019 uint64_t aSig0, aSig1;
158142c2
FB
6020
6021 aSig1 = extractFloat128Frac1( a );
6022 aSig0 = extractFloat128Frac0( a );
6023 aExp = extractFloat128Exp( a );
6024 aSign = extractFloat128Sign( a );
6025 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6026 shiftCount = 0x402F - aExp;
6027 if ( shiftCount <= 0 ) {
6028 if ( 0x403E < aExp ) {
ff32e16e 6029 float_raise(float_flag_invalid, status);
158142c2
FB
6030 if ( ! aSign
6031 || ( ( aExp == 0x7FFF )
6032 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6033 )
6034 ) {
6035 return LIT64( 0x7FFFFFFFFFFFFFFF );
6036 }
bb98fe42 6037 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6038 }
6039 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6040 }
6041 else {
6042 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6043 }
ff32e16e 6044 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6045
6046}
6047
6048/*----------------------------------------------------------------------------
6049| Returns the result of converting the quadruple-precision floating-point
6050| value `a' to the 64-bit two's complement integer format. The conversion
6051| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6052| Arithmetic, except that the conversion is always rounded toward zero.
6053| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6054| the conversion overflows, the largest integer with the same sign as `a' is
6055| returned.
6056*----------------------------------------------------------------------------*/
6057
f42c2224 6058int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6059{
6060 flag aSign;
f4014512 6061 int32_t aExp, shiftCount;
bb98fe42 6062 uint64_t aSig0, aSig1;
f42c2224 6063 int64_t z;
158142c2
FB
6064
6065 aSig1 = extractFloat128Frac1( a );
6066 aSig0 = extractFloat128Frac0( a );
6067 aExp = extractFloat128Exp( a );
6068 aSign = extractFloat128Sign( a );
6069 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6070 shiftCount = aExp - 0x402F;
6071 if ( 0 < shiftCount ) {
6072 if ( 0x403E <= aExp ) {
6073 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6074 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6075 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
6076 if (aSig1) {
6077 status->float_exception_flags |= float_flag_inexact;
6078 }
158142c2
FB
6079 }
6080 else {
ff32e16e 6081 float_raise(float_flag_invalid, status);
158142c2
FB
6082 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6083 return LIT64( 0x7FFFFFFFFFFFFFFF );
6084 }
6085 }
bb98fe42 6086 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6087 }
6088 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6089 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6090 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6091 }
6092 }
6093 else {
6094 if ( aExp < 0x3FFF ) {
6095 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6096 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6097 }
6098 return 0;
6099 }
6100 z = aSig0>>( - shiftCount );
6101 if ( aSig1
bb98fe42 6102 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6103 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6104 }
6105 }
6106 if ( aSign ) z = - z;
6107 return z;
6108
6109}
6110
6111/*----------------------------------------------------------------------------
6112| Returns the result of converting the quadruple-precision floating-point
6113| value `a' to the single-precision floating-point format. The conversion
6114| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6115| Arithmetic.
6116*----------------------------------------------------------------------------*/
6117
e5a41ffa 6118float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6119{
6120 flag aSign;
f4014512 6121 int32_t aExp;
bb98fe42
AF
6122 uint64_t aSig0, aSig1;
6123 uint32_t zSig;
158142c2
FB
6124
6125 aSig1 = extractFloat128Frac1( a );
6126 aSig0 = extractFloat128Frac0( a );
6127 aExp = extractFloat128Exp( a );
6128 aSign = extractFloat128Sign( a );
6129 if ( aExp == 0x7FFF ) {
6130 if ( aSig0 | aSig1 ) {
ff32e16e 6131 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6132 }
6133 return packFloat32( aSign, 0xFF, 0 );
6134 }
6135 aSig0 |= ( aSig1 != 0 );
6136 shift64RightJamming( aSig0, 18, &aSig0 );
6137 zSig = aSig0;
6138 if ( aExp || zSig ) {
6139 zSig |= 0x40000000;
6140 aExp -= 0x3F81;
6141 }
ff32e16e 6142 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6143
6144}
6145
6146/*----------------------------------------------------------------------------
6147| Returns the result of converting the quadruple-precision floating-point
6148| value `a' to the double-precision floating-point format. The conversion
6149| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6150| Arithmetic.
6151*----------------------------------------------------------------------------*/
6152
e5a41ffa 6153float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6154{
6155 flag aSign;
f4014512 6156 int32_t aExp;
bb98fe42 6157 uint64_t aSig0, aSig1;
158142c2
FB
6158
6159 aSig1 = extractFloat128Frac1( a );
6160 aSig0 = extractFloat128Frac0( a );
6161 aExp = extractFloat128Exp( a );
6162 aSign = extractFloat128Sign( a );
6163 if ( aExp == 0x7FFF ) {
6164 if ( aSig0 | aSig1 ) {
ff32e16e 6165 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6166 }
6167 return packFloat64( aSign, 0x7FF, 0 );
6168 }
6169 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6170 aSig0 |= ( aSig1 != 0 );
6171 if ( aExp || aSig0 ) {
6172 aSig0 |= LIT64( 0x4000000000000000 );
6173 aExp -= 0x3C01;
6174 }
ff32e16e 6175 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6176
6177}
6178
158142c2
FB
6179/*----------------------------------------------------------------------------
6180| Returns the result of converting the quadruple-precision floating-point
6181| value `a' to the extended double-precision floating-point format. The
6182| conversion is performed according to the IEC/IEEE Standard for Binary
6183| Floating-Point Arithmetic.
6184*----------------------------------------------------------------------------*/
6185
e5a41ffa 6186floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6187{
6188 flag aSign;
f4014512 6189 int32_t aExp;
bb98fe42 6190 uint64_t aSig0, aSig1;
158142c2
FB
6191
6192 aSig1 = extractFloat128Frac1( a );
6193 aSig0 = extractFloat128Frac0( a );
6194 aExp = extractFloat128Exp( a );
6195 aSign = extractFloat128Sign( a );
6196 if ( aExp == 0x7FFF ) {
6197 if ( aSig0 | aSig1 ) {
ff32e16e 6198 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2
FB
6199 }
6200 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
6201 }
6202 if ( aExp == 0 ) {
6203 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6204 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6205 }
6206 else {
6207 aSig0 |= LIT64( 0x0001000000000000 );
6208 }
6209 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6210 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6211
6212}
6213
158142c2
FB
6214/*----------------------------------------------------------------------------
6215| Rounds the quadruple-precision floating-point value `a' to an integer, and
6216| returns the result as a quadruple-precision floating-point value. The
6217| operation is performed according to the IEC/IEEE Standard for Binary
6218| Floating-Point Arithmetic.
6219*----------------------------------------------------------------------------*/
6220
e5a41ffa 6221float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6222{
6223 flag aSign;
f4014512 6224 int32_t aExp;
bb98fe42 6225 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6226 float128 z;
6227
6228 aExp = extractFloat128Exp( a );
6229 if ( 0x402F <= aExp ) {
6230 if ( 0x406F <= aExp ) {
6231 if ( ( aExp == 0x7FFF )
6232 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6233 ) {
ff32e16e 6234 return propagateFloat128NaN(a, a, status);
158142c2
FB
6235 }
6236 return a;
6237 }
6238 lastBitMask = 1;
6239 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6240 roundBitsMask = lastBitMask - 1;
6241 z = a;
a2f2d288 6242 switch (status->float_rounding_mode) {
dc355b76 6243 case float_round_nearest_even:
158142c2
FB
6244 if ( lastBitMask ) {
6245 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6246 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6247 }
6248 else {
bb98fe42 6249 if ( (int64_t) z.low < 0 ) {
158142c2 6250 ++z.high;
bb98fe42 6251 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6252 }
6253 }
dc355b76 6254 break;
f9288a76
PM
6255 case float_round_ties_away:
6256 if (lastBitMask) {
6257 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6258 } else {
6259 if ((int64_t) z.low < 0) {
6260 ++z.high;
6261 }
6262 }
6263 break;
dc355b76
PM
6264 case float_round_to_zero:
6265 break;
6266 case float_round_up:
6267 if (!extractFloat128Sign(z)) {
6268 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6269 }
6270 break;
6271 case float_round_down:
6272 if (extractFloat128Sign(z)) {
6273 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6274 }
dc355b76
PM
6275 break;
6276 default:
6277 abort();
158142c2
FB
6278 }
6279 z.low &= ~ roundBitsMask;
6280 }
6281 else {
6282 if ( aExp < 0x3FFF ) {
bb98fe42 6283 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6284 status->float_exception_flags |= float_flag_inexact;
158142c2 6285 aSign = extractFloat128Sign( a );
a2f2d288 6286 switch (status->float_rounding_mode) {
158142c2
FB
6287 case float_round_nearest_even:
6288 if ( ( aExp == 0x3FFE )
6289 && ( extractFloat128Frac0( a )
6290 | extractFloat128Frac1( a ) )
6291 ) {
6292 return packFloat128( aSign, 0x3FFF, 0, 0 );
6293 }
6294 break;
f9288a76
PM
6295 case float_round_ties_away:
6296 if (aExp == 0x3FFE) {
6297 return packFloat128(aSign, 0x3FFF, 0, 0);
6298 }
6299 break;
158142c2
FB
6300 case float_round_down:
6301 return
6302 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6303 : packFloat128( 0, 0, 0, 0 );
6304 case float_round_up:
6305 return
6306 aSign ? packFloat128( 1, 0, 0, 0 )
6307 : packFloat128( 0, 0x3FFF, 0, 0 );
6308 }
6309 return packFloat128( aSign, 0, 0, 0 );
6310 }
6311 lastBitMask = 1;
6312 lastBitMask <<= 0x402F - aExp;
6313 roundBitsMask = lastBitMask - 1;
6314 z.low = 0;
6315 z.high = a.high;
a2f2d288 6316 switch (status->float_rounding_mode) {
dc355b76 6317 case float_round_nearest_even:
158142c2
FB
6318 z.high += lastBitMask>>1;
6319 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6320 z.high &= ~ lastBitMask;
6321 }
dc355b76 6322 break;
f9288a76
PM
6323 case float_round_ties_away:
6324 z.high += lastBitMask>>1;
6325 break;
dc355b76
PM
6326 case float_round_to_zero:
6327 break;
6328 case float_round_up:
6329 if (!extractFloat128Sign(z)) {
158142c2
FB
6330 z.high |= ( a.low != 0 );
6331 z.high += roundBitsMask;
6332 }
dc355b76
PM
6333 break;
6334 case float_round_down:
6335 if (extractFloat128Sign(z)) {
6336 z.high |= (a.low != 0);
6337 z.high += roundBitsMask;
6338 }
6339 break;
6340 default:
6341 abort();
158142c2
FB
6342 }
6343 z.high &= ~ roundBitsMask;
6344 }
6345 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6346 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6347 }
6348 return z;
6349
6350}
6351
6352/*----------------------------------------------------------------------------
6353| Returns the result of adding the absolute values of the quadruple-precision
6354| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6355| before being returned. `zSign' is ignored if the result is a NaN.
6356| The addition is performed according to the IEC/IEEE Standard for Binary
6357| Floating-Point Arithmetic.
6358*----------------------------------------------------------------------------*/
6359
e5a41ffa
PM
6360static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6361 float_status *status)
158142c2 6362{
f4014512 6363 int32_t aExp, bExp, zExp;
bb98fe42 6364 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6365 int32_t expDiff;
158142c2
FB
6366
6367 aSig1 = extractFloat128Frac1( a );
6368 aSig0 = extractFloat128Frac0( a );
6369 aExp = extractFloat128Exp( a );
6370 bSig1 = extractFloat128Frac1( b );
6371 bSig0 = extractFloat128Frac0( b );
6372 bExp = extractFloat128Exp( b );
6373 expDiff = aExp - bExp;
6374 if ( 0 < expDiff ) {
6375 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6376 if (aSig0 | aSig1) {
6377 return propagateFloat128NaN(a, b, status);
6378 }
158142c2
FB
6379 return a;
6380 }
6381 if ( bExp == 0 ) {
6382 --expDiff;
6383 }
6384 else {
6385 bSig0 |= LIT64( 0x0001000000000000 );
6386 }
6387 shift128ExtraRightJamming(
6388 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6389 zExp = aExp;
6390 }
6391 else if ( expDiff < 0 ) {
6392 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6393 if (bSig0 | bSig1) {
6394 return propagateFloat128NaN(a, b, status);
6395 }
158142c2
FB
6396 return packFloat128( zSign, 0x7FFF, 0, 0 );
6397 }
6398 if ( aExp == 0 ) {
6399 ++expDiff;
6400 }
6401 else {
6402 aSig0 |= LIT64( 0x0001000000000000 );
6403 }
6404 shift128ExtraRightJamming(
6405 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6406 zExp = bExp;
6407 }
6408 else {
6409 if ( aExp == 0x7FFF ) {
6410 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6411 return propagateFloat128NaN(a, b, status);
158142c2
FB
6412 }
6413 return a;
6414 }
6415 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6416 if ( aExp == 0 ) {
a2f2d288 6417 if (status->flush_to_zero) {
e6afc87f 6418 if (zSig0 | zSig1) {
ff32e16e 6419 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6420 }
6421 return packFloat128(zSign, 0, 0, 0);
6422 }
fe76d976
PB
6423 return packFloat128( zSign, 0, zSig0, zSig1 );
6424 }
158142c2
FB
6425 zSig2 = 0;
6426 zSig0 |= LIT64( 0x0002000000000000 );
6427 zExp = aExp;
6428 goto shiftRight1;
6429 }
6430 aSig0 |= LIT64( 0x0001000000000000 );
6431 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6432 --zExp;
6433 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6434 ++zExp;
6435 shiftRight1:
6436 shift128ExtraRightJamming(
6437 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6438 roundAndPack:
ff32e16e 6439 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6440
6441}
6442
6443/*----------------------------------------------------------------------------
6444| Returns the result of subtracting the absolute values of the quadruple-
6445| precision floating-point values `a' and `b'. If `zSign' is 1, the
6446| difference is negated before being returned. `zSign' is ignored if the
6447| result is a NaN. The subtraction is performed according to the IEC/IEEE
6448| Standard for Binary Floating-Point Arithmetic.
6449*----------------------------------------------------------------------------*/
6450
e5a41ffa
PM
6451static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6452 float_status *status)
158142c2 6453{
f4014512 6454 int32_t aExp, bExp, zExp;
bb98fe42 6455 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6456 int32_t expDiff;
158142c2
FB
6457
6458 aSig1 = extractFloat128Frac1( a );
6459 aSig0 = extractFloat128Frac0( a );
6460 aExp = extractFloat128Exp( a );
6461 bSig1 = extractFloat128Frac1( b );
6462 bSig0 = extractFloat128Frac0( b );
6463 bExp = extractFloat128Exp( b );
6464 expDiff = aExp - bExp;
6465 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6466 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6467 if ( 0 < expDiff ) goto aExpBigger;
6468 if ( expDiff < 0 ) goto bExpBigger;
6469 if ( aExp == 0x7FFF ) {
6470 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6471 return propagateFloat128NaN(a, b, status);
158142c2 6472 }
ff32e16e 6473 float_raise(float_flag_invalid, status);
af39bc8c 6474 return float128_default_nan(status);
158142c2
FB
6475 }
6476 if ( aExp == 0 ) {
6477 aExp = 1;
6478 bExp = 1;
6479 }
6480 if ( bSig0 < aSig0 ) goto aBigger;
6481 if ( aSig0 < bSig0 ) goto bBigger;
6482 if ( bSig1 < aSig1 ) goto aBigger;
6483 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6484 return packFloat128(status->float_rounding_mode == float_round_down,
6485 0, 0, 0);
158142c2
FB
6486 bExpBigger:
6487 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6488 if (bSig0 | bSig1) {
6489 return propagateFloat128NaN(a, b, status);
6490 }
158142c2
FB
6491 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6492 }
6493 if ( aExp == 0 ) {
6494 ++expDiff;
6495 }
6496 else {
6497 aSig0 |= LIT64( 0x4000000000000000 );
6498 }
6499 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6500 bSig0 |= LIT64( 0x4000000000000000 );
6501 bBigger:
6502 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6503 zExp = bExp;
6504 zSign ^= 1;
6505 goto normalizeRoundAndPack;
6506 aExpBigger:
6507 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6508 if (aSig0 | aSig1) {
6509 return propagateFloat128NaN(a, b, status);
6510 }
158142c2
FB
6511 return a;
6512 }
6513 if ( bExp == 0 ) {
6514 --expDiff;
6515 }
6516 else {
6517 bSig0 |= LIT64( 0x4000000000000000 );
6518 }
6519 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6520 aSig0 |= LIT64( 0x4000000000000000 );
6521 aBigger:
6522 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6523 zExp = aExp;
6524 normalizeRoundAndPack:
6525 --zExp;
ff32e16e
PM
6526 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6527 status);
158142c2
FB
6528
6529}
6530
6531/*----------------------------------------------------------------------------
6532| Returns the result of adding the quadruple-precision floating-point values
6533| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6534| for Binary Floating-Point Arithmetic.
6535*----------------------------------------------------------------------------*/
6536
e5a41ffa 6537float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6538{
6539 flag aSign, bSign;
6540
6541 aSign = extractFloat128Sign( a );
6542 bSign = extractFloat128Sign( b );
6543 if ( aSign == bSign ) {
ff32e16e 6544 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6545 }
6546 else {
ff32e16e 6547 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6548 }
6549
6550}
6551
6552/*----------------------------------------------------------------------------
6553| Returns the result of subtracting the quadruple-precision floating-point
6554| values `a' and `b'. The operation is performed according to the IEC/IEEE
6555| Standard for Binary Floating-Point Arithmetic.
6556*----------------------------------------------------------------------------*/
6557
e5a41ffa 6558float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6559{
6560 flag aSign, bSign;
6561
6562 aSign = extractFloat128Sign( a );
6563 bSign = extractFloat128Sign( b );
6564 if ( aSign == bSign ) {
ff32e16e 6565 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6566 }
6567 else {
ff32e16e 6568 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6569 }
6570
6571}
6572
6573/*----------------------------------------------------------------------------
6574| Returns the result of multiplying the quadruple-precision floating-point
6575| values `a' and `b'. The operation is performed according to the IEC/IEEE
6576| Standard for Binary Floating-Point Arithmetic.
6577*----------------------------------------------------------------------------*/
6578
e5a41ffa 6579float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6580{
6581 flag aSign, bSign, zSign;
f4014512 6582 int32_t aExp, bExp, zExp;
bb98fe42 6583 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6584
6585 aSig1 = extractFloat128Frac1( a );
6586 aSig0 = extractFloat128Frac0( a );
6587 aExp = extractFloat128Exp( a );
6588 aSign = extractFloat128Sign( a );
6589 bSig1 = extractFloat128Frac1( b );
6590 bSig0 = extractFloat128Frac0( b );
6591 bExp = extractFloat128Exp( b );
6592 bSign = extractFloat128Sign( b );
6593 zSign = aSign ^ bSign;
6594 if ( aExp == 0x7FFF ) {
6595 if ( ( aSig0 | aSig1 )
6596 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6597 return propagateFloat128NaN(a, b, status);
158142c2
FB
6598 }
6599 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6600 return packFloat128( zSign, 0x7FFF, 0, 0 );
6601 }
6602 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6603 if (bSig0 | bSig1) {
6604 return propagateFloat128NaN(a, b, status);
6605 }
158142c2
FB
6606 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6607 invalid:
ff32e16e 6608 float_raise(float_flag_invalid, status);
af39bc8c 6609 return float128_default_nan(status);
158142c2
FB
6610 }
6611 return packFloat128( zSign, 0x7FFF, 0, 0 );
6612 }
6613 if ( aExp == 0 ) {
6614 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6615 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6616 }
6617 if ( bExp == 0 ) {
6618 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6619 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6620 }
6621 zExp = aExp + bExp - 0x4000;
6622 aSig0 |= LIT64( 0x0001000000000000 );
6623 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6624 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6625 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6626 zSig2 |= ( zSig3 != 0 );
6627 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6628 shift128ExtraRightJamming(
6629 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6630 ++zExp;
6631 }
ff32e16e 6632 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6633
6634}
6635
6636/*----------------------------------------------------------------------------
6637| Returns the result of dividing the quadruple-precision floating-point value
6638| `a' by the corresponding value `b'. The operation is performed according to
6639| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6640*----------------------------------------------------------------------------*/
6641
e5a41ffa 6642float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6643{
6644 flag aSign, bSign, zSign;
f4014512 6645 int32_t aExp, bExp, zExp;
bb98fe42
AF
6646 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6647 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6648
6649 aSig1 = extractFloat128Frac1( a );
6650 aSig0 = extractFloat128Frac0( a );
6651 aExp = extractFloat128Exp( a );
6652 aSign = extractFloat128Sign( a );
6653 bSig1 = extractFloat128Frac1( b );
6654 bSig0 = extractFloat128Frac0( b );
6655 bExp = extractFloat128Exp( b );
6656 bSign = extractFloat128Sign( b );
6657 zSign = aSign ^ bSign;
6658 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6659 if (aSig0 | aSig1) {
6660 return propagateFloat128NaN(a, b, status);
6661 }
158142c2 6662 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6663 if (bSig0 | bSig1) {
6664 return propagateFloat128NaN(a, b, status);
6665 }
158142c2
FB
6666 goto invalid;
6667 }
6668 return packFloat128( zSign, 0x7FFF, 0, 0 );
6669 }
6670 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6671 if (bSig0 | bSig1) {
6672 return propagateFloat128NaN(a, b, status);
6673 }
158142c2
FB
6674 return packFloat128( zSign, 0, 0, 0 );
6675 }
6676 if ( bExp == 0 ) {
6677 if ( ( bSig0 | bSig1 ) == 0 ) {
6678 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6679 invalid:
ff32e16e 6680 float_raise(float_flag_invalid, status);
af39bc8c 6681 return float128_default_nan(status);
158142c2 6682 }
ff32e16e 6683 float_raise(float_flag_divbyzero, status);
158142c2
FB
6684 return packFloat128( zSign, 0x7FFF, 0, 0 );
6685 }
6686 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6687 }
6688 if ( aExp == 0 ) {
6689 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6690 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6691 }
6692 zExp = aExp - bExp + 0x3FFD;
6693 shortShift128Left(
6694 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6695 shortShift128Left(
6696 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6697 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6698 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6699 ++zExp;
6700 }
6701 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6702 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6703 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6704 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6705 --zSig0;
6706 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6707 }
6708 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6709 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6710 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6711 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6712 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6713 --zSig1;
6714 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6715 }
6716 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6717 }
6718 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6719 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6720
6721}
6722
6723/*----------------------------------------------------------------------------
6724| Returns the remainder of the quadruple-precision floating-point value `a'
6725| with respect to the corresponding value `b'. The operation is performed
6726| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6727*----------------------------------------------------------------------------*/
6728
e5a41ffa 6729float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6730{
ed086f3d 6731 flag aSign, zSign;
f4014512 6732 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6733 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6734 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6735 int64_t sigMean0;
158142c2
FB
6736
6737 aSig1 = extractFloat128Frac1( a );
6738 aSig0 = extractFloat128Frac0( a );
6739 aExp = extractFloat128Exp( a );
6740 aSign = extractFloat128Sign( a );
6741 bSig1 = extractFloat128Frac1( b );
6742 bSig0 = extractFloat128Frac0( b );
6743 bExp = extractFloat128Exp( b );
158142c2
FB
6744 if ( aExp == 0x7FFF ) {
6745 if ( ( aSig0 | aSig1 )
6746 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6747 return propagateFloat128NaN(a, b, status);
158142c2
FB
6748 }
6749 goto invalid;
6750 }
6751 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6752 if (bSig0 | bSig1) {
6753 return propagateFloat128NaN(a, b, status);
6754 }
158142c2
FB
6755 return a;
6756 }
6757 if ( bExp == 0 ) {
6758 if ( ( bSig0 | bSig1 ) == 0 ) {
6759 invalid:
ff32e16e 6760 float_raise(float_flag_invalid, status);
af39bc8c 6761 return float128_default_nan(status);
158142c2
FB
6762 }
6763 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6764 }
6765 if ( aExp == 0 ) {
6766 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6767 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6768 }
6769 expDiff = aExp - bExp;
6770 if ( expDiff < -1 ) return a;
6771 shortShift128Left(
6772 aSig0 | LIT64( 0x0001000000000000 ),
6773 aSig1,
6774 15 - ( expDiff < 0 ),
6775 &aSig0,
6776 &aSig1
6777 );
6778 shortShift128Left(
6779 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6780 q = le128( bSig0, bSig1, aSig0, aSig1 );
6781 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6782 expDiff -= 64;
6783 while ( 0 < expDiff ) {
6784 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6785 q = ( 4 < q ) ? q - 4 : 0;
6786 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6787 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6788 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6789 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6790 expDiff -= 61;
6791 }
6792 if ( -64 < expDiff ) {
6793 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6794 q = ( 4 < q ) ? q - 4 : 0;
6795 q >>= - expDiff;
6796 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6797 expDiff += 52;
6798 if ( expDiff < 0 ) {
6799 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6800 }
6801 else {
6802 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6803 }
6804 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6805 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6806 }
6807 else {
6808 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6809 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6810 }
6811 do {
6812 alternateASig0 = aSig0;
6813 alternateASig1 = aSig1;
6814 ++q;
6815 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6816 } while ( 0 <= (int64_t) aSig0 );
158142c2 6817 add128(
bb98fe42 6818 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6819 if ( ( sigMean0 < 0 )
6820 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6821 aSig0 = alternateASig0;
6822 aSig1 = alternateASig1;
6823 }
bb98fe42 6824 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6825 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6826 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6827 status);
158142c2
FB
6828}
6829
6830/*----------------------------------------------------------------------------
6831| Returns the square root of the quadruple-precision floating-point value `a'.
6832| The operation is performed according to the IEC/IEEE Standard for Binary
6833| Floating-Point Arithmetic.
6834*----------------------------------------------------------------------------*/
6835
e5a41ffa 6836float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6837{
6838 flag aSign;
f4014512 6839 int32_t aExp, zExp;
bb98fe42
AF
6840 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6841 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6842
6843 aSig1 = extractFloat128Frac1( a );
6844 aSig0 = extractFloat128Frac0( a );
6845 aExp = extractFloat128Exp( a );
6846 aSign = extractFloat128Sign( a );
6847 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6848 if (aSig0 | aSig1) {
6849 return propagateFloat128NaN(a, a, status);
6850 }
158142c2
FB
6851 if ( ! aSign ) return a;
6852 goto invalid;
6853 }
6854 if ( aSign ) {
6855 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6856 invalid:
ff32e16e 6857 float_raise(float_flag_invalid, status);
af39bc8c 6858 return float128_default_nan(status);
158142c2
FB
6859 }
6860 if ( aExp == 0 ) {
6861 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6862 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6863 }
6864 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6865 aSig0 |= LIT64( 0x0001000000000000 );
6866 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6867 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6868 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6869 doubleZSig0 = zSig0<<1;
6870 mul64To128( zSig0, zSig0, &term0, &term1 );
6871 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6872 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6873 --zSig0;
6874 doubleZSig0 -= 2;
6875 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6876 }
6877 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6878 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6879 if ( zSig1 == 0 ) zSig1 = 1;
6880 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6881 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6882 mul64To128( zSig1, zSig1, &term2, &term3 );
6883 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6884 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6885 --zSig1;
6886 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6887 term3 |= 1;
6888 term2 |= doubleZSig0;
6889 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6890 }
6891 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6892 }
6893 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6894 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6895
6896}
6897
6898/*----------------------------------------------------------------------------
6899| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6900| the corresponding value `b', and 0 otherwise. The invalid exception is
6901| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6902| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6903*----------------------------------------------------------------------------*/
6904
e5a41ffa 6905int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6906{
6907
6908 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6909 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6910 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6911 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6912 ) {
ff32e16e 6913 float_raise(float_flag_invalid, status);
158142c2
FB
6914 return 0;
6915 }
6916 return
6917 ( a.low == b.low )
6918 && ( ( a.high == b.high )
6919 || ( ( a.low == 0 )
bb98fe42 6920 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6921 );
6922
6923}
6924
6925/*----------------------------------------------------------------------------
6926| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6927| or equal to the corresponding value `b', and 0 otherwise. The invalid
6928| exception is raised if either operand is a NaN. The comparison is performed
6929| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6930*----------------------------------------------------------------------------*/
6931
e5a41ffa 6932int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6933{
6934 flag aSign, bSign;
6935
6936 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6937 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6938 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6939 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6940 ) {
ff32e16e 6941 float_raise(float_flag_invalid, status);
158142c2
FB
6942 return 0;
6943 }
6944 aSign = extractFloat128Sign( a );
6945 bSign = extractFloat128Sign( b );
6946 if ( aSign != bSign ) {
6947 return
6948 aSign
bb98fe42 6949 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6950 == 0 );
6951 }
6952 return
6953 aSign ? le128( b.high, b.low, a.high, a.low )
6954 : le128( a.high, a.low, b.high, b.low );
6955
6956}
6957
6958/*----------------------------------------------------------------------------
6959| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6960| the corresponding value `b', and 0 otherwise. The invalid exception is
6961| raised if either operand is a NaN. The comparison is performed according
6962| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6963*----------------------------------------------------------------------------*/
6964
e5a41ffa 6965int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6966{
6967 flag aSign, bSign;
6968
6969 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6970 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6971 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6972 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6973 ) {
ff32e16e 6974 float_raise(float_flag_invalid, status);
158142c2
FB
6975 return 0;
6976 }
6977 aSign = extractFloat128Sign( a );
6978 bSign = extractFloat128Sign( b );
6979 if ( aSign != bSign ) {
6980 return
6981 aSign
bb98fe42 6982 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6983 != 0 );
6984 }
6985 return
6986 aSign ? lt128( b.high, b.low, a.high, a.low )
6987 : lt128( a.high, a.low, b.high, b.low );
6988
6989}
6990
67b7861d
AJ
6991/*----------------------------------------------------------------------------
6992| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6993| be compared, and 0 otherwise. The invalid exception is raised if either
6994| operand is a NaN. The comparison is performed according to the IEC/IEEE
6995| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6996*----------------------------------------------------------------------------*/
6997
e5a41ffa 6998int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6999{
7000 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7001 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7002 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7003 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7004 ) {
ff32e16e 7005 float_raise(float_flag_invalid, status);
67b7861d
AJ
7006 return 1;
7007 }
7008 return 0;
7009}
7010
158142c2
FB
7011/*----------------------------------------------------------------------------
7012| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7013| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7014| exception. The comparison is performed according to the IEC/IEEE Standard
7015| for Binary Floating-Point Arithmetic.
158142c2
FB
7016*----------------------------------------------------------------------------*/
7017
e5a41ffa 7018int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7019{
7020
7021 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7022 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7023 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7024 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7025 ) {
af39bc8c
AM
7026 if (float128_is_signaling_nan(a, status)
7027 || float128_is_signaling_nan(b, status)) {
ff32e16e 7028 float_raise(float_flag_invalid, status);
b689362d 7029 }
158142c2
FB
7030 return 0;
7031 }
7032 return
7033 ( a.low == b.low )
7034 && ( ( a.high == b.high )
7035 || ( ( a.low == 0 )
bb98fe42 7036 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7037 );
7038
7039}
7040
7041/*----------------------------------------------------------------------------
7042| Returns 1 if the quadruple-precision floating-point value `a' is less than
7043| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7044| cause an exception. Otherwise, the comparison is performed according to the
7045| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7046*----------------------------------------------------------------------------*/
7047
e5a41ffa 7048int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7049{
7050 flag aSign, bSign;
7051
7052 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7053 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7054 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7055 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7056 ) {
af39bc8c
AM
7057 if (float128_is_signaling_nan(a, status)
7058 || float128_is_signaling_nan(b, status)) {
ff32e16e 7059 float_raise(float_flag_invalid, status);
158142c2
FB
7060 }
7061 return 0;
7062 }
7063 aSign = extractFloat128Sign( a );
7064 bSign = extractFloat128Sign( b );
7065 if ( aSign != bSign ) {
7066 return
7067 aSign
bb98fe42 7068 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7069 == 0 );
7070 }
7071 return
7072 aSign ? le128( b.high, b.low, a.high, a.low )
7073 : le128( a.high, a.low, b.high, b.low );
7074
7075}
7076
7077/*----------------------------------------------------------------------------
7078| Returns 1 if the quadruple-precision floating-point value `a' is less than
7079| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7080| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7081| Standard for Binary Floating-Point Arithmetic.
7082*----------------------------------------------------------------------------*/
7083
e5a41ffa 7084int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7085{
7086 flag aSign, bSign;
7087
7088 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7089 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7090 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7091 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7092 ) {
af39bc8c
AM
7093 if (float128_is_signaling_nan(a, status)
7094 || float128_is_signaling_nan(b, status)) {
ff32e16e 7095 float_raise(float_flag_invalid, status);
158142c2
FB
7096 }
7097 return 0;
7098 }
7099 aSign = extractFloat128Sign( a );
7100 bSign = extractFloat128Sign( b );
7101 if ( aSign != bSign ) {
7102 return
7103 aSign
bb98fe42 7104 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7105 != 0 );
7106 }
7107 return
7108 aSign ? lt128( b.high, b.low, a.high, a.low )
7109 : lt128( a.high, a.low, b.high, b.low );
7110
7111}
7112
67b7861d
AJ
7113/*----------------------------------------------------------------------------
7114| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7115| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7116| comparison is performed according to the IEC/IEEE Standard for Binary
7117| Floating-Point Arithmetic.
7118*----------------------------------------------------------------------------*/
7119
e5a41ffa 7120int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7121{
7122 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7123 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7124 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7125 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7126 ) {
af39bc8c
AM
7127 if (float128_is_signaling_nan(a, status)
7128 || float128_is_signaling_nan(b, status)) {
ff32e16e 7129 float_raise(float_flag_invalid, status);
67b7861d
AJ
7130 }
7131 return 1;
7132 }
7133 return 0;
7134}
7135
1d6bda35 7136/* misc functions */
e5a41ffa 7137float32 uint32_to_float32(uint32_t a, float_status *status)
1d6bda35 7138{
ff32e16e 7139 return int64_to_float32(a, status);
1d6bda35
FB
7140}
7141
e5a41ffa 7142float64 uint32_to_float64(uint32_t a, float_status *status)
1d6bda35 7143{
ff32e16e 7144 return int64_to_float64(a, status);
1d6bda35
FB
7145}
7146
3a87d009 7147uint32_t float32_to_uint32(float32 a, float_status *status)
1d6bda35
FB
7148{
7149 int64_t v;
3a87d009 7150 uint32_t res;
34e1c27b 7151 int old_exc_flags = get_float_exception_flags(status);
1d6bda35 7152
ff32e16e 7153 v = float32_to_int64(a, status);
1d6bda35
FB
7154 if (v < 0) {
7155 res = 0;
1d6bda35
FB
7156 } else if (v > 0xffffffff) {
7157 res = 0xffffffff;
1d6bda35 7158 } else {
34e1c27b 7159 return v;
1d6bda35 7160 }
34e1c27b 7161 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7162 float_raise(float_flag_invalid, status);
1d6bda35
FB
7163 return res;
7164}
7165
3a87d009 7166uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status)
1d6bda35
FB
7167{
7168 int64_t v;
3a87d009 7169 uint32_t res;
34e1c27b 7170 int old_exc_flags = get_float_exception_flags(status);
1d6bda35 7171
ff32e16e 7172 v = float32_to_int64_round_to_zero(a, status);
1d6bda35
FB
7173 if (v < 0) {
7174 res = 0;
1d6bda35
FB
7175 } else if (v > 0xffffffff) {
7176 res = 0xffffffff;
1d6bda35 7177 } else {
34e1c27b 7178 return v;
1d6bda35 7179 }
34e1c27b 7180 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7181 float_raise(float_flag_invalid, status);
1d6bda35
FB
7182 return res;
7183}
7184
0bb721d7 7185int16_t float32_to_int16(float32 a, float_status *status)
f581bf54
WN
7186{
7187 int32_t v;
0bb721d7 7188 int16_t res;
f581bf54
WN
7189 int old_exc_flags = get_float_exception_flags(status);
7190
ff32e16e 7191 v = float32_to_int32(a, status);
f581bf54
WN
7192 if (v < -0x8000) {
7193 res = -0x8000;
7194 } else if (v > 0x7fff) {
7195 res = 0x7fff;
7196 } else {
7197 return v;
7198 }
7199
7200 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7201 float_raise(float_flag_invalid, status);
f581bf54
WN
7202 return res;
7203}
7204
0bb721d7 7205uint16_t float32_to_uint16(float32 a, float_status *status)
f581bf54
WN
7206{
7207 int32_t v;
0bb721d7 7208 uint16_t res;
f581bf54
WN
7209 int old_exc_flags = get_float_exception_flags(status);
7210
ff32e16e 7211 v = float32_to_int32(a, status);
f581bf54
WN
7212 if (v < 0) {
7213 res = 0;
7214 } else if (v > 0xffff) {
7215 res = 0xffff;
7216 } else {
7217 return v;
7218 }
7219
7220 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7221 float_raise(float_flag_invalid, status);
f581bf54
WN
7222 return res;
7223}
7224
0bb721d7 7225uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status)
cbcef455
PM
7226{
7227 int64_t v;
0bb721d7 7228 uint16_t res;
34e1c27b 7229 int old_exc_flags = get_float_exception_flags(status);
cbcef455 7230
ff32e16e 7231 v = float32_to_int64_round_to_zero(a, status);
cbcef455
PM
7232 if (v < 0) {
7233 res = 0;
cbcef455
PM
7234 } else if (v > 0xffff) {
7235 res = 0xffff;
cbcef455 7236 } else {
34e1c27b 7237 return v;
cbcef455 7238 }
34e1c27b 7239 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7240 float_raise(float_flag_invalid, status);
cbcef455
PM
7241 return res;
7242}
7243
3a87d009 7244uint32_t float64_to_uint32(float64 a, float_status *status)
1d6bda35 7245{
5e7f654f 7246 uint64_t v;
3a87d009 7247 uint32_t res;
5e7f654f 7248 int old_exc_flags = get_float_exception_flags(status);
1d6bda35 7249
ff32e16e 7250 v = float64_to_uint64(a, status);
5e7f654f 7251 if (v > 0xffffffff) {
1d6bda35 7252 res = 0xffffffff;
1d6bda35 7253 } else {
5e7f654f 7254 return v;
1d6bda35 7255 }
5e7f654f 7256 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7257 float_raise(float_flag_invalid, status);
1d6bda35
FB
7258 return res;
7259}
7260
3a87d009 7261uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status)
1d6bda35 7262{
fd728f2f 7263 uint64_t v;
3a87d009 7264 uint32_t res;
fd728f2f 7265 int old_exc_flags = get_float_exception_flags(status);
1d6bda35 7266
ff32e16e 7267 v = float64_to_uint64_round_to_zero(a, status);
fd728f2f 7268 if (v > 0xffffffff) {
1d6bda35 7269 res = 0xffffffff;
1d6bda35 7270 } else {
fd728f2f 7271 return v;
1d6bda35 7272 }
fd728f2f 7273 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7274 float_raise(float_flag_invalid, status);
1d6bda35
FB
7275 return res;
7276}
7277
0bb721d7 7278int16_t float64_to_int16(float64 a, float_status *status)
f581bf54
WN
7279{
7280 int64_t v;
0bb721d7 7281 int16_t res;
f581bf54
WN
7282 int old_exc_flags = get_float_exception_flags(status);
7283
ff32e16e 7284 v = float64_to_int32(a, status);
f581bf54
WN
7285 if (v < -0x8000) {
7286 res = -0x8000;
7287 } else if (v > 0x7fff) {
7288 res = 0x7fff;
7289 } else {
7290 return v;
7291 }
7292
7293 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7294 float_raise(float_flag_invalid, status);
f581bf54
WN
7295 return res;
7296}
7297
0bb721d7 7298uint16_t float64_to_uint16(float64 a, float_status *status)
f581bf54
WN
7299{
7300 int64_t v;
0bb721d7 7301 uint16_t res;
f581bf54
WN
7302 int old_exc_flags = get_float_exception_flags(status);
7303
ff32e16e 7304 v = float64_to_int32(a, status);
f581bf54
WN
7305 if (v < 0) {
7306 res = 0;
7307 } else if (v > 0xffff) {
7308 res = 0xffff;
7309 } else {
7310 return v;
7311 }
7312
7313 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7314 float_raise(float_flag_invalid, status);
f581bf54
WN
7315 return res;
7316}
7317
0bb721d7 7318uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status)
cbcef455
PM
7319{
7320 int64_t v;
0bb721d7 7321 uint16_t res;
34e1c27b 7322 int old_exc_flags = get_float_exception_flags(status);
cbcef455 7323
ff32e16e 7324 v = float64_to_int64_round_to_zero(a, status);
cbcef455
PM
7325 if (v < 0) {
7326 res = 0;
cbcef455
PM
7327 } else if (v > 0xffff) {
7328 res = 0xffff;
cbcef455 7329 } else {
34e1c27b 7330 return v;
cbcef455 7331 }
34e1c27b 7332 set_float_exception_flags(old_exc_flags, status);
ff32e16e 7333 float_raise(float_flag_invalid, status);
cbcef455
PM
7334 return res;
7335}
7336
fb3ea83a
TM
7337/*----------------------------------------------------------------------------
7338| Returns the result of converting the double-precision floating-point value
7339| `a' to the 64-bit unsigned integer format. The conversion is
7340| performed according to the IEC/IEEE Standard for Binary Floating-Point
7341| Arithmetic---which means in particular that the conversion is rounded
7342| according to the current rounding mode. If `a' is a NaN, the largest
7343| positive integer is returned. If the conversion overflows, the
7344| largest unsigned integer is returned. If 'a' is negative, the value is
7345| rounded and zero is returned; negative values that do not round to zero
7346| will raise the inexact exception.
7347*----------------------------------------------------------------------------*/
75d62a58 7348
e5a41ffa 7349uint64_t float64_to_uint64(float64 a, float_status *status)
fb3ea83a
TM
7350{
7351 flag aSign;
0c48262d 7352 int aExp;
07d792d2 7353 int shiftCount;
fb3ea83a 7354 uint64_t aSig, aSigExtra;
ff32e16e 7355 a = float64_squash_input_denormal(a, status);
75d62a58 7356
fb3ea83a
TM
7357 aSig = extractFloat64Frac(a);
7358 aExp = extractFloat64Exp(a);
7359 aSign = extractFloat64Sign(a);
7360 if (aSign && (aExp > 1022)) {
ff32e16e 7361 float_raise(float_flag_invalid, status);
fb3ea83a
TM
7362 if (float64_is_any_nan(a)) {
7363 return LIT64(0xFFFFFFFFFFFFFFFF);
7364 } else {
7365 return 0;
7366 }
7367 }
7368 if (aExp) {
7369 aSig |= LIT64(0x0010000000000000);
7370 }
7371 shiftCount = 0x433 - aExp;
7372 if (shiftCount <= 0) {
7373 if (0x43E < aExp) {
ff32e16e 7374 float_raise(float_flag_invalid, status);
fb3ea83a
TM
7375 return LIT64(0xFFFFFFFFFFFFFFFF);
7376 }
7377 aSigExtra = 0;
7378 aSig <<= -shiftCount;
7379 } else {
7380 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7381 }
ff32e16e 7382 return roundAndPackUint64(aSign, aSig, aSigExtra, status);
75d62a58
JM
7383}
7384
e5a41ffa 7385uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
75d62a58 7386{
a2f2d288 7387 signed char current_rounding_mode = status->float_rounding_mode;
ff32e16e
PM
7388 set_float_rounding_mode(float_round_to_zero, status);
7389 int64_t v = float64_to_uint64(a, status);
7390 set_float_rounding_mode(current_rounding_mode, status);
0a87a310 7391 return v;
75d62a58
JM
7392}
7393
1d6bda35 7394#define COMPARE(s, nan_exp) \
e5a41ffa
PM
7395static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
7396 int is_quiet, float_status *status) \
1d6bda35
FB
7397{ \
7398 flag aSign, bSign; \
bb98fe42 7399 uint ## s ## _t av, bv; \
ff32e16e
PM
7400 a = float ## s ## _squash_input_denormal(a, status); \
7401 b = float ## s ## _squash_input_denormal(b, status); \
1d6bda35
FB
7402 \
7403 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7404 extractFloat ## s ## Frac( a ) ) || \
7405 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7406 extractFloat ## s ## Frac( b ) )) { \
7407 if (!is_quiet || \
af39bc8c
AM
7408 float ## s ## _is_signaling_nan(a, status) || \
7409 float ## s ## _is_signaling_nan(b, status)) { \
ff32e16e 7410 float_raise(float_flag_invalid, status); \
1d6bda35
FB
7411 } \
7412 return float_relation_unordered; \
7413 } \
7414 aSign = extractFloat ## s ## Sign( a ); \
7415 bSign = extractFloat ## s ## Sign( b ); \
f090c9d4 7416 av = float ## s ## _val(a); \
cd8a2533 7417 bv = float ## s ## _val(b); \
1d6bda35 7418 if ( aSign != bSign ) { \
bb98fe42 7419 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
1d6bda35
FB
7420 /* zero case */ \
7421 return float_relation_equal; \
7422 } else { \
7423 return 1 - (2 * aSign); \
7424 } \
7425 } else { \
f090c9d4 7426 if (av == bv) { \
1d6bda35
FB
7427 return float_relation_equal; \
7428 } else { \
f090c9d4 7429 return 1 - 2 * (aSign ^ ( av < bv )); \
1d6bda35
FB
7430 } \
7431 } \
7432} \
7433 \
e5a41ffa 7434int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \
1d6bda35 7435{ \
ff32e16e 7436 return float ## s ## _compare_internal(a, b, 0, status); \
1d6bda35
FB
7437} \
7438 \
e5a41ffa
PM
7439int float ## s ## _compare_quiet(float ## s a, float ## s b, \
7440 float_status *status) \
1d6bda35 7441{ \
ff32e16e 7442 return float ## s ## _compare_internal(a, b, 1, status); \
1d6bda35
FB
7443}
7444
7445COMPARE(32, 0xff)
7446COMPARE(64, 0x7ff)
9ee6e8bb 7447
e5a41ffa
PM
7448static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7449 int is_quiet, float_status *status)
f6714d36
AJ
7450{
7451 flag aSign, bSign;
7452
d1eb8f2a
AD
7453 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7454 float_raise(float_flag_invalid, status);
7455 return float_relation_unordered;
7456 }
f6714d36
AJ
7457 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7458 ( extractFloatx80Frac( a )<<1 ) ) ||
7459 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7460 ( extractFloatx80Frac( b )<<1 ) )) {
7461 if (!is_quiet ||
af39bc8c
AM
7462 floatx80_is_signaling_nan(a, status) ||
7463 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7464 float_raise(float_flag_invalid, status);
f6714d36
AJ
7465 }
7466 return float_relation_unordered;
7467 }
7468 aSign = extractFloatx80Sign( a );
7469 bSign = extractFloatx80Sign( b );
7470 if ( aSign != bSign ) {
7471
7472 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7473 ( ( a.low | b.low ) == 0 ) ) {
7474 /* zero case */
7475 return float_relation_equal;
7476 } else {
7477 return 1 - (2 * aSign);
7478 }
7479 } else {
7480 if (a.low == b.low && a.high == b.high) {
7481 return float_relation_equal;
7482 } else {
7483 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7484 }
7485 }
7486}
7487
e5a41ffa 7488int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7489{
ff32e16e 7490 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7491}
7492
e5a41ffa 7493int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7494{
ff32e16e 7495 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7496}
7497
e5a41ffa
PM
7498static inline int float128_compare_internal(float128 a, float128 b,
7499 int is_quiet, float_status *status)
1f587329
BS
7500{
7501 flag aSign, bSign;
7502
7503 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7504 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7505 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7506 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7507 if (!is_quiet ||
af39bc8c
AM
7508 float128_is_signaling_nan(a, status) ||
7509 float128_is_signaling_nan(b, status)) {
ff32e16e 7510 float_raise(float_flag_invalid, status);
1f587329
BS
7511 }
7512 return float_relation_unordered;
7513 }
7514 aSign = extractFloat128Sign( a );
7515 bSign = extractFloat128Sign( b );
7516 if ( aSign != bSign ) {
7517 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7518 /* zero case */
7519 return float_relation_equal;
7520 } else {
7521 return 1 - (2 * aSign);
7522 }
7523 } else {
7524 if (a.low == b.low && a.high == b.high) {
7525 return float_relation_equal;
7526 } else {
7527 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7528 }
7529 }
7530}
7531
e5a41ffa 7532int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7533{
ff32e16e 7534 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7535}
7536
e5a41ffa 7537int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7538{
ff32e16e 7539 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7540}
7541
274f1b04
PM
7542/* min() and max() functions. These can't be implemented as
7543 * 'compare and pick one input' because that would mishandle
7544 * NaNs and +0 vs -0.
e17ab310
WN
7545 *
7546 * minnum() and maxnum() functions. These are similar to the min()
7547 * and max() functions but if one of the arguments is a QNaN and
7548 * the other is numerical then the numerical argument is returned.
7549 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
7550 * and maxNum() operations. min() and max() are the typical min/max
7551 * semantics provided by many CPUs which predate that specification.
2d31e060
LA
7552 *
7553 * minnummag() and maxnummag() functions correspond to minNumMag()
7554 * and minNumMag() from the IEEE-754 2008.
274f1b04 7555 */
e70614ea 7556#define MINMAX(s) \
a49db98d 7557static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
2d31e060 7558 int ismin, int isieee, \
e5a41ffa
PM
7559 int ismag, \
7560 float_status *status) \
274f1b04
PM
7561{ \
7562 flag aSign, bSign; \
2d31e060 7563 uint ## s ## _t av, bv, aav, abv; \
ff32e16e
PM
7564 a = float ## s ## _squash_input_denormal(a, status); \
7565 b = float ## s ## _squash_input_denormal(b, status); \
274f1b04
PM
7566 if (float ## s ## _is_any_nan(a) || \
7567 float ## s ## _is_any_nan(b)) { \
e17ab310 7568 if (isieee) { \
af39bc8c 7569 if (float ## s ## _is_quiet_nan(a, status) && \
e17ab310
WN
7570 !float ## s ##_is_any_nan(b)) { \
7571 return b; \
af39bc8c
AM
7572 } else if (float ## s ## _is_quiet_nan(b, status) && \
7573 !float ## s ## _is_any_nan(a)) { \
e17ab310
WN
7574 return a; \
7575 } \
7576 } \
ff32e16e 7577 return propagateFloat ## s ## NaN(a, b, status); \
274f1b04
PM
7578 } \
7579 aSign = extractFloat ## s ## Sign(a); \
7580 bSign = extractFloat ## s ## Sign(b); \
7581 av = float ## s ## _val(a); \
7582 bv = float ## s ## _val(b); \
2d31e060
LA
7583 if (ismag) { \
7584 aav = float ## s ## _abs(av); \
7585 abv = float ## s ## _abs(bv); \
7586 if (aav != abv) { \
7587 if (ismin) { \
7588 return (aav < abv) ? a : b; \
7589 } else { \
7590 return (aav < abv) ? b : a; \
7591 } \
7592 } \
7593 } \
274f1b04
PM
7594 if (aSign != bSign) { \
7595 if (ismin) { \
7596 return aSign ? a : b; \
7597 } else { \
7598 return aSign ? b : a; \
7599 } \
7600 } else { \
7601 if (ismin) { \
7602 return (aSign ^ (av < bv)) ? a : b; \
7603 } else { \
7604 return (aSign ^ (av < bv)) ? b : a; \
7605 } \
7606 } \
7607} \
7608 \
e5a41ffa
PM
7609float ## s float ## s ## _min(float ## s a, float ## s b, \
7610 float_status *status) \
274f1b04 7611{ \
ff32e16e 7612 return float ## s ## _minmax(a, b, 1, 0, 0, status); \
274f1b04
PM
7613} \
7614 \
e5a41ffa
PM
7615float ## s float ## s ## _max(float ## s a, float ## s b, \
7616 float_status *status) \
274f1b04 7617{ \
ff32e16e 7618 return float ## s ## _minmax(a, b, 0, 0, 0, status); \
e17ab310
WN
7619} \
7620 \
e5a41ffa
PM
7621float ## s float ## s ## _minnum(float ## s a, float ## s b, \
7622 float_status *status) \
e17ab310 7623{ \
ff32e16e 7624 return float ## s ## _minmax(a, b, 1, 1, 0, status); \
e17ab310
WN
7625} \
7626 \
e5a41ffa
PM
7627float ## s float ## s ## _maxnum(float ## s a, float ## s b, \
7628 float_status *status) \
e17ab310 7629{ \
ff32e16e 7630 return float ## s ## _minmax(a, b, 0, 1, 0, status); \
2d31e060
LA
7631} \
7632 \
e5a41ffa
PM
7633float ## s float ## s ## _minnummag(float ## s a, float ## s b, \
7634 float_status *status) \
2d31e060 7635{ \
ff32e16e 7636 return float ## s ## _minmax(a, b, 1, 1, 1, status); \
2d31e060
LA
7637} \
7638 \
e5a41ffa
PM
7639float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \
7640 float_status *status) \
2d31e060 7641{ \
ff32e16e 7642 return float ## s ## _minmax(a, b, 0, 1, 1, status); \
274f1b04
PM
7643}
7644
e70614ea
WN
7645MINMAX(32)
7646MINMAX(64)
274f1b04
PM
7647
7648
9ee6e8bb 7649/* Multiply A by 2 raised to the power N. */
e5a41ffa 7650float32 float32_scalbn(float32 a, int n, float_status *status)
9ee6e8bb
PB
7651{
7652 flag aSign;
326b9e98 7653 int16_t aExp;
bb98fe42 7654 uint32_t aSig;
9ee6e8bb 7655
ff32e16e 7656 a = float32_squash_input_denormal(a, status);
9ee6e8bb
PB
7657 aSig = extractFloat32Frac( a );
7658 aExp = extractFloat32Exp( a );
7659 aSign = extractFloat32Sign( a );
7660
7661 if ( aExp == 0xFF ) {
326b9e98 7662 if ( aSig ) {
ff32e16e 7663 return propagateFloat32NaN(a, a, status);
326b9e98 7664 }
9ee6e8bb
PB
7665 return a;
7666 }
3c85c37f 7667 if (aExp != 0) {
69397542 7668 aSig |= 0x00800000;
3c85c37f 7669 } else if (aSig == 0) {
69397542 7670 return a;
3c85c37f
PM
7671 } else {
7672 aExp++;
7673 }
69397542 7674
326b9e98
AJ
7675 if (n > 0x200) {
7676 n = 0x200;
7677 } else if (n < -0x200) {
7678 n = -0x200;
7679 }
7680
69397542
PB
7681 aExp += n - 1;
7682 aSig <<= 7;
ff32e16e 7683 return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
9ee6e8bb
PB
7684}
7685
e5a41ffa 7686float64 float64_scalbn(float64 a, int n, float_status *status)
9ee6e8bb
PB
7687{
7688 flag aSign;
326b9e98 7689 int16_t aExp;
bb98fe42 7690 uint64_t aSig;
9ee6e8bb 7691
ff32e16e 7692 a = float64_squash_input_denormal(a, status);
9ee6e8bb
PB
7693 aSig = extractFloat64Frac( a );
7694 aExp = extractFloat64Exp( a );
7695 aSign = extractFloat64Sign( a );
7696
7697 if ( aExp == 0x7FF ) {
326b9e98 7698 if ( aSig ) {
ff32e16e 7699 return propagateFloat64NaN(a, a, status);
326b9e98 7700 }
9ee6e8bb
PB
7701 return a;
7702 }
3c85c37f 7703 if (aExp != 0) {
69397542 7704 aSig |= LIT64( 0x0010000000000000 );
3c85c37f 7705 } else if (aSig == 0) {
69397542 7706 return a;
3c85c37f
PM
7707 } else {
7708 aExp++;
7709 }
69397542 7710
326b9e98
AJ
7711 if (n > 0x1000) {
7712 n = 0x1000;
7713 } else if (n < -0x1000) {
7714 n = -0x1000;
7715 }
7716
69397542
PB
7717 aExp += n - 1;
7718 aSig <<= 10;
ff32e16e 7719 return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
9ee6e8bb
PB
7720}
7721
e5a41ffa 7722floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7723{
7724 flag aSign;
326b9e98 7725 int32_t aExp;
bb98fe42 7726 uint64_t aSig;
9ee6e8bb 7727
d1eb8f2a
AD
7728 if (floatx80_invalid_encoding(a)) {
7729 float_raise(float_flag_invalid, status);
7730 return floatx80_default_nan(status);
7731 }
9ee6e8bb
PB
7732 aSig = extractFloatx80Frac( a );
7733 aExp = extractFloatx80Exp( a );
7734 aSign = extractFloatx80Sign( a );
7735
326b9e98
AJ
7736 if ( aExp == 0x7FFF ) {
7737 if ( aSig<<1 ) {
ff32e16e 7738 return propagateFloatx80NaN(a, a, status);
326b9e98 7739 }
9ee6e8bb
PB
7740 return a;
7741 }
326b9e98 7742
3c85c37f
PM
7743 if (aExp == 0) {
7744 if (aSig == 0) {
7745 return a;
7746 }
7747 aExp++;
7748 }
69397542 7749
326b9e98
AJ
7750 if (n > 0x10000) {
7751 n = 0x10000;
7752 } else if (n < -0x10000) {
7753 n = -0x10000;
7754 }
7755
9ee6e8bb 7756 aExp += n;
a2f2d288
PM
7757 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7758 aSign, aExp, aSig, 0, status);
9ee6e8bb 7759}
9ee6e8bb 7760
e5a41ffa 7761float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7762{
7763 flag aSign;
326b9e98 7764 int32_t aExp;
bb98fe42 7765 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7766
7767 aSig1 = extractFloat128Frac1( a );
7768 aSig0 = extractFloat128Frac0( a );
7769 aExp = extractFloat128Exp( a );
7770 aSign = extractFloat128Sign( a );
7771 if ( aExp == 0x7FFF ) {
326b9e98 7772 if ( aSig0 | aSig1 ) {
ff32e16e 7773 return propagateFloat128NaN(a, a, status);
326b9e98 7774 }
9ee6e8bb
PB
7775 return a;
7776 }
3c85c37f 7777 if (aExp != 0) {
69397542 7778 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7779 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7780 return a;
3c85c37f
PM
7781 } else {
7782 aExp++;
7783 }
69397542 7784
326b9e98
AJ
7785 if (n > 0x10000) {
7786 n = 0x10000;
7787 } else if (n < -0x10000) {
7788 n = -0x10000;
7789 }
7790
69397542
PB
7791 aExp += n - 1;
7792 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7793 , status);
9ee6e8bb
PB
7794
7795}