]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
target/arm: squash FZ16 behaviour for conversions
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
116/*----------------------------------------------------------------------------
117| Returns the sign bit of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
a49db98d 120static inline flag extractFloat16Sign(float16 a)
bb4d4bb3
PM
121{
122 return float16_val(a)>>15;
123}
124
d97544c9
AB
125/*----------------------------------------------------------------------------
126| Returns the fraction bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline uint32_t extractFloat32Frac(float32 a)
130{
131 return float32_val(a) & 0x007FFFFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the exponent bits of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline int extractFloat32Exp(float32 a)
139{
140 return (float32_val(a) >> 23) & 0xFF;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the sign bit of the single-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline flag extractFloat32Sign(float32 a)
148{
149 return float32_val(a) >> 31;
150}
151
152/*----------------------------------------------------------------------------
153| Returns the fraction bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline uint64_t extractFloat64Frac(float64 a)
157{
158 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
159}
160
161/*----------------------------------------------------------------------------
162| Returns the exponent bits of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline int extractFloat64Exp(float64 a)
166{
167 return (float64_val(a) >> 52) & 0x7FF;
168}
169
170/*----------------------------------------------------------------------------
171| Returns the sign bit of the double-precision floating-point value `a'.
172*----------------------------------------------------------------------------*/
173
174static inline flag extractFloat64Sign(float64 a)
175{
176 return float64_val(a) >> 63;
177}
178
a90119b5
AB
179/*
180 * Classify a floating point number. Everything above float_class_qnan
181 * is a NaN so cls >= float_class_qnan is any NaN.
182 */
183
184typedef enum __attribute__ ((__packed__)) {
185 float_class_unclassified,
186 float_class_zero,
187 float_class_normal,
188 float_class_inf,
189 float_class_qnan, /* all NaNs from here */
190 float_class_snan,
a90119b5
AB
191} FloatClass;
192
193/*
194 * Structure holding all of the decomposed parts of a float. The
195 * exponent is unbiased and the fraction is normalized. All
196 * calculations are done with a 64 bit fraction and then rounded as
197 * appropriate for the final format.
198 *
199 * Thanks to the packed FloatClass a decent compiler should be able to
200 * fit the whole structure into registers and avoid using the stack
201 * for parameter passing.
202 */
203
204typedef struct {
205 uint64_t frac;
206 int32_t exp;
207 FloatClass cls;
208 bool sign;
209} FloatParts;
210
211#define DECOMPOSED_BINARY_POINT (64 - 2)
212#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
213#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
214
215/* Structure holding all of the relevant parameters for a format.
216 * exp_size: the size of the exponent field
217 * exp_bias: the offset applied to the exponent field
218 * exp_max: the maximum normalised exponent
219 * frac_size: the size of the fraction field
220 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
221 * The following are computed based the size of fraction
222 * frac_lsb: least significant bit of fraction
223 * fram_lsbm1: the bit bellow the least significant bit (for rounding)
224 * round_mask/roundeven_mask: masks used for rounding
225 */
226typedef struct {
227 int exp_size;
228 int exp_bias;
229 int exp_max;
230 int frac_size;
231 int frac_shift;
232 uint64_t frac_lsb;
233 uint64_t frac_lsbm1;
234 uint64_t round_mask;
235 uint64_t roundeven_mask;
236} FloatFmt;
237
238/* Expand fields based on the size of exponent and fraction */
239#define FLOAT_PARAMS(E, F) \
240 .exp_size = E, \
241 .exp_bias = ((1 << E) - 1) >> 1, \
242 .exp_max = (1 << E) - 1, \
243 .frac_size = F, \
244 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
245 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
246 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
247 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
248 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
249
250static const FloatFmt float16_params = {
251 FLOAT_PARAMS(5, 10)
252};
253
254static const FloatFmt float32_params = {
255 FLOAT_PARAMS(8, 23)
256};
257
258static const FloatFmt float64_params = {
259 FLOAT_PARAMS(11, 52)
260};
261
6fff2167
AB
262/* Unpack a float to parts, but do not canonicalize. */
263static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
264{
265 const int sign_pos = fmt.frac_size + fmt.exp_size;
266
267 return (FloatParts) {
268 .cls = float_class_unclassified,
269 .sign = extract64(raw, sign_pos, 1),
270 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
271 .frac = extract64(raw, 0, fmt.frac_size),
272 };
273}
274
275static inline FloatParts float16_unpack_raw(float16 f)
276{
277 return unpack_raw(float16_params, f);
278}
279
280static inline FloatParts float32_unpack_raw(float32 f)
281{
282 return unpack_raw(float32_params, f);
283}
284
285static inline FloatParts float64_unpack_raw(float64 f)
286{
287 return unpack_raw(float64_params, f);
288}
289
290/* Pack a float from parts, but do not canonicalize. */
291static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
292{
293 const int sign_pos = fmt.frac_size + fmt.exp_size;
294 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
295 return deposit64(ret, sign_pos, 1, p.sign);
296}
297
298static inline float16 float16_pack_raw(FloatParts p)
299{
300 return make_float16(pack_raw(float16_params, p));
301}
302
303static inline float32 float32_pack_raw(FloatParts p)
304{
305 return make_float32(pack_raw(float32_params, p));
306}
307
308static inline float64 float64_pack_raw(FloatParts p)
309{
310 return make_float64(pack_raw(float64_params, p));
311}
312
0664335a
RH
313/*----------------------------------------------------------------------------
314| Functions and definitions to determine: (1) whether tininess for underflow
315| is detected before or after rounding by default, (2) what (if anything)
316| happens when exceptions are raised, (3) how signaling NaNs are distinguished
317| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
318| are propagated from function inputs to output. These details are target-
319| specific.
320*----------------------------------------------------------------------------*/
321#include "softfloat-specialize.h"
322
6fff2167
AB
323/* Canonicalize EXP and FRAC, setting CLS. */
324static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
325 float_status *status)
326{
327 if (part.exp == parm->exp_max) {
328 if (part.frac == 0) {
329 part.cls = float_class_inf;
330 } else {
94933df0 331 part.frac <<= parm->frac_shift;
298b468e
RH
332 part.cls = (parts_is_snan_frac(part.frac, status)
333 ? float_class_snan : float_class_qnan);
6fff2167
AB
334 }
335 } else if (part.exp == 0) {
336 if (likely(part.frac == 0)) {
337 part.cls = float_class_zero;
338 } else if (status->flush_inputs_to_zero) {
339 float_raise(float_flag_input_denormal, status);
340 part.cls = float_class_zero;
341 part.frac = 0;
342 } else {
343 int shift = clz64(part.frac) - 1;
344 part.cls = float_class_normal;
345 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
346 part.frac <<= shift;
347 }
348 } else {
349 part.cls = float_class_normal;
350 part.exp -= parm->exp_bias;
351 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
352 }
353 return part;
354}
355
356/* Round and uncanonicalize a floating-point number by parts. There
357 * are FRAC_SHIFT bits that may require rounding at the bottom of the
358 * fraction; these bits will be removed. The exponent will be biased
359 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
360 */
361
362static FloatParts round_canonical(FloatParts p, float_status *s,
363 const FloatFmt *parm)
364{
365 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
366 const uint64_t round_mask = parm->round_mask;
367 const uint64_t roundeven_mask = parm->roundeven_mask;
368 const int exp_max = parm->exp_max;
369 const int frac_shift = parm->frac_shift;
370 uint64_t frac, inc;
371 int exp, flags = 0;
372 bool overflow_norm;
373
374 frac = p.frac;
375 exp = p.exp;
376
377 switch (p.cls) {
378 case float_class_normal:
379 switch (s->float_rounding_mode) {
380 case float_round_nearest_even:
381 overflow_norm = false;
382 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
383 break;
384 case float_round_ties_away:
385 overflow_norm = false;
386 inc = frac_lsbm1;
387 break;
388 case float_round_to_zero:
389 overflow_norm = true;
390 inc = 0;
391 break;
392 case float_round_up:
393 inc = p.sign ? 0 : round_mask;
394 overflow_norm = p.sign;
395 break;
396 case float_round_down:
397 inc = p.sign ? round_mask : 0;
398 overflow_norm = !p.sign;
399 break;
400 default:
401 g_assert_not_reached();
402 }
403
404 exp += parm->exp_bias;
405 if (likely(exp > 0)) {
406 if (frac & round_mask) {
407 flags |= float_flag_inexact;
408 frac += inc;
409 if (frac & DECOMPOSED_OVERFLOW_BIT) {
410 frac >>= 1;
411 exp++;
412 }
413 }
414 frac >>= frac_shift;
415
416 if (unlikely(exp >= exp_max)) {
417 flags |= float_flag_overflow | float_flag_inexact;
418 if (overflow_norm) {
419 exp = exp_max - 1;
420 frac = -1;
421 } else {
422 p.cls = float_class_inf;
423 goto do_inf;
424 }
425 }
426 } else if (s->flush_to_zero) {
427 flags |= float_flag_output_denormal;
428 p.cls = float_class_zero;
429 goto do_zero;
430 } else {
431 bool is_tiny = (s->float_detect_tininess
432 == float_tininess_before_rounding)
433 || (exp < 0)
434 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
435
436 shift64RightJamming(frac, 1 - exp, &frac);
437 if (frac & round_mask) {
438 /* Need to recompute round-to-even. */
439 if (s->float_rounding_mode == float_round_nearest_even) {
440 inc = ((frac & roundeven_mask) != frac_lsbm1
441 ? frac_lsbm1 : 0);
442 }
443 flags |= float_flag_inexact;
444 frac += inc;
445 }
446
447 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
448 frac >>= frac_shift;
449
450 if (is_tiny && (flags & float_flag_inexact)) {
451 flags |= float_flag_underflow;
452 }
453 if (exp == 0 && frac == 0) {
454 p.cls = float_class_zero;
455 }
456 }
457 break;
458
459 case float_class_zero:
460 do_zero:
461 exp = 0;
462 frac = 0;
463 break;
464
465 case float_class_inf:
466 do_inf:
467 exp = exp_max;
468 frac = 0;
469 break;
470
471 case float_class_qnan:
472 case float_class_snan:
473 exp = exp_max;
94933df0 474 frac >>= parm->frac_shift;
6fff2167
AB
475 break;
476
477 default:
478 g_assert_not_reached();
479 }
480
481 float_raise(flags, s);
482 p.exp = exp;
483 p.frac = frac;
484 return p;
485}
486
487static FloatParts float16_unpack_canonical(float16 f, float_status *s)
488{
489 return canonicalize(float16_unpack_raw(f), &float16_params, s);
490}
491
492static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
493{
0bcfbcbe 494 return float16_pack_raw(round_canonical(p, s, &float16_params));
6fff2167
AB
495}
496
497static FloatParts float32_unpack_canonical(float32 f, float_status *s)
498{
499 return canonicalize(float32_unpack_raw(f), &float32_params, s);
500}
501
502static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
503{
0bcfbcbe 504 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
505}
506
507static FloatParts float64_unpack_canonical(float64 f, float_status *s)
508{
509 return canonicalize(float64_unpack_raw(f), &float64_params, s);
510}
511
512static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
513{
0bcfbcbe 514 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
515}
516
517/* Simple helpers for checking if what NaN we have */
518static bool is_nan(FloatClass c)
519{
520 return unlikely(c >= float_class_qnan);
521}
522static bool is_snan(FloatClass c)
523{
524 return c == float_class_snan;
525}
526static bool is_qnan(FloatClass c)
527{
528 return c == float_class_qnan;
529}
530
dbe4d53a
AB
531static FloatParts return_nan(FloatParts a, float_status *s)
532{
533 switch (a.cls) {
534 case float_class_snan:
535 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 536 a = parts_silence_nan(a, s);
dbe4d53a
AB
537 /* fall through */
538 case float_class_qnan:
539 if (s->default_nan_mode) {
f7e598e2 540 return parts_default_nan(s);
dbe4d53a
AB
541 }
542 break;
543
544 default:
545 g_assert_not_reached();
546 }
547 return a;
548}
549
6fff2167
AB
550static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
551{
552 if (is_snan(a.cls) || is_snan(b.cls)) {
553 s->float_exception_flags |= float_flag_invalid;
554 }
555
556 if (s->default_nan_mode) {
f7e598e2 557 return parts_default_nan(s);
6fff2167
AB
558 } else {
559 if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
560 is_qnan(b.cls), is_snan(b.cls),
561 a.frac > b.frac ||
562 (a.frac == b.frac && a.sign < b.sign))) {
563 a = b;
564 }
0bcfbcbe
RH
565 if (is_snan(a.cls)) {
566 return parts_silence_nan(a, s);
567 }
6fff2167
AB
568 }
569 return a;
570}
571
d446830a
AB
572static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
573 bool inf_zero, float_status *s)
574{
1839189b
PM
575 int which;
576
d446830a
AB
577 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
578 s->float_exception_flags |= float_flag_invalid;
579 }
580
1839189b
PM
581 which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
582 is_qnan(b.cls), is_snan(b.cls),
583 is_qnan(c.cls), is_snan(c.cls),
584 inf_zero, s);
585
d446830a 586 if (s->default_nan_mode) {
1839189b
PM
587 /* Note that this check is after pickNaNMulAdd so that function
588 * has an opportunity to set the Invalid flag.
589 */
f7e598e2 590 which = 3;
1839189b 591 }
d446830a 592
1839189b
PM
593 switch (which) {
594 case 0:
595 break;
596 case 1:
597 a = b;
598 break;
599 case 2:
600 a = c;
601 break;
602 case 3:
f7e598e2 603 return parts_default_nan(s);
1839189b
PM
604 default:
605 g_assert_not_reached();
d446830a 606 }
1839189b 607
0bcfbcbe
RH
608 if (is_snan(a.cls)) {
609 return parts_silence_nan(a, s);
610 }
d446830a
AB
611 return a;
612}
613
6fff2167
AB
614/*
615 * Returns the result of adding or subtracting the values of the
616 * floating-point values `a' and `b'. The operation is performed
617 * according to the IEC/IEEE Standard for Binary Floating-Point
618 * Arithmetic.
619 */
620
621static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
622 float_status *s)
623{
624 bool a_sign = a.sign;
625 bool b_sign = b.sign ^ subtract;
626
627 if (a_sign != b_sign) {
628 /* Subtraction */
629
630 if (a.cls == float_class_normal && b.cls == float_class_normal) {
631 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
632 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
633 a.frac = a.frac - b.frac;
634 } else {
635 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
636 a.frac = b.frac - a.frac;
637 a.exp = b.exp;
638 a_sign ^= 1;
639 }
640
641 if (a.frac == 0) {
642 a.cls = float_class_zero;
643 a.sign = s->float_rounding_mode == float_round_down;
644 } else {
645 int shift = clz64(a.frac) - 1;
646 a.frac = a.frac << shift;
647 a.exp = a.exp - shift;
648 a.sign = a_sign;
649 }
650 return a;
651 }
652 if (is_nan(a.cls) || is_nan(b.cls)) {
653 return pick_nan(a, b, s);
654 }
655 if (a.cls == float_class_inf) {
656 if (b.cls == float_class_inf) {
657 float_raise(float_flag_invalid, s);
f7e598e2 658 return parts_default_nan(s);
6fff2167
AB
659 }
660 return a;
661 }
662 if (a.cls == float_class_zero && b.cls == float_class_zero) {
663 a.sign = s->float_rounding_mode == float_round_down;
664 return a;
665 }
666 if (a.cls == float_class_zero || b.cls == float_class_inf) {
667 b.sign = a_sign ^ 1;
668 return b;
669 }
670 if (b.cls == float_class_zero) {
671 return a;
672 }
673 } else {
674 /* Addition */
675 if (a.cls == float_class_normal && b.cls == float_class_normal) {
676 if (a.exp > b.exp) {
677 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
678 } else if (a.exp < b.exp) {
679 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
680 a.exp = b.exp;
681 }
682 a.frac += b.frac;
683 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
684 a.frac >>= 1;
685 a.exp += 1;
686 }
687 return a;
688 }
689 if (is_nan(a.cls) || is_nan(b.cls)) {
690 return pick_nan(a, b, s);
691 }
692 if (a.cls == float_class_inf || b.cls == float_class_zero) {
693 return a;
694 }
695 if (b.cls == float_class_inf || a.cls == float_class_zero) {
696 b.sign = b_sign;
697 return b;
698 }
699 }
700 g_assert_not_reached();
701}
702
703/*
704 * Returns the result of adding or subtracting the floating-point
705 * values `a' and `b'. The operation is performed according to the
706 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
707 */
708
709float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
710 float_status *status)
711{
712 FloatParts pa = float16_unpack_canonical(a, status);
713 FloatParts pb = float16_unpack_canonical(b, status);
714 FloatParts pr = addsub_floats(pa, pb, false, status);
715
716 return float16_round_pack_canonical(pr, status);
717}
718
719float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
720 float_status *status)
721{
722 FloatParts pa = float32_unpack_canonical(a, status);
723 FloatParts pb = float32_unpack_canonical(b, status);
724 FloatParts pr = addsub_floats(pa, pb, false, status);
725
726 return float32_round_pack_canonical(pr, status);
727}
728
729float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
730 float_status *status)
731{
732 FloatParts pa = float64_unpack_canonical(a, status);
733 FloatParts pb = float64_unpack_canonical(b, status);
734 FloatParts pr = addsub_floats(pa, pb, false, status);
735
736 return float64_round_pack_canonical(pr, status);
737}
738
739float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
740 float_status *status)
741{
742 FloatParts pa = float16_unpack_canonical(a, status);
743 FloatParts pb = float16_unpack_canonical(b, status);
744 FloatParts pr = addsub_floats(pa, pb, true, status);
745
746 return float16_round_pack_canonical(pr, status);
747}
748
749float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
750 float_status *status)
751{
752 FloatParts pa = float32_unpack_canonical(a, status);
753 FloatParts pb = float32_unpack_canonical(b, status);
754 FloatParts pr = addsub_floats(pa, pb, true, status);
755
756 return float32_round_pack_canonical(pr, status);
757}
758
759float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
760 float_status *status)
761{
762 FloatParts pa = float64_unpack_canonical(a, status);
763 FloatParts pb = float64_unpack_canonical(b, status);
764 FloatParts pr = addsub_floats(pa, pb, true, status);
765
766 return float64_round_pack_canonical(pr, status);
767}
768
74d707e2
AB
769/*
770 * Returns the result of multiplying the floating-point values `a' and
771 * `b'. The operation is performed according to the IEC/IEEE Standard
772 * for Binary Floating-Point Arithmetic.
773 */
774
775static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
776{
777 bool sign = a.sign ^ b.sign;
778
779 if (a.cls == float_class_normal && b.cls == float_class_normal) {
780 uint64_t hi, lo;
781 int exp = a.exp + b.exp;
782
783 mul64To128(a.frac, b.frac, &hi, &lo);
784 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
785 if (lo & DECOMPOSED_OVERFLOW_BIT) {
786 shift64RightJamming(lo, 1, &lo);
787 exp += 1;
788 }
789
790 /* Re-use a */
791 a.exp = exp;
792 a.sign = sign;
793 a.frac = lo;
794 return a;
795 }
796 /* handle all the NaN cases */
797 if (is_nan(a.cls) || is_nan(b.cls)) {
798 return pick_nan(a, b, s);
799 }
800 /* Inf * Zero == NaN */
801 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
802 (a.cls == float_class_zero && b.cls == float_class_inf)) {
803 s->float_exception_flags |= float_flag_invalid;
f7e598e2 804 return parts_default_nan(s);
74d707e2
AB
805 }
806 /* Multiply by 0 or Inf */
807 if (a.cls == float_class_inf || a.cls == float_class_zero) {
808 a.sign = sign;
809 return a;
810 }
811 if (b.cls == float_class_inf || b.cls == float_class_zero) {
812 b.sign = sign;
813 return b;
814 }
815 g_assert_not_reached();
816}
817
818float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
819 float_status *status)
820{
821 FloatParts pa = float16_unpack_canonical(a, status);
822 FloatParts pb = float16_unpack_canonical(b, status);
823 FloatParts pr = mul_floats(pa, pb, status);
824
825 return float16_round_pack_canonical(pr, status);
826}
827
828float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
829 float_status *status)
830{
831 FloatParts pa = float32_unpack_canonical(a, status);
832 FloatParts pb = float32_unpack_canonical(b, status);
833 FloatParts pr = mul_floats(pa, pb, status);
834
835 return float32_round_pack_canonical(pr, status);
836}
837
838float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
839 float_status *status)
840{
841 FloatParts pa = float64_unpack_canonical(a, status);
842 FloatParts pb = float64_unpack_canonical(b, status);
843 FloatParts pr = mul_floats(pa, pb, status);
844
845 return float64_round_pack_canonical(pr, status);
846}
847
d446830a
AB
848/*
849 * Returns the result of multiplying the floating-point values `a' and
850 * `b' then adding 'c', with no intermediate rounding step after the
851 * multiplication. The operation is performed according to the
852 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
853 * The flags argument allows the caller to select negation of the
854 * addend, the intermediate product, or the final result. (The
855 * difference between this and having the caller do a separate
856 * negation is that negating externally will flip the sign bit on
857 * NaNs.)
858 */
859
860static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
861 int flags, float_status *s)
862{
863 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
864 ((1 << float_class_inf) | (1 << float_class_zero));
865 bool p_sign;
866 bool sign_flip = flags & float_muladd_negate_result;
867 FloatClass p_class;
868 uint64_t hi, lo;
869 int p_exp;
870
871 /* It is implementation-defined whether the cases of (0,inf,qnan)
872 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
873 * they return if they do), so we have to hand this information
874 * off to the target-specific pick-a-NaN routine.
875 */
876 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
877 return pick_nan_muladd(a, b, c, inf_zero, s);
878 }
879
880 if (inf_zero) {
881 s->float_exception_flags |= float_flag_invalid;
f7e598e2 882 return parts_default_nan(s);
d446830a
AB
883 }
884
885 if (flags & float_muladd_negate_c) {
886 c.sign ^= 1;
887 }
888
889 p_sign = a.sign ^ b.sign;
890
891 if (flags & float_muladd_negate_product) {
892 p_sign ^= 1;
893 }
894
895 if (a.cls == float_class_inf || b.cls == float_class_inf) {
896 p_class = float_class_inf;
897 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
898 p_class = float_class_zero;
899 } else {
900 p_class = float_class_normal;
901 }
902
903 if (c.cls == float_class_inf) {
904 if (p_class == float_class_inf && p_sign != c.sign) {
905 s->float_exception_flags |= float_flag_invalid;
f7e598e2 906 return parts_default_nan(s);
d446830a
AB
907 } else {
908 a.cls = float_class_inf;
909 a.sign = c.sign ^ sign_flip;
f7e598e2 910 return a;
d446830a 911 }
d446830a
AB
912 }
913
914 if (p_class == float_class_inf) {
915 a.cls = float_class_inf;
916 a.sign = p_sign ^ sign_flip;
917 return a;
918 }
919
920 if (p_class == float_class_zero) {
921 if (c.cls == float_class_zero) {
922 if (p_sign != c.sign) {
923 p_sign = s->float_rounding_mode == float_round_down;
924 }
925 c.sign = p_sign;
926 } else if (flags & float_muladd_halve_result) {
927 c.exp -= 1;
928 }
929 c.sign ^= sign_flip;
930 return c;
931 }
932
933 /* a & b should be normals now... */
934 assert(a.cls == float_class_normal &&
935 b.cls == float_class_normal);
936
937 p_exp = a.exp + b.exp;
938
939 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
940 * result.
941 */
942 mul64To128(a.frac, b.frac, &hi, &lo);
943 /* binary point now at bit 124 */
944
945 /* check for overflow */
946 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
947 shift128RightJamming(hi, lo, 1, &hi, &lo);
948 p_exp += 1;
949 }
950
951 /* + add/sub */
952 if (c.cls == float_class_zero) {
953 /* move binary point back to 62 */
954 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
955 } else {
956 int exp_diff = p_exp - c.exp;
957 if (p_sign == c.sign) {
958 /* Addition */
959 if (exp_diff <= 0) {
960 shift128RightJamming(hi, lo,
961 DECOMPOSED_BINARY_POINT - exp_diff,
962 &hi, &lo);
963 lo += c.frac;
964 p_exp = c.exp;
965 } else {
966 uint64_t c_hi, c_lo;
967 /* shift c to the same binary point as the product (124) */
968 c_hi = c.frac >> 2;
969 c_lo = 0;
970 shift128RightJamming(c_hi, c_lo,
971 exp_diff,
972 &c_hi, &c_lo);
973 add128(hi, lo, c_hi, c_lo, &hi, &lo);
974 /* move binary point back to 62 */
975 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
976 }
977
978 if (lo & DECOMPOSED_OVERFLOW_BIT) {
979 shift64RightJamming(lo, 1, &lo);
980 p_exp += 1;
981 }
982
983 } else {
984 /* Subtraction */
985 uint64_t c_hi, c_lo;
986 /* make C binary point match product at bit 124 */
987 c_hi = c.frac >> 2;
988 c_lo = 0;
989
990 if (exp_diff <= 0) {
991 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
992 if (exp_diff == 0
993 &&
994 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
995 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
996 } else {
997 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
998 p_sign ^= 1;
999 p_exp = c.exp;
1000 }
1001 } else {
1002 shift128RightJamming(c_hi, c_lo,
1003 exp_diff,
1004 &c_hi, &c_lo);
1005 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1006 }
1007
1008 if (hi == 0 && lo == 0) {
1009 a.cls = float_class_zero;
1010 a.sign = s->float_rounding_mode == float_round_down;
1011 a.sign ^= sign_flip;
1012 return a;
1013 } else {
1014 int shift;
1015 if (hi != 0) {
1016 shift = clz64(hi);
1017 } else {
1018 shift = clz64(lo) + 64;
1019 }
1020 /* Normalizing to a binary point of 124 is the
1021 correct adjust for the exponent. However since we're
1022 shifting, we might as well put the binary point back
1023 at 62 where we really want it. Therefore shift as
1024 if we're leaving 1 bit at the top of the word, but
1025 adjust the exponent as if we're leaving 3 bits. */
1026 shift -= 1;
1027 if (shift >= 64) {
1028 lo = lo << (shift - 64);
1029 } else {
1030 hi = (hi << shift) | (lo >> (64 - shift));
1031 lo = hi | ((lo << shift) != 0);
1032 }
1033 p_exp -= shift - 2;
1034 }
1035 }
1036 }
1037
1038 if (flags & float_muladd_halve_result) {
1039 p_exp -= 1;
1040 }
1041
1042 /* finally prepare our result */
1043 a.cls = float_class_normal;
1044 a.sign = p_sign ^ sign_flip;
1045 a.exp = p_exp;
1046 a.frac = lo;
1047
1048 return a;
1049}
1050
1051float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1052 int flags, float_status *status)
1053{
1054 FloatParts pa = float16_unpack_canonical(a, status);
1055 FloatParts pb = float16_unpack_canonical(b, status);
1056 FloatParts pc = float16_unpack_canonical(c, status);
1057 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1058
1059 return float16_round_pack_canonical(pr, status);
1060}
1061
1062float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1063 int flags, float_status *status)
1064{
1065 FloatParts pa = float32_unpack_canonical(a, status);
1066 FloatParts pb = float32_unpack_canonical(b, status);
1067 FloatParts pc = float32_unpack_canonical(c, status);
1068 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1069
1070 return float32_round_pack_canonical(pr, status);
1071}
1072
1073float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1074 int flags, float_status *status)
1075{
1076 FloatParts pa = float64_unpack_canonical(a, status);
1077 FloatParts pb = float64_unpack_canonical(b, status);
1078 FloatParts pc = float64_unpack_canonical(c, status);
1079 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1080
1081 return float64_round_pack_canonical(pr, status);
1082}
1083
cf07323d
AB
1084/*
1085 * Returns the result of dividing the floating-point value `a' by the
1086 * corresponding value `b'. The operation is performed according to
1087 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1088 */
1089
1090static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1091{
1092 bool sign = a.sign ^ b.sign;
1093
1094 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1095 uint64_t temp_lo, temp_hi;
1096 int exp = a.exp - b.exp;
1097 if (a.frac < b.frac) {
1098 exp -= 1;
1099 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1100 &temp_hi, &temp_lo);
1101 } else {
1102 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1103 &temp_hi, &temp_lo);
1104 }
1105 /* LSB of quot is set if inexact which roundandpack will use
1106 * to set flags. Yet again we re-use a for the result */
1107 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1108 a.sign = sign;
1109 a.exp = exp;
1110 return a;
1111 }
1112 /* handle all the NaN cases */
1113 if (is_nan(a.cls) || is_nan(b.cls)) {
1114 return pick_nan(a, b, s);
1115 }
1116 /* 0/0 or Inf/Inf */
1117 if (a.cls == b.cls
1118 &&
1119 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1120 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1121 return parts_default_nan(s);
cf07323d 1122 }
9cb4e398
AB
1123 /* Inf / x or 0 / x */
1124 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1125 a.sign = sign;
1126 return a;
1127 }
cf07323d
AB
1128 /* Div 0 => Inf */
1129 if (b.cls == float_class_zero) {
1130 s->float_exception_flags |= float_flag_divbyzero;
1131 a.cls = float_class_inf;
1132 a.sign = sign;
1133 return a;
1134 }
cf07323d
AB
1135 /* Div by Inf */
1136 if (b.cls == float_class_inf) {
1137 a.cls = float_class_zero;
1138 a.sign = sign;
1139 return a;
1140 }
1141 g_assert_not_reached();
1142}
1143
1144float16 float16_div(float16 a, float16 b, float_status *status)
1145{
1146 FloatParts pa = float16_unpack_canonical(a, status);
1147 FloatParts pb = float16_unpack_canonical(b, status);
1148 FloatParts pr = div_floats(pa, pb, status);
1149
1150 return float16_round_pack_canonical(pr, status);
1151}
1152
1153float32 float32_div(float32 a, float32 b, float_status *status)
1154{
1155 FloatParts pa = float32_unpack_canonical(a, status);
1156 FloatParts pb = float32_unpack_canonical(b, status);
1157 FloatParts pr = div_floats(pa, pb, status);
1158
1159 return float32_round_pack_canonical(pr, status);
1160}
1161
1162float64 float64_div(float64 a, float64 b, float_status *status)
1163{
1164 FloatParts pa = float64_unpack_canonical(a, status);
1165 FloatParts pb = float64_unpack_canonical(b, status);
1166 FloatParts pr = div_floats(pa, pb, status);
1167
1168 return float64_round_pack_canonical(pr, status);
1169}
1170
dbe4d53a
AB
1171/*
1172 * Rounds the floating-point value `a' to an integer, and returns the
1173 * result as a floating-point value. The operation is performed
1174 * according to the IEC/IEEE Standard for Binary Floating-Point
1175 * Arithmetic.
1176 */
1177
1178static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1179{
1180 if (is_nan(a.cls)) {
1181 return return_nan(a, s);
1182 }
1183
1184 switch (a.cls) {
1185 case float_class_zero:
1186 case float_class_inf:
1187 case float_class_qnan:
1188 /* already "integral" */
1189 break;
1190 case float_class_normal:
1191 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1192 /* already integral */
1193 break;
1194 }
1195 if (a.exp < 0) {
1196 bool one;
1197 /* all fractional */
1198 s->float_exception_flags |= float_flag_inexact;
1199 switch (rounding_mode) {
1200 case float_round_nearest_even:
1201 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1202 break;
1203 case float_round_ties_away:
1204 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1205 break;
1206 case float_round_to_zero:
1207 one = false;
1208 break;
1209 case float_round_up:
1210 one = !a.sign;
1211 break;
1212 case float_round_down:
1213 one = a.sign;
1214 break;
1215 default:
1216 g_assert_not_reached();
1217 }
1218
1219 if (one) {
1220 a.frac = DECOMPOSED_IMPLICIT_BIT;
1221 a.exp = 0;
1222 } else {
1223 a.cls = float_class_zero;
1224 }
1225 } else {
1226 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1227 uint64_t frac_lsbm1 = frac_lsb >> 1;
1228 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1229 uint64_t rnd_mask = rnd_even_mask >> 1;
1230 uint64_t inc;
1231
1232 switch (rounding_mode) {
1233 case float_round_nearest_even:
1234 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1235 break;
1236 case float_round_ties_away:
1237 inc = frac_lsbm1;
1238 break;
1239 case float_round_to_zero:
1240 inc = 0;
1241 break;
1242 case float_round_up:
1243 inc = a.sign ? 0 : rnd_mask;
1244 break;
1245 case float_round_down:
1246 inc = a.sign ? rnd_mask : 0;
1247 break;
1248 default:
1249 g_assert_not_reached();
1250 }
1251
1252 if (a.frac & rnd_mask) {
1253 s->float_exception_flags |= float_flag_inexact;
1254 a.frac += inc;
1255 a.frac &= ~rnd_mask;
1256 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1257 a.frac >>= 1;
1258 a.exp++;
1259 }
1260 }
1261 }
1262 break;
1263 default:
1264 g_assert_not_reached();
1265 }
1266 return a;
1267}
1268
1269float16 float16_round_to_int(float16 a, float_status *s)
1270{
1271 FloatParts pa = float16_unpack_canonical(a, s);
1272 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1273 return float16_round_pack_canonical(pr, s);
1274}
1275
1276float32 float32_round_to_int(float32 a, float_status *s)
1277{
1278 FloatParts pa = float32_unpack_canonical(a, s);
1279 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1280 return float32_round_pack_canonical(pr, s);
1281}
1282
1283float64 float64_round_to_int(float64 a, float_status *s)
1284{
1285 FloatParts pa = float64_unpack_canonical(a, s);
1286 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1287 return float64_round_pack_canonical(pr, s);
1288}
1289
1290float64 float64_trunc_to_int(float64 a, float_status *s)
1291{
1292 FloatParts pa = float64_unpack_canonical(a, s);
1293 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1294 return float64_round_pack_canonical(pr, s);
1295}
1296
ab52f973
AB
1297/*
1298 * Returns the result of converting the floating-point value `a' to
1299 * the two's complement integer format. The conversion is performed
1300 * according to the IEC/IEEE Standard for Binary Floating-Point
1301 * Arithmetic---which means in particular that the conversion is
1302 * rounded according to the current rounding mode. If `a' is a NaN,
1303 * the largest positive integer is returned. Otherwise, if the
1304 * conversion overflows, the largest integer with the same sign as `a'
1305 * is returned.
1306*/
1307
1308static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1309 int64_t min, int64_t max,
1310 float_status *s)
1311{
1312 uint64_t r;
1313 int orig_flags = get_float_exception_flags(s);
1314 FloatParts p = round_to_int(in, rmode, s);
1315
1316 switch (p.cls) {
1317 case float_class_snan:
1318 case float_class_qnan:
801bc563 1319 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1320 return max;
1321 case float_class_inf:
801bc563 1322 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1323 return p.sign ? min : max;
1324 case float_class_zero:
1325 return 0;
1326 case float_class_normal:
1327 if (p.exp < DECOMPOSED_BINARY_POINT) {
1328 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1329 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1330 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1331 } else {
1332 r = UINT64_MAX;
1333 }
1334 if (p.sign) {
33358375 1335 if (r <= -(uint64_t) min) {
ab52f973
AB
1336 return -r;
1337 } else {
1338 s->float_exception_flags = orig_flags | float_flag_invalid;
1339 return min;
1340 }
1341 } else {
33358375 1342 if (r <= max) {
ab52f973
AB
1343 return r;
1344 } else {
1345 s->float_exception_flags = orig_flags | float_flag_invalid;
1346 return max;
1347 }
1348 }
1349 default:
1350 g_assert_not_reached();
1351 }
1352}
1353
1354#define FLOAT_TO_INT(fsz, isz) \
1355int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1356 float_status *s) \
1357{ \
1358 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1359 return round_to_int_and_pack(p, s->float_rounding_mode, \
1360 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1361 s); \
1362} \
1363 \
1364int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1365 (float ## fsz a, float_status *s) \
1366{ \
1367 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1368 return round_to_int_and_pack(p, float_round_to_zero, \
1369 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1370 s); \
1371}
1372
1373FLOAT_TO_INT(16, 16)
1374FLOAT_TO_INT(16, 32)
1375FLOAT_TO_INT(16, 64)
1376
1377FLOAT_TO_INT(32, 16)
1378FLOAT_TO_INT(32, 32)
1379FLOAT_TO_INT(32, 64)
1380
1381FLOAT_TO_INT(64, 16)
1382FLOAT_TO_INT(64, 32)
1383FLOAT_TO_INT(64, 64)
1384
1385#undef FLOAT_TO_INT
1386
1387/*
1388 * Returns the result of converting the floating-point value `a' to
1389 * the unsigned integer format. The conversion is performed according
1390 * to the IEC/IEEE Standard for Binary Floating-Point
1391 * Arithmetic---which means in particular that the conversion is
1392 * rounded according to the current rounding mode. If `a' is a NaN,
1393 * the largest unsigned integer is returned. Otherwise, if the
1394 * conversion overflows, the largest unsigned integer is returned. If
1395 * the 'a' is negative, the result is rounded and zero is returned;
1396 * values that do not round to zero will raise the inexact exception
1397 * flag.
1398 */
1399
1400static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1401 float_status *s)
1402{
1403 int orig_flags = get_float_exception_flags(s);
1404 FloatParts p = round_to_int(in, rmode, s);
1405
1406 switch (p.cls) {
1407 case float_class_snan:
1408 case float_class_qnan:
1409 s->float_exception_flags = orig_flags | float_flag_invalid;
1410 return max;
1411 case float_class_inf:
801bc563 1412 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1413 return p.sign ? 0 : max;
1414 case float_class_zero:
1415 return 0;
1416 case float_class_normal:
1417 {
1418 uint64_t r;
1419 if (p.sign) {
1420 s->float_exception_flags = orig_flags | float_flag_invalid;
1421 return 0;
1422 }
1423
1424 if (p.exp < DECOMPOSED_BINARY_POINT) {
1425 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1426 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1427 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1428 } else {
1429 s->float_exception_flags = orig_flags | float_flag_invalid;
1430 return max;
1431 }
1432
1433 /* For uint64 this will never trip, but if p.exp is too large
1434 * to shift a decomposed fraction we shall have exited via the
1435 * 3rd leg above.
1436 */
1437 if (r > max) {
1438 s->float_exception_flags = orig_flags | float_flag_invalid;
1439 return max;
1440 } else {
1441 return r;
1442 }
1443 }
1444 default:
1445 g_assert_not_reached();
1446 }
1447}
1448
1449#define FLOAT_TO_UINT(fsz, isz) \
1450uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1451 float_status *s) \
1452{ \
1453 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1454 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1455 UINT ## isz ## _MAX, s); \
1456} \
1457 \
1458uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1459 (float ## fsz a, float_status *s) \
1460{ \
1461 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
bd49e602
RH
1462 return round_to_uint_and_pack(p, float_round_to_zero, \
1463 UINT ## isz ## _MAX, s); \
ab52f973
AB
1464}
1465
1466FLOAT_TO_UINT(16, 16)
1467FLOAT_TO_UINT(16, 32)
1468FLOAT_TO_UINT(16, 64)
1469
1470FLOAT_TO_UINT(32, 16)
1471FLOAT_TO_UINT(32, 32)
1472FLOAT_TO_UINT(32, 64)
1473
1474FLOAT_TO_UINT(64, 16)
1475FLOAT_TO_UINT(64, 32)
1476FLOAT_TO_UINT(64, 64)
1477
1478#undef FLOAT_TO_UINT
1479
c02e1fb8
AB
1480/*
1481 * Integer to float conversions
1482 *
1483 * Returns the result of converting the two's complement integer `a'
1484 * to the floating-point format. The conversion is performed according
1485 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1486 */
1487
1488static FloatParts int_to_float(int64_t a, float_status *status)
1489{
a5a5f5e2 1490 FloatParts r = {};
c02e1fb8
AB
1491 if (a == 0) {
1492 r.cls = float_class_zero;
1493 r.sign = false;
1494 } else if (a == (1ULL << 63)) {
1495 r.cls = float_class_normal;
1496 r.sign = true;
1497 r.frac = DECOMPOSED_IMPLICIT_BIT;
1498 r.exp = 63;
1499 } else {
1500 uint64_t f;
1501 if (a < 0) {
1502 f = -a;
1503 r.sign = true;
1504 } else {
1505 f = a;
1506 r.sign = false;
1507 }
1508 int shift = clz64(f) - 1;
1509 r.cls = float_class_normal;
1510 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1511 r.frac = f << shift;
1512 }
1513
1514 return r;
1515}
1516
1517float16 int64_to_float16(int64_t a, float_status *status)
1518{
1519 FloatParts pa = int_to_float(a, status);
1520 return float16_round_pack_canonical(pa, status);
1521}
1522
1523float16 int32_to_float16(int32_t a, float_status *status)
1524{
1525 return int64_to_float16(a, status);
1526}
1527
1528float16 int16_to_float16(int16_t a, float_status *status)
1529{
1530 return int64_to_float16(a, status);
1531}
1532
1533float32 int64_to_float32(int64_t a, float_status *status)
1534{
1535 FloatParts pa = int_to_float(a, status);
1536 return float32_round_pack_canonical(pa, status);
1537}
1538
1539float32 int32_to_float32(int32_t a, float_status *status)
1540{
1541 return int64_to_float32(a, status);
1542}
1543
1544float32 int16_to_float32(int16_t a, float_status *status)
1545{
1546 return int64_to_float32(a, status);
1547}
1548
1549float64 int64_to_float64(int64_t a, float_status *status)
1550{
1551 FloatParts pa = int_to_float(a, status);
1552 return float64_round_pack_canonical(pa, status);
1553}
1554
1555float64 int32_to_float64(int32_t a, float_status *status)
1556{
1557 return int64_to_float64(a, status);
1558}
1559
1560float64 int16_to_float64(int16_t a, float_status *status)
1561{
1562 return int64_to_float64(a, status);
1563}
1564
1565
1566/*
1567 * Unsigned Integer to float conversions
1568 *
1569 * Returns the result of converting the unsigned integer `a' to the
1570 * floating-point format. The conversion is performed according to the
1571 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1572 */
1573
1574static FloatParts uint_to_float(uint64_t a, float_status *status)
1575{
1576 FloatParts r = { .sign = false};
1577
1578 if (a == 0) {
1579 r.cls = float_class_zero;
1580 } else {
1581 int spare_bits = clz64(a) - 1;
1582 r.cls = float_class_normal;
1583 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1584 if (spare_bits < 0) {
1585 shift64RightJamming(a, -spare_bits, &a);
1586 r.frac = a;
1587 } else {
1588 r.frac = a << spare_bits;
1589 }
1590 }
1591
1592 return r;
1593}
1594
1595float16 uint64_to_float16(uint64_t a, float_status *status)
1596{
1597 FloatParts pa = uint_to_float(a, status);
1598 return float16_round_pack_canonical(pa, status);
1599}
1600
1601float16 uint32_to_float16(uint32_t a, float_status *status)
1602{
1603 return uint64_to_float16(a, status);
1604}
1605
1606float16 uint16_to_float16(uint16_t a, float_status *status)
1607{
1608 return uint64_to_float16(a, status);
1609}
1610
1611float32 uint64_to_float32(uint64_t a, float_status *status)
1612{
1613 FloatParts pa = uint_to_float(a, status);
1614 return float32_round_pack_canonical(pa, status);
1615}
1616
1617float32 uint32_to_float32(uint32_t a, float_status *status)
1618{
1619 return uint64_to_float32(a, status);
1620}
1621
1622float32 uint16_to_float32(uint16_t a, float_status *status)
1623{
1624 return uint64_to_float32(a, status);
1625}
1626
1627float64 uint64_to_float64(uint64_t a, float_status *status)
1628{
1629 FloatParts pa = uint_to_float(a, status);
1630 return float64_round_pack_canonical(pa, status);
1631}
1632
1633float64 uint32_to_float64(uint32_t a, float_status *status)
1634{
1635 return uint64_to_float64(a, status);
1636}
1637
1638float64 uint16_to_float64(uint16_t a, float_status *status)
1639{
1640 return uint64_to_float64(a, status);
1641}
1642
89360067
AB
1643/* Float Min/Max */
1644/* min() and max() functions. These can't be implemented as
1645 * 'compare and pick one input' because that would mishandle
1646 * NaNs and +0 vs -0.
1647 *
1648 * minnum() and maxnum() functions. These are similar to the min()
1649 * and max() functions but if one of the arguments is a QNaN and
1650 * the other is numerical then the numerical argument is returned.
1651 * SNaNs will get quietened before being returned.
1652 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
1653 * and maxNum() operations. min() and max() are the typical min/max
1654 * semantics provided by many CPUs which predate that specification.
1655 *
1656 * minnummag() and maxnummag() functions correspond to minNumMag()
1657 * and minNumMag() from the IEEE-754 2008.
1658 */
1659static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1660 bool ieee, bool ismag, float_status *s)
1661{
1662 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1663 if (ieee) {
1664 /* Takes two floating-point values `a' and `b', one of
1665 * which is a NaN, and returns the appropriate NaN
1666 * result. If either `a' or `b' is a signaling NaN,
1667 * the invalid exception is raised.
1668 */
1669 if (is_snan(a.cls) || is_snan(b.cls)) {
1670 return pick_nan(a, b, s);
1671 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1672 return b;
1673 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1674 return a;
1675 }
1676 }
1677 return pick_nan(a, b, s);
1678 } else {
1679 int a_exp, b_exp;
89360067
AB
1680
1681 switch (a.cls) {
1682 case float_class_normal:
1683 a_exp = a.exp;
1684 break;
1685 case float_class_inf:
1686 a_exp = INT_MAX;
1687 break;
1688 case float_class_zero:
1689 a_exp = INT_MIN;
1690 break;
1691 default:
1692 g_assert_not_reached();
1693 break;
1694 }
1695 switch (b.cls) {
1696 case float_class_normal:
1697 b_exp = b.exp;
1698 break;
1699 case float_class_inf:
1700 b_exp = INT_MAX;
1701 break;
1702 case float_class_zero:
1703 b_exp = INT_MIN;
1704 break;
1705 default:
1706 g_assert_not_reached();
1707 break;
1708 }
1709
6245327a
EC
1710 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1711 bool a_less = a_exp < b_exp;
1712 if (a_exp == b_exp) {
1713 a_less = a.frac < b.frac;
1714 }
1715 return a_less ^ ismin ? b : a;
89360067
AB
1716 }
1717
6245327a 1718 if (a.sign == b.sign) {
89360067
AB
1719 bool a_less = a_exp < b_exp;
1720 if (a_exp == b_exp) {
1721 a_less = a.frac < b.frac;
1722 }
6245327a 1723 return a.sign ^ a_less ^ ismin ? b : a;
89360067 1724 } else {
6245327a 1725 return a.sign ^ ismin ? b : a;
89360067
AB
1726 }
1727 }
1728}
1729
1730#define MINMAX(sz, name, ismin, isiee, ismag) \
1731float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1732 float_status *s) \
1733{ \
1734 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1735 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1736 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1737 \
1738 return float ## sz ## _round_pack_canonical(pr, s); \
1739}
1740
1741MINMAX(16, min, true, false, false)
1742MINMAX(16, minnum, true, true, false)
1743MINMAX(16, minnummag, true, true, true)
1744MINMAX(16, max, false, false, false)
1745MINMAX(16, maxnum, false, true, false)
1746MINMAX(16, maxnummag, false, true, true)
1747
1748MINMAX(32, min, true, false, false)
1749MINMAX(32, minnum, true, true, false)
1750MINMAX(32, minnummag, true, true, true)
1751MINMAX(32, max, false, false, false)
1752MINMAX(32, maxnum, false, true, false)
1753MINMAX(32, maxnummag, false, true, true)
1754
1755MINMAX(64, min, true, false, false)
1756MINMAX(64, minnum, true, true, false)
1757MINMAX(64, minnummag, true, true, true)
1758MINMAX(64, max, false, false, false)
1759MINMAX(64, maxnum, false, true, false)
1760MINMAX(64, maxnummag, false, true, true)
1761
1762#undef MINMAX
1763
0c4c9092
AB
1764/* Floating point compare */
1765static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1766 float_status *s)
1767{
1768 if (is_nan(a.cls) || is_nan(b.cls)) {
1769 if (!is_quiet ||
1770 a.cls == float_class_snan ||
1771 b.cls == float_class_snan) {
1772 s->float_exception_flags |= float_flag_invalid;
1773 }
1774 return float_relation_unordered;
1775 }
1776
1777 if (a.cls == float_class_zero) {
1778 if (b.cls == float_class_zero) {
1779 return float_relation_equal;
1780 }
1781 return b.sign ? float_relation_greater : float_relation_less;
1782 } else if (b.cls == float_class_zero) {
1783 return a.sign ? float_relation_less : float_relation_greater;
1784 }
1785
1786 /* The only really important thing about infinity is its sign. If
1787 * both are infinities the sign marks the smallest of the two.
1788 */
1789 if (a.cls == float_class_inf) {
1790 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1791 return float_relation_equal;
1792 }
1793 return a.sign ? float_relation_less : float_relation_greater;
1794 } else if (b.cls == float_class_inf) {
1795 return b.sign ? float_relation_greater : float_relation_less;
1796 }
1797
1798 if (a.sign != b.sign) {
1799 return a.sign ? float_relation_less : float_relation_greater;
1800 }
1801
1802 if (a.exp == b.exp) {
1803 if (a.frac == b.frac) {
1804 return float_relation_equal;
1805 }
1806 if (a.sign) {
1807 return a.frac > b.frac ?
1808 float_relation_less : float_relation_greater;
1809 } else {
1810 return a.frac > b.frac ?
1811 float_relation_greater : float_relation_less;
1812 }
1813 } else {
1814 if (a.sign) {
1815 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1816 } else {
1817 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1818 }
1819 }
1820}
1821
1822#define COMPARE(sz) \
1823int float ## sz ## _compare(float ## sz a, float ## sz b, \
1824 float_status *s) \
1825{ \
1826 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1827 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1828 return compare_floats(pa, pb, false, s); \
1829} \
1830int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1831 float_status *s) \
1832{ \
1833 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1834 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1835 return compare_floats(pa, pb, true, s); \
1836}
1837
1838COMPARE(16)
1839COMPARE(32)
1840COMPARE(64)
1841
1842#undef COMPARE
1843
0bfc9f19
AB
1844/* Multiply A by 2 raised to the power N. */
1845static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1846{
1847 if (unlikely(is_nan(a.cls))) {
1848 return return_nan(a, s);
1849 }
1850 if (a.cls == float_class_normal) {
ce8d4082
RH
1851 /* The largest float type (even though not supported by FloatParts)
1852 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
1853 * still allows rounding to infinity, without allowing overflow
1854 * within the int32_t that backs FloatParts.exp.
1855 */
1856 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
1857 a.exp += n;
1858 }
1859 return a;
1860}
1861
1862float16 float16_scalbn(float16 a, int n, float_status *status)
1863{
1864 FloatParts pa = float16_unpack_canonical(a, status);
1865 FloatParts pr = scalbn_decomposed(pa, n, status);
1866 return float16_round_pack_canonical(pr, status);
1867}
1868
1869float32 float32_scalbn(float32 a, int n, float_status *status)
1870{
1871 FloatParts pa = float32_unpack_canonical(a, status);
1872 FloatParts pr = scalbn_decomposed(pa, n, status);
1873 return float32_round_pack_canonical(pr, status);
1874}
1875
1876float64 float64_scalbn(float64 a, int n, float_status *status)
1877{
1878 FloatParts pa = float64_unpack_canonical(a, status);
1879 FloatParts pr = scalbn_decomposed(pa, n, status);
1880 return float64_round_pack_canonical(pr, status);
1881}
1882
c13bb2da
AB
1883/*
1884 * Square Root
1885 *
1886 * The old softfloat code did an approximation step before zeroing in
1887 * on the final result. However for simpleness we just compute the
1888 * square root by iterating down from the implicit bit to enough extra
1889 * bits to ensure we get a correctly rounded result.
1890 *
1891 * This does mean however the calculation is slower than before,
1892 * especially for 64 bit floats.
1893 */
1894
1895static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
1896{
1897 uint64_t a_frac, r_frac, s_frac;
1898 int bit, last_bit;
1899
1900 if (is_nan(a.cls)) {
1901 return return_nan(a, s);
1902 }
1903 if (a.cls == float_class_zero) {
1904 return a; /* sqrt(+-0) = +-0 */
1905 }
1906 if (a.sign) {
1907 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1908 return parts_default_nan(s);
c13bb2da
AB
1909 }
1910 if (a.cls == float_class_inf) {
1911 return a; /* sqrt(+inf) = +inf */
1912 }
1913
1914 assert(a.cls == float_class_normal);
1915
1916 /* We need two overflow bits at the top. Adding room for that is a
1917 * right shift. If the exponent is odd, we can discard the low bit
1918 * by multiplying the fraction by 2; that's a left shift. Combine
1919 * those and we shift right if the exponent is even.
1920 */
1921 a_frac = a.frac;
1922 if (!(a.exp & 1)) {
1923 a_frac >>= 1;
1924 }
1925 a.exp >>= 1;
1926
1927 /* Bit-by-bit computation of sqrt. */
1928 r_frac = 0;
1929 s_frac = 0;
1930
1931 /* Iterate from implicit bit down to the 3 extra bits to compute a
1932 * properly rounded result. Remember we've inserted one more bit
1933 * at the top, so these positions are one less.
1934 */
1935 bit = DECOMPOSED_BINARY_POINT - 1;
1936 last_bit = MAX(p->frac_shift - 4, 0);
1937 do {
1938 uint64_t q = 1ULL << bit;
1939 uint64_t t_frac = s_frac + q;
1940 if (t_frac <= a_frac) {
1941 s_frac = t_frac + q;
1942 a_frac -= t_frac;
1943 r_frac += q;
1944 }
1945 a_frac <<= 1;
1946 } while (--bit >= last_bit);
1947
1948 /* Undo the right shift done above. If there is any remaining
1949 * fraction, the result is inexact. Set the sticky bit.
1950 */
1951 a.frac = (r_frac << 1) + (a_frac != 0);
1952
1953 return a;
1954}
1955
1956float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
1957{
1958 FloatParts pa = float16_unpack_canonical(a, status);
1959 FloatParts pr = sqrt_float(pa, status, &float16_params);
1960 return float16_round_pack_canonical(pr, status);
1961}
1962
1963float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
1964{
1965 FloatParts pa = float32_unpack_canonical(a, status);
1966 FloatParts pr = sqrt_float(pa, status, &float32_params);
1967 return float32_round_pack_canonical(pr, status);
1968}
1969
1970float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
1971{
1972 FloatParts pa = float64_unpack_canonical(a, status);
1973 FloatParts pr = sqrt_float(pa, status, &float64_params);
1974 return float64_round_pack_canonical(pr, status);
1975}
1976
1977
158142c2
FB
1978/*----------------------------------------------------------------------------
1979| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
1980| and 7, and returns the properly rounded 32-bit integer corresponding to the
1981| input. If `zSign' is 1, the input is negated before being converted to an
1982| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
1983| is simply rounded to an integer, with the inexact exception raised if the
1984| input cannot be represented exactly as an integer. However, if the fixed-
1985| point input is too large, the invalid exception is raised and the largest
1986| positive or negative integer is returned.
1987*----------------------------------------------------------------------------*/
1988
f4014512 1989static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 1990{
8f506c70 1991 int8_t roundingMode;
158142c2 1992 flag roundNearestEven;
8f506c70 1993 int8_t roundIncrement, roundBits;
760e1416 1994 int32_t z;
158142c2 1995
a2f2d288 1996 roundingMode = status->float_rounding_mode;
158142c2 1997 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
1998 switch (roundingMode) {
1999 case float_round_nearest_even:
f9288a76 2000 case float_round_ties_away:
dc355b76
PM
2001 roundIncrement = 0x40;
2002 break;
2003 case float_round_to_zero:
2004 roundIncrement = 0;
2005 break;
2006 case float_round_up:
2007 roundIncrement = zSign ? 0 : 0x7f;
2008 break;
2009 case float_round_down:
2010 roundIncrement = zSign ? 0x7f : 0;
2011 break;
2012 default:
2013 abort();
158142c2
FB
2014 }
2015 roundBits = absZ & 0x7F;
2016 absZ = ( absZ + roundIncrement )>>7;
2017 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2018 z = absZ;
2019 if ( zSign ) z = - z;
2020 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2021 float_raise(float_flag_invalid, status);
bb98fe42 2022 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2023 }
a2f2d288
PM
2024 if (roundBits) {
2025 status->float_exception_flags |= float_flag_inexact;
2026 }
158142c2
FB
2027 return z;
2028
2029}
2030
2031/*----------------------------------------------------------------------------
2032| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2033| `absZ1', with binary point between bits 63 and 64 (between the input words),
2034| and returns the properly rounded 64-bit integer corresponding to the input.
2035| If `zSign' is 1, the input is negated before being converted to an integer.
2036| Ordinarily, the fixed-point input is simply rounded to an integer, with
2037| the inexact exception raised if the input cannot be represented exactly as
2038| an integer. However, if the fixed-point input is too large, the invalid
2039| exception is raised and the largest positive or negative integer is
2040| returned.
2041*----------------------------------------------------------------------------*/
2042
f42c2224 2043static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2044 float_status *status)
158142c2 2045{
8f506c70 2046 int8_t roundingMode;
158142c2 2047 flag roundNearestEven, increment;
760e1416 2048 int64_t z;
158142c2 2049
a2f2d288 2050 roundingMode = status->float_rounding_mode;
158142c2 2051 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2052 switch (roundingMode) {
2053 case float_round_nearest_even:
f9288a76 2054 case float_round_ties_away:
dc355b76
PM
2055 increment = ((int64_t) absZ1 < 0);
2056 break;
2057 case float_round_to_zero:
2058 increment = 0;
2059 break;
2060 case float_round_up:
2061 increment = !zSign && absZ1;
2062 break;
2063 case float_round_down:
2064 increment = zSign && absZ1;
2065 break;
2066 default:
2067 abort();
158142c2
FB
2068 }
2069 if ( increment ) {
2070 ++absZ0;
2071 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2072 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2073 }
2074 z = absZ0;
2075 if ( zSign ) z = - z;
2076 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2077 overflow:
ff32e16e 2078 float_raise(float_flag_invalid, status);
158142c2 2079 return
bb98fe42 2080 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2081 : LIT64( 0x7FFFFFFFFFFFFFFF );
2082 }
a2f2d288
PM
2083 if (absZ1) {
2084 status->float_exception_flags |= float_flag_inexact;
2085 }
158142c2
FB
2086 return z;
2087
2088}
2089
fb3ea83a
TM
2090/*----------------------------------------------------------------------------
2091| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2092| `absZ1', with binary point between bits 63 and 64 (between the input words),
2093| and returns the properly rounded 64-bit unsigned integer corresponding to the
2094| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2095| with the inexact exception raised if the input cannot be represented exactly
2096| as an integer. However, if the fixed-point input is too large, the invalid
2097| exception is raised and the largest unsigned integer is returned.
2098*----------------------------------------------------------------------------*/
2099
f42c2224 2100static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2101 uint64_t absZ1, float_status *status)
fb3ea83a 2102{
8f506c70 2103 int8_t roundingMode;
fb3ea83a
TM
2104 flag roundNearestEven, increment;
2105
a2f2d288 2106 roundingMode = status->float_rounding_mode;
fb3ea83a 2107 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2108 switch (roundingMode) {
2109 case float_round_nearest_even:
f9288a76 2110 case float_round_ties_away:
dc355b76
PM
2111 increment = ((int64_t)absZ1 < 0);
2112 break;
2113 case float_round_to_zero:
2114 increment = 0;
2115 break;
2116 case float_round_up:
2117 increment = !zSign && absZ1;
2118 break;
2119 case float_round_down:
2120 increment = zSign && absZ1;
2121 break;
2122 default:
2123 abort();
fb3ea83a
TM
2124 }
2125 if (increment) {
2126 ++absZ0;
2127 if (absZ0 == 0) {
ff32e16e 2128 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2129 return LIT64(0xFFFFFFFFFFFFFFFF);
2130 }
2131 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2132 }
2133
2134 if (zSign && absZ0) {
ff32e16e 2135 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2136 return 0;
2137 }
2138
2139 if (absZ1) {
a2f2d288 2140 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2141 }
2142 return absZ0;
2143}
2144
37d18660
PM
2145/*----------------------------------------------------------------------------
2146| If `a' is denormal and we are in flush-to-zero mode then set the
2147| input-denormal exception and return zero. Otherwise just return the value.
2148*----------------------------------------------------------------------------*/
e5a41ffa 2149float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2150{
a2f2d288 2151 if (status->flush_inputs_to_zero) {
37d18660 2152 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2153 float_raise(float_flag_input_denormal, status);
37d18660
PM
2154 return make_float32(float32_val(a) & 0x80000000);
2155 }
2156 }
2157 return a;
2158}
2159
158142c2
FB
2160/*----------------------------------------------------------------------------
2161| Normalizes the subnormal single-precision floating-point value represented
2162| by the denormalized significand `aSig'. The normalized exponent and
2163| significand are stored at the locations pointed to by `zExpPtr' and
2164| `zSigPtr', respectively.
2165*----------------------------------------------------------------------------*/
2166
2167static void
0c48262d 2168 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2169{
8f506c70 2170 int8_t shiftCount;
158142c2
FB
2171
2172 shiftCount = countLeadingZeros32( aSig ) - 8;
2173 *zSigPtr = aSig<<shiftCount;
2174 *zExpPtr = 1 - shiftCount;
2175
2176}
2177
158142c2
FB
2178/*----------------------------------------------------------------------------
2179| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2180| and significand `zSig', and returns the proper single-precision floating-
2181| point value corresponding to the abstract input. Ordinarily, the abstract
2182| value is simply rounded and packed into the single-precision format, with
2183| the inexact exception raised if the abstract input cannot be represented
2184| exactly. However, if the abstract value is too large, the overflow and
2185| inexact exceptions are raised and an infinity or maximal finite value is
2186| returned. If the abstract value is too small, the input value is rounded to
2187| a subnormal number, and the underflow and inexact exceptions are raised if
2188| the abstract input cannot be represented exactly as a subnormal single-
2189| precision floating-point number.
2190| The input significand `zSig' has its binary point between bits 30
2191| and 29, which is 7 bits to the left of the usual location. This shifted
2192| significand must be normalized or smaller. If `zSig' is not normalized,
2193| `zExp' must be 0; in that case, the result returned is a subnormal number,
2194| and it must not require rounding. In the usual case that `zSig' is
2195| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2196| The handling of underflow and overflow follows the IEC/IEEE Standard for
2197| Binary Floating-Point Arithmetic.
2198*----------------------------------------------------------------------------*/
2199
0c48262d 2200static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2201 float_status *status)
158142c2 2202{
8f506c70 2203 int8_t roundingMode;
158142c2 2204 flag roundNearestEven;
8f506c70 2205 int8_t roundIncrement, roundBits;
158142c2
FB
2206 flag isTiny;
2207
a2f2d288 2208 roundingMode = status->float_rounding_mode;
158142c2 2209 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2210 switch (roundingMode) {
2211 case float_round_nearest_even:
f9288a76 2212 case float_round_ties_away:
dc355b76
PM
2213 roundIncrement = 0x40;
2214 break;
2215 case float_round_to_zero:
2216 roundIncrement = 0;
2217 break;
2218 case float_round_up:
2219 roundIncrement = zSign ? 0 : 0x7f;
2220 break;
2221 case float_round_down:
2222 roundIncrement = zSign ? 0x7f : 0;
2223 break;
2224 default:
2225 abort();
2226 break;
158142c2
FB
2227 }
2228 roundBits = zSig & 0x7F;
bb98fe42 2229 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2230 if ( ( 0xFD < zExp )
2231 || ( ( zExp == 0xFD )
bb98fe42 2232 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2233 ) {
ff32e16e 2234 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2235 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2236 }
2237 if ( zExp < 0 ) {
a2f2d288 2238 if (status->flush_to_zero) {
ff32e16e 2239 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2240 return packFloat32(zSign, 0, 0);
2241 }
158142c2 2242 isTiny =
a2f2d288
PM
2243 (status->float_detect_tininess
2244 == float_tininess_before_rounding)
158142c2
FB
2245 || ( zExp < -1 )
2246 || ( zSig + roundIncrement < 0x80000000 );
2247 shift32RightJamming( zSig, - zExp, &zSig );
2248 zExp = 0;
2249 roundBits = zSig & 0x7F;
ff32e16e
PM
2250 if (isTiny && roundBits) {
2251 float_raise(float_flag_underflow, status);
2252 }
158142c2
FB
2253 }
2254 }
a2f2d288
PM
2255 if (roundBits) {
2256 status->float_exception_flags |= float_flag_inexact;
2257 }
158142c2
FB
2258 zSig = ( zSig + roundIncrement )>>7;
2259 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2260 if ( zSig == 0 ) zExp = 0;
2261 return packFloat32( zSign, zExp, zSig );
2262
2263}
2264
2265/*----------------------------------------------------------------------------
2266| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2267| and significand `zSig', and returns the proper single-precision floating-
2268| point value corresponding to the abstract input. This routine is just like
2269| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2270| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2271| floating-point exponent.
2272*----------------------------------------------------------------------------*/
2273
2274static float32
0c48262d 2275 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2276 float_status *status)
158142c2 2277{
8f506c70 2278 int8_t shiftCount;
158142c2
FB
2279
2280 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2281 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2282 status);
158142c2
FB
2283
2284}
2285
37d18660
PM
2286/*----------------------------------------------------------------------------
2287| If `a' is denormal and we are in flush-to-zero mode then set the
2288| input-denormal exception and return zero. Otherwise just return the value.
2289*----------------------------------------------------------------------------*/
e5a41ffa 2290float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2291{
a2f2d288 2292 if (status->flush_inputs_to_zero) {
37d18660 2293 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2294 float_raise(float_flag_input_denormal, status);
37d18660
PM
2295 return make_float64(float64_val(a) & (1ULL << 63));
2296 }
2297 }
2298 return a;
2299}
2300
158142c2
FB
2301/*----------------------------------------------------------------------------
2302| Normalizes the subnormal double-precision floating-point value represented
2303| by the denormalized significand `aSig'. The normalized exponent and
2304| significand are stored at the locations pointed to by `zExpPtr' and
2305| `zSigPtr', respectively.
2306*----------------------------------------------------------------------------*/
2307
2308static void
0c48262d 2309 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2310{
8f506c70 2311 int8_t shiftCount;
158142c2
FB
2312
2313 shiftCount = countLeadingZeros64( aSig ) - 11;
2314 *zSigPtr = aSig<<shiftCount;
2315 *zExpPtr = 1 - shiftCount;
2316
2317}
2318
2319/*----------------------------------------------------------------------------
2320| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2321| double-precision floating-point value, returning the result. After being
2322| shifted into the proper positions, the three fields are simply added
2323| together to form the result. This means that any integer portion of `zSig'
2324| will be added into the exponent. Since a properly normalized significand
2325| will have an integer portion equal to 1, the `zExp' input should be 1 less
2326| than the desired result exponent whenever `zSig' is a complete, normalized
2327| significand.
2328*----------------------------------------------------------------------------*/
2329
0c48262d 2330static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2331{
2332
f090c9d4 2333 return make_float64(
bb98fe42 2334 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2335
2336}
2337
2338/*----------------------------------------------------------------------------
2339| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2340| and significand `zSig', and returns the proper double-precision floating-
2341| point value corresponding to the abstract input. Ordinarily, the abstract
2342| value is simply rounded and packed into the double-precision format, with
2343| the inexact exception raised if the abstract input cannot be represented
2344| exactly. However, if the abstract value is too large, the overflow and
2345| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2346| returned. If the abstract value is too small, the input value is rounded to
2347| a subnormal number, and the underflow and inexact exceptions are raised if
2348| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2349| precision floating-point number.
2350| The input significand `zSig' has its binary point between bits 62
2351| and 61, which is 10 bits to the left of the usual location. This shifted
2352| significand must be normalized or smaller. If `zSig' is not normalized,
2353| `zExp' must be 0; in that case, the result returned is a subnormal number,
2354| and it must not require rounding. In the usual case that `zSig' is
2355| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2356| The handling of underflow and overflow follows the IEC/IEEE Standard for
2357| Binary Floating-Point Arithmetic.
2358*----------------------------------------------------------------------------*/
2359
0c48262d 2360static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2361 float_status *status)
158142c2 2362{
8f506c70 2363 int8_t roundingMode;
158142c2 2364 flag roundNearestEven;
0c48262d 2365 int roundIncrement, roundBits;
158142c2
FB
2366 flag isTiny;
2367
a2f2d288 2368 roundingMode = status->float_rounding_mode;
158142c2 2369 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2370 switch (roundingMode) {
2371 case float_round_nearest_even:
f9288a76 2372 case float_round_ties_away:
dc355b76
PM
2373 roundIncrement = 0x200;
2374 break;
2375 case float_round_to_zero:
2376 roundIncrement = 0;
2377 break;
2378 case float_round_up:
2379 roundIncrement = zSign ? 0 : 0x3ff;
2380 break;
2381 case float_round_down:
2382 roundIncrement = zSign ? 0x3ff : 0;
2383 break;
9ee6f678
BR
2384 case float_round_to_odd:
2385 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2386 break;
dc355b76
PM
2387 default:
2388 abort();
158142c2
FB
2389 }
2390 roundBits = zSig & 0x3FF;
bb98fe42 2391 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2392 if ( ( 0x7FD < zExp )
2393 || ( ( zExp == 0x7FD )
bb98fe42 2394 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2395 ) {
9ee6f678
BR
2396 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2397 roundIncrement != 0;
ff32e16e 2398 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2399 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2400 }
2401 if ( zExp < 0 ) {
a2f2d288 2402 if (status->flush_to_zero) {
ff32e16e 2403 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2404 return packFloat64(zSign, 0, 0);
2405 }
158142c2 2406 isTiny =
a2f2d288
PM
2407 (status->float_detect_tininess
2408 == float_tininess_before_rounding)
158142c2
FB
2409 || ( zExp < -1 )
2410 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2411 shift64RightJamming( zSig, - zExp, &zSig );
2412 zExp = 0;
2413 roundBits = zSig & 0x3FF;
ff32e16e
PM
2414 if (isTiny && roundBits) {
2415 float_raise(float_flag_underflow, status);
2416 }
9ee6f678
BR
2417 if (roundingMode == float_round_to_odd) {
2418 /*
2419 * For round-to-odd case, the roundIncrement depends on
2420 * zSig which just changed.
2421 */
2422 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2423 }
158142c2
FB
2424 }
2425 }
a2f2d288
PM
2426 if (roundBits) {
2427 status->float_exception_flags |= float_flag_inexact;
2428 }
158142c2
FB
2429 zSig = ( zSig + roundIncrement )>>10;
2430 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2431 if ( zSig == 0 ) zExp = 0;
2432 return packFloat64( zSign, zExp, zSig );
2433
2434}
2435
2436/*----------------------------------------------------------------------------
2437| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2438| and significand `zSig', and returns the proper double-precision floating-
2439| point value corresponding to the abstract input. This routine is just like
2440| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2441| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2442| floating-point exponent.
2443*----------------------------------------------------------------------------*/
2444
2445static float64
0c48262d 2446 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2447 float_status *status)
158142c2 2448{
8f506c70 2449 int8_t shiftCount;
158142c2
FB
2450
2451 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2452 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2453 status);
158142c2
FB
2454
2455}
2456
158142c2
FB
2457/*----------------------------------------------------------------------------
2458| Normalizes the subnormal extended double-precision floating-point value
2459| represented by the denormalized significand `aSig'. The normalized exponent
2460| and significand are stored at the locations pointed to by `zExpPtr' and
2461| `zSigPtr', respectively.
2462*----------------------------------------------------------------------------*/
2463
88857aca
LV
2464void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2465 uint64_t *zSigPtr)
158142c2 2466{
8f506c70 2467 int8_t shiftCount;
158142c2
FB
2468
2469 shiftCount = countLeadingZeros64( aSig );
2470 *zSigPtr = aSig<<shiftCount;
2471 *zExpPtr = 1 - shiftCount;
158142c2
FB
2472}
2473
2474/*----------------------------------------------------------------------------
2475| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2476| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2477| and returns the proper extended double-precision floating-point value
2478| corresponding to the abstract input. Ordinarily, the abstract value is
2479| rounded and packed into the extended double-precision format, with the
2480| inexact exception raised if the abstract input cannot be represented
2481| exactly. However, if the abstract value is too large, the overflow and
2482| inexact exceptions are raised and an infinity or maximal finite value is
2483| returned. If the abstract value is too small, the input value is rounded to
2484| a subnormal number, and the underflow and inexact exceptions are raised if
2485| the abstract input cannot be represented exactly as a subnormal extended
2486| double-precision floating-point number.
2487| If `roundingPrecision' is 32 or 64, the result is rounded to the same
2488| number of bits as single or double precision, respectively. Otherwise, the
2489| result is rounded to the full precision of the extended double-precision
2490| format.
2491| The input significand must be normalized or smaller. If the input
2492| significand is not normalized, `zExp' must be 0; in that case, the result
2493| returned is a subnormal number, and it must not require rounding. The
2494| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
2495| Floating-Point Arithmetic.
2496*----------------------------------------------------------------------------*/
2497
88857aca
LV
2498floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2499 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2500 float_status *status)
158142c2 2501{
8f506c70 2502 int8_t roundingMode;
158142c2 2503 flag roundNearestEven, increment, isTiny;
f42c2224 2504 int64_t roundIncrement, roundMask, roundBits;
158142c2 2505
a2f2d288 2506 roundingMode = status->float_rounding_mode;
158142c2
FB
2507 roundNearestEven = ( roundingMode == float_round_nearest_even );
2508 if ( roundingPrecision == 80 ) goto precision80;
2509 if ( roundingPrecision == 64 ) {
2510 roundIncrement = LIT64( 0x0000000000000400 );
2511 roundMask = LIT64( 0x00000000000007FF );
2512 }
2513 else if ( roundingPrecision == 32 ) {
2514 roundIncrement = LIT64( 0x0000008000000000 );
2515 roundMask = LIT64( 0x000000FFFFFFFFFF );
2516 }
2517 else {
2518 goto precision80;
2519 }
2520 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
2521 switch (roundingMode) {
2522 case float_round_nearest_even:
f9288a76 2523 case float_round_ties_away:
dc355b76
PM
2524 break;
2525 case float_round_to_zero:
2526 roundIncrement = 0;
2527 break;
2528 case float_round_up:
2529 roundIncrement = zSign ? 0 : roundMask;
2530 break;
2531 case float_round_down:
2532 roundIncrement = zSign ? roundMask : 0;
2533 break;
2534 default:
2535 abort();
158142c2
FB
2536 }
2537 roundBits = zSig0 & roundMask;
bb98fe42 2538 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2539 if ( ( 0x7FFE < zExp )
2540 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2541 ) {
2542 goto overflow;
2543 }
2544 if ( zExp <= 0 ) {
a2f2d288 2545 if (status->flush_to_zero) {
ff32e16e 2546 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2547 return packFloatx80(zSign, 0, 0);
2548 }
158142c2 2549 isTiny =
a2f2d288
PM
2550 (status->float_detect_tininess
2551 == float_tininess_before_rounding)
158142c2
FB
2552 || ( zExp < 0 )
2553 || ( zSig0 <= zSig0 + roundIncrement );
2554 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2555 zExp = 0;
2556 roundBits = zSig0 & roundMask;
ff32e16e
PM
2557 if (isTiny && roundBits) {
2558 float_raise(float_flag_underflow, status);
2559 }
a2f2d288
PM
2560 if (roundBits) {
2561 status->float_exception_flags |= float_flag_inexact;
2562 }
158142c2 2563 zSig0 += roundIncrement;
bb98fe42 2564 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2565 roundIncrement = roundMask + 1;
2566 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2567 roundMask |= roundIncrement;
2568 }
2569 zSig0 &= ~ roundMask;
2570 return packFloatx80( zSign, zExp, zSig0 );
2571 }
2572 }
a2f2d288
PM
2573 if (roundBits) {
2574 status->float_exception_flags |= float_flag_inexact;
2575 }
158142c2
FB
2576 zSig0 += roundIncrement;
2577 if ( zSig0 < roundIncrement ) {
2578 ++zExp;
2579 zSig0 = LIT64( 0x8000000000000000 );
2580 }
2581 roundIncrement = roundMask + 1;
2582 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2583 roundMask |= roundIncrement;
2584 }
2585 zSig0 &= ~ roundMask;
2586 if ( zSig0 == 0 ) zExp = 0;
2587 return packFloatx80( zSign, zExp, zSig0 );
2588 precision80:
dc355b76
PM
2589 switch (roundingMode) {
2590 case float_round_nearest_even:
f9288a76 2591 case float_round_ties_away:
dc355b76
PM
2592 increment = ((int64_t)zSig1 < 0);
2593 break;
2594 case float_round_to_zero:
2595 increment = 0;
2596 break;
2597 case float_round_up:
2598 increment = !zSign && zSig1;
2599 break;
2600 case float_round_down:
2601 increment = zSign && zSig1;
2602 break;
2603 default:
2604 abort();
158142c2 2605 }
bb98fe42 2606 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2607 if ( ( 0x7FFE < zExp )
2608 || ( ( zExp == 0x7FFE )
2609 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2610 && increment
2611 )
2612 ) {
2613 roundMask = 0;
2614 overflow:
ff32e16e 2615 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2616 if ( ( roundingMode == float_round_to_zero )
2617 || ( zSign && ( roundingMode == float_round_up ) )
2618 || ( ! zSign && ( roundingMode == float_round_down ) )
2619 ) {
2620 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2621 }
0f605c88
LV
2622 return packFloatx80(zSign,
2623 floatx80_infinity_high,
2624 floatx80_infinity_low);
158142c2
FB
2625 }
2626 if ( zExp <= 0 ) {
2627 isTiny =
a2f2d288
PM
2628 (status->float_detect_tininess
2629 == float_tininess_before_rounding)
158142c2
FB
2630 || ( zExp < 0 )
2631 || ! increment
2632 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2633 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2634 zExp = 0;
ff32e16e
PM
2635 if (isTiny && zSig1) {
2636 float_raise(float_flag_underflow, status);
2637 }
a2f2d288
PM
2638 if (zSig1) {
2639 status->float_exception_flags |= float_flag_inexact;
2640 }
dc355b76
PM
2641 switch (roundingMode) {
2642 case float_round_nearest_even:
f9288a76 2643 case float_round_ties_away:
dc355b76
PM
2644 increment = ((int64_t)zSig1 < 0);
2645 break;
2646 case float_round_to_zero:
2647 increment = 0;
2648 break;
2649 case float_round_up:
2650 increment = !zSign && zSig1;
2651 break;
2652 case float_round_down:
2653 increment = zSign && zSig1;
2654 break;
2655 default:
2656 abort();
158142c2
FB
2657 }
2658 if ( increment ) {
2659 ++zSig0;
2660 zSig0 &=
bb98fe42
AF
2661 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2662 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2663 }
2664 return packFloatx80( zSign, zExp, zSig0 );
2665 }
2666 }
a2f2d288
PM
2667 if (zSig1) {
2668 status->float_exception_flags |= float_flag_inexact;
2669 }
158142c2
FB
2670 if ( increment ) {
2671 ++zSig0;
2672 if ( zSig0 == 0 ) {
2673 ++zExp;
2674 zSig0 = LIT64( 0x8000000000000000 );
2675 }
2676 else {
bb98fe42 2677 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2678 }
2679 }
2680 else {
2681 if ( zSig0 == 0 ) zExp = 0;
2682 }
2683 return packFloatx80( zSign, zExp, zSig0 );
2684
2685}
2686
2687/*----------------------------------------------------------------------------
2688| Takes an abstract floating-point value having sign `zSign', exponent
2689| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
2690| and returns the proper extended double-precision floating-point value
2691| corresponding to the abstract input. This routine is just like
2692| `roundAndPackFloatx80' except that the input significand does not have to be
2693| normalized.
2694*----------------------------------------------------------------------------*/
2695
88857aca
LV
2696floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2697 flag zSign, int32_t zExp,
2698 uint64_t zSig0, uint64_t zSig1,
2699 float_status *status)
158142c2 2700{
8f506c70 2701 int8_t shiftCount;
158142c2
FB
2702
2703 if ( zSig0 == 0 ) {
2704 zSig0 = zSig1;
2705 zSig1 = 0;
2706 zExp -= 64;
2707 }
2708 shiftCount = countLeadingZeros64( zSig0 );
2709 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2710 zExp -= shiftCount;
ff32e16e
PM
2711 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2712 zSig0, zSig1, status);
158142c2
FB
2713
2714}
2715
158142c2
FB
2716/*----------------------------------------------------------------------------
2717| Returns the least-significant 64 fraction bits of the quadruple-precision
2718| floating-point value `a'.
2719*----------------------------------------------------------------------------*/
2720
a49db98d 2721static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
2722{
2723
2724 return a.low;
2725
2726}
2727
2728/*----------------------------------------------------------------------------
2729| Returns the most-significant 48 fraction bits of the quadruple-precision
2730| floating-point value `a'.
2731*----------------------------------------------------------------------------*/
2732
a49db98d 2733static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
2734{
2735
2736 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2737
2738}
2739
2740/*----------------------------------------------------------------------------
2741| Returns the exponent bits of the quadruple-precision floating-point value
2742| `a'.
2743*----------------------------------------------------------------------------*/
2744
f4014512 2745static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
2746{
2747
2748 return ( a.high>>48 ) & 0x7FFF;
2749
2750}
2751
2752/*----------------------------------------------------------------------------
2753| Returns the sign bit of the quadruple-precision floating-point value `a'.
2754*----------------------------------------------------------------------------*/
2755
a49db98d 2756static inline flag extractFloat128Sign( float128 a )
158142c2
FB
2757{
2758
2759 return a.high>>63;
2760
2761}
2762
2763/*----------------------------------------------------------------------------
2764| Normalizes the subnormal quadruple-precision floating-point value
2765| represented by the denormalized significand formed by the concatenation of
2766| `aSig0' and `aSig1'. The normalized exponent is stored at the location
2767| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
2768| significand are stored at the location pointed to by `zSig0Ptr', and the
2769| least significant 64 bits of the normalized significand are stored at the
2770| location pointed to by `zSig1Ptr'.
2771*----------------------------------------------------------------------------*/
2772
2773static void
2774 normalizeFloat128Subnormal(
bb98fe42
AF
2775 uint64_t aSig0,
2776 uint64_t aSig1,
f4014512 2777 int32_t *zExpPtr,
bb98fe42
AF
2778 uint64_t *zSig0Ptr,
2779 uint64_t *zSig1Ptr
158142c2
FB
2780 )
2781{
8f506c70 2782 int8_t shiftCount;
158142c2
FB
2783
2784 if ( aSig0 == 0 ) {
2785 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2786 if ( shiftCount < 0 ) {
2787 *zSig0Ptr = aSig1>>( - shiftCount );
2788 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2789 }
2790 else {
2791 *zSig0Ptr = aSig1<<shiftCount;
2792 *zSig1Ptr = 0;
2793 }
2794 *zExpPtr = - shiftCount - 63;
2795 }
2796 else {
2797 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2798 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2799 *zExpPtr = 1 - shiftCount;
2800 }
2801
2802}
2803
2804/*----------------------------------------------------------------------------
2805| Packs the sign `zSign', the exponent `zExp', and the significand formed
2806| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
2807| floating-point value, returning the result. After being shifted into the
2808| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
2809| added together to form the most significant 32 bits of the result. This
2810| means that any integer portion of `zSig0' will be added into the exponent.
2811| Since a properly normalized significand will have an integer portion equal
2812| to 1, the `zExp' input should be 1 less than the desired result exponent
2813| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
2814| significand.
2815*----------------------------------------------------------------------------*/
2816
a49db98d 2817static inline float128
f4014512 2818 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
2819{
2820 float128 z;
2821
2822 z.low = zSig1;
bb98fe42 2823 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
2824 return z;
2825
2826}
2827
2828/*----------------------------------------------------------------------------
2829| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2830| and extended significand formed by the concatenation of `zSig0', `zSig1',
2831| and `zSig2', and returns the proper quadruple-precision floating-point value
2832| corresponding to the abstract input. Ordinarily, the abstract value is
2833| simply rounded and packed into the quadruple-precision format, with the
2834| inexact exception raised if the abstract input cannot be represented
2835| exactly. However, if the abstract value is too large, the overflow and
2836| inexact exceptions are raised and an infinity or maximal finite value is
2837| returned. If the abstract value is too small, the input value is rounded to
2838| a subnormal number, and the underflow and inexact exceptions are raised if
2839| the abstract input cannot be represented exactly as a subnormal quadruple-
2840| precision floating-point number.
2841| The input significand must be normalized or smaller. If the input
2842| significand is not normalized, `zExp' must be 0; in that case, the result
2843| returned is a subnormal number, and it must not require rounding. In the
2844| usual case that the input significand is normalized, `zExp' must be 1 less
2845| than the ``true'' floating-point exponent. The handling of underflow and
2846| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2847*----------------------------------------------------------------------------*/
2848
f4014512 2849static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
2850 uint64_t zSig0, uint64_t zSig1,
2851 uint64_t zSig2, float_status *status)
158142c2 2852{
8f506c70 2853 int8_t roundingMode;
158142c2
FB
2854 flag roundNearestEven, increment, isTiny;
2855
a2f2d288 2856 roundingMode = status->float_rounding_mode;
158142c2 2857 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2858 switch (roundingMode) {
2859 case float_round_nearest_even:
f9288a76 2860 case float_round_ties_away:
dc355b76
PM
2861 increment = ((int64_t)zSig2 < 0);
2862 break;
2863 case float_round_to_zero:
2864 increment = 0;
2865 break;
2866 case float_round_up:
2867 increment = !zSign && zSig2;
2868 break;
2869 case float_round_down:
2870 increment = zSign && zSig2;
2871 break;
9ee6f678
BR
2872 case float_round_to_odd:
2873 increment = !(zSig1 & 0x1) && zSig2;
2874 break;
dc355b76
PM
2875 default:
2876 abort();
158142c2 2877 }
bb98fe42 2878 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
2879 if ( ( 0x7FFD < zExp )
2880 || ( ( zExp == 0x7FFD )
2881 && eq128(
2882 LIT64( 0x0001FFFFFFFFFFFF ),
2883 LIT64( 0xFFFFFFFFFFFFFFFF ),
2884 zSig0,
2885 zSig1
2886 )
2887 && increment
2888 )
2889 ) {
ff32e16e 2890 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2891 if ( ( roundingMode == float_round_to_zero )
2892 || ( zSign && ( roundingMode == float_round_up ) )
2893 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 2894 || (roundingMode == float_round_to_odd)
158142c2
FB
2895 ) {
2896 return
2897 packFloat128(
2898 zSign,
2899 0x7FFE,
2900 LIT64( 0x0000FFFFFFFFFFFF ),
2901 LIT64( 0xFFFFFFFFFFFFFFFF )
2902 );
2903 }
2904 return packFloat128( zSign, 0x7FFF, 0, 0 );
2905 }
2906 if ( zExp < 0 ) {
a2f2d288 2907 if (status->flush_to_zero) {
ff32e16e 2908 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2909 return packFloat128(zSign, 0, 0, 0);
2910 }
158142c2 2911 isTiny =
a2f2d288
PM
2912 (status->float_detect_tininess
2913 == float_tininess_before_rounding)
158142c2
FB
2914 || ( zExp < -1 )
2915 || ! increment
2916 || lt128(
2917 zSig0,
2918 zSig1,
2919 LIT64( 0x0001FFFFFFFFFFFF ),
2920 LIT64( 0xFFFFFFFFFFFFFFFF )
2921 );
2922 shift128ExtraRightJamming(
2923 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
2924 zExp = 0;
ff32e16e
PM
2925 if (isTiny && zSig2) {
2926 float_raise(float_flag_underflow, status);
2927 }
dc355b76
PM
2928 switch (roundingMode) {
2929 case float_round_nearest_even:
f9288a76 2930 case float_round_ties_away:
dc355b76
PM
2931 increment = ((int64_t)zSig2 < 0);
2932 break;
2933 case float_round_to_zero:
2934 increment = 0;
2935 break;
2936 case float_round_up:
2937 increment = !zSign && zSig2;
2938 break;
2939 case float_round_down:
2940 increment = zSign && zSig2;
2941 break;
9ee6f678
BR
2942 case float_round_to_odd:
2943 increment = !(zSig1 & 0x1) && zSig2;
2944 break;
dc355b76
PM
2945 default:
2946 abort();
158142c2
FB
2947 }
2948 }
2949 }
a2f2d288
PM
2950 if (zSig2) {
2951 status->float_exception_flags |= float_flag_inexact;
2952 }
158142c2
FB
2953 if ( increment ) {
2954 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
2955 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
2956 }
2957 else {
2958 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
2959 }
2960 return packFloat128( zSign, zExp, zSig0, zSig1 );
2961
2962}
2963
2964/*----------------------------------------------------------------------------
2965| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2966| and significand formed by the concatenation of `zSig0' and `zSig1', and
2967| returns the proper quadruple-precision floating-point value corresponding
2968| to the abstract input. This routine is just like `roundAndPackFloat128'
2969| except that the input significand has fewer bits and does not have to be
2970| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
2971| point exponent.
2972*----------------------------------------------------------------------------*/
2973
f4014512 2974static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
2975 uint64_t zSig0, uint64_t zSig1,
2976 float_status *status)
158142c2 2977{
8f506c70 2978 int8_t shiftCount;
bb98fe42 2979 uint64_t zSig2;
158142c2
FB
2980
2981 if ( zSig0 == 0 ) {
2982 zSig0 = zSig1;
2983 zSig1 = 0;
2984 zExp -= 64;
2985 }
2986 shiftCount = countLeadingZeros64( zSig0 ) - 15;
2987 if ( 0 <= shiftCount ) {
2988 zSig2 = 0;
2989 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2990 }
2991 else {
2992 shift128ExtraRightJamming(
2993 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
2994 }
2995 zExp -= shiftCount;
ff32e16e 2996 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
2997
2998}
2999
158142c2 3000
158142c2
FB
3001/*----------------------------------------------------------------------------
3002| Returns the result of converting the 32-bit two's complement integer `a'
3003| to the extended double-precision floating-point format. The conversion
3004| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3005| Arithmetic.
3006*----------------------------------------------------------------------------*/
3007
e5a41ffa 3008floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3009{
3010 flag zSign;
3a87d009 3011 uint32_t absA;
8f506c70 3012 int8_t shiftCount;
bb98fe42 3013 uint64_t zSig;
158142c2
FB
3014
3015 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3016 zSign = ( a < 0 );
3017 absA = zSign ? - a : a;
3018 shiftCount = countLeadingZeros32( absA ) + 32;
3019 zSig = absA;
3020 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3021
3022}
3023
158142c2
FB
3024/*----------------------------------------------------------------------------
3025| Returns the result of converting the 32-bit two's complement integer `a' to
3026| the quadruple-precision floating-point format. The conversion is performed
3027| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3028*----------------------------------------------------------------------------*/
3029
e5a41ffa 3030float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3031{
3032 flag zSign;
3a87d009 3033 uint32_t absA;
8f506c70 3034 int8_t shiftCount;
bb98fe42 3035 uint64_t zSig0;
158142c2
FB
3036
3037 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3038 zSign = ( a < 0 );
3039 absA = zSign ? - a : a;
3040 shiftCount = countLeadingZeros32( absA ) + 17;
3041 zSig0 = absA;
3042 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3043
3044}
3045
158142c2
FB
3046/*----------------------------------------------------------------------------
3047| Returns the result of converting the 64-bit two's complement integer `a'
3048| to the extended double-precision floating-point format. The conversion
3049| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3050| Arithmetic.
3051*----------------------------------------------------------------------------*/
3052
e5a41ffa 3053floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3054{
3055 flag zSign;
182f42fd 3056 uint64_t absA;
8f506c70 3057 int8_t shiftCount;
158142c2
FB
3058
3059 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3060 zSign = ( a < 0 );
3061 absA = zSign ? - a : a;
3062 shiftCount = countLeadingZeros64( absA );
3063 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3064
3065}
3066
158142c2
FB
3067/*----------------------------------------------------------------------------
3068| Returns the result of converting the 64-bit two's complement integer `a' to
3069| the quadruple-precision floating-point format. The conversion is performed
3070| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3071*----------------------------------------------------------------------------*/
3072
e5a41ffa 3073float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3074{
3075 flag zSign;
182f42fd 3076 uint64_t absA;
8f506c70 3077 int8_t shiftCount;
f4014512 3078 int32_t zExp;
bb98fe42 3079 uint64_t zSig0, zSig1;
158142c2
FB
3080
3081 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3082 zSign = ( a < 0 );
3083 absA = zSign ? - a : a;
3084 shiftCount = countLeadingZeros64( absA ) + 49;
3085 zExp = 0x406E - shiftCount;
3086 if ( 64 <= shiftCount ) {
3087 zSig1 = 0;
3088 zSig0 = absA;
3089 shiftCount -= 64;
3090 }
3091 else {
3092 zSig1 = absA;
3093 zSig0 = 0;
3094 }
3095 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3096 return packFloat128( zSign, zExp, zSig0, zSig1 );
3097
3098}
3099
6bb8e0f1
PM
3100/*----------------------------------------------------------------------------
3101| Returns the result of converting the 64-bit unsigned integer `a'
3102| to the quadruple-precision floating-point format. The conversion is performed
3103| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3104*----------------------------------------------------------------------------*/
3105
e5a41ffa 3106float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3107{
3108 if (a == 0) {
3109 return float128_zero;
3110 }
6603d506 3111 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3112}
3113
158142c2 3114
158142c2 3115
158142c2
FB
3116
3117/*----------------------------------------------------------------------------
3118| Returns the result of converting the single-precision floating-point value
3119| `a' to the double-precision floating-point format. The conversion is
3120| performed according to the IEC/IEEE Standard for Binary Floating-Point
3121| Arithmetic.
3122*----------------------------------------------------------------------------*/
3123
e5a41ffa 3124float64 float32_to_float64(float32 a, float_status *status)
158142c2
FB
3125{
3126 flag aSign;
0c48262d 3127 int aExp;
bb98fe42 3128 uint32_t aSig;
ff32e16e 3129 a = float32_squash_input_denormal(a, status);
158142c2
FB
3130
3131 aSig = extractFloat32Frac( a );
3132 aExp = extractFloat32Exp( a );
3133 aSign = extractFloat32Sign( a );
3134 if ( aExp == 0xFF ) {
ff32e16e
PM
3135 if (aSig) {
3136 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
3137 }
158142c2
FB
3138 return packFloat64( aSign, 0x7FF, 0 );
3139 }
3140 if ( aExp == 0 ) {
3141 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
3142 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3143 --aExp;
3144 }
bb98fe42 3145 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
158142c2
FB
3146
3147}
3148
158142c2
FB
3149/*----------------------------------------------------------------------------
3150| Returns the result of converting the single-precision floating-point value
3151| `a' to the extended double-precision floating-point format. The conversion
3152| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3153| Arithmetic.
3154*----------------------------------------------------------------------------*/
3155
e5a41ffa 3156floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3157{
3158 flag aSign;
0c48262d 3159 int aExp;
bb98fe42 3160 uint32_t aSig;
158142c2 3161
ff32e16e 3162 a = float32_squash_input_denormal(a, status);
158142c2
FB
3163 aSig = extractFloat32Frac( a );
3164 aExp = extractFloat32Exp( a );
3165 aSign = extractFloat32Sign( a );
3166 if ( aExp == 0xFF ) {
ff32e16e
PM
3167 if (aSig) {
3168 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3169 }
0f605c88
LV
3170 return packFloatx80(aSign,
3171 floatx80_infinity_high,
3172 floatx80_infinity_low);
158142c2
FB
3173 }
3174 if ( aExp == 0 ) {
3175 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3176 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3177 }
3178 aSig |= 0x00800000;
bb98fe42 3179 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3180
3181}
3182
158142c2
FB
3183/*----------------------------------------------------------------------------
3184| Returns the result of converting the single-precision floating-point value
3185| `a' to the double-precision floating-point format. The conversion is
3186| performed according to the IEC/IEEE Standard for Binary Floating-Point
3187| Arithmetic.
3188*----------------------------------------------------------------------------*/
3189
e5a41ffa 3190float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3191{
3192 flag aSign;
0c48262d 3193 int aExp;
bb98fe42 3194 uint32_t aSig;
158142c2 3195
ff32e16e 3196 a = float32_squash_input_denormal(a, status);
158142c2
FB
3197 aSig = extractFloat32Frac( a );
3198 aExp = extractFloat32Exp( a );
3199 aSign = extractFloat32Sign( a );
3200 if ( aExp == 0xFF ) {
ff32e16e
PM
3201 if (aSig) {
3202 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3203 }
158142c2
FB
3204 return packFloat128( aSign, 0x7FFF, 0, 0 );
3205 }
3206 if ( aExp == 0 ) {
3207 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3208 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3209 --aExp;
3210 }
bb98fe42 3211 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3212
3213}
3214
158142c2
FB
3215/*----------------------------------------------------------------------------
3216| Returns the remainder of the single-precision floating-point value `a'
3217| with respect to the corresponding value `b'. The operation is performed
3218| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3219*----------------------------------------------------------------------------*/
3220
e5a41ffa 3221float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3222{
ed086f3d 3223 flag aSign, zSign;
0c48262d 3224 int aExp, bExp, expDiff;
bb98fe42
AF
3225 uint32_t aSig, bSig;
3226 uint32_t q;
3227 uint64_t aSig64, bSig64, q64;
3228 uint32_t alternateASig;
3229 int32_t sigMean;
ff32e16e
PM
3230 a = float32_squash_input_denormal(a, status);
3231 b = float32_squash_input_denormal(b, status);
158142c2
FB
3232
3233 aSig = extractFloat32Frac( a );
3234 aExp = extractFloat32Exp( a );
3235 aSign = extractFloat32Sign( a );
3236 bSig = extractFloat32Frac( b );
3237 bExp = extractFloat32Exp( b );
158142c2
FB
3238 if ( aExp == 0xFF ) {
3239 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3240 return propagateFloat32NaN(a, b, status);
158142c2 3241 }
ff32e16e 3242 float_raise(float_flag_invalid, status);
af39bc8c 3243 return float32_default_nan(status);
158142c2
FB
3244 }
3245 if ( bExp == 0xFF ) {
ff32e16e
PM
3246 if (bSig) {
3247 return propagateFloat32NaN(a, b, status);
3248 }
158142c2
FB
3249 return a;
3250 }
3251 if ( bExp == 0 ) {
3252 if ( bSig == 0 ) {
ff32e16e 3253 float_raise(float_flag_invalid, status);
af39bc8c 3254 return float32_default_nan(status);
158142c2
FB
3255 }
3256 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3257 }
3258 if ( aExp == 0 ) {
3259 if ( aSig == 0 ) return a;
3260 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3261 }
3262 expDiff = aExp - bExp;
3263 aSig |= 0x00800000;
3264 bSig |= 0x00800000;
3265 if ( expDiff < 32 ) {
3266 aSig <<= 8;
3267 bSig <<= 8;
3268 if ( expDiff < 0 ) {
3269 if ( expDiff < -1 ) return a;
3270 aSig >>= 1;
3271 }
3272 q = ( bSig <= aSig );
3273 if ( q ) aSig -= bSig;
3274 if ( 0 < expDiff ) {
bb98fe42 3275 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3276 q >>= 32 - expDiff;
3277 bSig >>= 2;
3278 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3279 }
3280 else {
3281 aSig >>= 2;
3282 bSig >>= 2;
3283 }
3284 }
3285 else {
3286 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3287 aSig64 = ( (uint64_t) aSig )<<40;
3288 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3289 expDiff -= 64;
3290 while ( 0 < expDiff ) {
3291 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3292 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3293 aSig64 = - ( ( bSig * q64 )<<38 );
3294 expDiff -= 62;
3295 }
3296 expDiff += 64;
3297 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3298 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3299 q = q64>>( 64 - expDiff );
3300 bSig <<= 6;
3301 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3302 }
3303 do {
3304 alternateASig = aSig;
3305 ++q;
3306 aSig -= bSig;
bb98fe42 3307 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3308 sigMean = aSig + alternateASig;
3309 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3310 aSig = alternateASig;
3311 }
bb98fe42 3312 zSign = ( (int32_t) aSig < 0 );
158142c2 3313 if ( zSign ) aSig = - aSig;
ff32e16e 3314 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3315}
3316
369be8f6 3317
158142c2 3318
8229c991
AJ
3319/*----------------------------------------------------------------------------
3320| Returns the binary exponential of the single-precision floating-point value
3321| `a'. The operation is performed according to the IEC/IEEE Standard for
3322| Binary Floating-Point Arithmetic.
3323|
3324| Uses the following identities:
3325|
3326| 1. -------------------------------------------------------------------------
3327| x x*ln(2)
3328| 2 = e
3329|
3330| 2. -------------------------------------------------------------------------
3331| 2 3 4 5 n
3332| x x x x x x x
3333| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3334| 1! 2! 3! 4! 5! n!
3335*----------------------------------------------------------------------------*/
3336
3337static const float64 float32_exp2_coefficients[15] =
3338{
d5138cf4
PM
3339 const_float64( 0x3ff0000000000000ll ), /* 1 */
3340 const_float64( 0x3fe0000000000000ll ), /* 2 */
3341 const_float64( 0x3fc5555555555555ll ), /* 3 */
3342 const_float64( 0x3fa5555555555555ll ), /* 4 */
3343 const_float64( 0x3f81111111111111ll ), /* 5 */
3344 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3345 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3346 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3347 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3348 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3349 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3350 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3351 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3352 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3353 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3354};
3355
e5a41ffa 3356float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3357{
3358 flag aSign;
0c48262d 3359 int aExp;
bb98fe42 3360 uint32_t aSig;
8229c991
AJ
3361 float64 r, x, xn;
3362 int i;
ff32e16e 3363 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3364
3365 aSig = extractFloat32Frac( a );
3366 aExp = extractFloat32Exp( a );
3367 aSign = extractFloat32Sign( a );
3368
3369 if ( aExp == 0xFF) {
ff32e16e
PM
3370 if (aSig) {
3371 return propagateFloat32NaN(a, float32_zero, status);
3372 }
8229c991
AJ
3373 return (aSign) ? float32_zero : a;
3374 }
3375 if (aExp == 0) {
3376 if (aSig == 0) return float32_one;
3377 }
3378
ff32e16e 3379 float_raise(float_flag_inexact, status);
8229c991
AJ
3380
3381 /* ******************************* */
3382 /* using float64 for approximation */
3383 /* ******************************* */
ff32e16e
PM
3384 x = float32_to_float64(a, status);
3385 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3386
3387 xn = x;
3388 r = float64_one;
3389 for (i = 0 ; i < 15 ; i++) {
3390 float64 f;
3391
ff32e16e
PM
3392 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3393 r = float64_add(r, f, status);
8229c991 3394
ff32e16e 3395 xn = float64_mul(xn, x, status);
8229c991
AJ
3396 }
3397
3398 return float64_to_float32(r, status);
3399}
3400
374dfc33
AJ
3401/*----------------------------------------------------------------------------
3402| Returns the binary log of the single-precision floating-point value `a'.
3403| The operation is performed according to the IEC/IEEE Standard for Binary
3404| Floating-Point Arithmetic.
3405*----------------------------------------------------------------------------*/
e5a41ffa 3406float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3407{
3408 flag aSign, zSign;
0c48262d 3409 int aExp;
bb98fe42 3410 uint32_t aSig, zSig, i;
374dfc33 3411
ff32e16e 3412 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3413 aSig = extractFloat32Frac( a );
3414 aExp = extractFloat32Exp( a );
3415 aSign = extractFloat32Sign( a );
3416
3417 if ( aExp == 0 ) {
3418 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3419 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3420 }
3421 if ( aSign ) {
ff32e16e 3422 float_raise(float_flag_invalid, status);
af39bc8c 3423 return float32_default_nan(status);
374dfc33
AJ
3424 }
3425 if ( aExp == 0xFF ) {
ff32e16e
PM
3426 if (aSig) {
3427 return propagateFloat32NaN(a, float32_zero, status);
3428 }
374dfc33
AJ
3429 return a;
3430 }
3431
3432 aExp -= 0x7F;
3433 aSig |= 0x00800000;
3434 zSign = aExp < 0;
3435 zSig = aExp << 23;
3436
3437 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3438 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3439 if ( aSig & 0x01000000 ) {
3440 aSig >>= 1;
3441 zSig |= i;
3442 }
3443 }
3444
3445 if ( zSign )
3446 zSig = -zSig;
3447
ff32e16e 3448 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3449}
3450
158142c2
FB
3451/*----------------------------------------------------------------------------
3452| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3453| the corresponding value `b', and 0 otherwise. The invalid exception is
3454| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3455| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3456*----------------------------------------------------------------------------*/
3457
e5a41ffa 3458int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3459{
b689362d 3460 uint32_t av, bv;
ff32e16e
PM
3461 a = float32_squash_input_denormal(a, status);
3462 b = float32_squash_input_denormal(b, status);
158142c2
FB
3463
3464 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3465 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3466 ) {
ff32e16e 3467 float_raise(float_flag_invalid, status);
158142c2
FB
3468 return 0;
3469 }
b689362d
AJ
3470 av = float32_val(a);
3471 bv = float32_val(b);
3472 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3473}
3474
3475/*----------------------------------------------------------------------------
3476| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3477| or equal to the corresponding value `b', and 0 otherwise. The invalid
3478| exception is raised if either operand is a NaN. The comparison is performed
3479| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3480*----------------------------------------------------------------------------*/
3481
e5a41ffa 3482int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3483{
3484 flag aSign, bSign;
bb98fe42 3485 uint32_t av, bv;
ff32e16e
PM
3486 a = float32_squash_input_denormal(a, status);
3487 b = float32_squash_input_denormal(b, status);
158142c2
FB
3488
3489 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3490 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3491 ) {
ff32e16e 3492 float_raise(float_flag_invalid, status);
158142c2
FB
3493 return 0;
3494 }
3495 aSign = extractFloat32Sign( a );
3496 bSign = extractFloat32Sign( b );
f090c9d4
PB
3497 av = float32_val(a);
3498 bv = float32_val(b);
bb98fe42 3499 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3500 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3501
3502}
3503
3504/*----------------------------------------------------------------------------
3505| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3506| the corresponding value `b', and 0 otherwise. The invalid exception is
3507| raised if either operand is a NaN. The comparison is performed according
3508| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3509*----------------------------------------------------------------------------*/
3510
e5a41ffa 3511int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3512{
3513 flag aSign, bSign;
bb98fe42 3514 uint32_t av, bv;
ff32e16e
PM
3515 a = float32_squash_input_denormal(a, status);
3516 b = float32_squash_input_denormal(b, status);
158142c2
FB
3517
3518 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3519 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3520 ) {
ff32e16e 3521 float_raise(float_flag_invalid, status);
158142c2
FB
3522 return 0;
3523 }
3524 aSign = extractFloat32Sign( a );
3525 bSign = extractFloat32Sign( b );
f090c9d4
PB
3526 av = float32_val(a);
3527 bv = float32_val(b);
bb98fe42 3528 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3529 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3530
3531}
3532
67b7861d
AJ
3533/*----------------------------------------------------------------------------
3534| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
3535| be compared, and 0 otherwise. The invalid exception is raised if either
3536| operand is a NaN. The comparison is performed according to the IEC/IEEE
3537| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
3538*----------------------------------------------------------------------------*/
3539
e5a41ffa 3540int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 3541{
ff32e16e
PM
3542 a = float32_squash_input_denormal(a, status);
3543 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3544
3545 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3546 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3547 ) {
ff32e16e 3548 float_raise(float_flag_invalid, status);
67b7861d
AJ
3549 return 1;
3550 }
3551 return 0;
3552}
b689362d 3553
158142c2
FB
3554/*----------------------------------------------------------------------------
3555| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
3556| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3557| exception. The comparison is performed according to the IEC/IEEE Standard
3558| for Binary Floating-Point Arithmetic.
158142c2
FB
3559*----------------------------------------------------------------------------*/
3560
e5a41ffa 3561int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 3562{
ff32e16e
PM
3563 a = float32_squash_input_denormal(a, status);
3564 b = float32_squash_input_denormal(b, status);
158142c2
FB
3565
3566 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3567 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3568 ) {
af39bc8c
AM
3569 if (float32_is_signaling_nan(a, status)
3570 || float32_is_signaling_nan(b, status)) {
ff32e16e 3571 float_raise(float_flag_invalid, status);
b689362d 3572 }
158142c2
FB
3573 return 0;
3574 }
b689362d
AJ
3575 return ( float32_val(a) == float32_val(b) ) ||
3576 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
3577}
3578
3579/*----------------------------------------------------------------------------
3580| Returns 1 if the single-precision floating-point value `a' is less than or
3581| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3582| cause an exception. Otherwise, the comparison is performed according to the
3583| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3584*----------------------------------------------------------------------------*/
3585
e5a41ffa 3586int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3587{
3588 flag aSign, bSign;
bb98fe42 3589 uint32_t av, bv;
ff32e16e
PM
3590 a = float32_squash_input_denormal(a, status);
3591 b = float32_squash_input_denormal(b, status);
158142c2
FB
3592
3593 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3594 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3595 ) {
af39bc8c
AM
3596 if (float32_is_signaling_nan(a, status)
3597 || float32_is_signaling_nan(b, status)) {
ff32e16e 3598 float_raise(float_flag_invalid, status);
158142c2
FB
3599 }
3600 return 0;
3601 }
3602 aSign = extractFloat32Sign( a );
3603 bSign = extractFloat32Sign( b );
f090c9d4
PB
3604 av = float32_val(a);
3605 bv = float32_val(b);
bb98fe42 3606 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3607 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3608
3609}
3610
3611/*----------------------------------------------------------------------------
3612| Returns 1 if the single-precision floating-point value `a' is less than
3613| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3614| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 3615| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3616*----------------------------------------------------------------------------*/
3617
ab52f973 3618int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 3619{
ab52f973
AB
3620 flag aSign, bSign;
3621 uint32_t av, bv;
3622 a = float32_squash_input_denormal(a, status);
3623 b = float32_squash_input_denormal(b, status);
158142c2 3624
ab52f973
AB
3625 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3626 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3627 ) {
3628 if (float32_is_signaling_nan(a, status)
3629 || float32_is_signaling_nan(b, status)) {
ff32e16e 3630 float_raise(float_flag_invalid, status);
158142c2 3631 }
ab52f973 3632 return 0;
158142c2 3633 }
ab52f973
AB
3634 aSign = extractFloat32Sign( a );
3635 bSign = extractFloat32Sign( b );
3636 av = float32_val(a);
3637 bv = float32_val(b);
3638 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3639 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3640
3641}
3642
3643/*----------------------------------------------------------------------------
ab52f973
AB
3644| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3645| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3646| comparison is performed according to the IEC/IEEE Standard for Binary
3647| Floating-Point Arithmetic.
158142c2
FB
3648*----------------------------------------------------------------------------*/
3649
ab52f973 3650int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 3651{
ab52f973
AB
3652 a = float32_squash_input_denormal(a, status);
3653 b = float32_squash_input_denormal(b, status);
158142c2 3654
ab52f973
AB
3655 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3656 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3657 ) {
3658 if (float32_is_signaling_nan(a, status)
3659 || float32_is_signaling_nan(b, status)) {
3660 float_raise(float_flag_invalid, status);
158142c2 3661 }
ab52f973 3662 return 1;
158142c2 3663 }
ab52f973 3664 return 0;
158142c2
FB
3665}
3666
ab52f973 3667
158142c2
FB
3668/*----------------------------------------------------------------------------
3669| Returns the result of converting the double-precision floating-point value
3670| `a' to the single-precision floating-point format. The conversion is
3671| performed according to the IEC/IEEE Standard for Binary Floating-Point
3672| Arithmetic.
3673*----------------------------------------------------------------------------*/
3674
e5a41ffa 3675float32 float64_to_float32(float64 a, float_status *status)
158142c2
FB
3676{
3677 flag aSign;
0c48262d 3678 int aExp;
bb98fe42
AF
3679 uint64_t aSig;
3680 uint32_t zSig;
ff32e16e 3681 a = float64_squash_input_denormal(a, status);
158142c2
FB
3682
3683 aSig = extractFloat64Frac( a );
3684 aExp = extractFloat64Exp( a );
3685 aSign = extractFloat64Sign( a );
3686 if ( aExp == 0x7FF ) {
ff32e16e
PM
3687 if (aSig) {
3688 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3689 }
158142c2
FB
3690 return packFloat32( aSign, 0xFF, 0 );
3691 }
3692 shift64RightJamming( aSig, 22, &aSig );
3693 zSig = aSig;
3694 if ( aExp || zSig ) {
3695 zSig |= 0x40000000;
3696 aExp -= 0x381;
3697 }
ff32e16e 3698 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
3699
3700}
3701
60011498
PB
3702
3703/*----------------------------------------------------------------------------
3704| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3705| half-precision floating-point value, returning the result. After being
3706| shifted into the proper positions, the three fields are simply added
3707| together to form the result. This means that any integer portion of `zSig'
3708| will be added into the exponent. Since a properly normalized significand
3709| will have an integer portion equal to 1, the `zExp' input should be 1 less
3710| than the desired result exponent whenever `zSig' is a complete, normalized
3711| significand.
3712*----------------------------------------------------------------------------*/
0c48262d 3713static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
60011498 3714{
bb4d4bb3 3715 return make_float16(
bb98fe42 3716 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
60011498
PB
3717}
3718
c4a1c5e7
PM
3719/*----------------------------------------------------------------------------
3720| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3721| and significand `zSig', and returns the proper half-precision floating-
3722| point value corresponding to the abstract input. Ordinarily, the abstract
3723| value is simply rounded and packed into the half-precision format, with
3724| the inexact exception raised if the abstract input cannot be represented
3725| exactly. However, if the abstract value is too large, the overflow and
3726| inexact exceptions are raised and an infinity or maximal finite value is
3727| returned. If the abstract value is too small, the input value is rounded to
3728| a subnormal number, and the underflow and inexact exceptions are raised if
3729| the abstract input cannot be represented exactly as a subnormal half-
3730| precision floating-point number.
3731| The `ieee' flag indicates whether to use IEEE standard half precision, or
3732| ARM-style "alternative representation", which omits the NaN and Inf
3733| encodings in order to raise the maximum representable exponent by one.
3734| The input significand `zSig' has its binary point between bits 22
3735| and 23, which is 13 bits to the left of the usual location. This shifted
3736| significand must be normalized or smaller. If `zSig' is not normalized,
3737| `zExp' must be 0; in that case, the result returned is a subnormal number,
3738| and it must not require rounding. In the usual case that `zSig' is
3739| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3740| Note the slightly odd position of the binary point in zSig compared with the
3741| other roundAndPackFloat functions. This should probably be fixed if we
3742| need to implement more float16 routines than just conversion.
3743| The handling of underflow and overflow follows the IEC/IEEE Standard for
3744| Binary Floating-Point Arithmetic.
3745*----------------------------------------------------------------------------*/
3746
0c48262d 3747static float16 roundAndPackFloat16(flag zSign, int zExp,
e5a41ffa
PM
3748 uint32_t zSig, flag ieee,
3749 float_status *status)
c4a1c5e7
PM
3750{
3751 int maxexp = ieee ? 29 : 30;
3752 uint32_t mask;
3753 uint32_t increment;
c4a1c5e7
PM
3754 bool rounding_bumps_exp;
3755 bool is_tiny = false;
3756
3757 /* Calculate the mask of bits of the mantissa which are not
3758 * representable in half-precision and will be lost.
3759 */
3760 if (zExp < 1) {
3761 /* Will be denormal in halfprec */
3762 mask = 0x00ffffff;
3763 if (zExp >= -11) {
3764 mask >>= 11 + zExp;
3765 }
3766 } else {
3767 /* Normal number in halfprec */
3768 mask = 0x00001fff;
3769 }
3770
a2f2d288 3771 switch (status->float_rounding_mode) {
c4a1c5e7
PM
3772 case float_round_nearest_even:
3773 increment = (mask + 1) >> 1;
3774 if ((zSig & mask) == increment) {
3775 increment = zSig & (increment << 1);
3776 }
3777 break;
f9288a76
PM
3778 case float_round_ties_away:
3779 increment = (mask + 1) >> 1;
3780 break;
c4a1c5e7
PM
3781 case float_round_up:
3782 increment = zSign ? 0 : mask;
3783 break;
3784 case float_round_down:
3785 increment = zSign ? mask : 0;
3786 break;
3787 default: /* round_to_zero */
3788 increment = 0;
3789 break;
3790 }
3791
3792 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3793
3794 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3795 if (ieee) {
ff32e16e 3796 float_raise(float_flag_overflow | float_flag_inexact, status);
c4a1c5e7
PM
3797 return packFloat16(zSign, 0x1f, 0);
3798 } else {
ff32e16e 3799 float_raise(float_flag_invalid, status);
c4a1c5e7
PM
3800 return packFloat16(zSign, 0x1f, 0x3ff);
3801 }
3802 }
3803
3804 if (zExp < 0) {
3805 /* Note that flush-to-zero does not affect half-precision results */
3806 is_tiny =
a2f2d288 3807 (status->float_detect_tininess == float_tininess_before_rounding)
c4a1c5e7
PM
3808 || (zExp < -1)
3809 || (!rounding_bumps_exp);
3810 }
3811 if (zSig & mask) {
ff32e16e 3812 float_raise(float_flag_inexact, status);
c4a1c5e7 3813 if (is_tiny) {
ff32e16e 3814 float_raise(float_flag_underflow, status);
c4a1c5e7
PM
3815 }
3816 }
3817
3818 zSig += increment;
3819 if (rounding_bumps_exp) {
3820 zSig >>= 1;
3821 zExp++;
3822 }
3823
3824 if (zExp < -10) {
3825 return packFloat16(zSign, 0, 0);
3826 }
3827 if (zExp < 0) {
3828 zSig >>= -zExp;
3829 zExp = 0;
3830 }
3831 return packFloat16(zSign, zExp, zSig >> 13);
3832}
3833
210cbd49
AB
3834/*----------------------------------------------------------------------------
3835| If `a' is denormal and we are in flush-to-zero mode then set the
3836| input-denormal exception and return zero. Otherwise just return the value.
3837*----------------------------------------------------------------------------*/
3838float16 float16_squash_input_denormal(float16 a, float_status *status)
3839{
3840 if (status->flush_inputs_to_zero) {
3841 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3842 float_raise(float_flag_input_denormal, status);
3843 return make_float16(float16_val(a) & 0x8000);
3844 }
3845 }
3846 return a;
3847}
3848
0c48262d 3849static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
c4a1c5e7
PM
3850 uint32_t *zSigPtr)
3851{
3852 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3853 *zSigPtr = aSig << shiftCount;
3854 *zExpPtr = 1 - shiftCount;
3855}
3856
60011498
PB
3857/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3858 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
bb4d4bb3 3859
e5a41ffa 3860float32 float16_to_float32(float16 a, flag ieee, float_status *status)
60011498
PB
3861{
3862 flag aSign;
0c48262d 3863 int aExp;
bb98fe42 3864 uint32_t aSig;
60011498 3865
bb4d4bb3
PM
3866 aSign = extractFloat16Sign(a);
3867 aExp = extractFloat16Exp(a);
3868 aSig = extractFloat16Frac(a);
60011498
PB
3869
3870 if (aExp == 0x1f && ieee) {
3871 if (aSig) {
ff32e16e 3872 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
60011498 3873 }
4be8eeac 3874 return packFloat32(aSign, 0xff, 0);
60011498
PB
3875 }
3876 if (aExp == 0) {
60011498
PB
3877 if (aSig == 0) {
3878 return packFloat32(aSign, 0, 0);
3879 }
3880
c4a1c5e7
PM
3881 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3882 aExp--;
60011498
PB
3883 }
3884 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3885}
3886
e5a41ffa 3887float16 float32_to_float16(float32 a, flag ieee, float_status *status)
60011498
PB
3888{
3889 flag aSign;
0c48262d 3890 int aExp;
bb98fe42 3891 uint32_t aSig;
38970efa 3892
ff32e16e 3893 a = float32_squash_input_denormal(a, status);
60011498
PB
3894
3895 aSig = extractFloat32Frac( a );
3896 aExp = extractFloat32Exp( a );
3897 aSign = extractFloat32Sign( a );
3898 if ( aExp == 0xFF ) {
3899 if (aSig) {
600e30d2 3900 /* Input is a NaN */
600e30d2 3901 if (!ieee) {
ff32e16e 3902 float_raise(float_flag_invalid, status);
600e30d2
PM
3903 return packFloat16(aSign, 0, 0);
3904 }
38970efa 3905 return commonNaNToFloat16(
ff32e16e 3906 float32ToCommonNaN(a, status), status);
60011498 3907 }
600e30d2
PM
3908 /* Infinity */
3909 if (!ieee) {
ff32e16e 3910 float_raise(float_flag_invalid, status);
600e30d2
PM
3911 return packFloat16(aSign, 0x1f, 0x3ff);
3912 }
3913 return packFloat16(aSign, 0x1f, 0);
60011498 3914 }
600e30d2 3915 if (aExp == 0 && aSig == 0) {
60011498
PB
3916 return packFloat16(aSign, 0, 0);
3917 }
38970efa
PM
3918 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3919 * even if the input is denormal; however this is harmless because
3920 * the largest possible single-precision denormal is still smaller
3921 * than the smallest representable half-precision denormal, and so we
3922 * will end up ignoring aSig and returning via the "always return zero"
3923 * codepath.
3924 */
60011498 3925 aSig |= 0x00800000;
c4a1c5e7 3926 aExp -= 0x71;
60011498 3927
ff32e16e 3928 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
60011498
PB
3929}
3930
e5a41ffa 3931float64 float16_to_float64(float16 a, flag ieee, float_status *status)
14c9a07e
PM
3932{
3933 flag aSign;
0c48262d 3934 int aExp;
14c9a07e
PM
3935 uint32_t aSig;
3936
3937 aSign = extractFloat16Sign(a);
3938 aExp = extractFloat16Exp(a);
3939 aSig = extractFloat16Frac(a);
3940
3941 if (aExp == 0x1f && ieee) {
3942 if (aSig) {
3943 return commonNaNToFloat64(
ff32e16e 3944 float16ToCommonNaN(a, status), status);
14c9a07e
PM
3945 }
3946 return packFloat64(aSign, 0x7ff, 0);
3947 }
3948 if (aExp == 0) {
3949 if (aSig == 0) {
3950 return packFloat64(aSign, 0, 0);
3951 }
3952
3953 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3954 aExp--;
3955 }
3956 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3957}
3958
e5a41ffa 3959float16 float64_to_float16(float64 a, flag ieee, float_status *status)
14c9a07e
PM
3960{
3961 flag aSign;
0c48262d 3962 int aExp;
14c9a07e
PM
3963 uint64_t aSig;
3964 uint32_t zSig;
3965
ff32e16e 3966 a = float64_squash_input_denormal(a, status);
14c9a07e
PM
3967
3968 aSig = extractFloat64Frac(a);
3969 aExp = extractFloat64Exp(a);
3970 aSign = extractFloat64Sign(a);
3971 if (aExp == 0x7FF) {
3972 if (aSig) {
3973 /* Input is a NaN */
3974 if (!ieee) {
ff32e16e 3975 float_raise(float_flag_invalid, status);
14c9a07e
PM
3976 return packFloat16(aSign, 0, 0);
3977 }
3978 return commonNaNToFloat16(
ff32e16e 3979 float64ToCommonNaN(a, status), status);
14c9a07e
PM
3980 }
3981 /* Infinity */
3982 if (!ieee) {
ff32e16e 3983 float_raise(float_flag_invalid, status);
14c9a07e
PM
3984 return packFloat16(aSign, 0x1f, 0x3ff);
3985 }
3986 return packFloat16(aSign, 0x1f, 0);
3987 }
3988 shift64RightJamming(aSig, 29, &aSig);
3989 zSig = aSig;
3990 if (aExp == 0 && zSig == 0) {
3991 return packFloat16(aSign, 0, 0);
3992 }
3993 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3994 * even if the input is denormal; however this is harmless because
3995 * the largest possible single-precision denormal is still smaller
3996 * than the smallest representable half-precision denormal, and so we
3997 * will end up ignoring aSig and returning via the "always return zero"
3998 * codepath.
3999 */
4000 zSig |= 0x00800000;
4001 aExp -= 0x3F1;
4002
ff32e16e 4003 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
14c9a07e
PM
4004}
4005
158142c2
FB
4006/*----------------------------------------------------------------------------
4007| Returns the result of converting the double-precision floating-point value
4008| `a' to the extended double-precision floating-point format. The conversion
4009| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4010| Arithmetic.
4011*----------------------------------------------------------------------------*/
4012
e5a41ffa 4013floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4014{
4015 flag aSign;
0c48262d 4016 int aExp;
bb98fe42 4017 uint64_t aSig;
158142c2 4018
ff32e16e 4019 a = float64_squash_input_denormal(a, status);
158142c2
FB
4020 aSig = extractFloat64Frac( a );
4021 aExp = extractFloat64Exp( a );
4022 aSign = extractFloat64Sign( a );
4023 if ( aExp == 0x7FF ) {
ff32e16e
PM
4024 if (aSig) {
4025 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4026 }
0f605c88
LV
4027 return packFloatx80(aSign,
4028 floatx80_infinity_high,
4029 floatx80_infinity_low);
158142c2
FB
4030 }
4031 if ( aExp == 0 ) {
4032 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4033 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4034 }
4035 return
4036 packFloatx80(
4037 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4038
4039}
4040
158142c2
FB
4041/*----------------------------------------------------------------------------
4042| Returns the result of converting the double-precision floating-point value
4043| `a' to the quadruple-precision floating-point format. The conversion is
4044| performed according to the IEC/IEEE Standard for Binary Floating-Point
4045| Arithmetic.
4046*----------------------------------------------------------------------------*/
4047
e5a41ffa 4048float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4049{
4050 flag aSign;
0c48262d 4051 int aExp;
bb98fe42 4052 uint64_t aSig, zSig0, zSig1;
158142c2 4053
ff32e16e 4054 a = float64_squash_input_denormal(a, status);
158142c2
FB
4055 aSig = extractFloat64Frac( a );
4056 aExp = extractFloat64Exp( a );
4057 aSign = extractFloat64Sign( a );
4058 if ( aExp == 0x7FF ) {
ff32e16e
PM
4059 if (aSig) {
4060 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4061 }
158142c2
FB
4062 return packFloat128( aSign, 0x7FFF, 0, 0 );
4063 }
4064 if ( aExp == 0 ) {
4065 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4066 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4067 --aExp;
4068 }
4069 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4070 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4071
4072}
4073
158142c2
FB
4074
4075/*----------------------------------------------------------------------------
4076| Returns the remainder of the double-precision floating-point value `a'
4077| with respect to the corresponding value `b'. The operation is performed
4078| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4079*----------------------------------------------------------------------------*/
4080
e5a41ffa 4081float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4082{
ed086f3d 4083 flag aSign, zSign;
0c48262d 4084 int aExp, bExp, expDiff;
bb98fe42
AF
4085 uint64_t aSig, bSig;
4086 uint64_t q, alternateASig;
4087 int64_t sigMean;
158142c2 4088
ff32e16e
PM
4089 a = float64_squash_input_denormal(a, status);
4090 b = float64_squash_input_denormal(b, status);
158142c2
FB
4091 aSig = extractFloat64Frac( a );
4092 aExp = extractFloat64Exp( a );
4093 aSign = extractFloat64Sign( a );
4094 bSig = extractFloat64Frac( b );
4095 bExp = extractFloat64Exp( b );
158142c2
FB
4096 if ( aExp == 0x7FF ) {
4097 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4098 return propagateFloat64NaN(a, b, status);
158142c2 4099 }
ff32e16e 4100 float_raise(float_flag_invalid, status);
af39bc8c 4101 return float64_default_nan(status);
158142c2
FB
4102 }
4103 if ( bExp == 0x7FF ) {
ff32e16e
PM
4104 if (bSig) {
4105 return propagateFloat64NaN(a, b, status);
4106 }
158142c2
FB
4107 return a;
4108 }
4109 if ( bExp == 0 ) {
4110 if ( bSig == 0 ) {
ff32e16e 4111 float_raise(float_flag_invalid, status);
af39bc8c 4112 return float64_default_nan(status);
158142c2
FB
4113 }
4114 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4115 }
4116 if ( aExp == 0 ) {
4117 if ( aSig == 0 ) return a;
4118 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4119 }
4120 expDiff = aExp - bExp;
4121 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4122 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4123 if ( expDiff < 0 ) {
4124 if ( expDiff < -1 ) return a;
4125 aSig >>= 1;
4126 }
4127 q = ( bSig <= aSig );
4128 if ( q ) aSig -= bSig;
4129 expDiff -= 64;
4130 while ( 0 < expDiff ) {
4131 q = estimateDiv128To64( aSig, 0, bSig );
4132 q = ( 2 < q ) ? q - 2 : 0;
4133 aSig = - ( ( bSig>>2 ) * q );
4134 expDiff -= 62;
4135 }
4136 expDiff += 64;
4137 if ( 0 < expDiff ) {
4138 q = estimateDiv128To64( aSig, 0, bSig );
4139 q = ( 2 < q ) ? q - 2 : 0;
4140 q >>= 64 - expDiff;
4141 bSig >>= 2;
4142 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4143 }
4144 else {
4145 aSig >>= 2;
4146 bSig >>= 2;
4147 }
4148 do {
4149 alternateASig = aSig;
4150 ++q;
4151 aSig -= bSig;
bb98fe42 4152 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4153 sigMean = aSig + alternateASig;
4154 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4155 aSig = alternateASig;
4156 }
bb98fe42 4157 zSign = ( (int64_t) aSig < 0 );
158142c2 4158 if ( zSign ) aSig = - aSig;
ff32e16e 4159 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4160
4161}
4162
374dfc33
AJ
4163/*----------------------------------------------------------------------------
4164| Returns the binary log of the double-precision floating-point value `a'.
4165| The operation is performed according to the IEC/IEEE Standard for Binary
4166| Floating-Point Arithmetic.
4167*----------------------------------------------------------------------------*/
e5a41ffa 4168float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4169{
4170 flag aSign, zSign;
0c48262d 4171 int aExp;
bb98fe42 4172 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4173 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4174
4175 aSig = extractFloat64Frac( a );
4176 aExp = extractFloat64Exp( a );
4177 aSign = extractFloat64Sign( a );
4178
4179 if ( aExp == 0 ) {
4180 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4181 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4182 }
4183 if ( aSign ) {
ff32e16e 4184 float_raise(float_flag_invalid, status);
af39bc8c 4185 return float64_default_nan(status);
374dfc33
AJ
4186 }
4187 if ( aExp == 0x7FF ) {
ff32e16e
PM
4188 if (aSig) {
4189 return propagateFloat64NaN(a, float64_zero, status);
4190 }
374dfc33
AJ
4191 return a;
4192 }
4193
4194 aExp -= 0x3FF;
4195 aSig |= LIT64( 0x0010000000000000 );
4196 zSign = aExp < 0;
bb98fe42 4197 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4198 for (i = 1LL << 51; i > 0; i >>= 1) {
4199 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4200 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4201 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4202 aSig >>= 1;
4203 zSig |= i;
4204 }
4205 }
4206
4207 if ( zSign )
4208 zSig = -zSig;
ff32e16e 4209 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4210}
4211
158142c2
FB
4212/*----------------------------------------------------------------------------
4213| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4214| corresponding value `b', and 0 otherwise. The invalid exception is raised
4215| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4216| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4217*----------------------------------------------------------------------------*/
4218
e5a41ffa 4219int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4220{
bb98fe42 4221 uint64_t av, bv;
ff32e16e
PM
4222 a = float64_squash_input_denormal(a, status);
4223 b = float64_squash_input_denormal(b, status);
158142c2
FB
4224
4225 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4226 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4227 ) {
ff32e16e 4228 float_raise(float_flag_invalid, status);
158142c2
FB
4229 return 0;
4230 }
f090c9d4 4231 av = float64_val(a);
a1b91bb4 4232 bv = float64_val(b);
bb98fe42 4233 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4234
4235}
4236
4237/*----------------------------------------------------------------------------
4238| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4239| equal to the corresponding value `b', and 0 otherwise. The invalid
4240| exception is raised if either operand is a NaN. The comparison is performed
4241| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4242*----------------------------------------------------------------------------*/
4243
e5a41ffa 4244int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4245{
4246 flag aSign, bSign;
bb98fe42 4247 uint64_t av, bv;
ff32e16e
PM
4248 a = float64_squash_input_denormal(a, status);
4249 b = float64_squash_input_denormal(b, status);
158142c2
FB
4250
4251 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4252 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4253 ) {
ff32e16e 4254 float_raise(float_flag_invalid, status);
158142c2
FB
4255 return 0;
4256 }
4257 aSign = extractFloat64Sign( a );
4258 bSign = extractFloat64Sign( b );
f090c9d4 4259 av = float64_val(a);
a1b91bb4 4260 bv = float64_val(b);
bb98fe42 4261 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4262 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4263
4264}
4265
4266/*----------------------------------------------------------------------------
4267| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4268| the corresponding value `b', and 0 otherwise. The invalid exception is
4269| raised if either operand is a NaN. The comparison is performed according
4270| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4271*----------------------------------------------------------------------------*/
4272
e5a41ffa 4273int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4274{
4275 flag aSign, bSign;
bb98fe42 4276 uint64_t av, bv;
158142c2 4277
ff32e16e
PM
4278 a = float64_squash_input_denormal(a, status);
4279 b = float64_squash_input_denormal(b, status);
158142c2
FB
4280 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4281 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4282 ) {
ff32e16e 4283 float_raise(float_flag_invalid, status);
158142c2
FB
4284 return 0;
4285 }
4286 aSign = extractFloat64Sign( a );
4287 bSign = extractFloat64Sign( b );
f090c9d4 4288 av = float64_val(a);
a1b91bb4 4289 bv = float64_val(b);
bb98fe42 4290 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4291 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4292
4293}
4294
67b7861d
AJ
4295/*----------------------------------------------------------------------------
4296| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4297| be compared, and 0 otherwise. The invalid exception is raised if either
4298| operand is a NaN. The comparison is performed according to the IEC/IEEE
4299| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4300*----------------------------------------------------------------------------*/
4301
e5a41ffa 4302int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4303{
ff32e16e
PM
4304 a = float64_squash_input_denormal(a, status);
4305 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4306
4307 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4308 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4309 ) {
ff32e16e 4310 float_raise(float_flag_invalid, status);
67b7861d
AJ
4311 return 1;
4312 }
4313 return 0;
4314}
4315
158142c2
FB
4316/*----------------------------------------------------------------------------
4317| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4318| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4319| exception.The comparison is performed according to the IEC/IEEE Standard
4320| for Binary Floating-Point Arithmetic.
158142c2
FB
4321*----------------------------------------------------------------------------*/
4322
e5a41ffa 4323int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4324{
bb98fe42 4325 uint64_t av, bv;
ff32e16e
PM
4326 a = float64_squash_input_denormal(a, status);
4327 b = float64_squash_input_denormal(b, status);
158142c2
FB
4328
4329 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4330 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4331 ) {
af39bc8c
AM
4332 if (float64_is_signaling_nan(a, status)
4333 || float64_is_signaling_nan(b, status)) {
ff32e16e 4334 float_raise(float_flag_invalid, status);
b689362d 4335 }
158142c2
FB
4336 return 0;
4337 }
f090c9d4 4338 av = float64_val(a);
a1b91bb4 4339 bv = float64_val(b);
bb98fe42 4340 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4341
4342}
4343
4344/*----------------------------------------------------------------------------
4345| Returns 1 if the double-precision floating-point value `a' is less than or
4346| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4347| cause an exception. Otherwise, the comparison is performed according to the
4348| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4349*----------------------------------------------------------------------------*/
4350
e5a41ffa 4351int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4352{
4353 flag aSign, bSign;
bb98fe42 4354 uint64_t av, bv;
ff32e16e
PM
4355 a = float64_squash_input_denormal(a, status);
4356 b = float64_squash_input_denormal(b, status);
158142c2
FB
4357
4358 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4359 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4360 ) {
af39bc8c
AM
4361 if (float64_is_signaling_nan(a, status)
4362 || float64_is_signaling_nan(b, status)) {
ff32e16e 4363 float_raise(float_flag_invalid, status);
158142c2
FB
4364 }
4365 return 0;
4366 }
4367 aSign = extractFloat64Sign( a );
4368 bSign = extractFloat64Sign( b );
f090c9d4 4369 av = float64_val(a);
a1b91bb4 4370 bv = float64_val(b);
bb98fe42 4371 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4372 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4373
4374}
4375
4376/*----------------------------------------------------------------------------
4377| Returns 1 if the double-precision floating-point value `a' is less than
4378| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4379| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4380| Standard for Binary Floating-Point Arithmetic.
4381*----------------------------------------------------------------------------*/
4382
e5a41ffa 4383int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4384{
4385 flag aSign, bSign;
bb98fe42 4386 uint64_t av, bv;
ff32e16e
PM
4387 a = float64_squash_input_denormal(a, status);
4388 b = float64_squash_input_denormal(b, status);
158142c2
FB
4389
4390 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4391 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4392 ) {
af39bc8c
AM
4393 if (float64_is_signaling_nan(a, status)
4394 || float64_is_signaling_nan(b, status)) {
ff32e16e 4395 float_raise(float_flag_invalid, status);
158142c2
FB
4396 }
4397 return 0;
4398 }
4399 aSign = extractFloat64Sign( a );
4400 bSign = extractFloat64Sign( b );
f090c9d4 4401 av = float64_val(a);
a1b91bb4 4402 bv = float64_val(b);
bb98fe42 4403 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4404 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4405
4406}
4407
67b7861d
AJ
4408/*----------------------------------------------------------------------------
4409| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4410| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4411| comparison is performed according to the IEC/IEEE Standard for Binary
4412| Floating-Point Arithmetic.
4413*----------------------------------------------------------------------------*/
4414
e5a41ffa 4415int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4416{
ff32e16e
PM
4417 a = float64_squash_input_denormal(a, status);
4418 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4419
4420 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4421 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4422 ) {
af39bc8c
AM
4423 if (float64_is_signaling_nan(a, status)
4424 || float64_is_signaling_nan(b, status)) {
ff32e16e 4425 float_raise(float_flag_invalid, status);
67b7861d
AJ
4426 }
4427 return 1;
4428 }
4429 return 0;
4430}
4431
158142c2
FB
4432/*----------------------------------------------------------------------------
4433| Returns the result of converting the extended double-precision floating-
4434| point value `a' to the 32-bit two's complement integer format. The
4435| conversion is performed according to the IEC/IEEE Standard for Binary
4436| Floating-Point Arithmetic---which means in particular that the conversion
4437| is rounded according to the current rounding mode. If `a' is a NaN, the
4438| largest positive integer is returned. Otherwise, if the conversion
4439| overflows, the largest integer with the same sign as `a' is returned.
4440*----------------------------------------------------------------------------*/
4441
f4014512 4442int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4443{
4444 flag aSign;
f4014512 4445 int32_t aExp, shiftCount;
bb98fe42 4446 uint64_t aSig;
158142c2 4447
d1eb8f2a
AD
4448 if (floatx80_invalid_encoding(a)) {
4449 float_raise(float_flag_invalid, status);
4450 return 1 << 31;
4451 }
158142c2
FB
4452 aSig = extractFloatx80Frac( a );
4453 aExp = extractFloatx80Exp( a );
4454 aSign = extractFloatx80Sign( a );
bb98fe42 4455 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4456 shiftCount = 0x4037 - aExp;
4457 if ( shiftCount <= 0 ) shiftCount = 1;
4458 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4459 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4460
4461}
4462
4463/*----------------------------------------------------------------------------
4464| Returns the result of converting the extended double-precision floating-
4465| point value `a' to the 32-bit two's complement integer format. The
4466| conversion is performed according to the IEC/IEEE Standard for Binary
4467| Floating-Point Arithmetic, except that the conversion is always rounded
4468| toward zero. If `a' is a NaN, the largest positive integer is returned.
4469| Otherwise, if the conversion overflows, the largest integer with the same
4470| sign as `a' is returned.
4471*----------------------------------------------------------------------------*/
4472
f4014512 4473int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4474{
4475 flag aSign;
f4014512 4476 int32_t aExp, shiftCount;
bb98fe42 4477 uint64_t aSig, savedASig;
b3a6a2e0 4478 int32_t z;
158142c2 4479
d1eb8f2a
AD
4480 if (floatx80_invalid_encoding(a)) {
4481 float_raise(float_flag_invalid, status);
4482 return 1 << 31;
4483 }
158142c2
FB
4484 aSig = extractFloatx80Frac( a );
4485 aExp = extractFloatx80Exp( a );
4486 aSign = extractFloatx80Sign( a );
4487 if ( 0x401E < aExp ) {
bb98fe42 4488 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4489 goto invalid;
4490 }
4491 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4492 if (aExp || aSig) {
4493 status->float_exception_flags |= float_flag_inexact;
4494 }
158142c2
FB
4495 return 0;
4496 }
4497 shiftCount = 0x403E - aExp;
4498 savedASig = aSig;
4499 aSig >>= shiftCount;
4500 z = aSig;
4501 if ( aSign ) z = - z;
4502 if ( ( z < 0 ) ^ aSign ) {
4503 invalid:
ff32e16e 4504 float_raise(float_flag_invalid, status);
bb98fe42 4505 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4506 }
4507 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4508 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4509 }
4510 return z;
4511
4512}
4513
4514/*----------------------------------------------------------------------------
4515| Returns the result of converting the extended double-precision floating-
4516| point value `a' to the 64-bit two's complement integer format. The
4517| conversion is performed according to the IEC/IEEE Standard for Binary
4518| Floating-Point Arithmetic---which means in particular that the conversion
4519| is rounded according to the current rounding mode. If `a' is a NaN,
4520| the largest positive integer is returned. Otherwise, if the conversion
4521| overflows, the largest integer with the same sign as `a' is returned.
4522*----------------------------------------------------------------------------*/
4523
f42c2224 4524int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4525{
4526 flag aSign;
f4014512 4527 int32_t aExp, shiftCount;
bb98fe42 4528 uint64_t aSig, aSigExtra;
158142c2 4529
d1eb8f2a
AD
4530 if (floatx80_invalid_encoding(a)) {
4531 float_raise(float_flag_invalid, status);
4532 return 1ULL << 63;
4533 }
158142c2
FB
4534 aSig = extractFloatx80Frac( a );
4535 aExp = extractFloatx80Exp( a );
4536 aSign = extractFloatx80Sign( a );
4537 shiftCount = 0x403E - aExp;
4538 if ( shiftCount <= 0 ) {
4539 if ( shiftCount ) {
ff32e16e 4540 float_raise(float_flag_invalid, status);
0f605c88 4541 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4542 return LIT64( 0x7FFFFFFFFFFFFFFF );
4543 }
bb98fe42 4544 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4545 }
4546 aSigExtra = 0;
4547 }
4548 else {
4549 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4550 }
ff32e16e 4551 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4552
4553}
4554
4555/*----------------------------------------------------------------------------
4556| Returns the result of converting the extended double-precision floating-
4557| point value `a' to the 64-bit two's complement integer format. The
4558| conversion is performed according to the IEC/IEEE Standard for Binary
4559| Floating-Point Arithmetic, except that the conversion is always rounded
4560| toward zero. If `a' is a NaN, the largest positive integer is returned.
4561| Otherwise, if the conversion overflows, the largest integer with the same
4562| sign as `a' is returned.
4563*----------------------------------------------------------------------------*/
4564
f42c2224 4565int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4566{
4567 flag aSign;
f4014512 4568 int32_t aExp, shiftCount;
bb98fe42 4569 uint64_t aSig;
f42c2224 4570 int64_t z;
158142c2 4571
d1eb8f2a
AD
4572 if (floatx80_invalid_encoding(a)) {
4573 float_raise(float_flag_invalid, status);
4574 return 1ULL << 63;
4575 }
158142c2
FB
4576 aSig = extractFloatx80Frac( a );
4577 aExp = extractFloatx80Exp( a );
4578 aSign = extractFloatx80Sign( a );
4579 shiftCount = aExp - 0x403E;
4580 if ( 0 <= shiftCount ) {
4581 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4582 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4583 float_raise(float_flag_invalid, status);
158142c2
FB
4584 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4585 return LIT64( 0x7FFFFFFFFFFFFFFF );
4586 }
4587 }
bb98fe42 4588 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4589 }
4590 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4591 if (aExp | aSig) {
4592 status->float_exception_flags |= float_flag_inexact;
4593 }
158142c2
FB
4594 return 0;
4595 }
4596 z = aSig>>( - shiftCount );
bb98fe42 4597 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4598 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4599 }
4600 if ( aSign ) z = - z;
4601 return z;
4602
4603}
4604
4605/*----------------------------------------------------------------------------
4606| Returns the result of converting the extended double-precision floating-
4607| point value `a' to the single-precision floating-point format. The
4608| conversion is performed according to the IEC/IEEE Standard for Binary
4609| Floating-Point Arithmetic.
4610*----------------------------------------------------------------------------*/
4611
e5a41ffa 4612float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4613{
4614 flag aSign;
f4014512 4615 int32_t aExp;
bb98fe42 4616 uint64_t aSig;
158142c2 4617
d1eb8f2a
AD
4618 if (floatx80_invalid_encoding(a)) {
4619 float_raise(float_flag_invalid, status);
4620 return float32_default_nan(status);
4621 }
158142c2
FB
4622 aSig = extractFloatx80Frac( a );
4623 aExp = extractFloatx80Exp( a );
4624 aSign = extractFloatx80Sign( a );
4625 if ( aExp == 0x7FFF ) {
bb98fe42 4626 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4627 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4628 }
4629 return packFloat32( aSign, 0xFF, 0 );
4630 }
4631 shift64RightJamming( aSig, 33, &aSig );
4632 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4633 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4634
4635}
4636
4637/*----------------------------------------------------------------------------
4638| Returns the result of converting the extended double-precision floating-
4639| point value `a' to the double-precision floating-point format. The
4640| conversion is performed according to the IEC/IEEE Standard for Binary
4641| Floating-Point Arithmetic.
4642*----------------------------------------------------------------------------*/
4643
e5a41ffa 4644float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4645{
4646 flag aSign;
f4014512 4647 int32_t aExp;
bb98fe42 4648 uint64_t aSig, zSig;
158142c2 4649
d1eb8f2a
AD
4650 if (floatx80_invalid_encoding(a)) {
4651 float_raise(float_flag_invalid, status);
4652 return float64_default_nan(status);
4653 }
158142c2
FB
4654 aSig = extractFloatx80Frac( a );
4655 aExp = extractFloatx80Exp( a );
4656 aSign = extractFloatx80Sign( a );
4657 if ( aExp == 0x7FFF ) {
bb98fe42 4658 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4659 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4660 }
4661 return packFloat64( aSign, 0x7FF, 0 );
4662 }
4663 shift64RightJamming( aSig, 1, &zSig );
4664 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4665 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4666
4667}
4668
158142c2
FB
4669/*----------------------------------------------------------------------------
4670| Returns the result of converting the extended double-precision floating-
4671| point value `a' to the quadruple-precision floating-point format. The
4672| conversion is performed according to the IEC/IEEE Standard for Binary
4673| Floating-Point Arithmetic.
4674*----------------------------------------------------------------------------*/
4675
e5a41ffa 4676float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4677{
4678 flag aSign;
0c48262d 4679 int aExp;
bb98fe42 4680 uint64_t aSig, zSig0, zSig1;
158142c2 4681
d1eb8f2a
AD
4682 if (floatx80_invalid_encoding(a)) {
4683 float_raise(float_flag_invalid, status);
4684 return float128_default_nan(status);
4685 }
158142c2
FB
4686 aSig = extractFloatx80Frac( a );
4687 aExp = extractFloatx80Exp( a );
4688 aSign = extractFloatx80Sign( a );
bb98fe42 4689 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4690 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4691 }
4692 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4693 return packFloat128( aSign, aExp, zSig0, zSig1 );
4694
4695}
4696
0f721292
LV
4697/*----------------------------------------------------------------------------
4698| Rounds the extended double-precision floating-point value `a'
4699| to the precision provided by floatx80_rounding_precision and returns the
4700| result as an extended double-precision floating-point value.
4701| The operation is performed according to the IEC/IEEE Standard for Binary
4702| Floating-Point Arithmetic.
4703*----------------------------------------------------------------------------*/
4704
4705floatx80 floatx80_round(floatx80 a, float_status *status)
4706{
4707 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4708 extractFloatx80Sign(a),
4709 extractFloatx80Exp(a),
4710 extractFloatx80Frac(a), 0, status);
4711}
4712
158142c2
FB
4713/*----------------------------------------------------------------------------
4714| Rounds the extended double-precision floating-point value `a' to an integer,
4715| and returns the result as an extended quadruple-precision floating-point
4716| value. The operation is performed according to the IEC/IEEE Standard for
4717| Binary Floating-Point Arithmetic.
4718*----------------------------------------------------------------------------*/
4719
e5a41ffa 4720floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4721{
4722 flag aSign;
f4014512 4723 int32_t aExp;
bb98fe42 4724 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4725 floatx80 z;
4726
d1eb8f2a
AD
4727 if (floatx80_invalid_encoding(a)) {
4728 float_raise(float_flag_invalid, status);
4729 return floatx80_default_nan(status);
4730 }
158142c2
FB
4731 aExp = extractFloatx80Exp( a );
4732 if ( 0x403E <= aExp ) {
bb98fe42 4733 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4734 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4735 }
4736 return a;
4737 }
4738 if ( aExp < 0x3FFF ) {
4739 if ( ( aExp == 0 )
bb98fe42 4740 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4741 return a;
4742 }
a2f2d288 4743 status->float_exception_flags |= float_flag_inexact;
158142c2 4744 aSign = extractFloatx80Sign( a );
a2f2d288 4745 switch (status->float_rounding_mode) {
158142c2 4746 case float_round_nearest_even:
bb98fe42 4747 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4748 ) {
4749 return
4750 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4751 }
4752 break;
f9288a76
PM
4753 case float_round_ties_away:
4754 if (aExp == 0x3FFE) {
4755 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4756 }
4757 break;
158142c2
FB
4758 case float_round_down:
4759 return
4760 aSign ?
4761 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4762 : packFloatx80( 0, 0, 0 );
4763 case float_round_up:
4764 return
4765 aSign ? packFloatx80( 1, 0, 0 )
4766 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4767 }
4768 return packFloatx80( aSign, 0, 0 );
4769 }
4770 lastBitMask = 1;
4771 lastBitMask <<= 0x403E - aExp;
4772 roundBitsMask = lastBitMask - 1;
4773 z = a;
a2f2d288 4774 switch (status->float_rounding_mode) {
dc355b76 4775 case float_round_nearest_even:
158142c2 4776 z.low += lastBitMask>>1;
dc355b76
PM
4777 if ((z.low & roundBitsMask) == 0) {
4778 z.low &= ~lastBitMask;
4779 }
4780 break;
f9288a76
PM
4781 case float_round_ties_away:
4782 z.low += lastBitMask >> 1;
4783 break;
dc355b76
PM
4784 case float_round_to_zero:
4785 break;
4786 case float_round_up:
4787 if (!extractFloatx80Sign(z)) {
4788 z.low += roundBitsMask;
4789 }
4790 break;
4791 case float_round_down:
4792 if (extractFloatx80Sign(z)) {
158142c2
FB
4793 z.low += roundBitsMask;
4794 }
dc355b76
PM
4795 break;
4796 default:
4797 abort();
158142c2
FB
4798 }
4799 z.low &= ~ roundBitsMask;
4800 if ( z.low == 0 ) {
4801 ++z.high;
4802 z.low = LIT64( 0x8000000000000000 );
4803 }
a2f2d288
PM
4804 if (z.low != a.low) {
4805 status->float_exception_flags |= float_flag_inexact;
4806 }
158142c2
FB
4807 return z;
4808
4809}
4810
4811/*----------------------------------------------------------------------------
4812| Returns the result of adding the absolute values of the extended double-
4813| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4814| negated before being returned. `zSign' is ignored if the result is a NaN.
4815| The addition is performed according to the IEC/IEEE Standard for Binary
4816| Floating-Point Arithmetic.
4817*----------------------------------------------------------------------------*/
4818
e5a41ffa
PM
4819static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4820 float_status *status)
158142c2 4821{
f4014512 4822 int32_t aExp, bExp, zExp;
bb98fe42 4823 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4824 int32_t expDiff;
158142c2
FB
4825
4826 aSig = extractFloatx80Frac( a );
4827 aExp = extractFloatx80Exp( a );
4828 bSig = extractFloatx80Frac( b );
4829 bExp = extractFloatx80Exp( b );
4830 expDiff = aExp - bExp;
4831 if ( 0 < expDiff ) {
4832 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4833 if ((uint64_t)(aSig << 1)) {
4834 return propagateFloatx80NaN(a, b, status);
4835 }
158142c2
FB
4836 return a;
4837 }
4838 if ( bExp == 0 ) --expDiff;
4839 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4840 zExp = aExp;
4841 }
4842 else if ( expDiff < 0 ) {
4843 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4844 if ((uint64_t)(bSig << 1)) {
4845 return propagateFloatx80NaN(a, b, status);
4846 }
0f605c88
LV
4847 return packFloatx80(zSign,
4848 floatx80_infinity_high,
4849 floatx80_infinity_low);
158142c2
FB
4850 }
4851 if ( aExp == 0 ) ++expDiff;
4852 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4853 zExp = bExp;
4854 }
4855 else {
4856 if ( aExp == 0x7FFF ) {
bb98fe42 4857 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4858 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4859 }
4860 return a;
4861 }
4862 zSig1 = 0;
4863 zSig0 = aSig + bSig;
4864 if ( aExp == 0 ) {
4865 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4866 goto roundAndPack;
4867 }
4868 zExp = aExp;
4869 goto shiftRight1;
4870 }
4871 zSig0 = aSig + bSig;
bb98fe42 4872 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
4873 shiftRight1:
4874 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4875 zSig0 |= LIT64( 0x8000000000000000 );
4876 ++zExp;
4877 roundAndPack:
a2f2d288 4878 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4879 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4880}
4881
4882/*----------------------------------------------------------------------------
4883| Returns the result of subtracting the absolute values of the extended
4884| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4885| difference is negated before being returned. `zSign' is ignored if the
4886| result is a NaN. The subtraction is performed according to the IEC/IEEE
4887| Standard for Binary Floating-Point Arithmetic.
4888*----------------------------------------------------------------------------*/
4889
e5a41ffa
PM
4890static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4891 float_status *status)
158142c2 4892{
f4014512 4893 int32_t aExp, bExp, zExp;
bb98fe42 4894 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4895 int32_t expDiff;
158142c2
FB
4896
4897 aSig = extractFloatx80Frac( a );
4898 aExp = extractFloatx80Exp( a );
4899 bSig = extractFloatx80Frac( b );
4900 bExp = extractFloatx80Exp( b );
4901 expDiff = aExp - bExp;
4902 if ( 0 < expDiff ) goto aExpBigger;
4903 if ( expDiff < 0 ) goto bExpBigger;
4904 if ( aExp == 0x7FFF ) {
bb98fe42 4905 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4906 return propagateFloatx80NaN(a, b, status);
158142c2 4907 }
ff32e16e 4908 float_raise(float_flag_invalid, status);
af39bc8c 4909 return floatx80_default_nan(status);
158142c2
FB
4910 }
4911 if ( aExp == 0 ) {
4912 aExp = 1;
4913 bExp = 1;
4914 }
4915 zSig1 = 0;
4916 if ( bSig < aSig ) goto aBigger;
4917 if ( aSig < bSig ) goto bBigger;
a2f2d288 4918 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
4919 bExpBigger:
4920 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4921 if ((uint64_t)(bSig << 1)) {
4922 return propagateFloatx80NaN(a, b, status);
4923 }
0f605c88
LV
4924 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4925 floatx80_infinity_low);
158142c2
FB
4926 }
4927 if ( aExp == 0 ) ++expDiff;
4928 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4929 bBigger:
4930 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4931 zExp = bExp;
4932 zSign ^= 1;
4933 goto normalizeRoundAndPack;
4934 aExpBigger:
4935 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4936 if ((uint64_t)(aSig << 1)) {
4937 return propagateFloatx80NaN(a, b, status);
4938 }
158142c2
FB
4939 return a;
4940 }
4941 if ( bExp == 0 ) --expDiff;
4942 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4943 aBigger:
4944 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4945 zExp = aExp;
4946 normalizeRoundAndPack:
a2f2d288 4947 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4948 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4949}
4950
4951/*----------------------------------------------------------------------------
4952| Returns the result of adding the extended double-precision floating-point
4953| values `a' and `b'. The operation is performed according to the IEC/IEEE
4954| Standard for Binary Floating-Point Arithmetic.
4955*----------------------------------------------------------------------------*/
4956
e5a41ffa 4957floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4958{
4959 flag aSign, bSign;
4960
d1eb8f2a
AD
4961 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4962 float_raise(float_flag_invalid, status);
4963 return floatx80_default_nan(status);
4964 }
158142c2
FB
4965 aSign = extractFloatx80Sign( a );
4966 bSign = extractFloatx80Sign( b );
4967 if ( aSign == bSign ) {
ff32e16e 4968 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4969 }
4970 else {
ff32e16e 4971 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4972 }
4973
4974}
4975
4976/*----------------------------------------------------------------------------
4977| Returns the result of subtracting the extended double-precision floating-
4978| point values `a' and `b'. The operation is performed according to the
4979| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4980*----------------------------------------------------------------------------*/
4981
e5a41ffa 4982floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4983{
4984 flag aSign, bSign;
4985
d1eb8f2a
AD
4986 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4987 float_raise(float_flag_invalid, status);
4988 return floatx80_default_nan(status);
4989 }
158142c2
FB
4990 aSign = extractFloatx80Sign( a );
4991 bSign = extractFloatx80Sign( b );
4992 if ( aSign == bSign ) {
ff32e16e 4993 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4994 }
4995 else {
ff32e16e 4996 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4997 }
4998
4999}
5000
5001/*----------------------------------------------------------------------------
5002| Returns the result of multiplying the extended double-precision floating-
5003| point values `a' and `b'. The operation is performed according to the
5004| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5005*----------------------------------------------------------------------------*/
5006
e5a41ffa 5007floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5008{
5009 flag aSign, bSign, zSign;
f4014512 5010 int32_t aExp, bExp, zExp;
bb98fe42 5011 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5012
d1eb8f2a
AD
5013 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5014 float_raise(float_flag_invalid, status);
5015 return floatx80_default_nan(status);
5016 }
158142c2
FB
5017 aSig = extractFloatx80Frac( a );
5018 aExp = extractFloatx80Exp( a );
5019 aSign = extractFloatx80Sign( a );
5020 bSig = extractFloatx80Frac( b );
5021 bExp = extractFloatx80Exp( b );
5022 bSign = extractFloatx80Sign( b );
5023 zSign = aSign ^ bSign;
5024 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5025 if ( (uint64_t) ( aSig<<1 )
5026 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5027 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5028 }
5029 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5030 return packFloatx80(zSign, floatx80_infinity_high,
5031 floatx80_infinity_low);
158142c2
FB
5032 }
5033 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5034 if ((uint64_t)(bSig << 1)) {
5035 return propagateFloatx80NaN(a, b, status);
5036 }
158142c2
FB
5037 if ( ( aExp | aSig ) == 0 ) {
5038 invalid:
ff32e16e 5039 float_raise(float_flag_invalid, status);
af39bc8c 5040 return floatx80_default_nan(status);
158142c2 5041 }
0f605c88
LV
5042 return packFloatx80(zSign, floatx80_infinity_high,
5043 floatx80_infinity_low);
158142c2
FB
5044 }
5045 if ( aExp == 0 ) {
5046 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5047 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5048 }
5049 if ( bExp == 0 ) {
5050 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5051 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5052 }
5053 zExp = aExp + bExp - 0x3FFE;
5054 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5055 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5056 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5057 --zExp;
5058 }
a2f2d288 5059 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5060 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5061}
5062
5063/*----------------------------------------------------------------------------
5064| Returns the result of dividing the extended double-precision floating-point
5065| value `a' by the corresponding value `b'. The operation is performed
5066| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5067*----------------------------------------------------------------------------*/
5068
e5a41ffa 5069floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5070{
5071 flag aSign, bSign, zSign;
f4014512 5072 int32_t aExp, bExp, zExp;
bb98fe42
AF
5073 uint64_t aSig, bSig, zSig0, zSig1;
5074 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5075
d1eb8f2a
AD
5076 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5077 float_raise(float_flag_invalid, status);
5078 return floatx80_default_nan(status);
5079 }
158142c2
FB
5080 aSig = extractFloatx80Frac( a );
5081 aExp = extractFloatx80Exp( a );
5082 aSign = extractFloatx80Sign( a );
5083 bSig = extractFloatx80Frac( b );
5084 bExp = extractFloatx80Exp( b );
5085 bSign = extractFloatx80Sign( b );
5086 zSign = aSign ^ bSign;
5087 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5088 if ((uint64_t)(aSig << 1)) {
5089 return propagateFloatx80NaN(a, b, status);
5090 }
158142c2 5091 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5092 if ((uint64_t)(bSig << 1)) {
5093 return propagateFloatx80NaN(a, b, status);
5094 }
158142c2
FB
5095 goto invalid;
5096 }
0f605c88
LV
5097 return packFloatx80(zSign, floatx80_infinity_high,
5098 floatx80_infinity_low);
158142c2
FB
5099 }
5100 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5101 if ((uint64_t)(bSig << 1)) {
5102 return propagateFloatx80NaN(a, b, status);
5103 }
158142c2
FB
5104 return packFloatx80( zSign, 0, 0 );
5105 }
5106 if ( bExp == 0 ) {
5107 if ( bSig == 0 ) {
5108 if ( ( aExp | aSig ) == 0 ) {
5109 invalid:
ff32e16e 5110 float_raise(float_flag_invalid, status);
af39bc8c 5111 return floatx80_default_nan(status);
158142c2 5112 }
ff32e16e 5113 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5114 return packFloatx80(zSign, floatx80_infinity_high,
5115 floatx80_infinity_low);
158142c2
FB
5116 }
5117 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5118 }
5119 if ( aExp == 0 ) {
5120 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5121 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5122 }
5123 zExp = aExp - bExp + 0x3FFE;
5124 rem1 = 0;
5125 if ( bSig <= aSig ) {
5126 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5127 ++zExp;
5128 }
5129 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5130 mul64To128( bSig, zSig0, &term0, &term1 );
5131 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5132 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5133 --zSig0;
5134 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5135 }
5136 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5137 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5138 mul64To128( bSig, zSig1, &term1, &term2 );
5139 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5140 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5141 --zSig1;
5142 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5143 }
5144 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5145 }
a2f2d288 5146 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5147 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5148}
5149
5150/*----------------------------------------------------------------------------
5151| Returns the remainder of the extended double-precision floating-point value
5152| `a' with respect to the corresponding value `b'. The operation is performed
5153| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5154*----------------------------------------------------------------------------*/
5155
e5a41ffa 5156floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5157{
ed086f3d 5158 flag aSign, zSign;
f4014512 5159 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5160 uint64_t aSig0, aSig1, bSig;
5161 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5162
d1eb8f2a
AD
5163 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5164 float_raise(float_flag_invalid, status);
5165 return floatx80_default_nan(status);
5166 }
158142c2
FB
5167 aSig0 = extractFloatx80Frac( a );
5168 aExp = extractFloatx80Exp( a );
5169 aSign = extractFloatx80Sign( a );
5170 bSig = extractFloatx80Frac( b );
5171 bExp = extractFloatx80Exp( b );
158142c2 5172 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5173 if ( (uint64_t) ( aSig0<<1 )
5174 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5175 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5176 }
5177 goto invalid;
5178 }
5179 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5180 if ((uint64_t)(bSig << 1)) {
5181 return propagateFloatx80NaN(a, b, status);
5182 }
158142c2
FB
5183 return a;
5184 }
5185 if ( bExp == 0 ) {
5186 if ( bSig == 0 ) {
5187 invalid:
ff32e16e 5188 float_raise(float_flag_invalid, status);
af39bc8c 5189 return floatx80_default_nan(status);
158142c2
FB
5190 }
5191 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5192 }
5193 if ( aExp == 0 ) {
bb98fe42 5194 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5195 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5196 }
5197 bSig |= LIT64( 0x8000000000000000 );
5198 zSign = aSign;
5199 expDiff = aExp - bExp;
5200 aSig1 = 0;
5201 if ( expDiff < 0 ) {
5202 if ( expDiff < -1 ) return a;
5203 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5204 expDiff = 0;
5205 }
5206 q = ( bSig <= aSig0 );
5207 if ( q ) aSig0 -= bSig;
5208 expDiff -= 64;
5209 while ( 0 < expDiff ) {
5210 q = estimateDiv128To64( aSig0, aSig1, bSig );
5211 q = ( 2 < q ) ? q - 2 : 0;
5212 mul64To128( bSig, q, &term0, &term1 );
5213 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5214 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5215 expDiff -= 62;
5216 }
5217 expDiff += 64;
5218 if ( 0 < expDiff ) {
5219 q = estimateDiv128To64( aSig0, aSig1, bSig );
5220 q = ( 2 < q ) ? q - 2 : 0;
5221 q >>= 64 - expDiff;
5222 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5223 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5224 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5225 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5226 ++q;
5227 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5228 }
5229 }
5230 else {
5231 term1 = 0;
5232 term0 = bSig;
5233 }
5234 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5235 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5236 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5237 && ( q & 1 ) )
5238 ) {
5239 aSig0 = alternateASig0;
5240 aSig1 = alternateASig1;
5241 zSign = ! zSign;
5242 }
5243 return
5244 normalizeRoundAndPackFloatx80(
ff32e16e 5245 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5246
5247}
5248
5249/*----------------------------------------------------------------------------
5250| Returns the square root of the extended double-precision floating-point
5251| value `a'. The operation is performed according to the IEC/IEEE Standard
5252| for Binary Floating-Point Arithmetic.
5253*----------------------------------------------------------------------------*/
5254
e5a41ffa 5255floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5256{
5257 flag aSign;
f4014512 5258 int32_t aExp, zExp;
bb98fe42
AF
5259 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5260 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5261
d1eb8f2a
AD
5262 if (floatx80_invalid_encoding(a)) {
5263 float_raise(float_flag_invalid, status);
5264 return floatx80_default_nan(status);
5265 }
158142c2
FB
5266 aSig0 = extractFloatx80Frac( a );
5267 aExp = extractFloatx80Exp( a );
5268 aSign = extractFloatx80Sign( a );
5269 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5270 if ((uint64_t)(aSig0 << 1)) {
5271 return propagateFloatx80NaN(a, a, status);
5272 }
158142c2
FB
5273 if ( ! aSign ) return a;
5274 goto invalid;
5275 }
5276 if ( aSign ) {
5277 if ( ( aExp | aSig0 ) == 0 ) return a;
5278 invalid:
ff32e16e 5279 float_raise(float_flag_invalid, status);
af39bc8c 5280 return floatx80_default_nan(status);
158142c2
FB
5281 }
5282 if ( aExp == 0 ) {
5283 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5284 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5285 }
5286 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5287 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5288 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5289 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5290 doubleZSig0 = zSig0<<1;
5291 mul64To128( zSig0, zSig0, &term0, &term1 );
5292 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5293 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5294 --zSig0;
5295 doubleZSig0 -= 2;
5296 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5297 }
5298 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5299 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5300 if ( zSig1 == 0 ) zSig1 = 1;
5301 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5302 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5303 mul64To128( zSig1, zSig1, &term2, &term3 );
5304 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5305 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5306 --zSig1;
5307 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5308 term3 |= 1;
5309 term2 |= doubleZSig0;
5310 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5311 }
5312 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5313 }
5314 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5315 zSig0 |= doubleZSig0;
a2f2d288
PM
5316 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5317 0, zExp, zSig0, zSig1, status);
158142c2
FB
5318}
5319
5320/*----------------------------------------------------------------------------
b689362d
AJ
5321| Returns 1 if the extended double-precision floating-point value `a' is equal
5322| to the corresponding value `b', and 0 otherwise. The invalid exception is
5323| raised if either operand is a NaN. Otherwise, the comparison is performed
5324| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5325*----------------------------------------------------------------------------*/
5326
e5a41ffa 5327int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5328{
5329
d1eb8f2a
AD
5330 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5331 || (extractFloatx80Exp(a) == 0x7FFF
5332 && (uint64_t) (extractFloatx80Frac(a) << 1))
5333 || (extractFloatx80Exp(b) == 0x7FFF
5334 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5335 ) {
ff32e16e 5336 float_raise(float_flag_invalid, status);
158142c2
FB
5337 return 0;
5338 }
5339 return
5340 ( a.low == b.low )
5341 && ( ( a.high == b.high )
5342 || ( ( a.low == 0 )
bb98fe42 5343 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5344 );
5345
5346}
5347
5348/*----------------------------------------------------------------------------
5349| Returns 1 if the extended double-precision floating-point value `a' is
5350| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5351| invalid exception is raised if either operand is a NaN. The comparison is
5352| performed according to the IEC/IEEE Standard for Binary Floating-Point
5353| Arithmetic.
158142c2
FB
5354*----------------------------------------------------------------------------*/
5355
e5a41ffa 5356int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5357{
5358 flag aSign, bSign;
5359
d1eb8f2a
AD
5360 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5361 || (extractFloatx80Exp(a) == 0x7FFF
5362 && (uint64_t) (extractFloatx80Frac(a) << 1))
5363 || (extractFloatx80Exp(b) == 0x7FFF
5364 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5365 ) {
ff32e16e 5366 float_raise(float_flag_invalid, status);
158142c2
FB
5367 return 0;
5368 }
5369 aSign = extractFloatx80Sign( a );
5370 bSign = extractFloatx80Sign( b );
5371 if ( aSign != bSign ) {
5372 return
5373 aSign
bb98fe42 5374 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5375 == 0 );
5376 }
5377 return
5378 aSign ? le128( b.high, b.low, a.high, a.low )
5379 : le128( a.high, a.low, b.high, b.low );
5380
5381}
5382
5383/*----------------------------------------------------------------------------
5384| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5385| less than the corresponding value `b', and 0 otherwise. The invalid
5386| exception is raised if either operand is a NaN. The comparison is performed
5387| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5388*----------------------------------------------------------------------------*/
5389
e5a41ffa 5390int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5391{
5392 flag aSign, bSign;
5393
d1eb8f2a
AD
5394 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5395 || (extractFloatx80Exp(a) == 0x7FFF
5396 && (uint64_t) (extractFloatx80Frac(a) << 1))
5397 || (extractFloatx80Exp(b) == 0x7FFF
5398 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5399 ) {
ff32e16e 5400 float_raise(float_flag_invalid, status);
158142c2
FB
5401 return 0;
5402 }
5403 aSign = extractFloatx80Sign( a );
5404 bSign = extractFloatx80Sign( b );
5405 if ( aSign != bSign ) {
5406 return
5407 aSign
bb98fe42 5408 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5409 != 0 );
5410 }
5411 return
5412 aSign ? lt128( b.high, b.low, a.high, a.low )
5413 : lt128( a.high, a.low, b.high, b.low );
5414
5415}
5416
67b7861d
AJ
5417/*----------------------------------------------------------------------------
5418| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5419| cannot be compared, and 0 otherwise. The invalid exception is raised if
5420| either operand is a NaN. The comparison is performed according to the
5421| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5422*----------------------------------------------------------------------------*/
e5a41ffa 5423int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5424{
d1eb8f2a
AD
5425 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5426 || (extractFloatx80Exp(a) == 0x7FFF
5427 && (uint64_t) (extractFloatx80Frac(a) << 1))
5428 || (extractFloatx80Exp(b) == 0x7FFF
5429 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5430 ) {
ff32e16e 5431 float_raise(float_flag_invalid, status);
67b7861d
AJ
5432 return 1;
5433 }
5434 return 0;
5435}
5436
158142c2 5437/*----------------------------------------------------------------------------
b689362d 5438| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5439| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5440| cause an exception. The comparison is performed according to the IEC/IEEE
5441| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5442*----------------------------------------------------------------------------*/
5443
e5a41ffa 5444int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5445{
5446
d1eb8f2a
AD
5447 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5448 float_raise(float_flag_invalid, status);
5449 return 0;
5450 }
158142c2 5451 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5452 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5453 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5454 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5455 ) {
af39bc8c
AM
5456 if (floatx80_is_signaling_nan(a, status)
5457 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5458 float_raise(float_flag_invalid, status);
b689362d 5459 }
158142c2
FB
5460 return 0;
5461 }
5462 return
5463 ( a.low == b.low )
5464 && ( ( a.high == b.high )
5465 || ( ( a.low == 0 )
bb98fe42 5466 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5467 );
5468
5469}
5470
5471/*----------------------------------------------------------------------------
5472| Returns 1 if the extended double-precision floating-point value `a' is less
5473| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5474| do not cause an exception. Otherwise, the comparison is performed according
5475| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5476*----------------------------------------------------------------------------*/
5477
e5a41ffa 5478int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5479{
5480 flag aSign, bSign;
5481
d1eb8f2a
AD
5482 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5483 float_raise(float_flag_invalid, status);
5484 return 0;
5485 }
158142c2 5486 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5487 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5488 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5489 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5490 ) {
af39bc8c
AM
5491 if (floatx80_is_signaling_nan(a, status)
5492 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5493 float_raise(float_flag_invalid, status);
158142c2
FB
5494 }
5495 return 0;
5496 }
5497 aSign = extractFloatx80Sign( a );
5498 bSign = extractFloatx80Sign( b );
5499 if ( aSign != bSign ) {
5500 return
5501 aSign
bb98fe42 5502 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5503 == 0 );
5504 }
5505 return
5506 aSign ? le128( b.high, b.low, a.high, a.low )
5507 : le128( a.high, a.low, b.high, b.low );
5508
5509}
5510
5511/*----------------------------------------------------------------------------
5512| Returns 1 if the extended double-precision floating-point value `a' is less
5513| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5514| an exception. Otherwise, the comparison is performed according to the
5515| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5516*----------------------------------------------------------------------------*/
5517
e5a41ffa 5518int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5519{
5520 flag aSign, bSign;
5521
d1eb8f2a
AD
5522 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5523 float_raise(float_flag_invalid, status);
5524 return 0;
5525 }
158142c2 5526 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5527 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5528 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5529 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5530 ) {
af39bc8c
AM
5531 if (floatx80_is_signaling_nan(a, status)
5532 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5533 float_raise(float_flag_invalid, status);
158142c2
FB
5534 }
5535 return 0;
5536 }
5537 aSign = extractFloatx80Sign( a );
5538 bSign = extractFloatx80Sign( b );
5539 if ( aSign != bSign ) {
5540 return
5541 aSign
bb98fe42 5542 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5543 != 0 );
5544 }
5545 return
5546 aSign ? lt128( b.high, b.low, a.high, a.low )
5547 : lt128( a.high, a.low, b.high, b.low );
5548
5549}
5550
67b7861d
AJ
5551/*----------------------------------------------------------------------------
5552| Returns 1 if the extended double-precision floating-point values `a' and `b'
5553| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5554| The comparison is performed according to the IEC/IEEE Standard for Binary
5555| Floating-Point Arithmetic.
5556*----------------------------------------------------------------------------*/
e5a41ffa 5557int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5558{
d1eb8f2a
AD
5559 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5560 float_raise(float_flag_invalid, status);
5561 return 1;
5562 }
67b7861d
AJ
5563 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5564 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5565 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5566 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5567 ) {
af39bc8c
AM
5568 if (floatx80_is_signaling_nan(a, status)
5569 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5570 float_raise(float_flag_invalid, status);
67b7861d
AJ
5571 }
5572 return 1;
5573 }
5574 return 0;
5575}
5576
158142c2
FB
5577/*----------------------------------------------------------------------------
5578| Returns the result of converting the quadruple-precision floating-point
5579| value `a' to the 32-bit two's complement integer format. The conversion
5580| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5581| Arithmetic---which means in particular that the conversion is rounded
5582| according to the current rounding mode. If `a' is a NaN, the largest
5583| positive integer is returned. Otherwise, if the conversion overflows, the
5584| largest integer with the same sign as `a' is returned.
5585*----------------------------------------------------------------------------*/
5586
f4014512 5587int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5588{
5589 flag aSign;
f4014512 5590 int32_t aExp, shiftCount;
bb98fe42 5591 uint64_t aSig0, aSig1;
158142c2
FB
5592
5593 aSig1 = extractFloat128Frac1( a );
5594 aSig0 = extractFloat128Frac0( a );
5595 aExp = extractFloat128Exp( a );
5596 aSign = extractFloat128Sign( a );
5597 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5598 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5599 aSig0 |= ( aSig1 != 0 );
5600 shiftCount = 0x4028 - aExp;
5601 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5602 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5603
5604}
5605
5606/*----------------------------------------------------------------------------
5607| Returns the result of converting the quadruple-precision floating-point
5608| value `a' to the 32-bit two's complement integer format. The conversion
5609| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5610| Arithmetic, except that the conversion is always rounded toward zero. If
5611| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5612| conversion overflows, the largest integer with the same sign as `a' is
5613| returned.
5614*----------------------------------------------------------------------------*/
5615
f4014512 5616int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5617{
5618 flag aSign;
f4014512 5619 int32_t aExp, shiftCount;
bb98fe42 5620 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5621 int32_t z;
158142c2
FB
5622
5623 aSig1 = extractFloat128Frac1( a );
5624 aSig0 = extractFloat128Frac0( a );
5625 aExp = extractFloat128Exp( a );
5626 aSign = extractFloat128Sign( a );
5627 aSig0 |= ( aSig1 != 0 );
5628 if ( 0x401E < aExp ) {
5629 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5630 goto invalid;
5631 }
5632 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5633 if (aExp || aSig0) {
5634 status->float_exception_flags |= float_flag_inexact;
5635 }
158142c2
FB
5636 return 0;
5637 }
5638 aSig0 |= LIT64( 0x0001000000000000 );
5639 shiftCount = 0x402F - aExp;
5640 savedASig = aSig0;
5641 aSig0 >>= shiftCount;
5642 z = aSig0;
5643 if ( aSign ) z = - z;
5644 if ( ( z < 0 ) ^ aSign ) {
5645 invalid:
ff32e16e 5646 float_raise(float_flag_invalid, status);
bb98fe42 5647 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5648 }
5649 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5650 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5651 }
5652 return z;
5653
5654}
5655
5656/*----------------------------------------------------------------------------
5657| Returns the result of converting the quadruple-precision floating-point
5658| value `a' to the 64-bit two's complement integer format. The conversion
5659| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5660| Arithmetic---which means in particular that the conversion is rounded
5661| according to the current rounding mode. If `a' is a NaN, the largest
5662| positive integer is returned. Otherwise, if the conversion overflows, the
5663| largest integer with the same sign as `a' is returned.
5664*----------------------------------------------------------------------------*/
5665
f42c2224 5666int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5667{
5668 flag aSign;
f4014512 5669 int32_t aExp, shiftCount;
bb98fe42 5670 uint64_t aSig0, aSig1;
158142c2
FB
5671
5672 aSig1 = extractFloat128Frac1( a );
5673 aSig0 = extractFloat128Frac0( a );
5674 aExp = extractFloat128Exp( a );
5675 aSign = extractFloat128Sign( a );
5676 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5677 shiftCount = 0x402F - aExp;
5678 if ( shiftCount <= 0 ) {
5679 if ( 0x403E < aExp ) {
ff32e16e 5680 float_raise(float_flag_invalid, status);
158142c2
FB
5681 if ( ! aSign
5682 || ( ( aExp == 0x7FFF )
5683 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5684 )
5685 ) {
5686 return LIT64( 0x7FFFFFFFFFFFFFFF );
5687 }
bb98fe42 5688 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5689 }
5690 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5691 }
5692 else {
5693 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5694 }
ff32e16e 5695 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5696
5697}
5698
5699/*----------------------------------------------------------------------------
5700| Returns the result of converting the quadruple-precision floating-point
5701| value `a' to the 64-bit two's complement integer format. The conversion
5702| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5703| Arithmetic, except that the conversion is always rounded toward zero.
5704| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5705| the conversion overflows, the largest integer with the same sign as `a' is
5706| returned.
5707*----------------------------------------------------------------------------*/
5708
f42c2224 5709int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5710{
5711 flag aSign;
f4014512 5712 int32_t aExp, shiftCount;
bb98fe42 5713 uint64_t aSig0, aSig1;
f42c2224 5714 int64_t z;
158142c2
FB
5715
5716 aSig1 = extractFloat128Frac1( a );
5717 aSig0 = extractFloat128Frac0( a );
5718 aExp = extractFloat128Exp( a );
5719 aSign = extractFloat128Sign( a );
5720 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5721 shiftCount = aExp - 0x402F;
5722 if ( 0 < shiftCount ) {
5723 if ( 0x403E <= aExp ) {
5724 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5725 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5726 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5727 if (aSig1) {
5728 status->float_exception_flags |= float_flag_inexact;
5729 }
158142c2
FB
5730 }
5731 else {
ff32e16e 5732 float_raise(float_flag_invalid, status);
158142c2
FB
5733 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5734 return LIT64( 0x7FFFFFFFFFFFFFFF );
5735 }
5736 }
bb98fe42 5737 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5738 }
5739 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5740 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5741 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5742 }
5743 }
5744 else {
5745 if ( aExp < 0x3FFF ) {
5746 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5747 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5748 }
5749 return 0;
5750 }
5751 z = aSig0>>( - shiftCount );
5752 if ( aSig1
bb98fe42 5753 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5754 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5755 }
5756 }
5757 if ( aSign ) z = - z;
5758 return z;
5759
5760}
5761
2e6d8568
BR
5762/*----------------------------------------------------------------------------
5763| Returns the result of converting the quadruple-precision floating-point value
5764| `a' to the 64-bit unsigned integer format. The conversion is
5765| performed according to the IEC/IEEE Standard for Binary Floating-Point
5766| Arithmetic---which means in particular that the conversion is rounded
5767| according to the current rounding mode. If `a' is a NaN, the largest
5768| positive integer is returned. If the conversion overflows, the
5769| largest unsigned integer is returned. If 'a' is negative, the value is
5770| rounded and zero is returned; negative values that do not round to zero
5771| will raise the inexact exception.
5772*----------------------------------------------------------------------------*/
5773
5774uint64_t float128_to_uint64(float128 a, float_status *status)
5775{
5776 flag aSign;
5777 int aExp;
5778 int shiftCount;
5779 uint64_t aSig0, aSig1;
5780
5781 aSig0 = extractFloat128Frac0(a);
5782 aSig1 = extractFloat128Frac1(a);
5783 aExp = extractFloat128Exp(a);
5784 aSign = extractFloat128Sign(a);
5785 if (aSign && (aExp > 0x3FFE)) {
5786 float_raise(float_flag_invalid, status);
5787 if (float128_is_any_nan(a)) {
5788 return LIT64(0xFFFFFFFFFFFFFFFF);
5789 } else {
5790 return 0;
5791 }
5792 }
5793 if (aExp) {
5794 aSig0 |= LIT64(0x0001000000000000);
5795 }
5796 shiftCount = 0x402F - aExp;
5797 if (shiftCount <= 0) {
5798 if (0x403E < aExp) {
5799 float_raise(float_flag_invalid, status);
5800 return LIT64(0xFFFFFFFFFFFFFFFF);
5801 }
5802 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5803 } else {
5804 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5805 }
5806 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5807}
5808
5809uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5810{
5811 uint64_t v;
5812 signed char current_rounding_mode = status->float_rounding_mode;
5813
5814 set_float_rounding_mode(float_round_to_zero, status);
5815 v = float128_to_uint64(a, status);
5816 set_float_rounding_mode(current_rounding_mode, status);
5817
5818 return v;
5819}
5820
158142c2
FB
5821/*----------------------------------------------------------------------------
5822| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5823| value `a' to the 32-bit unsigned integer format. The conversion
5824| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5825| Arithmetic except that the conversion is always rounded toward zero.
5826| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5827| if the conversion overflows, the largest unsigned integer is returned.
5828| If 'a' is negative, the value is rounded and zero is returned; negative
5829| values that do not round to zero will raise the inexact exception.
5830*----------------------------------------------------------------------------*/
5831
5832uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5833{
5834 uint64_t v;
5835 uint32_t res;
5836 int old_exc_flags = get_float_exception_flags(status);
5837
5838 v = float128_to_uint64_round_to_zero(a, status);
5839 if (v > 0xffffffff) {
5840 res = 0xffffffff;
5841 } else {
5842 return v;
5843 }
5844 set_float_exception_flags(old_exc_flags, status);
5845 float_raise(float_flag_invalid, status);
5846 return res;
5847}
5848
5849/*----------------------------------------------------------------------------
5850| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
5851| value `a' to the single-precision floating-point format. The conversion
5852| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5853| Arithmetic.
5854*----------------------------------------------------------------------------*/
5855
e5a41ffa 5856float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
5857{
5858 flag aSign;
f4014512 5859 int32_t aExp;
bb98fe42
AF
5860 uint64_t aSig0, aSig1;
5861 uint32_t zSig;
158142c2
FB
5862
5863 aSig1 = extractFloat128Frac1( a );
5864 aSig0 = extractFloat128Frac0( a );
5865 aExp = extractFloat128Exp( a );
5866 aSign = extractFloat128Sign( a );
5867 if ( aExp == 0x7FFF ) {
5868 if ( aSig0 | aSig1 ) {
ff32e16e 5869 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
5870 }
5871 return packFloat32( aSign, 0xFF, 0 );
5872 }
5873 aSig0 |= ( aSig1 != 0 );
5874 shift64RightJamming( aSig0, 18, &aSig0 );
5875 zSig = aSig0;
5876 if ( aExp || zSig ) {
5877 zSig |= 0x40000000;
5878 aExp -= 0x3F81;
5879 }
ff32e16e 5880 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
5881
5882}
5883
5884/*----------------------------------------------------------------------------
5885| Returns the result of converting the quadruple-precision floating-point
5886| value `a' to the double-precision floating-point format. The conversion
5887| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5888| Arithmetic.
5889*----------------------------------------------------------------------------*/
5890
e5a41ffa 5891float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
5892{
5893 flag aSign;
f4014512 5894 int32_t aExp;
bb98fe42 5895 uint64_t aSig0, aSig1;
158142c2
FB
5896
5897 aSig1 = extractFloat128Frac1( a );
5898 aSig0 = extractFloat128Frac0( a );
5899 aExp = extractFloat128Exp( a );
5900 aSign = extractFloat128Sign( a );
5901 if ( aExp == 0x7FFF ) {
5902 if ( aSig0 | aSig1 ) {
ff32e16e 5903 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
5904 }
5905 return packFloat64( aSign, 0x7FF, 0 );
5906 }
5907 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5908 aSig0 |= ( aSig1 != 0 );
5909 if ( aExp || aSig0 ) {
5910 aSig0 |= LIT64( 0x4000000000000000 );
5911 aExp -= 0x3C01;
5912 }
ff32e16e 5913 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
5914
5915}
5916
158142c2
FB
5917/*----------------------------------------------------------------------------
5918| Returns the result of converting the quadruple-precision floating-point
5919| value `a' to the extended double-precision floating-point format. The
5920| conversion is performed according to the IEC/IEEE Standard for Binary
5921| Floating-Point Arithmetic.
5922*----------------------------------------------------------------------------*/
5923
e5a41ffa 5924floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
5925{
5926 flag aSign;
f4014512 5927 int32_t aExp;
bb98fe42 5928 uint64_t aSig0, aSig1;
158142c2
FB
5929
5930 aSig1 = extractFloat128Frac1( a );
5931 aSig0 = extractFloat128Frac0( a );
5932 aExp = extractFloat128Exp( a );
5933 aSign = extractFloat128Sign( a );
5934 if ( aExp == 0x7FFF ) {
5935 if ( aSig0 | aSig1 ) {
ff32e16e 5936 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 5937 }
0f605c88
LV
5938 return packFloatx80(aSign, floatx80_infinity_high,
5939 floatx80_infinity_low);
158142c2
FB
5940 }
5941 if ( aExp == 0 ) {
5942 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5943 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5944 }
5945 else {
5946 aSig0 |= LIT64( 0x0001000000000000 );
5947 }
5948 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 5949 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
5950
5951}
5952
158142c2
FB
5953/*----------------------------------------------------------------------------
5954| Rounds the quadruple-precision floating-point value `a' to an integer, and
5955| returns the result as a quadruple-precision floating-point value. The
5956| operation is performed according to the IEC/IEEE Standard for Binary
5957| Floating-Point Arithmetic.
5958*----------------------------------------------------------------------------*/
5959
e5a41ffa 5960float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
5961{
5962 flag aSign;
f4014512 5963 int32_t aExp;
bb98fe42 5964 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5965 float128 z;
5966
5967 aExp = extractFloat128Exp( a );
5968 if ( 0x402F <= aExp ) {
5969 if ( 0x406F <= aExp ) {
5970 if ( ( aExp == 0x7FFF )
5971 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5972 ) {
ff32e16e 5973 return propagateFloat128NaN(a, a, status);
158142c2
FB
5974 }
5975 return a;
5976 }
5977 lastBitMask = 1;
5978 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5979 roundBitsMask = lastBitMask - 1;
5980 z = a;
a2f2d288 5981 switch (status->float_rounding_mode) {
dc355b76 5982 case float_round_nearest_even:
158142c2
FB
5983 if ( lastBitMask ) {
5984 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5985 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5986 }
5987 else {
bb98fe42 5988 if ( (int64_t) z.low < 0 ) {
158142c2 5989 ++z.high;
bb98fe42 5990 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
5991 }
5992 }
dc355b76 5993 break;
f9288a76
PM
5994 case float_round_ties_away:
5995 if (lastBitMask) {
5996 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
5997 } else {
5998 if ((int64_t) z.low < 0) {
5999 ++z.high;
6000 }
6001 }
6002 break;
dc355b76
PM
6003 case float_round_to_zero:
6004 break;
6005 case float_round_up:
6006 if (!extractFloat128Sign(z)) {
6007 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6008 }
6009 break;
6010 case float_round_down:
6011 if (extractFloat128Sign(z)) {
6012 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6013 }
dc355b76
PM
6014 break;
6015 default:
6016 abort();
158142c2
FB
6017 }
6018 z.low &= ~ roundBitsMask;
6019 }
6020 else {
6021 if ( aExp < 0x3FFF ) {
bb98fe42 6022 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6023 status->float_exception_flags |= float_flag_inexact;
158142c2 6024 aSign = extractFloat128Sign( a );
a2f2d288 6025 switch (status->float_rounding_mode) {
158142c2
FB
6026 case float_round_nearest_even:
6027 if ( ( aExp == 0x3FFE )
6028 && ( extractFloat128Frac0( a )
6029 | extractFloat128Frac1( a ) )
6030 ) {
6031 return packFloat128( aSign, 0x3FFF, 0, 0 );
6032 }
6033 break;
f9288a76
PM
6034 case float_round_ties_away:
6035 if (aExp == 0x3FFE) {
6036 return packFloat128(aSign, 0x3FFF, 0, 0);
6037 }
6038 break;
158142c2
FB
6039 case float_round_down:
6040 return
6041 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6042 : packFloat128( 0, 0, 0, 0 );
6043 case float_round_up:
6044 return
6045 aSign ? packFloat128( 1, 0, 0, 0 )
6046 : packFloat128( 0, 0x3FFF, 0, 0 );
6047 }
6048 return packFloat128( aSign, 0, 0, 0 );
6049 }
6050 lastBitMask = 1;
6051 lastBitMask <<= 0x402F - aExp;
6052 roundBitsMask = lastBitMask - 1;
6053 z.low = 0;
6054 z.high = a.high;
a2f2d288 6055 switch (status->float_rounding_mode) {
dc355b76 6056 case float_round_nearest_even:
158142c2
FB
6057 z.high += lastBitMask>>1;
6058 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6059 z.high &= ~ lastBitMask;
6060 }
dc355b76 6061 break;
f9288a76
PM
6062 case float_round_ties_away:
6063 z.high += lastBitMask>>1;
6064 break;
dc355b76
PM
6065 case float_round_to_zero:
6066 break;
6067 case float_round_up:
6068 if (!extractFloat128Sign(z)) {
158142c2
FB
6069 z.high |= ( a.low != 0 );
6070 z.high += roundBitsMask;
6071 }
dc355b76
PM
6072 break;
6073 case float_round_down:
6074 if (extractFloat128Sign(z)) {
6075 z.high |= (a.low != 0);
6076 z.high += roundBitsMask;
6077 }
6078 break;
6079 default:
6080 abort();
158142c2
FB
6081 }
6082 z.high &= ~ roundBitsMask;
6083 }
6084 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6085 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6086 }
6087 return z;
6088
6089}
6090
6091/*----------------------------------------------------------------------------
6092| Returns the result of adding the absolute values of the quadruple-precision
6093| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6094| before being returned. `zSign' is ignored if the result is a NaN.
6095| The addition is performed according to the IEC/IEEE Standard for Binary
6096| Floating-Point Arithmetic.
6097*----------------------------------------------------------------------------*/
6098
e5a41ffa
PM
6099static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6100 float_status *status)
158142c2 6101{
f4014512 6102 int32_t aExp, bExp, zExp;
bb98fe42 6103 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6104 int32_t expDiff;
158142c2
FB
6105
6106 aSig1 = extractFloat128Frac1( a );
6107 aSig0 = extractFloat128Frac0( a );
6108 aExp = extractFloat128Exp( a );
6109 bSig1 = extractFloat128Frac1( b );
6110 bSig0 = extractFloat128Frac0( b );
6111 bExp = extractFloat128Exp( b );
6112 expDiff = aExp - bExp;
6113 if ( 0 < expDiff ) {
6114 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6115 if (aSig0 | aSig1) {
6116 return propagateFloat128NaN(a, b, status);
6117 }
158142c2
FB
6118 return a;
6119 }
6120 if ( bExp == 0 ) {
6121 --expDiff;
6122 }
6123 else {
6124 bSig0 |= LIT64( 0x0001000000000000 );
6125 }
6126 shift128ExtraRightJamming(
6127 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6128 zExp = aExp;
6129 }
6130 else if ( expDiff < 0 ) {
6131 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6132 if (bSig0 | bSig1) {
6133 return propagateFloat128NaN(a, b, status);
6134 }
158142c2
FB
6135 return packFloat128( zSign, 0x7FFF, 0, 0 );
6136 }
6137 if ( aExp == 0 ) {
6138 ++expDiff;
6139 }
6140 else {
6141 aSig0 |= LIT64( 0x0001000000000000 );
6142 }
6143 shift128ExtraRightJamming(
6144 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6145 zExp = bExp;
6146 }
6147 else {
6148 if ( aExp == 0x7FFF ) {
6149 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6150 return propagateFloat128NaN(a, b, status);
158142c2
FB
6151 }
6152 return a;
6153 }
6154 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6155 if ( aExp == 0 ) {
a2f2d288 6156 if (status->flush_to_zero) {
e6afc87f 6157 if (zSig0 | zSig1) {
ff32e16e 6158 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6159 }
6160 return packFloat128(zSign, 0, 0, 0);
6161 }
fe76d976
PB
6162 return packFloat128( zSign, 0, zSig0, zSig1 );
6163 }
158142c2
FB
6164 zSig2 = 0;
6165 zSig0 |= LIT64( 0x0002000000000000 );
6166 zExp = aExp;
6167 goto shiftRight1;
6168 }
6169 aSig0 |= LIT64( 0x0001000000000000 );
6170 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6171 --zExp;
6172 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6173 ++zExp;
6174 shiftRight1:
6175 shift128ExtraRightJamming(
6176 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6177 roundAndPack:
ff32e16e 6178 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6179
6180}
6181
6182/*----------------------------------------------------------------------------
6183| Returns the result of subtracting the absolute values of the quadruple-
6184| precision floating-point values `a' and `b'. If `zSign' is 1, the
6185| difference is negated before being returned. `zSign' is ignored if the
6186| result is a NaN. The subtraction is performed according to the IEC/IEEE
6187| Standard for Binary Floating-Point Arithmetic.
6188*----------------------------------------------------------------------------*/
6189
e5a41ffa
PM
6190static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6191 float_status *status)
158142c2 6192{
f4014512 6193 int32_t aExp, bExp, zExp;
bb98fe42 6194 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6195 int32_t expDiff;
158142c2
FB
6196
6197 aSig1 = extractFloat128Frac1( a );
6198 aSig0 = extractFloat128Frac0( a );
6199 aExp = extractFloat128Exp( a );
6200 bSig1 = extractFloat128Frac1( b );
6201 bSig0 = extractFloat128Frac0( b );
6202 bExp = extractFloat128Exp( b );
6203 expDiff = aExp - bExp;
6204 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6205 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6206 if ( 0 < expDiff ) goto aExpBigger;
6207 if ( expDiff < 0 ) goto bExpBigger;
6208 if ( aExp == 0x7FFF ) {
6209 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6210 return propagateFloat128NaN(a, b, status);
158142c2 6211 }
ff32e16e 6212 float_raise(float_flag_invalid, status);
af39bc8c 6213 return float128_default_nan(status);
158142c2
FB
6214 }
6215 if ( aExp == 0 ) {
6216 aExp = 1;
6217 bExp = 1;
6218 }
6219 if ( bSig0 < aSig0 ) goto aBigger;
6220 if ( aSig0 < bSig0 ) goto bBigger;
6221 if ( bSig1 < aSig1 ) goto aBigger;
6222 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6223 return packFloat128(status->float_rounding_mode == float_round_down,
6224 0, 0, 0);
158142c2
FB
6225 bExpBigger:
6226 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6227 if (bSig0 | bSig1) {
6228 return propagateFloat128NaN(a, b, status);
6229 }
158142c2
FB
6230 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6231 }
6232 if ( aExp == 0 ) {
6233 ++expDiff;
6234 }
6235 else {
6236 aSig0 |= LIT64( 0x4000000000000000 );
6237 }
6238 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6239 bSig0 |= LIT64( 0x4000000000000000 );
6240 bBigger:
6241 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6242 zExp = bExp;
6243 zSign ^= 1;
6244 goto normalizeRoundAndPack;
6245 aExpBigger:
6246 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6247 if (aSig0 | aSig1) {
6248 return propagateFloat128NaN(a, b, status);
6249 }
158142c2
FB
6250 return a;
6251 }
6252 if ( bExp == 0 ) {
6253 --expDiff;
6254 }
6255 else {
6256 bSig0 |= LIT64( 0x4000000000000000 );
6257 }
6258 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6259 aSig0 |= LIT64( 0x4000000000000000 );
6260 aBigger:
6261 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6262 zExp = aExp;
6263 normalizeRoundAndPack:
6264 --zExp;
ff32e16e
PM
6265 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6266 status);
158142c2
FB
6267
6268}
6269
6270/*----------------------------------------------------------------------------
6271| Returns the result of adding the quadruple-precision floating-point values
6272| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6273| for Binary Floating-Point Arithmetic.
6274*----------------------------------------------------------------------------*/
6275
e5a41ffa 6276float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6277{
6278 flag aSign, bSign;
6279
6280 aSign = extractFloat128Sign( a );
6281 bSign = extractFloat128Sign( b );
6282 if ( aSign == bSign ) {
ff32e16e 6283 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6284 }
6285 else {
ff32e16e 6286 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6287 }
6288
6289}
6290
6291/*----------------------------------------------------------------------------
6292| Returns the result of subtracting the quadruple-precision floating-point
6293| values `a' and `b'. The operation is performed according to the IEC/IEEE
6294| Standard for Binary Floating-Point Arithmetic.
6295*----------------------------------------------------------------------------*/
6296
e5a41ffa 6297float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6298{
6299 flag aSign, bSign;
6300
6301 aSign = extractFloat128Sign( a );
6302 bSign = extractFloat128Sign( b );
6303 if ( aSign == bSign ) {
ff32e16e 6304 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6305 }
6306 else {
ff32e16e 6307 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6308 }
6309
6310}
6311
6312/*----------------------------------------------------------------------------
6313| Returns the result of multiplying the quadruple-precision floating-point
6314| values `a' and `b'. The operation is performed according to the IEC/IEEE
6315| Standard for Binary Floating-Point Arithmetic.
6316*----------------------------------------------------------------------------*/
6317
e5a41ffa 6318float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6319{
6320 flag aSign, bSign, zSign;
f4014512 6321 int32_t aExp, bExp, zExp;
bb98fe42 6322 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6323
6324 aSig1 = extractFloat128Frac1( a );
6325 aSig0 = extractFloat128Frac0( a );
6326 aExp = extractFloat128Exp( a );
6327 aSign = extractFloat128Sign( a );
6328 bSig1 = extractFloat128Frac1( b );
6329 bSig0 = extractFloat128Frac0( b );
6330 bExp = extractFloat128Exp( b );
6331 bSign = extractFloat128Sign( b );
6332 zSign = aSign ^ bSign;
6333 if ( aExp == 0x7FFF ) {
6334 if ( ( aSig0 | aSig1 )
6335 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6336 return propagateFloat128NaN(a, b, status);
158142c2
FB
6337 }
6338 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6339 return packFloat128( zSign, 0x7FFF, 0, 0 );
6340 }
6341 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6342 if (bSig0 | bSig1) {
6343 return propagateFloat128NaN(a, b, status);
6344 }
158142c2
FB
6345 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6346 invalid:
ff32e16e 6347 float_raise(float_flag_invalid, status);
af39bc8c 6348 return float128_default_nan(status);
158142c2
FB
6349 }
6350 return packFloat128( zSign, 0x7FFF, 0, 0 );
6351 }
6352 if ( aExp == 0 ) {
6353 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6354 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6355 }
6356 if ( bExp == 0 ) {
6357 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6358 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6359 }
6360 zExp = aExp + bExp - 0x4000;
6361 aSig0 |= LIT64( 0x0001000000000000 );
6362 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6363 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6364 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6365 zSig2 |= ( zSig3 != 0 );
6366 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6367 shift128ExtraRightJamming(
6368 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6369 ++zExp;
6370 }
ff32e16e 6371 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6372
6373}
6374
6375/*----------------------------------------------------------------------------
6376| Returns the result of dividing the quadruple-precision floating-point value
6377| `a' by the corresponding value `b'. The operation is performed according to
6378| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6379*----------------------------------------------------------------------------*/
6380
e5a41ffa 6381float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6382{
6383 flag aSign, bSign, zSign;
f4014512 6384 int32_t aExp, bExp, zExp;
bb98fe42
AF
6385 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6386 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6387
6388 aSig1 = extractFloat128Frac1( a );
6389 aSig0 = extractFloat128Frac0( a );
6390 aExp = extractFloat128Exp( a );
6391 aSign = extractFloat128Sign( a );
6392 bSig1 = extractFloat128Frac1( b );
6393 bSig0 = extractFloat128Frac0( b );
6394 bExp = extractFloat128Exp( b );
6395 bSign = extractFloat128Sign( b );
6396 zSign = aSign ^ bSign;
6397 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6398 if (aSig0 | aSig1) {
6399 return propagateFloat128NaN(a, b, status);
6400 }
158142c2 6401 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6402 if (bSig0 | bSig1) {
6403 return propagateFloat128NaN(a, b, status);
6404 }
158142c2
FB
6405 goto invalid;
6406 }
6407 return packFloat128( zSign, 0x7FFF, 0, 0 );
6408 }
6409 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6410 if (bSig0 | bSig1) {
6411 return propagateFloat128NaN(a, b, status);
6412 }
158142c2
FB
6413 return packFloat128( zSign, 0, 0, 0 );
6414 }
6415 if ( bExp == 0 ) {
6416 if ( ( bSig0 | bSig1 ) == 0 ) {
6417 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6418 invalid:
ff32e16e 6419 float_raise(float_flag_invalid, status);
af39bc8c 6420 return float128_default_nan(status);
158142c2 6421 }
ff32e16e 6422 float_raise(float_flag_divbyzero, status);
158142c2
FB
6423 return packFloat128( zSign, 0x7FFF, 0, 0 );
6424 }
6425 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6426 }
6427 if ( aExp == 0 ) {
6428 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6429 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6430 }
6431 zExp = aExp - bExp + 0x3FFD;
6432 shortShift128Left(
6433 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6434 shortShift128Left(
6435 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6436 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6437 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6438 ++zExp;
6439 }
6440 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6441 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6442 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6443 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6444 --zSig0;
6445 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6446 }
6447 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6448 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6449 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6450 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6451 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6452 --zSig1;
6453 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6454 }
6455 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6456 }
6457 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6458 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6459
6460}
6461
6462/*----------------------------------------------------------------------------
6463| Returns the remainder of the quadruple-precision floating-point value `a'
6464| with respect to the corresponding value `b'. The operation is performed
6465| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6466*----------------------------------------------------------------------------*/
6467
e5a41ffa 6468float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6469{
ed086f3d 6470 flag aSign, zSign;
f4014512 6471 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6472 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6473 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6474 int64_t sigMean0;
158142c2
FB
6475
6476 aSig1 = extractFloat128Frac1( a );
6477 aSig0 = extractFloat128Frac0( a );
6478 aExp = extractFloat128Exp( a );
6479 aSign = extractFloat128Sign( a );
6480 bSig1 = extractFloat128Frac1( b );
6481 bSig0 = extractFloat128Frac0( b );
6482 bExp = extractFloat128Exp( b );
158142c2
FB
6483 if ( aExp == 0x7FFF ) {
6484 if ( ( aSig0 | aSig1 )
6485 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6486 return propagateFloat128NaN(a, b, status);
158142c2
FB
6487 }
6488 goto invalid;
6489 }
6490 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6491 if (bSig0 | bSig1) {
6492 return propagateFloat128NaN(a, b, status);
6493 }
158142c2
FB
6494 return a;
6495 }
6496 if ( bExp == 0 ) {
6497 if ( ( bSig0 | bSig1 ) == 0 ) {
6498 invalid:
ff32e16e 6499 float_raise(float_flag_invalid, status);
af39bc8c 6500 return float128_default_nan(status);
158142c2
FB
6501 }
6502 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6503 }
6504 if ( aExp == 0 ) {
6505 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6506 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6507 }
6508 expDiff = aExp - bExp;
6509 if ( expDiff < -1 ) return a;
6510 shortShift128Left(
6511 aSig0 | LIT64( 0x0001000000000000 ),
6512 aSig1,
6513 15 - ( expDiff < 0 ),
6514 &aSig0,
6515 &aSig1
6516 );
6517 shortShift128Left(
6518 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6519 q = le128( bSig0, bSig1, aSig0, aSig1 );
6520 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6521 expDiff -= 64;
6522 while ( 0 < expDiff ) {
6523 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6524 q = ( 4 < q ) ? q - 4 : 0;
6525 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6526 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6527 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6528 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6529 expDiff -= 61;
6530 }
6531 if ( -64 < expDiff ) {
6532 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6533 q = ( 4 < q ) ? q - 4 : 0;
6534 q >>= - expDiff;
6535 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6536 expDiff += 52;
6537 if ( expDiff < 0 ) {
6538 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6539 }
6540 else {
6541 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6542 }
6543 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6544 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6545 }
6546 else {
6547 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6548 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6549 }
6550 do {
6551 alternateASig0 = aSig0;
6552 alternateASig1 = aSig1;
6553 ++q;
6554 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6555 } while ( 0 <= (int64_t) aSig0 );
158142c2 6556 add128(
bb98fe42 6557 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6558 if ( ( sigMean0 < 0 )
6559 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6560 aSig0 = alternateASig0;
6561 aSig1 = alternateASig1;
6562 }
bb98fe42 6563 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6564 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6565 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6566 status);
158142c2
FB
6567}
6568
6569/*----------------------------------------------------------------------------
6570| Returns the square root of the quadruple-precision floating-point value `a'.
6571| The operation is performed according to the IEC/IEEE Standard for Binary
6572| Floating-Point Arithmetic.
6573*----------------------------------------------------------------------------*/
6574
e5a41ffa 6575float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6576{
6577 flag aSign;
f4014512 6578 int32_t aExp, zExp;
bb98fe42
AF
6579 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6580 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6581
6582 aSig1 = extractFloat128Frac1( a );
6583 aSig0 = extractFloat128Frac0( a );
6584 aExp = extractFloat128Exp( a );
6585 aSign = extractFloat128Sign( a );
6586 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6587 if (aSig0 | aSig1) {
6588 return propagateFloat128NaN(a, a, status);
6589 }
158142c2
FB
6590 if ( ! aSign ) return a;
6591 goto invalid;
6592 }
6593 if ( aSign ) {
6594 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6595 invalid:
ff32e16e 6596 float_raise(float_flag_invalid, status);
af39bc8c 6597 return float128_default_nan(status);
158142c2
FB
6598 }
6599 if ( aExp == 0 ) {
6600 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6601 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6602 }
6603 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6604 aSig0 |= LIT64( 0x0001000000000000 );
6605 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6606 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6607 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6608 doubleZSig0 = zSig0<<1;
6609 mul64To128( zSig0, zSig0, &term0, &term1 );
6610 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6611 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6612 --zSig0;
6613 doubleZSig0 -= 2;
6614 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6615 }
6616 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6617 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6618 if ( zSig1 == 0 ) zSig1 = 1;
6619 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6620 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6621 mul64To128( zSig1, zSig1, &term2, &term3 );
6622 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6623 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6624 --zSig1;
6625 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6626 term3 |= 1;
6627 term2 |= doubleZSig0;
6628 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6629 }
6630 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6631 }
6632 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6633 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6634
6635}
6636
6637/*----------------------------------------------------------------------------
6638| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6639| the corresponding value `b', and 0 otherwise. The invalid exception is
6640| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6641| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6642*----------------------------------------------------------------------------*/
6643
e5a41ffa 6644int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6645{
6646
6647 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6648 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6649 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6650 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6651 ) {
ff32e16e 6652 float_raise(float_flag_invalid, status);
158142c2
FB
6653 return 0;
6654 }
6655 return
6656 ( a.low == b.low )
6657 && ( ( a.high == b.high )
6658 || ( ( a.low == 0 )
bb98fe42 6659 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6660 );
6661
6662}
6663
6664/*----------------------------------------------------------------------------
6665| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6666| or equal to the corresponding value `b', and 0 otherwise. The invalid
6667| exception is raised if either operand is a NaN. The comparison is performed
6668| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6669*----------------------------------------------------------------------------*/
6670
e5a41ffa 6671int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6672{
6673 flag aSign, bSign;
6674
6675 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6676 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6677 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6678 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6679 ) {
ff32e16e 6680 float_raise(float_flag_invalid, status);
158142c2
FB
6681 return 0;
6682 }
6683 aSign = extractFloat128Sign( a );
6684 bSign = extractFloat128Sign( b );
6685 if ( aSign != bSign ) {
6686 return
6687 aSign
bb98fe42 6688 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6689 == 0 );
6690 }
6691 return
6692 aSign ? le128( b.high, b.low, a.high, a.low )
6693 : le128( a.high, a.low, b.high, b.low );
6694
6695}
6696
6697/*----------------------------------------------------------------------------
6698| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6699| the corresponding value `b', and 0 otherwise. The invalid exception is
6700| raised if either operand is a NaN. The comparison is performed according
6701| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6702*----------------------------------------------------------------------------*/
6703
e5a41ffa 6704int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6705{
6706 flag aSign, bSign;
6707
6708 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6709 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6710 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6711 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6712 ) {
ff32e16e 6713 float_raise(float_flag_invalid, status);
158142c2
FB
6714 return 0;
6715 }
6716 aSign = extractFloat128Sign( a );
6717 bSign = extractFloat128Sign( b );
6718 if ( aSign != bSign ) {
6719 return
6720 aSign
bb98fe42 6721 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6722 != 0 );
6723 }
6724 return
6725 aSign ? lt128( b.high, b.low, a.high, a.low )
6726 : lt128( a.high, a.low, b.high, b.low );
6727
6728}
6729
67b7861d
AJ
6730/*----------------------------------------------------------------------------
6731| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6732| be compared, and 0 otherwise. The invalid exception is raised if either
6733| operand is a NaN. The comparison is performed according to the IEC/IEEE
6734| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6735*----------------------------------------------------------------------------*/
6736
e5a41ffa 6737int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6738{
6739 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6740 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6741 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6742 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6743 ) {
ff32e16e 6744 float_raise(float_flag_invalid, status);
67b7861d
AJ
6745 return 1;
6746 }
6747 return 0;
6748}
6749
158142c2
FB
6750/*----------------------------------------------------------------------------
6751| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6752| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6753| exception. The comparison is performed according to the IEC/IEEE Standard
6754| for Binary Floating-Point Arithmetic.
158142c2
FB
6755*----------------------------------------------------------------------------*/
6756
e5a41ffa 6757int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6758{
6759
6760 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6761 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6762 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6763 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6764 ) {
af39bc8c
AM
6765 if (float128_is_signaling_nan(a, status)
6766 || float128_is_signaling_nan(b, status)) {
ff32e16e 6767 float_raise(float_flag_invalid, status);
b689362d 6768 }
158142c2
FB
6769 return 0;
6770 }
6771 return
6772 ( a.low == b.low )
6773 && ( ( a.high == b.high )
6774 || ( ( a.low == 0 )
bb98fe42 6775 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6776 );
6777
6778}
6779
6780/*----------------------------------------------------------------------------
6781| Returns 1 if the quadruple-precision floating-point value `a' is less than
6782| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6783| cause an exception. Otherwise, the comparison is performed according to the
6784| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6785*----------------------------------------------------------------------------*/
6786
e5a41ffa 6787int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6788{
6789 flag aSign, bSign;
6790
6791 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6792 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6793 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6794 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6795 ) {
af39bc8c
AM
6796 if (float128_is_signaling_nan(a, status)
6797 || float128_is_signaling_nan(b, status)) {
ff32e16e 6798 float_raise(float_flag_invalid, status);
158142c2
FB
6799 }
6800 return 0;
6801 }
6802 aSign = extractFloat128Sign( a );
6803 bSign = extractFloat128Sign( b );
6804 if ( aSign != bSign ) {
6805 return
6806 aSign
bb98fe42 6807 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6808 == 0 );
6809 }
6810 return
6811 aSign ? le128( b.high, b.low, a.high, a.low )
6812 : le128( a.high, a.low, b.high, b.low );
6813
6814}
6815
6816/*----------------------------------------------------------------------------
6817| Returns 1 if the quadruple-precision floating-point value `a' is less than
6818| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6819| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6820| Standard for Binary Floating-Point Arithmetic.
6821*----------------------------------------------------------------------------*/
6822
e5a41ffa 6823int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6824{
6825 flag aSign, bSign;
6826
6827 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6828 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6829 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6830 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6831 ) {
af39bc8c
AM
6832 if (float128_is_signaling_nan(a, status)
6833 || float128_is_signaling_nan(b, status)) {
ff32e16e 6834 float_raise(float_flag_invalid, status);
158142c2
FB
6835 }
6836 return 0;
6837 }
6838 aSign = extractFloat128Sign( a );
6839 bSign = extractFloat128Sign( b );
6840 if ( aSign != bSign ) {
6841 return
6842 aSign
bb98fe42 6843 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6844 != 0 );
6845 }
6846 return
6847 aSign ? lt128( b.high, b.low, a.high, a.low )
6848 : lt128( a.high, a.low, b.high, b.low );
6849
6850}
6851
67b7861d
AJ
6852/*----------------------------------------------------------------------------
6853| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6854| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6855| comparison is performed according to the IEC/IEEE Standard for Binary
6856| Floating-Point Arithmetic.
6857*----------------------------------------------------------------------------*/
6858
e5a41ffa 6859int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
6860{
6861 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6862 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6863 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6864 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6865 ) {
af39bc8c
AM
6866 if (float128_is_signaling_nan(a, status)
6867 || float128_is_signaling_nan(b, status)) {
ff32e16e 6868 float_raise(float_flag_invalid, status);
67b7861d
AJ
6869 }
6870 return 1;
6871 }
6872 return 0;
6873}
6874
e5a41ffa
PM
6875static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6876 int is_quiet, float_status *status)
f6714d36
AJ
6877{
6878 flag aSign, bSign;
6879
d1eb8f2a
AD
6880 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6881 float_raise(float_flag_invalid, status);
6882 return float_relation_unordered;
6883 }
f6714d36
AJ
6884 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6885 ( extractFloatx80Frac( a )<<1 ) ) ||
6886 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6887 ( extractFloatx80Frac( b )<<1 ) )) {
6888 if (!is_quiet ||
af39bc8c
AM
6889 floatx80_is_signaling_nan(a, status) ||
6890 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6891 float_raise(float_flag_invalid, status);
f6714d36
AJ
6892 }
6893 return float_relation_unordered;
6894 }
6895 aSign = extractFloatx80Sign( a );
6896 bSign = extractFloatx80Sign( b );
6897 if ( aSign != bSign ) {
6898
6899 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6900 ( ( a.low | b.low ) == 0 ) ) {
6901 /* zero case */
6902 return float_relation_equal;
6903 } else {
6904 return 1 - (2 * aSign);
6905 }
6906 } else {
6907 if (a.low == b.low && a.high == b.high) {
6908 return float_relation_equal;
6909 } else {
6910 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6911 }
6912 }
6913}
6914
e5a41ffa 6915int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6916{
ff32e16e 6917 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6918}
6919
e5a41ffa 6920int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 6921{
ff32e16e 6922 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6923}
6924
e5a41ffa
PM
6925static inline int float128_compare_internal(float128 a, float128 b,
6926 int is_quiet, float_status *status)
1f587329
BS
6927{
6928 flag aSign, bSign;
6929
6930 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6931 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6932 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6933 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6934 if (!is_quiet ||
af39bc8c
AM
6935 float128_is_signaling_nan(a, status) ||
6936 float128_is_signaling_nan(b, status)) {
ff32e16e 6937 float_raise(float_flag_invalid, status);
1f587329
BS
6938 }
6939 return float_relation_unordered;
6940 }
6941 aSign = extractFloat128Sign( a );
6942 bSign = extractFloat128Sign( b );
6943 if ( aSign != bSign ) {
6944 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6945 /* zero case */
6946 return float_relation_equal;
6947 } else {
6948 return 1 - (2 * aSign);
6949 }
6950 } else {
6951 if (a.low == b.low && a.high == b.high) {
6952 return float_relation_equal;
6953 } else {
6954 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6955 }
6956 }
6957}
6958
e5a41ffa 6959int float128_compare(float128 a, float128 b, float_status *status)
1f587329 6960{
ff32e16e 6961 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6962}
6963
e5a41ffa 6964int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 6965{
ff32e16e 6966 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6967}
6968
e5a41ffa 6969floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
6970{
6971 flag aSign;
326b9e98 6972 int32_t aExp;
bb98fe42 6973 uint64_t aSig;
9ee6e8bb 6974
d1eb8f2a
AD
6975 if (floatx80_invalid_encoding(a)) {
6976 float_raise(float_flag_invalid, status);
6977 return floatx80_default_nan(status);
6978 }
9ee6e8bb
PB
6979 aSig = extractFloatx80Frac( a );
6980 aExp = extractFloatx80Exp( a );
6981 aSign = extractFloatx80Sign( a );
6982
326b9e98
AJ
6983 if ( aExp == 0x7FFF ) {
6984 if ( aSig<<1 ) {
ff32e16e 6985 return propagateFloatx80NaN(a, a, status);
326b9e98 6986 }
9ee6e8bb
PB
6987 return a;
6988 }
326b9e98 6989
3c85c37f
PM
6990 if (aExp == 0) {
6991 if (aSig == 0) {
6992 return a;
6993 }
6994 aExp++;
6995 }
69397542 6996
326b9e98
AJ
6997 if (n > 0x10000) {
6998 n = 0x10000;
6999 } else if (n < -0x10000) {
7000 n = -0x10000;
7001 }
7002
9ee6e8bb 7003 aExp += n;
a2f2d288
PM
7004 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7005 aSign, aExp, aSig, 0, status);
9ee6e8bb 7006}
9ee6e8bb 7007
e5a41ffa 7008float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7009{
7010 flag aSign;
326b9e98 7011 int32_t aExp;
bb98fe42 7012 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7013
7014 aSig1 = extractFloat128Frac1( a );
7015 aSig0 = extractFloat128Frac0( a );
7016 aExp = extractFloat128Exp( a );
7017 aSign = extractFloat128Sign( a );
7018 if ( aExp == 0x7FFF ) {
326b9e98 7019 if ( aSig0 | aSig1 ) {
ff32e16e 7020 return propagateFloat128NaN(a, a, status);
326b9e98 7021 }
9ee6e8bb
PB
7022 return a;
7023 }
3c85c37f 7024 if (aExp != 0) {
69397542 7025 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7026 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7027 return a;
3c85c37f
PM
7028 } else {
7029 aExp++;
7030 }
69397542 7031
326b9e98
AJ
7032 if (n > 0x10000) {
7033 n = 0x10000;
7034 } else if (n < -0x10000) {
7035 n = -0x10000;
7036 }
7037
69397542
PB
7038 aExp += n - 1;
7039 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7040 , status);
9ee6e8bb
PB
7041
7042}