]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
Makefile: install gtk message catalogs if CONFIG_GTK=y too, not only =m
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2
FB
97
98/*----------------------------------------------------------------------------
99| Functions and definitions to determine: (1) whether tininess for underflow
100| is detected before or after rounding by default, (2) what (if anything)
101| happens when exceptions are raised, (3) how signaling NaNs are distinguished
102| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
103| are propagated from function inputs to output. These details are target-
104| specific.
105*----------------------------------------------------------------------------*/
106#include "softfloat-specialize.h"
107
bb4d4bb3
PM
108/*----------------------------------------------------------------------------
109| Returns the fraction bits of the half-precision floating-point value `a'.
110*----------------------------------------------------------------------------*/
111
a49db98d 112static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
113{
114 return float16_val(a) & 0x3ff;
115}
116
117/*----------------------------------------------------------------------------
118| Returns the exponent bits of the half-precision floating-point value `a'.
119*----------------------------------------------------------------------------*/
120
0c48262d 121static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126/*----------------------------------------------------------------------------
127| Returns the sign bit of the single-precision floating-point value `a'.
128*----------------------------------------------------------------------------*/
129
a49db98d 130static inline flag extractFloat16Sign(float16 a)
bb4d4bb3
PM
131{
132 return float16_val(a)>>15;
133}
134
d97544c9
AB
135/*----------------------------------------------------------------------------
136| Returns the fraction bits of the single-precision floating-point value `a'.
137*----------------------------------------------------------------------------*/
138
139static inline uint32_t extractFloat32Frac(float32 a)
140{
141 return float32_val(a) & 0x007FFFFF;
142}
143
144/*----------------------------------------------------------------------------
145| Returns the exponent bits of the single-precision floating-point value `a'.
146*----------------------------------------------------------------------------*/
147
148static inline int extractFloat32Exp(float32 a)
149{
150 return (float32_val(a) >> 23) & 0xFF;
151}
152
153/*----------------------------------------------------------------------------
154| Returns the sign bit of the single-precision floating-point value `a'.
155*----------------------------------------------------------------------------*/
156
157static inline flag extractFloat32Sign(float32 a)
158{
159 return float32_val(a) >> 31;
160}
161
162/*----------------------------------------------------------------------------
163| Returns the fraction bits of the double-precision floating-point value `a'.
164*----------------------------------------------------------------------------*/
165
166static inline uint64_t extractFloat64Frac(float64 a)
167{
168 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
169}
170
171/*----------------------------------------------------------------------------
172| Returns the exponent bits of the double-precision floating-point value `a'.
173*----------------------------------------------------------------------------*/
174
175static inline int extractFloat64Exp(float64 a)
176{
177 return (float64_val(a) >> 52) & 0x7FF;
178}
179
180/*----------------------------------------------------------------------------
181| Returns the sign bit of the double-precision floating-point value `a'.
182*----------------------------------------------------------------------------*/
183
184static inline flag extractFloat64Sign(float64 a)
185{
186 return float64_val(a) >> 63;
187}
188
a90119b5
AB
189/*
190 * Classify a floating point number. Everything above float_class_qnan
191 * is a NaN so cls >= float_class_qnan is any NaN.
192 */
193
194typedef enum __attribute__ ((__packed__)) {
195 float_class_unclassified,
196 float_class_zero,
197 float_class_normal,
198 float_class_inf,
199 float_class_qnan, /* all NaNs from here */
200 float_class_snan,
201 float_class_dnan,
202 float_class_msnan, /* maybe silenced */
203} FloatClass;
204
205/*
206 * Structure holding all of the decomposed parts of a float. The
207 * exponent is unbiased and the fraction is normalized. All
208 * calculations are done with a 64 bit fraction and then rounded as
209 * appropriate for the final format.
210 *
211 * Thanks to the packed FloatClass a decent compiler should be able to
212 * fit the whole structure into registers and avoid using the stack
213 * for parameter passing.
214 */
215
216typedef struct {
217 uint64_t frac;
218 int32_t exp;
219 FloatClass cls;
220 bool sign;
221} FloatParts;
222
223#define DECOMPOSED_BINARY_POINT (64 - 2)
224#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
225#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
226
227/* Structure holding all of the relevant parameters for a format.
228 * exp_size: the size of the exponent field
229 * exp_bias: the offset applied to the exponent field
230 * exp_max: the maximum normalised exponent
231 * frac_size: the size of the fraction field
232 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
233 * The following are computed based the size of fraction
234 * frac_lsb: least significant bit of fraction
235 * fram_lsbm1: the bit bellow the least significant bit (for rounding)
236 * round_mask/roundeven_mask: masks used for rounding
237 */
238typedef struct {
239 int exp_size;
240 int exp_bias;
241 int exp_max;
242 int frac_size;
243 int frac_shift;
244 uint64_t frac_lsb;
245 uint64_t frac_lsbm1;
246 uint64_t round_mask;
247 uint64_t roundeven_mask;
248} FloatFmt;
249
250/* Expand fields based on the size of exponent and fraction */
251#define FLOAT_PARAMS(E, F) \
252 .exp_size = E, \
253 .exp_bias = ((1 << E) - 1) >> 1, \
254 .exp_max = (1 << E) - 1, \
255 .frac_size = F, \
256 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
257 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
258 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
259 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
260 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
261
262static const FloatFmt float16_params = {
263 FLOAT_PARAMS(5, 10)
264};
265
266static const FloatFmt float32_params = {
267 FLOAT_PARAMS(8, 23)
268};
269
270static const FloatFmt float64_params = {
271 FLOAT_PARAMS(11, 52)
272};
273
6fff2167
AB
274/* Unpack a float to parts, but do not canonicalize. */
275static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
276{
277 const int sign_pos = fmt.frac_size + fmt.exp_size;
278
279 return (FloatParts) {
280 .cls = float_class_unclassified,
281 .sign = extract64(raw, sign_pos, 1),
282 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
283 .frac = extract64(raw, 0, fmt.frac_size),
284 };
285}
286
287static inline FloatParts float16_unpack_raw(float16 f)
288{
289 return unpack_raw(float16_params, f);
290}
291
292static inline FloatParts float32_unpack_raw(float32 f)
293{
294 return unpack_raw(float32_params, f);
295}
296
297static inline FloatParts float64_unpack_raw(float64 f)
298{
299 return unpack_raw(float64_params, f);
300}
301
302/* Pack a float from parts, but do not canonicalize. */
303static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
304{
305 const int sign_pos = fmt.frac_size + fmt.exp_size;
306 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
307 return deposit64(ret, sign_pos, 1, p.sign);
308}
309
310static inline float16 float16_pack_raw(FloatParts p)
311{
312 return make_float16(pack_raw(float16_params, p));
313}
314
315static inline float32 float32_pack_raw(FloatParts p)
316{
317 return make_float32(pack_raw(float32_params, p));
318}
319
320static inline float64 float64_pack_raw(FloatParts p)
321{
322 return make_float64(pack_raw(float64_params, p));
323}
324
325/* Canonicalize EXP and FRAC, setting CLS. */
326static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
327 float_status *status)
328{
329 if (part.exp == parm->exp_max) {
330 if (part.frac == 0) {
331 part.cls = float_class_inf;
332 } else {
333#ifdef NO_SIGNALING_NANS
334 part.cls = float_class_qnan;
335#else
336 int64_t msb = part.frac << (parm->frac_shift + 2);
337 if ((msb < 0) == status->snan_bit_is_one) {
338 part.cls = float_class_snan;
339 } else {
340 part.cls = float_class_qnan;
341 }
342#endif
343 }
344 } else if (part.exp == 0) {
345 if (likely(part.frac == 0)) {
346 part.cls = float_class_zero;
347 } else if (status->flush_inputs_to_zero) {
348 float_raise(float_flag_input_denormal, status);
349 part.cls = float_class_zero;
350 part.frac = 0;
351 } else {
352 int shift = clz64(part.frac) - 1;
353 part.cls = float_class_normal;
354 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
355 part.frac <<= shift;
356 }
357 } else {
358 part.cls = float_class_normal;
359 part.exp -= parm->exp_bias;
360 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
361 }
362 return part;
363}
364
365/* Round and uncanonicalize a floating-point number by parts. There
366 * are FRAC_SHIFT bits that may require rounding at the bottom of the
367 * fraction; these bits will be removed. The exponent will be biased
368 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
369 */
370
371static FloatParts round_canonical(FloatParts p, float_status *s,
372 const FloatFmt *parm)
373{
374 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
375 const uint64_t round_mask = parm->round_mask;
376 const uint64_t roundeven_mask = parm->roundeven_mask;
377 const int exp_max = parm->exp_max;
378 const int frac_shift = parm->frac_shift;
379 uint64_t frac, inc;
380 int exp, flags = 0;
381 bool overflow_norm;
382
383 frac = p.frac;
384 exp = p.exp;
385
386 switch (p.cls) {
387 case float_class_normal:
388 switch (s->float_rounding_mode) {
389 case float_round_nearest_even:
390 overflow_norm = false;
391 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
392 break;
393 case float_round_ties_away:
394 overflow_norm = false;
395 inc = frac_lsbm1;
396 break;
397 case float_round_to_zero:
398 overflow_norm = true;
399 inc = 0;
400 break;
401 case float_round_up:
402 inc = p.sign ? 0 : round_mask;
403 overflow_norm = p.sign;
404 break;
405 case float_round_down:
406 inc = p.sign ? round_mask : 0;
407 overflow_norm = !p.sign;
408 break;
409 default:
410 g_assert_not_reached();
411 }
412
413 exp += parm->exp_bias;
414 if (likely(exp > 0)) {
415 if (frac & round_mask) {
416 flags |= float_flag_inexact;
417 frac += inc;
418 if (frac & DECOMPOSED_OVERFLOW_BIT) {
419 frac >>= 1;
420 exp++;
421 }
422 }
423 frac >>= frac_shift;
424
425 if (unlikely(exp >= exp_max)) {
426 flags |= float_flag_overflow | float_flag_inexact;
427 if (overflow_norm) {
428 exp = exp_max - 1;
429 frac = -1;
430 } else {
431 p.cls = float_class_inf;
432 goto do_inf;
433 }
434 }
435 } else if (s->flush_to_zero) {
436 flags |= float_flag_output_denormal;
437 p.cls = float_class_zero;
438 goto do_zero;
439 } else {
440 bool is_tiny = (s->float_detect_tininess
441 == float_tininess_before_rounding)
442 || (exp < 0)
443 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
444
445 shift64RightJamming(frac, 1 - exp, &frac);
446 if (frac & round_mask) {
447 /* Need to recompute round-to-even. */
448 if (s->float_rounding_mode == float_round_nearest_even) {
449 inc = ((frac & roundeven_mask) != frac_lsbm1
450 ? frac_lsbm1 : 0);
451 }
452 flags |= float_flag_inexact;
453 frac += inc;
454 }
455
456 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
457 frac >>= frac_shift;
458
459 if (is_tiny && (flags & float_flag_inexact)) {
460 flags |= float_flag_underflow;
461 }
462 if (exp == 0 && frac == 0) {
463 p.cls = float_class_zero;
464 }
465 }
466 break;
467
468 case float_class_zero:
469 do_zero:
470 exp = 0;
471 frac = 0;
472 break;
473
474 case float_class_inf:
475 do_inf:
476 exp = exp_max;
477 frac = 0;
478 break;
479
480 case float_class_qnan:
481 case float_class_snan:
482 exp = exp_max;
483 break;
484
485 default:
486 g_assert_not_reached();
487 }
488
489 float_raise(flags, s);
490 p.exp = exp;
491 p.frac = frac;
492 return p;
493}
494
495static FloatParts float16_unpack_canonical(float16 f, float_status *s)
496{
497 return canonicalize(float16_unpack_raw(f), &float16_params, s);
498}
499
500static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
501{
502 switch (p.cls) {
503 case float_class_dnan:
504 return float16_default_nan(s);
505 case float_class_msnan:
506 return float16_maybe_silence_nan(float16_pack_raw(p), s);
507 default:
508 p = round_canonical(p, s, &float16_params);
509 return float16_pack_raw(p);
510 }
511}
512
513static FloatParts float32_unpack_canonical(float32 f, float_status *s)
514{
515 return canonicalize(float32_unpack_raw(f), &float32_params, s);
516}
517
518static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
519{
520 switch (p.cls) {
521 case float_class_dnan:
522 return float32_default_nan(s);
523 case float_class_msnan:
524 return float32_maybe_silence_nan(float32_pack_raw(p), s);
525 default:
526 p = round_canonical(p, s, &float32_params);
527 return float32_pack_raw(p);
528 }
529}
530
531static FloatParts float64_unpack_canonical(float64 f, float_status *s)
532{
533 return canonicalize(float64_unpack_raw(f), &float64_params, s);
534}
535
536static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
537{
538 switch (p.cls) {
539 case float_class_dnan:
540 return float64_default_nan(s);
541 case float_class_msnan:
542 return float64_maybe_silence_nan(float64_pack_raw(p), s);
543 default:
544 p = round_canonical(p, s, &float64_params);
545 return float64_pack_raw(p);
546 }
547}
548
549/* Simple helpers for checking if what NaN we have */
550static bool is_nan(FloatClass c)
551{
552 return unlikely(c >= float_class_qnan);
553}
554static bool is_snan(FloatClass c)
555{
556 return c == float_class_snan;
557}
558static bool is_qnan(FloatClass c)
559{
560 return c == float_class_qnan;
561}
562
dbe4d53a
AB
563static FloatParts return_nan(FloatParts a, float_status *s)
564{
565 switch (a.cls) {
566 case float_class_snan:
567 s->float_exception_flags |= float_flag_invalid;
568 a.cls = float_class_msnan;
569 /* fall through */
570 case float_class_qnan:
571 if (s->default_nan_mode) {
572 a.cls = float_class_dnan;
573 }
574 break;
575
576 default:
577 g_assert_not_reached();
578 }
579 return a;
580}
581
6fff2167
AB
582static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
583{
584 if (is_snan(a.cls) || is_snan(b.cls)) {
585 s->float_exception_flags |= float_flag_invalid;
586 }
587
588 if (s->default_nan_mode) {
589 a.cls = float_class_dnan;
590 } else {
591 if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
592 is_qnan(b.cls), is_snan(b.cls),
593 a.frac > b.frac ||
594 (a.frac == b.frac && a.sign < b.sign))) {
595 a = b;
596 }
597 a.cls = float_class_msnan;
598 }
599 return a;
600}
601
d446830a
AB
602static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
603 bool inf_zero, float_status *s)
604{
605 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
606 s->float_exception_flags |= float_flag_invalid;
607 }
608
609 if (s->default_nan_mode) {
610 a.cls = float_class_dnan;
611 } else {
612 switch (pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
613 is_qnan(b.cls), is_snan(b.cls),
614 is_qnan(c.cls), is_snan(c.cls),
615 inf_zero, s)) {
616 case 0:
617 break;
618 case 1:
619 a = b;
620 break;
621 case 2:
622 a = c;
623 break;
624 case 3:
625 a.cls = float_class_dnan;
626 return a;
627 default:
628 g_assert_not_reached();
629 }
630
631 a.cls = float_class_msnan;
632 }
633 return a;
634}
635
6fff2167
AB
636/*
637 * Returns the result of adding or subtracting the values of the
638 * floating-point values `a' and `b'. The operation is performed
639 * according to the IEC/IEEE Standard for Binary Floating-Point
640 * Arithmetic.
641 */
642
643static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
644 float_status *s)
645{
646 bool a_sign = a.sign;
647 bool b_sign = b.sign ^ subtract;
648
649 if (a_sign != b_sign) {
650 /* Subtraction */
651
652 if (a.cls == float_class_normal && b.cls == float_class_normal) {
653 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
654 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
655 a.frac = a.frac - b.frac;
656 } else {
657 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
658 a.frac = b.frac - a.frac;
659 a.exp = b.exp;
660 a_sign ^= 1;
661 }
662
663 if (a.frac == 0) {
664 a.cls = float_class_zero;
665 a.sign = s->float_rounding_mode == float_round_down;
666 } else {
667 int shift = clz64(a.frac) - 1;
668 a.frac = a.frac << shift;
669 a.exp = a.exp - shift;
670 a.sign = a_sign;
671 }
672 return a;
673 }
674 if (is_nan(a.cls) || is_nan(b.cls)) {
675 return pick_nan(a, b, s);
676 }
677 if (a.cls == float_class_inf) {
678 if (b.cls == float_class_inf) {
679 float_raise(float_flag_invalid, s);
680 a.cls = float_class_dnan;
681 }
682 return a;
683 }
684 if (a.cls == float_class_zero && b.cls == float_class_zero) {
685 a.sign = s->float_rounding_mode == float_round_down;
686 return a;
687 }
688 if (a.cls == float_class_zero || b.cls == float_class_inf) {
689 b.sign = a_sign ^ 1;
690 return b;
691 }
692 if (b.cls == float_class_zero) {
693 return a;
694 }
695 } else {
696 /* Addition */
697 if (a.cls == float_class_normal && b.cls == float_class_normal) {
698 if (a.exp > b.exp) {
699 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
700 } else if (a.exp < b.exp) {
701 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
702 a.exp = b.exp;
703 }
704 a.frac += b.frac;
705 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
706 a.frac >>= 1;
707 a.exp += 1;
708 }
709 return a;
710 }
711 if (is_nan(a.cls) || is_nan(b.cls)) {
712 return pick_nan(a, b, s);
713 }
714 if (a.cls == float_class_inf || b.cls == float_class_zero) {
715 return a;
716 }
717 if (b.cls == float_class_inf || a.cls == float_class_zero) {
718 b.sign = b_sign;
719 return b;
720 }
721 }
722 g_assert_not_reached();
723}
724
725/*
726 * Returns the result of adding or subtracting the floating-point
727 * values `a' and `b'. The operation is performed according to the
728 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
729 */
730
731float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
732 float_status *status)
733{
734 FloatParts pa = float16_unpack_canonical(a, status);
735 FloatParts pb = float16_unpack_canonical(b, status);
736 FloatParts pr = addsub_floats(pa, pb, false, status);
737
738 return float16_round_pack_canonical(pr, status);
739}
740
741float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
742 float_status *status)
743{
744 FloatParts pa = float32_unpack_canonical(a, status);
745 FloatParts pb = float32_unpack_canonical(b, status);
746 FloatParts pr = addsub_floats(pa, pb, false, status);
747
748 return float32_round_pack_canonical(pr, status);
749}
750
751float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
752 float_status *status)
753{
754 FloatParts pa = float64_unpack_canonical(a, status);
755 FloatParts pb = float64_unpack_canonical(b, status);
756 FloatParts pr = addsub_floats(pa, pb, false, status);
757
758 return float64_round_pack_canonical(pr, status);
759}
760
761float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
762 float_status *status)
763{
764 FloatParts pa = float16_unpack_canonical(a, status);
765 FloatParts pb = float16_unpack_canonical(b, status);
766 FloatParts pr = addsub_floats(pa, pb, true, status);
767
768 return float16_round_pack_canonical(pr, status);
769}
770
771float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
772 float_status *status)
773{
774 FloatParts pa = float32_unpack_canonical(a, status);
775 FloatParts pb = float32_unpack_canonical(b, status);
776 FloatParts pr = addsub_floats(pa, pb, true, status);
777
778 return float32_round_pack_canonical(pr, status);
779}
780
781float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
782 float_status *status)
783{
784 FloatParts pa = float64_unpack_canonical(a, status);
785 FloatParts pb = float64_unpack_canonical(b, status);
786 FloatParts pr = addsub_floats(pa, pb, true, status);
787
788 return float64_round_pack_canonical(pr, status);
789}
790
74d707e2
AB
791/*
792 * Returns the result of multiplying the floating-point values `a' and
793 * `b'. The operation is performed according to the IEC/IEEE Standard
794 * for Binary Floating-Point Arithmetic.
795 */
796
797static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
798{
799 bool sign = a.sign ^ b.sign;
800
801 if (a.cls == float_class_normal && b.cls == float_class_normal) {
802 uint64_t hi, lo;
803 int exp = a.exp + b.exp;
804
805 mul64To128(a.frac, b.frac, &hi, &lo);
806 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
807 if (lo & DECOMPOSED_OVERFLOW_BIT) {
808 shift64RightJamming(lo, 1, &lo);
809 exp += 1;
810 }
811
812 /* Re-use a */
813 a.exp = exp;
814 a.sign = sign;
815 a.frac = lo;
816 return a;
817 }
818 /* handle all the NaN cases */
819 if (is_nan(a.cls) || is_nan(b.cls)) {
820 return pick_nan(a, b, s);
821 }
822 /* Inf * Zero == NaN */
823 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
824 (a.cls == float_class_zero && b.cls == float_class_inf)) {
825 s->float_exception_flags |= float_flag_invalid;
826 a.cls = float_class_dnan;
827 a.sign = sign;
828 return a;
829 }
830 /* Multiply by 0 or Inf */
831 if (a.cls == float_class_inf || a.cls == float_class_zero) {
832 a.sign = sign;
833 return a;
834 }
835 if (b.cls == float_class_inf || b.cls == float_class_zero) {
836 b.sign = sign;
837 return b;
838 }
839 g_assert_not_reached();
840}
841
842float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
843 float_status *status)
844{
845 FloatParts pa = float16_unpack_canonical(a, status);
846 FloatParts pb = float16_unpack_canonical(b, status);
847 FloatParts pr = mul_floats(pa, pb, status);
848
849 return float16_round_pack_canonical(pr, status);
850}
851
852float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
853 float_status *status)
854{
855 FloatParts pa = float32_unpack_canonical(a, status);
856 FloatParts pb = float32_unpack_canonical(b, status);
857 FloatParts pr = mul_floats(pa, pb, status);
858
859 return float32_round_pack_canonical(pr, status);
860}
861
862float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
863 float_status *status)
864{
865 FloatParts pa = float64_unpack_canonical(a, status);
866 FloatParts pb = float64_unpack_canonical(b, status);
867 FloatParts pr = mul_floats(pa, pb, status);
868
869 return float64_round_pack_canonical(pr, status);
870}
871
d446830a
AB
872/*
873 * Returns the result of multiplying the floating-point values `a' and
874 * `b' then adding 'c', with no intermediate rounding step after the
875 * multiplication. The operation is performed according to the
876 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
877 * The flags argument allows the caller to select negation of the
878 * addend, the intermediate product, or the final result. (The
879 * difference between this and having the caller do a separate
880 * negation is that negating externally will flip the sign bit on
881 * NaNs.)
882 */
883
884static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
885 int flags, float_status *s)
886{
887 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
888 ((1 << float_class_inf) | (1 << float_class_zero));
889 bool p_sign;
890 bool sign_flip = flags & float_muladd_negate_result;
891 FloatClass p_class;
892 uint64_t hi, lo;
893 int p_exp;
894
895 /* It is implementation-defined whether the cases of (0,inf,qnan)
896 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
897 * they return if they do), so we have to hand this information
898 * off to the target-specific pick-a-NaN routine.
899 */
900 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
901 return pick_nan_muladd(a, b, c, inf_zero, s);
902 }
903
904 if (inf_zero) {
905 s->float_exception_flags |= float_flag_invalid;
906 a.cls = float_class_dnan;
907 return a;
908 }
909
910 if (flags & float_muladd_negate_c) {
911 c.sign ^= 1;
912 }
913
914 p_sign = a.sign ^ b.sign;
915
916 if (flags & float_muladd_negate_product) {
917 p_sign ^= 1;
918 }
919
920 if (a.cls == float_class_inf || b.cls == float_class_inf) {
921 p_class = float_class_inf;
922 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
923 p_class = float_class_zero;
924 } else {
925 p_class = float_class_normal;
926 }
927
928 if (c.cls == float_class_inf) {
929 if (p_class == float_class_inf && p_sign != c.sign) {
930 s->float_exception_flags |= float_flag_invalid;
931 a.cls = float_class_dnan;
932 } else {
933 a.cls = float_class_inf;
934 a.sign = c.sign ^ sign_flip;
935 }
936 return a;
937 }
938
939 if (p_class == float_class_inf) {
940 a.cls = float_class_inf;
941 a.sign = p_sign ^ sign_flip;
942 return a;
943 }
944
945 if (p_class == float_class_zero) {
946 if (c.cls == float_class_zero) {
947 if (p_sign != c.sign) {
948 p_sign = s->float_rounding_mode == float_round_down;
949 }
950 c.sign = p_sign;
951 } else if (flags & float_muladd_halve_result) {
952 c.exp -= 1;
953 }
954 c.sign ^= sign_flip;
955 return c;
956 }
957
958 /* a & b should be normals now... */
959 assert(a.cls == float_class_normal &&
960 b.cls == float_class_normal);
961
962 p_exp = a.exp + b.exp;
963
964 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
965 * result.
966 */
967 mul64To128(a.frac, b.frac, &hi, &lo);
968 /* binary point now at bit 124 */
969
970 /* check for overflow */
971 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
972 shift128RightJamming(hi, lo, 1, &hi, &lo);
973 p_exp += 1;
974 }
975
976 /* + add/sub */
977 if (c.cls == float_class_zero) {
978 /* move binary point back to 62 */
979 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
980 } else {
981 int exp_diff = p_exp - c.exp;
982 if (p_sign == c.sign) {
983 /* Addition */
984 if (exp_diff <= 0) {
985 shift128RightJamming(hi, lo,
986 DECOMPOSED_BINARY_POINT - exp_diff,
987 &hi, &lo);
988 lo += c.frac;
989 p_exp = c.exp;
990 } else {
991 uint64_t c_hi, c_lo;
992 /* shift c to the same binary point as the product (124) */
993 c_hi = c.frac >> 2;
994 c_lo = 0;
995 shift128RightJamming(c_hi, c_lo,
996 exp_diff,
997 &c_hi, &c_lo);
998 add128(hi, lo, c_hi, c_lo, &hi, &lo);
999 /* move binary point back to 62 */
1000 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1001 }
1002
1003 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1004 shift64RightJamming(lo, 1, &lo);
1005 p_exp += 1;
1006 }
1007
1008 } else {
1009 /* Subtraction */
1010 uint64_t c_hi, c_lo;
1011 /* make C binary point match product at bit 124 */
1012 c_hi = c.frac >> 2;
1013 c_lo = 0;
1014
1015 if (exp_diff <= 0) {
1016 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1017 if (exp_diff == 0
1018 &&
1019 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1020 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1021 } else {
1022 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1023 p_sign ^= 1;
1024 p_exp = c.exp;
1025 }
1026 } else {
1027 shift128RightJamming(c_hi, c_lo,
1028 exp_diff,
1029 &c_hi, &c_lo);
1030 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1031 }
1032
1033 if (hi == 0 && lo == 0) {
1034 a.cls = float_class_zero;
1035 a.sign = s->float_rounding_mode == float_round_down;
1036 a.sign ^= sign_flip;
1037 return a;
1038 } else {
1039 int shift;
1040 if (hi != 0) {
1041 shift = clz64(hi);
1042 } else {
1043 shift = clz64(lo) + 64;
1044 }
1045 /* Normalizing to a binary point of 124 is the
1046 correct adjust for the exponent. However since we're
1047 shifting, we might as well put the binary point back
1048 at 62 where we really want it. Therefore shift as
1049 if we're leaving 1 bit at the top of the word, but
1050 adjust the exponent as if we're leaving 3 bits. */
1051 shift -= 1;
1052 if (shift >= 64) {
1053 lo = lo << (shift - 64);
1054 } else {
1055 hi = (hi << shift) | (lo >> (64 - shift));
1056 lo = hi | ((lo << shift) != 0);
1057 }
1058 p_exp -= shift - 2;
1059 }
1060 }
1061 }
1062
1063 if (flags & float_muladd_halve_result) {
1064 p_exp -= 1;
1065 }
1066
1067 /* finally prepare our result */
1068 a.cls = float_class_normal;
1069 a.sign = p_sign ^ sign_flip;
1070 a.exp = p_exp;
1071 a.frac = lo;
1072
1073 return a;
1074}
1075
1076float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1077 int flags, float_status *status)
1078{
1079 FloatParts pa = float16_unpack_canonical(a, status);
1080 FloatParts pb = float16_unpack_canonical(b, status);
1081 FloatParts pc = float16_unpack_canonical(c, status);
1082 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1083
1084 return float16_round_pack_canonical(pr, status);
1085}
1086
1087float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1088 int flags, float_status *status)
1089{
1090 FloatParts pa = float32_unpack_canonical(a, status);
1091 FloatParts pb = float32_unpack_canonical(b, status);
1092 FloatParts pc = float32_unpack_canonical(c, status);
1093 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1094
1095 return float32_round_pack_canonical(pr, status);
1096}
1097
1098float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1099 int flags, float_status *status)
1100{
1101 FloatParts pa = float64_unpack_canonical(a, status);
1102 FloatParts pb = float64_unpack_canonical(b, status);
1103 FloatParts pc = float64_unpack_canonical(c, status);
1104 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1105
1106 return float64_round_pack_canonical(pr, status);
1107}
1108
cf07323d
AB
1109/*
1110 * Returns the result of dividing the floating-point value `a' by the
1111 * corresponding value `b'. The operation is performed according to
1112 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1113 */
1114
1115static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1116{
1117 bool sign = a.sign ^ b.sign;
1118
1119 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1120 uint64_t temp_lo, temp_hi;
1121 int exp = a.exp - b.exp;
1122 if (a.frac < b.frac) {
1123 exp -= 1;
1124 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1125 &temp_hi, &temp_lo);
1126 } else {
1127 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1128 &temp_hi, &temp_lo);
1129 }
1130 /* LSB of quot is set if inexact which roundandpack will use
1131 * to set flags. Yet again we re-use a for the result */
1132 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1133 a.sign = sign;
1134 a.exp = exp;
1135 return a;
1136 }
1137 /* handle all the NaN cases */
1138 if (is_nan(a.cls) || is_nan(b.cls)) {
1139 return pick_nan(a, b, s);
1140 }
1141 /* 0/0 or Inf/Inf */
1142 if (a.cls == b.cls
1143 &&
1144 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1145 s->float_exception_flags |= float_flag_invalid;
1146 a.cls = float_class_dnan;
1147 return a;
1148 }
1149 /* Div 0 => Inf */
1150 if (b.cls == float_class_zero) {
1151 s->float_exception_flags |= float_flag_divbyzero;
1152 a.cls = float_class_inf;
1153 a.sign = sign;
1154 return a;
1155 }
1156 /* Inf / x or 0 / x */
1157 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1158 a.sign = sign;
1159 return a;
1160 }
1161 /* Div by Inf */
1162 if (b.cls == float_class_inf) {
1163 a.cls = float_class_zero;
1164 a.sign = sign;
1165 return a;
1166 }
1167 g_assert_not_reached();
1168}
1169
1170float16 float16_div(float16 a, float16 b, float_status *status)
1171{
1172 FloatParts pa = float16_unpack_canonical(a, status);
1173 FloatParts pb = float16_unpack_canonical(b, status);
1174 FloatParts pr = div_floats(pa, pb, status);
1175
1176 return float16_round_pack_canonical(pr, status);
1177}
1178
1179float32 float32_div(float32 a, float32 b, float_status *status)
1180{
1181 FloatParts pa = float32_unpack_canonical(a, status);
1182 FloatParts pb = float32_unpack_canonical(b, status);
1183 FloatParts pr = div_floats(pa, pb, status);
1184
1185 return float32_round_pack_canonical(pr, status);
1186}
1187
1188float64 float64_div(float64 a, float64 b, float_status *status)
1189{
1190 FloatParts pa = float64_unpack_canonical(a, status);
1191 FloatParts pb = float64_unpack_canonical(b, status);
1192 FloatParts pr = div_floats(pa, pb, status);
1193
1194 return float64_round_pack_canonical(pr, status);
1195}
1196
dbe4d53a
AB
1197/*
1198 * Rounds the floating-point value `a' to an integer, and returns the
1199 * result as a floating-point value. The operation is performed
1200 * according to the IEC/IEEE Standard for Binary Floating-Point
1201 * Arithmetic.
1202 */
1203
1204static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1205{
1206 if (is_nan(a.cls)) {
1207 return return_nan(a, s);
1208 }
1209
1210 switch (a.cls) {
1211 case float_class_zero:
1212 case float_class_inf:
1213 case float_class_qnan:
1214 /* already "integral" */
1215 break;
1216 case float_class_normal:
1217 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1218 /* already integral */
1219 break;
1220 }
1221 if (a.exp < 0) {
1222 bool one;
1223 /* all fractional */
1224 s->float_exception_flags |= float_flag_inexact;
1225 switch (rounding_mode) {
1226 case float_round_nearest_even:
1227 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1228 break;
1229 case float_round_ties_away:
1230 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1231 break;
1232 case float_round_to_zero:
1233 one = false;
1234 break;
1235 case float_round_up:
1236 one = !a.sign;
1237 break;
1238 case float_round_down:
1239 one = a.sign;
1240 break;
1241 default:
1242 g_assert_not_reached();
1243 }
1244
1245 if (one) {
1246 a.frac = DECOMPOSED_IMPLICIT_BIT;
1247 a.exp = 0;
1248 } else {
1249 a.cls = float_class_zero;
1250 }
1251 } else {
1252 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1253 uint64_t frac_lsbm1 = frac_lsb >> 1;
1254 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1255 uint64_t rnd_mask = rnd_even_mask >> 1;
1256 uint64_t inc;
1257
1258 switch (rounding_mode) {
1259 case float_round_nearest_even:
1260 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1261 break;
1262 case float_round_ties_away:
1263 inc = frac_lsbm1;
1264 break;
1265 case float_round_to_zero:
1266 inc = 0;
1267 break;
1268 case float_round_up:
1269 inc = a.sign ? 0 : rnd_mask;
1270 break;
1271 case float_round_down:
1272 inc = a.sign ? rnd_mask : 0;
1273 break;
1274 default:
1275 g_assert_not_reached();
1276 }
1277
1278 if (a.frac & rnd_mask) {
1279 s->float_exception_flags |= float_flag_inexact;
1280 a.frac += inc;
1281 a.frac &= ~rnd_mask;
1282 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1283 a.frac >>= 1;
1284 a.exp++;
1285 }
1286 }
1287 }
1288 break;
1289 default:
1290 g_assert_not_reached();
1291 }
1292 return a;
1293}
1294
1295float16 float16_round_to_int(float16 a, float_status *s)
1296{
1297 FloatParts pa = float16_unpack_canonical(a, s);
1298 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1299 return float16_round_pack_canonical(pr, s);
1300}
1301
1302float32 float32_round_to_int(float32 a, float_status *s)
1303{
1304 FloatParts pa = float32_unpack_canonical(a, s);
1305 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1306 return float32_round_pack_canonical(pr, s);
1307}
1308
1309float64 float64_round_to_int(float64 a, float_status *s)
1310{
1311 FloatParts pa = float64_unpack_canonical(a, s);
1312 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1313 return float64_round_pack_canonical(pr, s);
1314}
1315
1316float64 float64_trunc_to_int(float64 a, float_status *s)
1317{
1318 FloatParts pa = float64_unpack_canonical(a, s);
1319 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1320 return float64_round_pack_canonical(pr, s);
1321}
1322
ab52f973
AB
1323/*
1324 * Returns the result of converting the floating-point value `a' to
1325 * the two's complement integer format. The conversion is performed
1326 * according to the IEC/IEEE Standard for Binary Floating-Point
1327 * Arithmetic---which means in particular that the conversion is
1328 * rounded according to the current rounding mode. If `a' is a NaN,
1329 * the largest positive integer is returned. Otherwise, if the
1330 * conversion overflows, the largest integer with the same sign as `a'
1331 * is returned.
1332*/
1333
1334static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1335 int64_t min, int64_t max,
1336 float_status *s)
1337{
1338 uint64_t r;
1339 int orig_flags = get_float_exception_flags(s);
1340 FloatParts p = round_to_int(in, rmode, s);
1341
1342 switch (p.cls) {
1343 case float_class_snan:
1344 case float_class_qnan:
cffad426
SR
1345 case float_class_dnan:
1346 case float_class_msnan:
801bc563 1347 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1348 return max;
1349 case float_class_inf:
801bc563 1350 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1351 return p.sign ? min : max;
1352 case float_class_zero:
1353 return 0;
1354 case float_class_normal:
1355 if (p.exp < DECOMPOSED_BINARY_POINT) {
1356 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1357 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1358 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1359 } else {
1360 r = UINT64_MAX;
1361 }
1362 if (p.sign) {
1363 if (r < -(uint64_t) min) {
1364 return -r;
1365 } else {
1366 s->float_exception_flags = orig_flags | float_flag_invalid;
1367 return min;
1368 }
1369 } else {
1370 if (r < max) {
1371 return r;
1372 } else {
1373 s->float_exception_flags = orig_flags | float_flag_invalid;
1374 return max;
1375 }
1376 }
1377 default:
1378 g_assert_not_reached();
1379 }
1380}
1381
1382#define FLOAT_TO_INT(fsz, isz) \
1383int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1384 float_status *s) \
1385{ \
1386 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1387 return round_to_int_and_pack(p, s->float_rounding_mode, \
1388 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1389 s); \
1390} \
1391 \
1392int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1393 (float ## fsz a, float_status *s) \
1394{ \
1395 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1396 return round_to_int_and_pack(p, float_round_to_zero, \
1397 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1398 s); \
1399}
1400
1401FLOAT_TO_INT(16, 16)
1402FLOAT_TO_INT(16, 32)
1403FLOAT_TO_INT(16, 64)
1404
1405FLOAT_TO_INT(32, 16)
1406FLOAT_TO_INT(32, 32)
1407FLOAT_TO_INT(32, 64)
1408
1409FLOAT_TO_INT(64, 16)
1410FLOAT_TO_INT(64, 32)
1411FLOAT_TO_INT(64, 64)
1412
1413#undef FLOAT_TO_INT
1414
1415/*
1416 * Returns the result of converting the floating-point value `a' to
1417 * the unsigned integer format. The conversion is performed according
1418 * to the IEC/IEEE Standard for Binary Floating-Point
1419 * Arithmetic---which means in particular that the conversion is
1420 * rounded according to the current rounding mode. If `a' is a NaN,
1421 * the largest unsigned integer is returned. Otherwise, if the
1422 * conversion overflows, the largest unsigned integer is returned. If
1423 * the 'a' is negative, the result is rounded and zero is returned;
1424 * values that do not round to zero will raise the inexact exception
1425 * flag.
1426 */
1427
1428static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1429 float_status *s)
1430{
1431 int orig_flags = get_float_exception_flags(s);
1432 FloatParts p = round_to_int(in, rmode, s);
1433
1434 switch (p.cls) {
1435 case float_class_snan:
1436 case float_class_qnan:
cffad426
SR
1437 case float_class_dnan:
1438 case float_class_msnan:
ab52f973
AB
1439 s->float_exception_flags = orig_flags | float_flag_invalid;
1440 return max;
1441 case float_class_inf:
801bc563 1442 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1443 return p.sign ? 0 : max;
1444 case float_class_zero:
1445 return 0;
1446 case float_class_normal:
1447 {
1448 uint64_t r;
1449 if (p.sign) {
1450 s->float_exception_flags = orig_flags | float_flag_invalid;
1451 return 0;
1452 }
1453
1454 if (p.exp < DECOMPOSED_BINARY_POINT) {
1455 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1456 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1457 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1458 } else {
1459 s->float_exception_flags = orig_flags | float_flag_invalid;
1460 return max;
1461 }
1462
1463 /* For uint64 this will never trip, but if p.exp is too large
1464 * to shift a decomposed fraction we shall have exited via the
1465 * 3rd leg above.
1466 */
1467 if (r > max) {
1468 s->float_exception_flags = orig_flags | float_flag_invalid;
1469 return max;
1470 } else {
1471 return r;
1472 }
1473 }
1474 default:
1475 g_assert_not_reached();
1476 }
1477}
1478
1479#define FLOAT_TO_UINT(fsz, isz) \
1480uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1481 float_status *s) \
1482{ \
1483 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1484 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1485 UINT ## isz ## _MAX, s); \
1486} \
1487 \
1488uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1489 (float ## fsz a, float_status *s) \
1490{ \
1491 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
bd49e602
RH
1492 return round_to_uint_and_pack(p, float_round_to_zero, \
1493 UINT ## isz ## _MAX, s); \
ab52f973
AB
1494}
1495
1496FLOAT_TO_UINT(16, 16)
1497FLOAT_TO_UINT(16, 32)
1498FLOAT_TO_UINT(16, 64)
1499
1500FLOAT_TO_UINT(32, 16)
1501FLOAT_TO_UINT(32, 32)
1502FLOAT_TO_UINT(32, 64)
1503
1504FLOAT_TO_UINT(64, 16)
1505FLOAT_TO_UINT(64, 32)
1506FLOAT_TO_UINT(64, 64)
1507
1508#undef FLOAT_TO_UINT
1509
c02e1fb8
AB
1510/*
1511 * Integer to float conversions
1512 *
1513 * Returns the result of converting the two's complement integer `a'
1514 * to the floating-point format. The conversion is performed according
1515 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1516 */
1517
1518static FloatParts int_to_float(int64_t a, float_status *status)
1519{
1520 FloatParts r;
1521 if (a == 0) {
1522 r.cls = float_class_zero;
1523 r.sign = false;
1524 } else if (a == (1ULL << 63)) {
1525 r.cls = float_class_normal;
1526 r.sign = true;
1527 r.frac = DECOMPOSED_IMPLICIT_BIT;
1528 r.exp = 63;
1529 } else {
1530 uint64_t f;
1531 if (a < 0) {
1532 f = -a;
1533 r.sign = true;
1534 } else {
1535 f = a;
1536 r.sign = false;
1537 }
1538 int shift = clz64(f) - 1;
1539 r.cls = float_class_normal;
1540 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1541 r.frac = f << shift;
1542 }
1543
1544 return r;
1545}
1546
1547float16 int64_to_float16(int64_t a, float_status *status)
1548{
1549 FloatParts pa = int_to_float(a, status);
1550 return float16_round_pack_canonical(pa, status);
1551}
1552
1553float16 int32_to_float16(int32_t a, float_status *status)
1554{
1555 return int64_to_float16(a, status);
1556}
1557
1558float16 int16_to_float16(int16_t a, float_status *status)
1559{
1560 return int64_to_float16(a, status);
1561}
1562
1563float32 int64_to_float32(int64_t a, float_status *status)
1564{
1565 FloatParts pa = int_to_float(a, status);
1566 return float32_round_pack_canonical(pa, status);
1567}
1568
1569float32 int32_to_float32(int32_t a, float_status *status)
1570{
1571 return int64_to_float32(a, status);
1572}
1573
1574float32 int16_to_float32(int16_t a, float_status *status)
1575{
1576 return int64_to_float32(a, status);
1577}
1578
1579float64 int64_to_float64(int64_t a, float_status *status)
1580{
1581 FloatParts pa = int_to_float(a, status);
1582 return float64_round_pack_canonical(pa, status);
1583}
1584
1585float64 int32_to_float64(int32_t a, float_status *status)
1586{
1587 return int64_to_float64(a, status);
1588}
1589
1590float64 int16_to_float64(int16_t a, float_status *status)
1591{
1592 return int64_to_float64(a, status);
1593}
1594
1595
1596/*
1597 * Unsigned Integer to float conversions
1598 *
1599 * Returns the result of converting the unsigned integer `a' to the
1600 * floating-point format. The conversion is performed according to the
1601 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1602 */
1603
1604static FloatParts uint_to_float(uint64_t a, float_status *status)
1605{
1606 FloatParts r = { .sign = false};
1607
1608 if (a == 0) {
1609 r.cls = float_class_zero;
1610 } else {
1611 int spare_bits = clz64(a) - 1;
1612 r.cls = float_class_normal;
1613 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1614 if (spare_bits < 0) {
1615 shift64RightJamming(a, -spare_bits, &a);
1616 r.frac = a;
1617 } else {
1618 r.frac = a << spare_bits;
1619 }
1620 }
1621
1622 return r;
1623}
1624
1625float16 uint64_to_float16(uint64_t a, float_status *status)
1626{
1627 FloatParts pa = uint_to_float(a, status);
1628 return float16_round_pack_canonical(pa, status);
1629}
1630
1631float16 uint32_to_float16(uint32_t a, float_status *status)
1632{
1633 return uint64_to_float16(a, status);
1634}
1635
1636float16 uint16_to_float16(uint16_t a, float_status *status)
1637{
1638 return uint64_to_float16(a, status);
1639}
1640
1641float32 uint64_to_float32(uint64_t a, float_status *status)
1642{
1643 FloatParts pa = uint_to_float(a, status);
1644 return float32_round_pack_canonical(pa, status);
1645}
1646
1647float32 uint32_to_float32(uint32_t a, float_status *status)
1648{
1649 return uint64_to_float32(a, status);
1650}
1651
1652float32 uint16_to_float32(uint16_t a, float_status *status)
1653{
1654 return uint64_to_float32(a, status);
1655}
1656
1657float64 uint64_to_float64(uint64_t a, float_status *status)
1658{
1659 FloatParts pa = uint_to_float(a, status);
1660 return float64_round_pack_canonical(pa, status);
1661}
1662
1663float64 uint32_to_float64(uint32_t a, float_status *status)
1664{
1665 return uint64_to_float64(a, status);
1666}
1667
1668float64 uint16_to_float64(uint16_t a, float_status *status)
1669{
1670 return uint64_to_float64(a, status);
1671}
1672
89360067
AB
1673/* Float Min/Max */
1674/* min() and max() functions. These can't be implemented as
1675 * 'compare and pick one input' because that would mishandle
1676 * NaNs and +0 vs -0.
1677 *
1678 * minnum() and maxnum() functions. These are similar to the min()
1679 * and max() functions but if one of the arguments is a QNaN and
1680 * the other is numerical then the numerical argument is returned.
1681 * SNaNs will get quietened before being returned.
1682 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
1683 * and maxNum() operations. min() and max() are the typical min/max
1684 * semantics provided by many CPUs which predate that specification.
1685 *
1686 * minnummag() and maxnummag() functions correspond to minNumMag()
1687 * and minNumMag() from the IEEE-754 2008.
1688 */
1689static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1690 bool ieee, bool ismag, float_status *s)
1691{
1692 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1693 if (ieee) {
1694 /* Takes two floating-point values `a' and `b', one of
1695 * which is a NaN, and returns the appropriate NaN
1696 * result. If either `a' or `b' is a signaling NaN,
1697 * the invalid exception is raised.
1698 */
1699 if (is_snan(a.cls) || is_snan(b.cls)) {
1700 return pick_nan(a, b, s);
1701 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1702 return b;
1703 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1704 return a;
1705 }
1706 }
1707 return pick_nan(a, b, s);
1708 } else {
1709 int a_exp, b_exp;
89360067
AB
1710
1711 switch (a.cls) {
1712 case float_class_normal:
1713 a_exp = a.exp;
1714 break;
1715 case float_class_inf:
1716 a_exp = INT_MAX;
1717 break;
1718 case float_class_zero:
1719 a_exp = INT_MIN;
1720 break;
1721 default:
1722 g_assert_not_reached();
1723 break;
1724 }
1725 switch (b.cls) {
1726 case float_class_normal:
1727 b_exp = b.exp;
1728 break;
1729 case float_class_inf:
1730 b_exp = INT_MAX;
1731 break;
1732 case float_class_zero:
1733 b_exp = INT_MIN;
1734 break;
1735 default:
1736 g_assert_not_reached();
1737 break;
1738 }
1739
6245327a
EC
1740 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1741 bool a_less = a_exp < b_exp;
1742 if (a_exp == b_exp) {
1743 a_less = a.frac < b.frac;
1744 }
1745 return a_less ^ ismin ? b : a;
89360067
AB
1746 }
1747
6245327a 1748 if (a.sign == b.sign) {
89360067
AB
1749 bool a_less = a_exp < b_exp;
1750 if (a_exp == b_exp) {
1751 a_less = a.frac < b.frac;
1752 }
6245327a 1753 return a.sign ^ a_less ^ ismin ? b : a;
89360067 1754 } else {
6245327a 1755 return a.sign ^ ismin ? b : a;
89360067
AB
1756 }
1757 }
1758}
1759
1760#define MINMAX(sz, name, ismin, isiee, ismag) \
1761float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1762 float_status *s) \
1763{ \
1764 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1765 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1766 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1767 \
1768 return float ## sz ## _round_pack_canonical(pr, s); \
1769}
1770
1771MINMAX(16, min, true, false, false)
1772MINMAX(16, minnum, true, true, false)
1773MINMAX(16, minnummag, true, true, true)
1774MINMAX(16, max, false, false, false)
1775MINMAX(16, maxnum, false, true, false)
1776MINMAX(16, maxnummag, false, true, true)
1777
1778MINMAX(32, min, true, false, false)
1779MINMAX(32, minnum, true, true, false)
1780MINMAX(32, minnummag, true, true, true)
1781MINMAX(32, max, false, false, false)
1782MINMAX(32, maxnum, false, true, false)
1783MINMAX(32, maxnummag, false, true, true)
1784
1785MINMAX(64, min, true, false, false)
1786MINMAX(64, minnum, true, true, false)
1787MINMAX(64, minnummag, true, true, true)
1788MINMAX(64, max, false, false, false)
1789MINMAX(64, maxnum, false, true, false)
1790MINMAX(64, maxnummag, false, true, true)
1791
1792#undef MINMAX
1793
0c4c9092
AB
1794/* Floating point compare */
1795static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1796 float_status *s)
1797{
1798 if (is_nan(a.cls) || is_nan(b.cls)) {
1799 if (!is_quiet ||
1800 a.cls == float_class_snan ||
1801 b.cls == float_class_snan) {
1802 s->float_exception_flags |= float_flag_invalid;
1803 }
1804 return float_relation_unordered;
1805 }
1806
1807 if (a.cls == float_class_zero) {
1808 if (b.cls == float_class_zero) {
1809 return float_relation_equal;
1810 }
1811 return b.sign ? float_relation_greater : float_relation_less;
1812 } else if (b.cls == float_class_zero) {
1813 return a.sign ? float_relation_less : float_relation_greater;
1814 }
1815
1816 /* The only really important thing about infinity is its sign. If
1817 * both are infinities the sign marks the smallest of the two.
1818 */
1819 if (a.cls == float_class_inf) {
1820 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1821 return float_relation_equal;
1822 }
1823 return a.sign ? float_relation_less : float_relation_greater;
1824 } else if (b.cls == float_class_inf) {
1825 return b.sign ? float_relation_greater : float_relation_less;
1826 }
1827
1828 if (a.sign != b.sign) {
1829 return a.sign ? float_relation_less : float_relation_greater;
1830 }
1831
1832 if (a.exp == b.exp) {
1833 if (a.frac == b.frac) {
1834 return float_relation_equal;
1835 }
1836 if (a.sign) {
1837 return a.frac > b.frac ?
1838 float_relation_less : float_relation_greater;
1839 } else {
1840 return a.frac > b.frac ?
1841 float_relation_greater : float_relation_less;
1842 }
1843 } else {
1844 if (a.sign) {
1845 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1846 } else {
1847 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1848 }
1849 }
1850}
1851
1852#define COMPARE(sz) \
1853int float ## sz ## _compare(float ## sz a, float ## sz b, \
1854 float_status *s) \
1855{ \
1856 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1857 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1858 return compare_floats(pa, pb, false, s); \
1859} \
1860int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1861 float_status *s) \
1862{ \
1863 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1864 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1865 return compare_floats(pa, pb, true, s); \
1866}
1867
1868COMPARE(16)
1869COMPARE(32)
1870COMPARE(64)
1871
1872#undef COMPARE
1873
0bfc9f19
AB
1874/* Multiply A by 2 raised to the power N. */
1875static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1876{
1877 if (unlikely(is_nan(a.cls))) {
1878 return return_nan(a, s);
1879 }
1880 if (a.cls == float_class_normal) {
1881 a.exp += n;
1882 }
1883 return a;
1884}
1885
1886float16 float16_scalbn(float16 a, int n, float_status *status)
1887{
1888 FloatParts pa = float16_unpack_canonical(a, status);
1889 FloatParts pr = scalbn_decomposed(pa, n, status);
1890 return float16_round_pack_canonical(pr, status);
1891}
1892
1893float32 float32_scalbn(float32 a, int n, float_status *status)
1894{
1895 FloatParts pa = float32_unpack_canonical(a, status);
1896 FloatParts pr = scalbn_decomposed(pa, n, status);
1897 return float32_round_pack_canonical(pr, status);
1898}
1899
1900float64 float64_scalbn(float64 a, int n, float_status *status)
1901{
1902 FloatParts pa = float64_unpack_canonical(a, status);
1903 FloatParts pr = scalbn_decomposed(pa, n, status);
1904 return float64_round_pack_canonical(pr, status);
1905}
1906
c13bb2da
AB
1907/*
1908 * Square Root
1909 *
1910 * The old softfloat code did an approximation step before zeroing in
1911 * on the final result. However for simpleness we just compute the
1912 * square root by iterating down from the implicit bit to enough extra
1913 * bits to ensure we get a correctly rounded result.
1914 *
1915 * This does mean however the calculation is slower than before,
1916 * especially for 64 bit floats.
1917 */
1918
1919static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
1920{
1921 uint64_t a_frac, r_frac, s_frac;
1922 int bit, last_bit;
1923
1924 if (is_nan(a.cls)) {
1925 return return_nan(a, s);
1926 }
1927 if (a.cls == float_class_zero) {
1928 return a; /* sqrt(+-0) = +-0 */
1929 }
1930 if (a.sign) {
1931 s->float_exception_flags |= float_flag_invalid;
1932 a.cls = float_class_dnan;
1933 return a;
1934 }
1935 if (a.cls == float_class_inf) {
1936 return a; /* sqrt(+inf) = +inf */
1937 }
1938
1939 assert(a.cls == float_class_normal);
1940
1941 /* We need two overflow bits at the top. Adding room for that is a
1942 * right shift. If the exponent is odd, we can discard the low bit
1943 * by multiplying the fraction by 2; that's a left shift. Combine
1944 * those and we shift right if the exponent is even.
1945 */
1946 a_frac = a.frac;
1947 if (!(a.exp & 1)) {
1948 a_frac >>= 1;
1949 }
1950 a.exp >>= 1;
1951
1952 /* Bit-by-bit computation of sqrt. */
1953 r_frac = 0;
1954 s_frac = 0;
1955
1956 /* Iterate from implicit bit down to the 3 extra bits to compute a
1957 * properly rounded result. Remember we've inserted one more bit
1958 * at the top, so these positions are one less.
1959 */
1960 bit = DECOMPOSED_BINARY_POINT - 1;
1961 last_bit = MAX(p->frac_shift - 4, 0);
1962 do {
1963 uint64_t q = 1ULL << bit;
1964 uint64_t t_frac = s_frac + q;
1965 if (t_frac <= a_frac) {
1966 s_frac = t_frac + q;
1967 a_frac -= t_frac;
1968 r_frac += q;
1969 }
1970 a_frac <<= 1;
1971 } while (--bit >= last_bit);
1972
1973 /* Undo the right shift done above. If there is any remaining
1974 * fraction, the result is inexact. Set the sticky bit.
1975 */
1976 a.frac = (r_frac << 1) + (a_frac != 0);
1977
1978 return a;
1979}
1980
1981float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
1982{
1983 FloatParts pa = float16_unpack_canonical(a, status);
1984 FloatParts pr = sqrt_float(pa, status, &float16_params);
1985 return float16_round_pack_canonical(pr, status);
1986}
1987
1988float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
1989{
1990 FloatParts pa = float32_unpack_canonical(a, status);
1991 FloatParts pr = sqrt_float(pa, status, &float32_params);
1992 return float32_round_pack_canonical(pr, status);
1993}
1994
1995float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
1996{
1997 FloatParts pa = float64_unpack_canonical(a, status);
1998 FloatParts pr = sqrt_float(pa, status, &float64_params);
1999 return float64_round_pack_canonical(pr, status);
2000}
2001
2002
158142c2
FB
2003/*----------------------------------------------------------------------------
2004| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2005| and 7, and returns the properly rounded 32-bit integer corresponding to the
2006| input. If `zSign' is 1, the input is negated before being converted to an
2007| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2008| is simply rounded to an integer, with the inexact exception raised if the
2009| input cannot be represented exactly as an integer. However, if the fixed-
2010| point input is too large, the invalid exception is raised and the largest
2011| positive or negative integer is returned.
2012*----------------------------------------------------------------------------*/
2013
f4014512 2014static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2015{
8f506c70 2016 int8_t roundingMode;
158142c2 2017 flag roundNearestEven;
8f506c70 2018 int8_t roundIncrement, roundBits;
760e1416 2019 int32_t z;
158142c2 2020
a2f2d288 2021 roundingMode = status->float_rounding_mode;
158142c2 2022 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2023 switch (roundingMode) {
2024 case float_round_nearest_even:
f9288a76 2025 case float_round_ties_away:
dc355b76
PM
2026 roundIncrement = 0x40;
2027 break;
2028 case float_round_to_zero:
2029 roundIncrement = 0;
2030 break;
2031 case float_round_up:
2032 roundIncrement = zSign ? 0 : 0x7f;
2033 break;
2034 case float_round_down:
2035 roundIncrement = zSign ? 0x7f : 0;
2036 break;
2037 default:
2038 abort();
158142c2
FB
2039 }
2040 roundBits = absZ & 0x7F;
2041 absZ = ( absZ + roundIncrement )>>7;
2042 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2043 z = absZ;
2044 if ( zSign ) z = - z;
2045 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2046 float_raise(float_flag_invalid, status);
bb98fe42 2047 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2048 }
a2f2d288
PM
2049 if (roundBits) {
2050 status->float_exception_flags |= float_flag_inexact;
2051 }
158142c2
FB
2052 return z;
2053
2054}
2055
2056/*----------------------------------------------------------------------------
2057| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2058| `absZ1', with binary point between bits 63 and 64 (between the input words),
2059| and returns the properly rounded 64-bit integer corresponding to the input.
2060| If `zSign' is 1, the input is negated before being converted to an integer.
2061| Ordinarily, the fixed-point input is simply rounded to an integer, with
2062| the inexact exception raised if the input cannot be represented exactly as
2063| an integer. However, if the fixed-point input is too large, the invalid
2064| exception is raised and the largest positive or negative integer is
2065| returned.
2066*----------------------------------------------------------------------------*/
2067
f42c2224 2068static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2069 float_status *status)
158142c2 2070{
8f506c70 2071 int8_t roundingMode;
158142c2 2072 flag roundNearestEven, increment;
760e1416 2073 int64_t z;
158142c2 2074
a2f2d288 2075 roundingMode = status->float_rounding_mode;
158142c2 2076 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2077 switch (roundingMode) {
2078 case float_round_nearest_even:
f9288a76 2079 case float_round_ties_away:
dc355b76
PM
2080 increment = ((int64_t) absZ1 < 0);
2081 break;
2082 case float_round_to_zero:
2083 increment = 0;
2084 break;
2085 case float_round_up:
2086 increment = !zSign && absZ1;
2087 break;
2088 case float_round_down:
2089 increment = zSign && absZ1;
2090 break;
2091 default:
2092 abort();
158142c2
FB
2093 }
2094 if ( increment ) {
2095 ++absZ0;
2096 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2097 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2098 }
2099 z = absZ0;
2100 if ( zSign ) z = - z;
2101 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2102 overflow:
ff32e16e 2103 float_raise(float_flag_invalid, status);
158142c2 2104 return
bb98fe42 2105 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2106 : LIT64( 0x7FFFFFFFFFFFFFFF );
2107 }
a2f2d288
PM
2108 if (absZ1) {
2109 status->float_exception_flags |= float_flag_inexact;
2110 }
158142c2
FB
2111 return z;
2112
2113}
2114
fb3ea83a
TM
2115/*----------------------------------------------------------------------------
2116| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2117| `absZ1', with binary point between bits 63 and 64 (between the input words),
2118| and returns the properly rounded 64-bit unsigned integer corresponding to the
2119| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2120| with the inexact exception raised if the input cannot be represented exactly
2121| as an integer. However, if the fixed-point input is too large, the invalid
2122| exception is raised and the largest unsigned integer is returned.
2123*----------------------------------------------------------------------------*/
2124
f42c2224 2125static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2126 uint64_t absZ1, float_status *status)
fb3ea83a 2127{
8f506c70 2128 int8_t roundingMode;
fb3ea83a
TM
2129 flag roundNearestEven, increment;
2130
a2f2d288 2131 roundingMode = status->float_rounding_mode;
fb3ea83a 2132 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2133 switch (roundingMode) {
2134 case float_round_nearest_even:
f9288a76 2135 case float_round_ties_away:
dc355b76
PM
2136 increment = ((int64_t)absZ1 < 0);
2137 break;
2138 case float_round_to_zero:
2139 increment = 0;
2140 break;
2141 case float_round_up:
2142 increment = !zSign && absZ1;
2143 break;
2144 case float_round_down:
2145 increment = zSign && absZ1;
2146 break;
2147 default:
2148 abort();
fb3ea83a
TM
2149 }
2150 if (increment) {
2151 ++absZ0;
2152 if (absZ0 == 0) {
ff32e16e 2153 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2154 return LIT64(0xFFFFFFFFFFFFFFFF);
2155 }
2156 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2157 }
2158
2159 if (zSign && absZ0) {
ff32e16e 2160 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2161 return 0;
2162 }
2163
2164 if (absZ1) {
a2f2d288 2165 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2166 }
2167 return absZ0;
2168}
2169
37d18660
PM
2170/*----------------------------------------------------------------------------
2171| If `a' is denormal and we are in flush-to-zero mode then set the
2172| input-denormal exception and return zero. Otherwise just return the value.
2173*----------------------------------------------------------------------------*/
e5a41ffa 2174float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2175{
a2f2d288 2176 if (status->flush_inputs_to_zero) {
37d18660 2177 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2178 float_raise(float_flag_input_denormal, status);
37d18660
PM
2179 return make_float32(float32_val(a) & 0x80000000);
2180 }
2181 }
2182 return a;
2183}
2184
158142c2
FB
2185/*----------------------------------------------------------------------------
2186| Normalizes the subnormal single-precision floating-point value represented
2187| by the denormalized significand `aSig'. The normalized exponent and
2188| significand are stored at the locations pointed to by `zExpPtr' and
2189| `zSigPtr', respectively.
2190*----------------------------------------------------------------------------*/
2191
2192static void
0c48262d 2193 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2194{
8f506c70 2195 int8_t shiftCount;
158142c2
FB
2196
2197 shiftCount = countLeadingZeros32( aSig ) - 8;
2198 *zSigPtr = aSig<<shiftCount;
2199 *zExpPtr = 1 - shiftCount;
2200
2201}
2202
158142c2
FB
2203/*----------------------------------------------------------------------------
2204| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2205| and significand `zSig', and returns the proper single-precision floating-
2206| point value corresponding to the abstract input. Ordinarily, the abstract
2207| value is simply rounded and packed into the single-precision format, with
2208| the inexact exception raised if the abstract input cannot be represented
2209| exactly. However, if the abstract value is too large, the overflow and
2210| inexact exceptions are raised and an infinity or maximal finite value is
2211| returned. If the abstract value is too small, the input value is rounded to
2212| a subnormal number, and the underflow and inexact exceptions are raised if
2213| the abstract input cannot be represented exactly as a subnormal single-
2214| precision floating-point number.
2215| The input significand `zSig' has its binary point between bits 30
2216| and 29, which is 7 bits to the left of the usual location. This shifted
2217| significand must be normalized or smaller. If `zSig' is not normalized,
2218| `zExp' must be 0; in that case, the result returned is a subnormal number,
2219| and it must not require rounding. In the usual case that `zSig' is
2220| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2221| The handling of underflow and overflow follows the IEC/IEEE Standard for
2222| Binary Floating-Point Arithmetic.
2223*----------------------------------------------------------------------------*/
2224
0c48262d 2225static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2226 float_status *status)
158142c2 2227{
8f506c70 2228 int8_t roundingMode;
158142c2 2229 flag roundNearestEven;
8f506c70 2230 int8_t roundIncrement, roundBits;
158142c2
FB
2231 flag isTiny;
2232
a2f2d288 2233 roundingMode = status->float_rounding_mode;
158142c2 2234 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2235 switch (roundingMode) {
2236 case float_round_nearest_even:
f9288a76 2237 case float_round_ties_away:
dc355b76
PM
2238 roundIncrement = 0x40;
2239 break;
2240 case float_round_to_zero:
2241 roundIncrement = 0;
2242 break;
2243 case float_round_up:
2244 roundIncrement = zSign ? 0 : 0x7f;
2245 break;
2246 case float_round_down:
2247 roundIncrement = zSign ? 0x7f : 0;
2248 break;
2249 default:
2250 abort();
2251 break;
158142c2
FB
2252 }
2253 roundBits = zSig & 0x7F;
bb98fe42 2254 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2255 if ( ( 0xFD < zExp )
2256 || ( ( zExp == 0xFD )
bb98fe42 2257 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2258 ) {
ff32e16e 2259 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2260 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2261 }
2262 if ( zExp < 0 ) {
a2f2d288 2263 if (status->flush_to_zero) {
ff32e16e 2264 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2265 return packFloat32(zSign, 0, 0);
2266 }
158142c2 2267 isTiny =
a2f2d288
PM
2268 (status->float_detect_tininess
2269 == float_tininess_before_rounding)
158142c2
FB
2270 || ( zExp < -1 )
2271 || ( zSig + roundIncrement < 0x80000000 );
2272 shift32RightJamming( zSig, - zExp, &zSig );
2273 zExp = 0;
2274 roundBits = zSig & 0x7F;
ff32e16e
PM
2275 if (isTiny && roundBits) {
2276 float_raise(float_flag_underflow, status);
2277 }
158142c2
FB
2278 }
2279 }
a2f2d288
PM
2280 if (roundBits) {
2281 status->float_exception_flags |= float_flag_inexact;
2282 }
158142c2
FB
2283 zSig = ( zSig + roundIncrement )>>7;
2284 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2285 if ( zSig == 0 ) zExp = 0;
2286 return packFloat32( zSign, zExp, zSig );
2287
2288}
2289
2290/*----------------------------------------------------------------------------
2291| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2292| and significand `zSig', and returns the proper single-precision floating-
2293| point value corresponding to the abstract input. This routine is just like
2294| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2295| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2296| floating-point exponent.
2297*----------------------------------------------------------------------------*/
2298
2299static float32
0c48262d 2300 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2301 float_status *status)
158142c2 2302{
8f506c70 2303 int8_t shiftCount;
158142c2
FB
2304
2305 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2306 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2307 status);
158142c2
FB
2308
2309}
2310
37d18660
PM
2311/*----------------------------------------------------------------------------
2312| If `a' is denormal and we are in flush-to-zero mode then set the
2313| input-denormal exception and return zero. Otherwise just return the value.
2314*----------------------------------------------------------------------------*/
e5a41ffa 2315float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2316{
a2f2d288 2317 if (status->flush_inputs_to_zero) {
37d18660 2318 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2319 float_raise(float_flag_input_denormal, status);
37d18660
PM
2320 return make_float64(float64_val(a) & (1ULL << 63));
2321 }
2322 }
2323 return a;
2324}
2325
158142c2
FB
2326/*----------------------------------------------------------------------------
2327| Normalizes the subnormal double-precision floating-point value represented
2328| by the denormalized significand `aSig'. The normalized exponent and
2329| significand are stored at the locations pointed to by `zExpPtr' and
2330| `zSigPtr', respectively.
2331*----------------------------------------------------------------------------*/
2332
2333static void
0c48262d 2334 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2335{
8f506c70 2336 int8_t shiftCount;
158142c2
FB
2337
2338 shiftCount = countLeadingZeros64( aSig ) - 11;
2339 *zSigPtr = aSig<<shiftCount;
2340 *zExpPtr = 1 - shiftCount;
2341
2342}
2343
2344/*----------------------------------------------------------------------------
2345| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2346| double-precision floating-point value, returning the result. After being
2347| shifted into the proper positions, the three fields are simply added
2348| together to form the result. This means that any integer portion of `zSig'
2349| will be added into the exponent. Since a properly normalized significand
2350| will have an integer portion equal to 1, the `zExp' input should be 1 less
2351| than the desired result exponent whenever `zSig' is a complete, normalized
2352| significand.
2353*----------------------------------------------------------------------------*/
2354
0c48262d 2355static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2356{
2357
f090c9d4 2358 return make_float64(
bb98fe42 2359 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2360
2361}
2362
2363/*----------------------------------------------------------------------------
2364| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2365| and significand `zSig', and returns the proper double-precision floating-
2366| point value corresponding to the abstract input. Ordinarily, the abstract
2367| value is simply rounded and packed into the double-precision format, with
2368| the inexact exception raised if the abstract input cannot be represented
2369| exactly. However, if the abstract value is too large, the overflow and
2370| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2371| returned. If the abstract value is too small, the input value is rounded to
2372| a subnormal number, and the underflow and inexact exceptions are raised if
2373| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2374| precision floating-point number.
2375| The input significand `zSig' has its binary point between bits 62
2376| and 61, which is 10 bits to the left of the usual location. This shifted
2377| significand must be normalized or smaller. If `zSig' is not normalized,
2378| `zExp' must be 0; in that case, the result returned is a subnormal number,
2379| and it must not require rounding. In the usual case that `zSig' is
2380| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2381| The handling of underflow and overflow follows the IEC/IEEE Standard for
2382| Binary Floating-Point Arithmetic.
2383*----------------------------------------------------------------------------*/
2384
0c48262d 2385static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2386 float_status *status)
158142c2 2387{
8f506c70 2388 int8_t roundingMode;
158142c2 2389 flag roundNearestEven;
0c48262d 2390 int roundIncrement, roundBits;
158142c2
FB
2391 flag isTiny;
2392
a2f2d288 2393 roundingMode = status->float_rounding_mode;
158142c2 2394 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2395 switch (roundingMode) {
2396 case float_round_nearest_even:
f9288a76 2397 case float_round_ties_away:
dc355b76
PM
2398 roundIncrement = 0x200;
2399 break;
2400 case float_round_to_zero:
2401 roundIncrement = 0;
2402 break;
2403 case float_round_up:
2404 roundIncrement = zSign ? 0 : 0x3ff;
2405 break;
2406 case float_round_down:
2407 roundIncrement = zSign ? 0x3ff : 0;
2408 break;
9ee6f678
BR
2409 case float_round_to_odd:
2410 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2411 break;
dc355b76
PM
2412 default:
2413 abort();
158142c2
FB
2414 }
2415 roundBits = zSig & 0x3FF;
bb98fe42 2416 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2417 if ( ( 0x7FD < zExp )
2418 || ( ( zExp == 0x7FD )
bb98fe42 2419 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2420 ) {
9ee6f678
BR
2421 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2422 roundIncrement != 0;
ff32e16e 2423 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2424 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2425 }
2426 if ( zExp < 0 ) {
a2f2d288 2427 if (status->flush_to_zero) {
ff32e16e 2428 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2429 return packFloat64(zSign, 0, 0);
2430 }
158142c2 2431 isTiny =
a2f2d288
PM
2432 (status->float_detect_tininess
2433 == float_tininess_before_rounding)
158142c2
FB
2434 || ( zExp < -1 )
2435 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2436 shift64RightJamming( zSig, - zExp, &zSig );
2437 zExp = 0;
2438 roundBits = zSig & 0x3FF;
ff32e16e
PM
2439 if (isTiny && roundBits) {
2440 float_raise(float_flag_underflow, status);
2441 }
9ee6f678
BR
2442 if (roundingMode == float_round_to_odd) {
2443 /*
2444 * For round-to-odd case, the roundIncrement depends on
2445 * zSig which just changed.
2446 */
2447 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2448 }
158142c2
FB
2449 }
2450 }
a2f2d288
PM
2451 if (roundBits) {
2452 status->float_exception_flags |= float_flag_inexact;
2453 }
158142c2
FB
2454 zSig = ( zSig + roundIncrement )>>10;
2455 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2456 if ( zSig == 0 ) zExp = 0;
2457 return packFloat64( zSign, zExp, zSig );
2458
2459}
2460
2461/*----------------------------------------------------------------------------
2462| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2463| and significand `zSig', and returns the proper double-precision floating-
2464| point value corresponding to the abstract input. This routine is just like
2465| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2466| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2467| floating-point exponent.
2468*----------------------------------------------------------------------------*/
2469
2470static float64
0c48262d 2471 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2472 float_status *status)
158142c2 2473{
8f506c70 2474 int8_t shiftCount;
158142c2
FB
2475
2476 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2477 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2478 status);
158142c2
FB
2479
2480}
2481
158142c2
FB
2482/*----------------------------------------------------------------------------
2483| Normalizes the subnormal extended double-precision floating-point value
2484| represented by the denormalized significand `aSig'. The normalized exponent
2485| and significand are stored at the locations pointed to by `zExpPtr' and
2486| `zSigPtr', respectively.
2487*----------------------------------------------------------------------------*/
2488
88857aca
LV
2489void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2490 uint64_t *zSigPtr)
158142c2 2491{
8f506c70 2492 int8_t shiftCount;
158142c2
FB
2493
2494 shiftCount = countLeadingZeros64( aSig );
2495 *zSigPtr = aSig<<shiftCount;
2496 *zExpPtr = 1 - shiftCount;
158142c2
FB
2497}
2498
2499/*----------------------------------------------------------------------------
2500| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2501| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2502| and returns the proper extended double-precision floating-point value
2503| corresponding to the abstract input. Ordinarily, the abstract value is
2504| rounded and packed into the extended double-precision format, with the
2505| inexact exception raised if the abstract input cannot be represented
2506| exactly. However, if the abstract value is too large, the overflow and
2507| inexact exceptions are raised and an infinity or maximal finite value is
2508| returned. If the abstract value is too small, the input value is rounded to
2509| a subnormal number, and the underflow and inexact exceptions are raised if
2510| the abstract input cannot be represented exactly as a subnormal extended
2511| double-precision floating-point number.
2512| If `roundingPrecision' is 32 or 64, the result is rounded to the same
2513| number of bits as single or double precision, respectively. Otherwise, the
2514| result is rounded to the full precision of the extended double-precision
2515| format.
2516| The input significand must be normalized or smaller. If the input
2517| significand is not normalized, `zExp' must be 0; in that case, the result
2518| returned is a subnormal number, and it must not require rounding. The
2519| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
2520| Floating-Point Arithmetic.
2521*----------------------------------------------------------------------------*/
2522
88857aca
LV
2523floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2524 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2525 float_status *status)
158142c2 2526{
8f506c70 2527 int8_t roundingMode;
158142c2 2528 flag roundNearestEven, increment, isTiny;
f42c2224 2529 int64_t roundIncrement, roundMask, roundBits;
158142c2 2530
a2f2d288 2531 roundingMode = status->float_rounding_mode;
158142c2
FB
2532 roundNearestEven = ( roundingMode == float_round_nearest_even );
2533 if ( roundingPrecision == 80 ) goto precision80;
2534 if ( roundingPrecision == 64 ) {
2535 roundIncrement = LIT64( 0x0000000000000400 );
2536 roundMask = LIT64( 0x00000000000007FF );
2537 }
2538 else if ( roundingPrecision == 32 ) {
2539 roundIncrement = LIT64( 0x0000008000000000 );
2540 roundMask = LIT64( 0x000000FFFFFFFFFF );
2541 }
2542 else {
2543 goto precision80;
2544 }
2545 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
2546 switch (roundingMode) {
2547 case float_round_nearest_even:
f9288a76 2548 case float_round_ties_away:
dc355b76
PM
2549 break;
2550 case float_round_to_zero:
2551 roundIncrement = 0;
2552 break;
2553 case float_round_up:
2554 roundIncrement = zSign ? 0 : roundMask;
2555 break;
2556 case float_round_down:
2557 roundIncrement = zSign ? roundMask : 0;
2558 break;
2559 default:
2560 abort();
158142c2
FB
2561 }
2562 roundBits = zSig0 & roundMask;
bb98fe42 2563 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2564 if ( ( 0x7FFE < zExp )
2565 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2566 ) {
2567 goto overflow;
2568 }
2569 if ( zExp <= 0 ) {
a2f2d288 2570 if (status->flush_to_zero) {
ff32e16e 2571 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2572 return packFloatx80(zSign, 0, 0);
2573 }
158142c2 2574 isTiny =
a2f2d288
PM
2575 (status->float_detect_tininess
2576 == float_tininess_before_rounding)
158142c2
FB
2577 || ( zExp < 0 )
2578 || ( zSig0 <= zSig0 + roundIncrement );
2579 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2580 zExp = 0;
2581 roundBits = zSig0 & roundMask;
ff32e16e
PM
2582 if (isTiny && roundBits) {
2583 float_raise(float_flag_underflow, status);
2584 }
a2f2d288
PM
2585 if (roundBits) {
2586 status->float_exception_flags |= float_flag_inexact;
2587 }
158142c2 2588 zSig0 += roundIncrement;
bb98fe42 2589 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2590 roundIncrement = roundMask + 1;
2591 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2592 roundMask |= roundIncrement;
2593 }
2594 zSig0 &= ~ roundMask;
2595 return packFloatx80( zSign, zExp, zSig0 );
2596 }
2597 }
a2f2d288
PM
2598 if (roundBits) {
2599 status->float_exception_flags |= float_flag_inexact;
2600 }
158142c2
FB
2601 zSig0 += roundIncrement;
2602 if ( zSig0 < roundIncrement ) {
2603 ++zExp;
2604 zSig0 = LIT64( 0x8000000000000000 );
2605 }
2606 roundIncrement = roundMask + 1;
2607 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2608 roundMask |= roundIncrement;
2609 }
2610 zSig0 &= ~ roundMask;
2611 if ( zSig0 == 0 ) zExp = 0;
2612 return packFloatx80( zSign, zExp, zSig0 );
2613 precision80:
dc355b76
PM
2614 switch (roundingMode) {
2615 case float_round_nearest_even:
f9288a76 2616 case float_round_ties_away:
dc355b76
PM
2617 increment = ((int64_t)zSig1 < 0);
2618 break;
2619 case float_round_to_zero:
2620 increment = 0;
2621 break;
2622 case float_round_up:
2623 increment = !zSign && zSig1;
2624 break;
2625 case float_round_down:
2626 increment = zSign && zSig1;
2627 break;
2628 default:
2629 abort();
158142c2 2630 }
bb98fe42 2631 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2632 if ( ( 0x7FFE < zExp )
2633 || ( ( zExp == 0x7FFE )
2634 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2635 && increment
2636 )
2637 ) {
2638 roundMask = 0;
2639 overflow:
ff32e16e 2640 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2641 if ( ( roundingMode == float_round_to_zero )
2642 || ( zSign && ( roundingMode == float_round_up ) )
2643 || ( ! zSign && ( roundingMode == float_round_down ) )
2644 ) {
2645 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2646 }
0f605c88
LV
2647 return packFloatx80(zSign,
2648 floatx80_infinity_high,
2649 floatx80_infinity_low);
158142c2
FB
2650 }
2651 if ( zExp <= 0 ) {
2652 isTiny =
a2f2d288
PM
2653 (status->float_detect_tininess
2654 == float_tininess_before_rounding)
158142c2
FB
2655 || ( zExp < 0 )
2656 || ! increment
2657 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2658 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2659 zExp = 0;
ff32e16e
PM
2660 if (isTiny && zSig1) {
2661 float_raise(float_flag_underflow, status);
2662 }
a2f2d288
PM
2663 if (zSig1) {
2664 status->float_exception_flags |= float_flag_inexact;
2665 }
dc355b76
PM
2666 switch (roundingMode) {
2667 case float_round_nearest_even:
f9288a76 2668 case float_round_ties_away:
dc355b76
PM
2669 increment = ((int64_t)zSig1 < 0);
2670 break;
2671 case float_round_to_zero:
2672 increment = 0;
2673 break;
2674 case float_round_up:
2675 increment = !zSign && zSig1;
2676 break;
2677 case float_round_down:
2678 increment = zSign && zSig1;
2679 break;
2680 default:
2681 abort();
158142c2
FB
2682 }
2683 if ( increment ) {
2684 ++zSig0;
2685 zSig0 &=
bb98fe42
AF
2686 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2687 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2688 }
2689 return packFloatx80( zSign, zExp, zSig0 );
2690 }
2691 }
a2f2d288
PM
2692 if (zSig1) {
2693 status->float_exception_flags |= float_flag_inexact;
2694 }
158142c2
FB
2695 if ( increment ) {
2696 ++zSig0;
2697 if ( zSig0 == 0 ) {
2698 ++zExp;
2699 zSig0 = LIT64( 0x8000000000000000 );
2700 }
2701 else {
bb98fe42 2702 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2703 }
2704 }
2705 else {
2706 if ( zSig0 == 0 ) zExp = 0;
2707 }
2708 return packFloatx80( zSign, zExp, zSig0 );
2709
2710}
2711
2712/*----------------------------------------------------------------------------
2713| Takes an abstract floating-point value having sign `zSign', exponent
2714| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
2715| and returns the proper extended double-precision floating-point value
2716| corresponding to the abstract input. This routine is just like
2717| `roundAndPackFloatx80' except that the input significand does not have to be
2718| normalized.
2719*----------------------------------------------------------------------------*/
2720
88857aca
LV
2721floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2722 flag zSign, int32_t zExp,
2723 uint64_t zSig0, uint64_t zSig1,
2724 float_status *status)
158142c2 2725{
8f506c70 2726 int8_t shiftCount;
158142c2
FB
2727
2728 if ( zSig0 == 0 ) {
2729 zSig0 = zSig1;
2730 zSig1 = 0;
2731 zExp -= 64;
2732 }
2733 shiftCount = countLeadingZeros64( zSig0 );
2734 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2735 zExp -= shiftCount;
ff32e16e
PM
2736 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2737 zSig0, zSig1, status);
158142c2
FB
2738
2739}
2740
158142c2
FB
2741/*----------------------------------------------------------------------------
2742| Returns the least-significant 64 fraction bits of the quadruple-precision
2743| floating-point value `a'.
2744*----------------------------------------------------------------------------*/
2745
a49db98d 2746static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
2747{
2748
2749 return a.low;
2750
2751}
2752
2753/*----------------------------------------------------------------------------
2754| Returns the most-significant 48 fraction bits of the quadruple-precision
2755| floating-point value `a'.
2756*----------------------------------------------------------------------------*/
2757
a49db98d 2758static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
2759{
2760
2761 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2762
2763}
2764
2765/*----------------------------------------------------------------------------
2766| Returns the exponent bits of the quadruple-precision floating-point value
2767| `a'.
2768*----------------------------------------------------------------------------*/
2769
f4014512 2770static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
2771{
2772
2773 return ( a.high>>48 ) & 0x7FFF;
2774
2775}
2776
2777/*----------------------------------------------------------------------------
2778| Returns the sign bit of the quadruple-precision floating-point value `a'.
2779*----------------------------------------------------------------------------*/
2780
a49db98d 2781static inline flag extractFloat128Sign( float128 a )
158142c2
FB
2782{
2783
2784 return a.high>>63;
2785
2786}
2787
2788/*----------------------------------------------------------------------------
2789| Normalizes the subnormal quadruple-precision floating-point value
2790| represented by the denormalized significand formed by the concatenation of
2791| `aSig0' and `aSig1'. The normalized exponent is stored at the location
2792| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
2793| significand are stored at the location pointed to by `zSig0Ptr', and the
2794| least significant 64 bits of the normalized significand are stored at the
2795| location pointed to by `zSig1Ptr'.
2796*----------------------------------------------------------------------------*/
2797
2798static void
2799 normalizeFloat128Subnormal(
bb98fe42
AF
2800 uint64_t aSig0,
2801 uint64_t aSig1,
f4014512 2802 int32_t *zExpPtr,
bb98fe42
AF
2803 uint64_t *zSig0Ptr,
2804 uint64_t *zSig1Ptr
158142c2
FB
2805 )
2806{
8f506c70 2807 int8_t shiftCount;
158142c2
FB
2808
2809 if ( aSig0 == 0 ) {
2810 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2811 if ( shiftCount < 0 ) {
2812 *zSig0Ptr = aSig1>>( - shiftCount );
2813 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2814 }
2815 else {
2816 *zSig0Ptr = aSig1<<shiftCount;
2817 *zSig1Ptr = 0;
2818 }
2819 *zExpPtr = - shiftCount - 63;
2820 }
2821 else {
2822 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2823 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2824 *zExpPtr = 1 - shiftCount;
2825 }
2826
2827}
2828
2829/*----------------------------------------------------------------------------
2830| Packs the sign `zSign', the exponent `zExp', and the significand formed
2831| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
2832| floating-point value, returning the result. After being shifted into the
2833| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
2834| added together to form the most significant 32 bits of the result. This
2835| means that any integer portion of `zSig0' will be added into the exponent.
2836| Since a properly normalized significand will have an integer portion equal
2837| to 1, the `zExp' input should be 1 less than the desired result exponent
2838| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
2839| significand.
2840*----------------------------------------------------------------------------*/
2841
a49db98d 2842static inline float128
f4014512 2843 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
2844{
2845 float128 z;
2846
2847 z.low = zSig1;
bb98fe42 2848 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
2849 return z;
2850
2851}
2852
2853/*----------------------------------------------------------------------------
2854| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2855| and extended significand formed by the concatenation of `zSig0', `zSig1',
2856| and `zSig2', and returns the proper quadruple-precision floating-point value
2857| corresponding to the abstract input. Ordinarily, the abstract value is
2858| simply rounded and packed into the quadruple-precision format, with the
2859| inexact exception raised if the abstract input cannot be represented
2860| exactly. However, if the abstract value is too large, the overflow and
2861| inexact exceptions are raised and an infinity or maximal finite value is
2862| returned. If the abstract value is too small, the input value is rounded to
2863| a subnormal number, and the underflow and inexact exceptions are raised if
2864| the abstract input cannot be represented exactly as a subnormal quadruple-
2865| precision floating-point number.
2866| The input significand must be normalized or smaller. If the input
2867| significand is not normalized, `zExp' must be 0; in that case, the result
2868| returned is a subnormal number, and it must not require rounding. In the
2869| usual case that the input significand is normalized, `zExp' must be 1 less
2870| than the ``true'' floating-point exponent. The handling of underflow and
2871| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2872*----------------------------------------------------------------------------*/
2873
f4014512 2874static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
2875 uint64_t zSig0, uint64_t zSig1,
2876 uint64_t zSig2, float_status *status)
158142c2 2877{
8f506c70 2878 int8_t roundingMode;
158142c2
FB
2879 flag roundNearestEven, increment, isTiny;
2880
a2f2d288 2881 roundingMode = status->float_rounding_mode;
158142c2 2882 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2883 switch (roundingMode) {
2884 case float_round_nearest_even:
f9288a76 2885 case float_round_ties_away:
dc355b76
PM
2886 increment = ((int64_t)zSig2 < 0);
2887 break;
2888 case float_round_to_zero:
2889 increment = 0;
2890 break;
2891 case float_round_up:
2892 increment = !zSign && zSig2;
2893 break;
2894 case float_round_down:
2895 increment = zSign && zSig2;
2896 break;
9ee6f678
BR
2897 case float_round_to_odd:
2898 increment = !(zSig1 & 0x1) && zSig2;
2899 break;
dc355b76
PM
2900 default:
2901 abort();
158142c2 2902 }
bb98fe42 2903 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
2904 if ( ( 0x7FFD < zExp )
2905 || ( ( zExp == 0x7FFD )
2906 && eq128(
2907 LIT64( 0x0001FFFFFFFFFFFF ),
2908 LIT64( 0xFFFFFFFFFFFFFFFF ),
2909 zSig0,
2910 zSig1
2911 )
2912 && increment
2913 )
2914 ) {
ff32e16e 2915 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2916 if ( ( roundingMode == float_round_to_zero )
2917 || ( zSign && ( roundingMode == float_round_up ) )
2918 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 2919 || (roundingMode == float_round_to_odd)
158142c2
FB
2920 ) {
2921 return
2922 packFloat128(
2923 zSign,
2924 0x7FFE,
2925 LIT64( 0x0000FFFFFFFFFFFF ),
2926 LIT64( 0xFFFFFFFFFFFFFFFF )
2927 );
2928 }
2929 return packFloat128( zSign, 0x7FFF, 0, 0 );
2930 }
2931 if ( zExp < 0 ) {
a2f2d288 2932 if (status->flush_to_zero) {
ff32e16e 2933 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2934 return packFloat128(zSign, 0, 0, 0);
2935 }
158142c2 2936 isTiny =
a2f2d288
PM
2937 (status->float_detect_tininess
2938 == float_tininess_before_rounding)
158142c2
FB
2939 || ( zExp < -1 )
2940 || ! increment
2941 || lt128(
2942 zSig0,
2943 zSig1,
2944 LIT64( 0x0001FFFFFFFFFFFF ),
2945 LIT64( 0xFFFFFFFFFFFFFFFF )
2946 );
2947 shift128ExtraRightJamming(
2948 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
2949 zExp = 0;
ff32e16e
PM
2950 if (isTiny && zSig2) {
2951 float_raise(float_flag_underflow, status);
2952 }
dc355b76
PM
2953 switch (roundingMode) {
2954 case float_round_nearest_even:
f9288a76 2955 case float_round_ties_away:
dc355b76
PM
2956 increment = ((int64_t)zSig2 < 0);
2957 break;
2958 case float_round_to_zero:
2959 increment = 0;
2960 break;
2961 case float_round_up:
2962 increment = !zSign && zSig2;
2963 break;
2964 case float_round_down:
2965 increment = zSign && zSig2;
2966 break;
9ee6f678
BR
2967 case float_round_to_odd:
2968 increment = !(zSig1 & 0x1) && zSig2;
2969 break;
dc355b76
PM
2970 default:
2971 abort();
158142c2
FB
2972 }
2973 }
2974 }
a2f2d288
PM
2975 if (zSig2) {
2976 status->float_exception_flags |= float_flag_inexact;
2977 }
158142c2
FB
2978 if ( increment ) {
2979 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
2980 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
2981 }
2982 else {
2983 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
2984 }
2985 return packFloat128( zSign, zExp, zSig0, zSig1 );
2986
2987}
2988
2989/*----------------------------------------------------------------------------
2990| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2991| and significand formed by the concatenation of `zSig0' and `zSig1', and
2992| returns the proper quadruple-precision floating-point value corresponding
2993| to the abstract input. This routine is just like `roundAndPackFloat128'
2994| except that the input significand has fewer bits and does not have to be
2995| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
2996| point exponent.
2997*----------------------------------------------------------------------------*/
2998
f4014512 2999static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3000 uint64_t zSig0, uint64_t zSig1,
3001 float_status *status)
158142c2 3002{
8f506c70 3003 int8_t shiftCount;
bb98fe42 3004 uint64_t zSig2;
158142c2
FB
3005
3006 if ( zSig0 == 0 ) {
3007 zSig0 = zSig1;
3008 zSig1 = 0;
3009 zExp -= 64;
3010 }
3011 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3012 if ( 0 <= shiftCount ) {
3013 zSig2 = 0;
3014 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3015 }
3016 else {
3017 shift128ExtraRightJamming(
3018 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3019 }
3020 zExp -= shiftCount;
ff32e16e 3021 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3022
3023}
3024
158142c2 3025
158142c2
FB
3026/*----------------------------------------------------------------------------
3027| Returns the result of converting the 32-bit two's complement integer `a'
3028| to the extended double-precision floating-point format. The conversion
3029| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3030| Arithmetic.
3031*----------------------------------------------------------------------------*/
3032
e5a41ffa 3033floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3034{
3035 flag zSign;
3a87d009 3036 uint32_t absA;
8f506c70 3037 int8_t shiftCount;
bb98fe42 3038 uint64_t zSig;
158142c2
FB
3039
3040 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3041 zSign = ( a < 0 );
3042 absA = zSign ? - a : a;
3043 shiftCount = countLeadingZeros32( absA ) + 32;
3044 zSig = absA;
3045 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3046
3047}
3048
158142c2
FB
3049/*----------------------------------------------------------------------------
3050| Returns the result of converting the 32-bit two's complement integer `a' to
3051| the quadruple-precision floating-point format. The conversion is performed
3052| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3053*----------------------------------------------------------------------------*/
3054
e5a41ffa 3055float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3056{
3057 flag zSign;
3a87d009 3058 uint32_t absA;
8f506c70 3059 int8_t shiftCount;
bb98fe42 3060 uint64_t zSig0;
158142c2
FB
3061
3062 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3063 zSign = ( a < 0 );
3064 absA = zSign ? - a : a;
3065 shiftCount = countLeadingZeros32( absA ) + 17;
3066 zSig0 = absA;
3067 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3068
3069}
3070
158142c2
FB
3071/*----------------------------------------------------------------------------
3072| Returns the result of converting the 64-bit two's complement integer `a'
3073| to the extended double-precision floating-point format. The conversion
3074| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3075| Arithmetic.
3076*----------------------------------------------------------------------------*/
3077
e5a41ffa 3078floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3079{
3080 flag zSign;
182f42fd 3081 uint64_t absA;
8f506c70 3082 int8_t shiftCount;
158142c2
FB
3083
3084 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3085 zSign = ( a < 0 );
3086 absA = zSign ? - a : a;
3087 shiftCount = countLeadingZeros64( absA );
3088 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3089
3090}
3091
158142c2
FB
3092/*----------------------------------------------------------------------------
3093| Returns the result of converting the 64-bit two's complement integer `a' to
3094| the quadruple-precision floating-point format. The conversion is performed
3095| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3096*----------------------------------------------------------------------------*/
3097
e5a41ffa 3098float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3099{
3100 flag zSign;
182f42fd 3101 uint64_t absA;
8f506c70 3102 int8_t shiftCount;
f4014512 3103 int32_t zExp;
bb98fe42 3104 uint64_t zSig0, zSig1;
158142c2
FB
3105
3106 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3107 zSign = ( a < 0 );
3108 absA = zSign ? - a : a;
3109 shiftCount = countLeadingZeros64( absA ) + 49;
3110 zExp = 0x406E - shiftCount;
3111 if ( 64 <= shiftCount ) {
3112 zSig1 = 0;
3113 zSig0 = absA;
3114 shiftCount -= 64;
3115 }
3116 else {
3117 zSig1 = absA;
3118 zSig0 = 0;
3119 }
3120 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3121 return packFloat128( zSign, zExp, zSig0, zSig1 );
3122
3123}
3124
6bb8e0f1
PM
3125/*----------------------------------------------------------------------------
3126| Returns the result of converting the 64-bit unsigned integer `a'
3127| to the quadruple-precision floating-point format. The conversion is performed
3128| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3129*----------------------------------------------------------------------------*/
3130
e5a41ffa 3131float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3132{
3133 if (a == 0) {
3134 return float128_zero;
3135 }
ff32e16e 3136 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1e397ead
RH
3137}
3138
158142c2 3139
158142c2 3140
158142c2
FB
3141
3142/*----------------------------------------------------------------------------
3143| Returns the result of converting the single-precision floating-point value
3144| `a' to the double-precision floating-point format. The conversion is
3145| performed according to the IEC/IEEE Standard for Binary Floating-Point
3146| Arithmetic.
3147*----------------------------------------------------------------------------*/
3148
e5a41ffa 3149float64 float32_to_float64(float32 a, float_status *status)
158142c2
FB
3150{
3151 flag aSign;
0c48262d 3152 int aExp;
bb98fe42 3153 uint32_t aSig;
ff32e16e 3154 a = float32_squash_input_denormal(a, status);
158142c2
FB
3155
3156 aSig = extractFloat32Frac( a );
3157 aExp = extractFloat32Exp( a );
3158 aSign = extractFloat32Sign( a );
3159 if ( aExp == 0xFF ) {
ff32e16e
PM
3160 if (aSig) {
3161 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
3162 }
158142c2
FB
3163 return packFloat64( aSign, 0x7FF, 0 );
3164 }
3165 if ( aExp == 0 ) {
3166 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
3167 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3168 --aExp;
3169 }
bb98fe42 3170 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
158142c2
FB
3171
3172}
3173
158142c2
FB
3174/*----------------------------------------------------------------------------
3175| Returns the result of converting the single-precision floating-point value
3176| `a' to the extended double-precision floating-point format. The conversion
3177| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3178| Arithmetic.
3179*----------------------------------------------------------------------------*/
3180
e5a41ffa 3181floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3182{
3183 flag aSign;
0c48262d 3184 int aExp;
bb98fe42 3185 uint32_t aSig;
158142c2 3186
ff32e16e 3187 a = float32_squash_input_denormal(a, status);
158142c2
FB
3188 aSig = extractFloat32Frac( a );
3189 aExp = extractFloat32Exp( a );
3190 aSign = extractFloat32Sign( a );
3191 if ( aExp == 0xFF ) {
ff32e16e
PM
3192 if (aSig) {
3193 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3194 }
0f605c88
LV
3195 return packFloatx80(aSign,
3196 floatx80_infinity_high,
3197 floatx80_infinity_low);
158142c2
FB
3198 }
3199 if ( aExp == 0 ) {
3200 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3201 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3202 }
3203 aSig |= 0x00800000;
bb98fe42 3204 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3205
3206}
3207
158142c2
FB
3208/*----------------------------------------------------------------------------
3209| Returns the result of converting the single-precision floating-point value
3210| `a' to the double-precision floating-point format. The conversion is
3211| performed according to the IEC/IEEE Standard for Binary Floating-Point
3212| Arithmetic.
3213*----------------------------------------------------------------------------*/
3214
e5a41ffa 3215float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3216{
3217 flag aSign;
0c48262d 3218 int aExp;
bb98fe42 3219 uint32_t aSig;
158142c2 3220
ff32e16e 3221 a = float32_squash_input_denormal(a, status);
158142c2
FB
3222 aSig = extractFloat32Frac( a );
3223 aExp = extractFloat32Exp( a );
3224 aSign = extractFloat32Sign( a );
3225 if ( aExp == 0xFF ) {
ff32e16e
PM
3226 if (aSig) {
3227 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3228 }
158142c2
FB
3229 return packFloat128( aSign, 0x7FFF, 0, 0 );
3230 }
3231 if ( aExp == 0 ) {
3232 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3233 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3234 --aExp;
3235 }
bb98fe42 3236 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3237
3238}
3239
158142c2
FB
3240/*----------------------------------------------------------------------------
3241| Returns the remainder of the single-precision floating-point value `a'
3242| with respect to the corresponding value `b'. The operation is performed
3243| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3244*----------------------------------------------------------------------------*/
3245
e5a41ffa 3246float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3247{
ed086f3d 3248 flag aSign, zSign;
0c48262d 3249 int aExp, bExp, expDiff;
bb98fe42
AF
3250 uint32_t aSig, bSig;
3251 uint32_t q;
3252 uint64_t aSig64, bSig64, q64;
3253 uint32_t alternateASig;
3254 int32_t sigMean;
ff32e16e
PM
3255 a = float32_squash_input_denormal(a, status);
3256 b = float32_squash_input_denormal(b, status);
158142c2
FB
3257
3258 aSig = extractFloat32Frac( a );
3259 aExp = extractFloat32Exp( a );
3260 aSign = extractFloat32Sign( a );
3261 bSig = extractFloat32Frac( b );
3262 bExp = extractFloat32Exp( b );
158142c2
FB
3263 if ( aExp == 0xFF ) {
3264 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3265 return propagateFloat32NaN(a, b, status);
158142c2 3266 }
ff32e16e 3267 float_raise(float_flag_invalid, status);
af39bc8c 3268 return float32_default_nan(status);
158142c2
FB
3269 }
3270 if ( bExp == 0xFF ) {
ff32e16e
PM
3271 if (bSig) {
3272 return propagateFloat32NaN(a, b, status);
3273 }
158142c2
FB
3274 return a;
3275 }
3276 if ( bExp == 0 ) {
3277 if ( bSig == 0 ) {
ff32e16e 3278 float_raise(float_flag_invalid, status);
af39bc8c 3279 return float32_default_nan(status);
158142c2
FB
3280 }
3281 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3282 }
3283 if ( aExp == 0 ) {
3284 if ( aSig == 0 ) return a;
3285 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3286 }
3287 expDiff = aExp - bExp;
3288 aSig |= 0x00800000;
3289 bSig |= 0x00800000;
3290 if ( expDiff < 32 ) {
3291 aSig <<= 8;
3292 bSig <<= 8;
3293 if ( expDiff < 0 ) {
3294 if ( expDiff < -1 ) return a;
3295 aSig >>= 1;
3296 }
3297 q = ( bSig <= aSig );
3298 if ( q ) aSig -= bSig;
3299 if ( 0 < expDiff ) {
bb98fe42 3300 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3301 q >>= 32 - expDiff;
3302 bSig >>= 2;
3303 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3304 }
3305 else {
3306 aSig >>= 2;
3307 bSig >>= 2;
3308 }
3309 }
3310 else {
3311 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3312 aSig64 = ( (uint64_t) aSig )<<40;
3313 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3314 expDiff -= 64;
3315 while ( 0 < expDiff ) {
3316 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3317 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3318 aSig64 = - ( ( bSig * q64 )<<38 );
3319 expDiff -= 62;
3320 }
3321 expDiff += 64;
3322 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3323 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3324 q = q64>>( 64 - expDiff );
3325 bSig <<= 6;
3326 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3327 }
3328 do {
3329 alternateASig = aSig;
3330 ++q;
3331 aSig -= bSig;
bb98fe42 3332 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3333 sigMean = aSig + alternateASig;
3334 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3335 aSig = alternateASig;
3336 }
bb98fe42 3337 zSign = ( (int32_t) aSig < 0 );
158142c2 3338 if ( zSign ) aSig = - aSig;
ff32e16e 3339 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3340}
3341
369be8f6 3342
158142c2 3343
8229c991
AJ
3344/*----------------------------------------------------------------------------
3345| Returns the binary exponential of the single-precision floating-point value
3346| `a'. The operation is performed according to the IEC/IEEE Standard for
3347| Binary Floating-Point Arithmetic.
3348|
3349| Uses the following identities:
3350|
3351| 1. -------------------------------------------------------------------------
3352| x x*ln(2)
3353| 2 = e
3354|
3355| 2. -------------------------------------------------------------------------
3356| 2 3 4 5 n
3357| x x x x x x x
3358| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3359| 1! 2! 3! 4! 5! n!
3360*----------------------------------------------------------------------------*/
3361
3362static const float64 float32_exp2_coefficients[15] =
3363{
d5138cf4
PM
3364 const_float64( 0x3ff0000000000000ll ), /* 1 */
3365 const_float64( 0x3fe0000000000000ll ), /* 2 */
3366 const_float64( 0x3fc5555555555555ll ), /* 3 */
3367 const_float64( 0x3fa5555555555555ll ), /* 4 */
3368 const_float64( 0x3f81111111111111ll ), /* 5 */
3369 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3370 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3371 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3372 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3373 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3374 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3375 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3376 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3377 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3378 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3379};
3380
e5a41ffa 3381float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3382{
3383 flag aSign;
0c48262d 3384 int aExp;
bb98fe42 3385 uint32_t aSig;
8229c991
AJ
3386 float64 r, x, xn;
3387 int i;
ff32e16e 3388 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3389
3390 aSig = extractFloat32Frac( a );
3391 aExp = extractFloat32Exp( a );
3392 aSign = extractFloat32Sign( a );
3393
3394 if ( aExp == 0xFF) {
ff32e16e
PM
3395 if (aSig) {
3396 return propagateFloat32NaN(a, float32_zero, status);
3397 }
8229c991
AJ
3398 return (aSign) ? float32_zero : a;
3399 }
3400 if (aExp == 0) {
3401 if (aSig == 0) return float32_one;
3402 }
3403
ff32e16e 3404 float_raise(float_flag_inexact, status);
8229c991
AJ
3405
3406 /* ******************************* */
3407 /* using float64 for approximation */
3408 /* ******************************* */
ff32e16e
PM
3409 x = float32_to_float64(a, status);
3410 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3411
3412 xn = x;
3413 r = float64_one;
3414 for (i = 0 ; i < 15 ; i++) {
3415 float64 f;
3416
ff32e16e
PM
3417 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3418 r = float64_add(r, f, status);
8229c991 3419
ff32e16e 3420 xn = float64_mul(xn, x, status);
8229c991
AJ
3421 }
3422
3423 return float64_to_float32(r, status);
3424}
3425
374dfc33
AJ
3426/*----------------------------------------------------------------------------
3427| Returns the binary log of the single-precision floating-point value `a'.
3428| The operation is performed according to the IEC/IEEE Standard for Binary
3429| Floating-Point Arithmetic.
3430*----------------------------------------------------------------------------*/
e5a41ffa 3431float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3432{
3433 flag aSign, zSign;
0c48262d 3434 int aExp;
bb98fe42 3435 uint32_t aSig, zSig, i;
374dfc33 3436
ff32e16e 3437 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3438 aSig = extractFloat32Frac( a );
3439 aExp = extractFloat32Exp( a );
3440 aSign = extractFloat32Sign( a );
3441
3442 if ( aExp == 0 ) {
3443 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3444 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3445 }
3446 if ( aSign ) {
ff32e16e 3447 float_raise(float_flag_invalid, status);
af39bc8c 3448 return float32_default_nan(status);
374dfc33
AJ
3449 }
3450 if ( aExp == 0xFF ) {
ff32e16e
PM
3451 if (aSig) {
3452 return propagateFloat32NaN(a, float32_zero, status);
3453 }
374dfc33
AJ
3454 return a;
3455 }
3456
3457 aExp -= 0x7F;
3458 aSig |= 0x00800000;
3459 zSign = aExp < 0;
3460 zSig = aExp << 23;
3461
3462 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3463 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3464 if ( aSig & 0x01000000 ) {
3465 aSig >>= 1;
3466 zSig |= i;
3467 }
3468 }
3469
3470 if ( zSign )
3471 zSig = -zSig;
3472
ff32e16e 3473 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3474}
3475
158142c2
FB
3476/*----------------------------------------------------------------------------
3477| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3478| the corresponding value `b', and 0 otherwise. The invalid exception is
3479| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3480| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3481*----------------------------------------------------------------------------*/
3482
e5a41ffa 3483int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3484{
b689362d 3485 uint32_t av, bv;
ff32e16e
PM
3486 a = float32_squash_input_denormal(a, status);
3487 b = float32_squash_input_denormal(b, status);
158142c2
FB
3488
3489 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3490 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3491 ) {
ff32e16e 3492 float_raise(float_flag_invalid, status);
158142c2
FB
3493 return 0;
3494 }
b689362d
AJ
3495 av = float32_val(a);
3496 bv = float32_val(b);
3497 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3498}
3499
3500/*----------------------------------------------------------------------------
3501| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3502| or equal to the corresponding value `b', and 0 otherwise. The invalid
3503| exception is raised if either operand is a NaN. The comparison is performed
3504| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3505*----------------------------------------------------------------------------*/
3506
e5a41ffa 3507int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3508{
3509 flag aSign, bSign;
bb98fe42 3510 uint32_t av, bv;
ff32e16e
PM
3511 a = float32_squash_input_denormal(a, status);
3512 b = float32_squash_input_denormal(b, status);
158142c2
FB
3513
3514 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3515 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3516 ) {
ff32e16e 3517 float_raise(float_flag_invalid, status);
158142c2
FB
3518 return 0;
3519 }
3520 aSign = extractFloat32Sign( a );
3521 bSign = extractFloat32Sign( b );
f090c9d4
PB
3522 av = float32_val(a);
3523 bv = float32_val(b);
bb98fe42 3524 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3525 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3526
3527}
3528
3529/*----------------------------------------------------------------------------
3530| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3531| the corresponding value `b', and 0 otherwise. The invalid exception is
3532| raised if either operand is a NaN. The comparison is performed according
3533| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3534*----------------------------------------------------------------------------*/
3535
e5a41ffa 3536int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3537{
3538 flag aSign, bSign;
bb98fe42 3539 uint32_t av, bv;
ff32e16e
PM
3540 a = float32_squash_input_denormal(a, status);
3541 b = float32_squash_input_denormal(b, status);
158142c2
FB
3542
3543 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3544 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3545 ) {
ff32e16e 3546 float_raise(float_flag_invalid, status);
158142c2
FB
3547 return 0;
3548 }
3549 aSign = extractFloat32Sign( a );
3550 bSign = extractFloat32Sign( b );
f090c9d4
PB
3551 av = float32_val(a);
3552 bv = float32_val(b);
bb98fe42 3553 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3554 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3555
3556}
3557
67b7861d
AJ
3558/*----------------------------------------------------------------------------
3559| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
3560| be compared, and 0 otherwise. The invalid exception is raised if either
3561| operand is a NaN. The comparison is performed according to the IEC/IEEE
3562| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
3563*----------------------------------------------------------------------------*/
3564
e5a41ffa 3565int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 3566{
ff32e16e
PM
3567 a = float32_squash_input_denormal(a, status);
3568 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3569
3570 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3571 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3572 ) {
ff32e16e 3573 float_raise(float_flag_invalid, status);
67b7861d
AJ
3574 return 1;
3575 }
3576 return 0;
3577}
b689362d 3578
158142c2
FB
3579/*----------------------------------------------------------------------------
3580| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
3581| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3582| exception. The comparison is performed according to the IEC/IEEE Standard
3583| for Binary Floating-Point Arithmetic.
158142c2
FB
3584*----------------------------------------------------------------------------*/
3585
e5a41ffa 3586int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 3587{
ff32e16e
PM
3588 a = float32_squash_input_denormal(a, status);
3589 b = float32_squash_input_denormal(b, status);
158142c2
FB
3590
3591 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3592 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3593 ) {
af39bc8c
AM
3594 if (float32_is_signaling_nan(a, status)
3595 || float32_is_signaling_nan(b, status)) {
ff32e16e 3596 float_raise(float_flag_invalid, status);
b689362d 3597 }
158142c2
FB
3598 return 0;
3599 }
b689362d
AJ
3600 return ( float32_val(a) == float32_val(b) ) ||
3601 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
3602}
3603
3604/*----------------------------------------------------------------------------
3605| Returns 1 if the single-precision floating-point value `a' is less than or
3606| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3607| cause an exception. Otherwise, the comparison is performed according to the
3608| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3609*----------------------------------------------------------------------------*/
3610
e5a41ffa 3611int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3612{
3613 flag aSign, bSign;
bb98fe42 3614 uint32_t av, bv;
ff32e16e
PM
3615 a = float32_squash_input_denormal(a, status);
3616 b = float32_squash_input_denormal(b, status);
158142c2
FB
3617
3618 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3619 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3620 ) {
af39bc8c
AM
3621 if (float32_is_signaling_nan(a, status)
3622 || float32_is_signaling_nan(b, status)) {
ff32e16e 3623 float_raise(float_flag_invalid, status);
158142c2
FB
3624 }
3625 return 0;
3626 }
3627 aSign = extractFloat32Sign( a );
3628 bSign = extractFloat32Sign( b );
f090c9d4
PB
3629 av = float32_val(a);
3630 bv = float32_val(b);
bb98fe42 3631 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3632 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3633
3634}
3635
3636/*----------------------------------------------------------------------------
3637| Returns 1 if the single-precision floating-point value `a' is less than
3638| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3639| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 3640| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3641*----------------------------------------------------------------------------*/
3642
ab52f973 3643int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 3644{
ab52f973
AB
3645 flag aSign, bSign;
3646 uint32_t av, bv;
3647 a = float32_squash_input_denormal(a, status);
3648 b = float32_squash_input_denormal(b, status);
158142c2 3649
ab52f973
AB
3650 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3651 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3652 ) {
3653 if (float32_is_signaling_nan(a, status)
3654 || float32_is_signaling_nan(b, status)) {
ff32e16e 3655 float_raise(float_flag_invalid, status);
158142c2 3656 }
ab52f973 3657 return 0;
158142c2 3658 }
ab52f973
AB
3659 aSign = extractFloat32Sign( a );
3660 bSign = extractFloat32Sign( b );
3661 av = float32_val(a);
3662 bv = float32_val(b);
3663 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3664 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3665
3666}
3667
3668/*----------------------------------------------------------------------------
ab52f973
AB
3669| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3670| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3671| comparison is performed according to the IEC/IEEE Standard for Binary
3672| Floating-Point Arithmetic.
158142c2
FB
3673*----------------------------------------------------------------------------*/
3674
ab52f973 3675int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 3676{
ab52f973
AB
3677 a = float32_squash_input_denormal(a, status);
3678 b = float32_squash_input_denormal(b, status);
158142c2 3679
ab52f973
AB
3680 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3681 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3682 ) {
3683 if (float32_is_signaling_nan(a, status)
3684 || float32_is_signaling_nan(b, status)) {
3685 float_raise(float_flag_invalid, status);
158142c2 3686 }
ab52f973 3687 return 1;
158142c2 3688 }
ab52f973 3689 return 0;
158142c2
FB
3690}
3691
ab52f973 3692
158142c2
FB
3693/*----------------------------------------------------------------------------
3694| Returns the result of converting the double-precision floating-point value
3695| `a' to the single-precision floating-point format. The conversion is
3696| performed according to the IEC/IEEE Standard for Binary Floating-Point
3697| Arithmetic.
3698*----------------------------------------------------------------------------*/
3699
e5a41ffa 3700float32 float64_to_float32(float64 a, float_status *status)
158142c2
FB
3701{
3702 flag aSign;
0c48262d 3703 int aExp;
bb98fe42
AF
3704 uint64_t aSig;
3705 uint32_t zSig;
ff32e16e 3706 a = float64_squash_input_denormal(a, status);
158142c2
FB
3707
3708 aSig = extractFloat64Frac( a );
3709 aExp = extractFloat64Exp( a );
3710 aSign = extractFloat64Sign( a );
3711 if ( aExp == 0x7FF ) {
ff32e16e
PM
3712 if (aSig) {
3713 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3714 }
158142c2
FB
3715 return packFloat32( aSign, 0xFF, 0 );
3716 }
3717 shift64RightJamming( aSig, 22, &aSig );
3718 zSig = aSig;
3719 if ( aExp || zSig ) {
3720 zSig |= 0x40000000;
3721 aExp -= 0x381;
3722 }
ff32e16e 3723 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
3724
3725}
3726
60011498
PB
3727
3728/*----------------------------------------------------------------------------
3729| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3730| half-precision floating-point value, returning the result. After being
3731| shifted into the proper positions, the three fields are simply added
3732| together to form the result. This means that any integer portion of `zSig'
3733| will be added into the exponent. Since a properly normalized significand
3734| will have an integer portion equal to 1, the `zExp' input should be 1 less
3735| than the desired result exponent whenever `zSig' is a complete, normalized
3736| significand.
3737*----------------------------------------------------------------------------*/
0c48262d 3738static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
60011498 3739{
bb4d4bb3 3740 return make_float16(
bb98fe42 3741 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
60011498
PB
3742}
3743
c4a1c5e7
PM
3744/*----------------------------------------------------------------------------
3745| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3746| and significand `zSig', and returns the proper half-precision floating-
3747| point value corresponding to the abstract input. Ordinarily, the abstract
3748| value is simply rounded and packed into the half-precision format, with
3749| the inexact exception raised if the abstract input cannot be represented
3750| exactly. However, if the abstract value is too large, the overflow and
3751| inexact exceptions are raised and an infinity or maximal finite value is
3752| returned. If the abstract value is too small, the input value is rounded to
3753| a subnormal number, and the underflow and inexact exceptions are raised if
3754| the abstract input cannot be represented exactly as a subnormal half-
3755| precision floating-point number.
3756| The `ieee' flag indicates whether to use IEEE standard half precision, or
3757| ARM-style "alternative representation", which omits the NaN and Inf
3758| encodings in order to raise the maximum representable exponent by one.
3759| The input significand `zSig' has its binary point between bits 22
3760| and 23, which is 13 bits to the left of the usual location. This shifted
3761| significand must be normalized or smaller. If `zSig' is not normalized,
3762| `zExp' must be 0; in that case, the result returned is a subnormal number,
3763| and it must not require rounding. In the usual case that `zSig' is
3764| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3765| Note the slightly odd position of the binary point in zSig compared with the
3766| other roundAndPackFloat functions. This should probably be fixed if we
3767| need to implement more float16 routines than just conversion.
3768| The handling of underflow and overflow follows the IEC/IEEE Standard for
3769| Binary Floating-Point Arithmetic.
3770*----------------------------------------------------------------------------*/
3771
0c48262d 3772static float16 roundAndPackFloat16(flag zSign, int zExp,
e5a41ffa
PM
3773 uint32_t zSig, flag ieee,
3774 float_status *status)
c4a1c5e7
PM
3775{
3776 int maxexp = ieee ? 29 : 30;
3777 uint32_t mask;
3778 uint32_t increment;
c4a1c5e7
PM
3779 bool rounding_bumps_exp;
3780 bool is_tiny = false;
3781
3782 /* Calculate the mask of bits of the mantissa which are not
3783 * representable in half-precision and will be lost.
3784 */
3785 if (zExp < 1) {
3786 /* Will be denormal in halfprec */
3787 mask = 0x00ffffff;
3788 if (zExp >= -11) {
3789 mask >>= 11 + zExp;
3790 }
3791 } else {
3792 /* Normal number in halfprec */
3793 mask = 0x00001fff;
3794 }
3795
a2f2d288 3796 switch (status->float_rounding_mode) {
c4a1c5e7
PM
3797 case float_round_nearest_even:
3798 increment = (mask + 1) >> 1;
3799 if ((zSig & mask) == increment) {
3800 increment = zSig & (increment << 1);
3801 }
3802 break;
f9288a76
PM
3803 case float_round_ties_away:
3804 increment = (mask + 1) >> 1;
3805 break;
c4a1c5e7
PM
3806 case float_round_up:
3807 increment = zSign ? 0 : mask;
3808 break;
3809 case float_round_down:
3810 increment = zSign ? mask : 0;
3811 break;
3812 default: /* round_to_zero */
3813 increment = 0;
3814 break;
3815 }
3816
3817 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3818
3819 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3820 if (ieee) {
ff32e16e 3821 float_raise(float_flag_overflow | float_flag_inexact, status);
c4a1c5e7
PM
3822 return packFloat16(zSign, 0x1f, 0);
3823 } else {
ff32e16e 3824 float_raise(float_flag_invalid, status);
c4a1c5e7
PM
3825 return packFloat16(zSign, 0x1f, 0x3ff);
3826 }
3827 }
3828
3829 if (zExp < 0) {
3830 /* Note that flush-to-zero does not affect half-precision results */
3831 is_tiny =
a2f2d288 3832 (status->float_detect_tininess == float_tininess_before_rounding)
c4a1c5e7
PM
3833 || (zExp < -1)
3834 || (!rounding_bumps_exp);
3835 }
3836 if (zSig & mask) {
ff32e16e 3837 float_raise(float_flag_inexact, status);
c4a1c5e7 3838 if (is_tiny) {
ff32e16e 3839 float_raise(float_flag_underflow, status);
c4a1c5e7
PM
3840 }
3841 }
3842
3843 zSig += increment;
3844 if (rounding_bumps_exp) {
3845 zSig >>= 1;
3846 zExp++;
3847 }
3848
3849 if (zExp < -10) {
3850 return packFloat16(zSign, 0, 0);
3851 }
3852 if (zExp < 0) {
3853 zSig >>= -zExp;
3854 zExp = 0;
3855 }
3856 return packFloat16(zSign, zExp, zSig >> 13);
3857}
3858
210cbd49
AB
3859/*----------------------------------------------------------------------------
3860| If `a' is denormal and we are in flush-to-zero mode then set the
3861| input-denormal exception and return zero. Otherwise just return the value.
3862*----------------------------------------------------------------------------*/
3863float16 float16_squash_input_denormal(float16 a, float_status *status)
3864{
3865 if (status->flush_inputs_to_zero) {
3866 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3867 float_raise(float_flag_input_denormal, status);
3868 return make_float16(float16_val(a) & 0x8000);
3869 }
3870 }
3871 return a;
3872}
3873
0c48262d 3874static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
c4a1c5e7
PM
3875 uint32_t *zSigPtr)
3876{
3877 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3878 *zSigPtr = aSig << shiftCount;
3879 *zExpPtr = 1 - shiftCount;
3880}
3881
60011498
PB
3882/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3883 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
bb4d4bb3 3884
e5a41ffa 3885float32 float16_to_float32(float16 a, flag ieee, float_status *status)
60011498
PB
3886{
3887 flag aSign;
0c48262d 3888 int aExp;
bb98fe42 3889 uint32_t aSig;
60011498 3890
bb4d4bb3
PM
3891 aSign = extractFloat16Sign(a);
3892 aExp = extractFloat16Exp(a);
3893 aSig = extractFloat16Frac(a);
60011498
PB
3894
3895 if (aExp == 0x1f && ieee) {
3896 if (aSig) {
ff32e16e 3897 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
60011498 3898 }
4be8eeac 3899 return packFloat32(aSign, 0xff, 0);
60011498
PB
3900 }
3901 if (aExp == 0) {
60011498
PB
3902 if (aSig == 0) {
3903 return packFloat32(aSign, 0, 0);
3904 }
3905
c4a1c5e7
PM
3906 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3907 aExp--;
60011498
PB
3908 }
3909 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3910}
3911
e5a41ffa 3912float16 float32_to_float16(float32 a, flag ieee, float_status *status)
60011498
PB
3913{
3914 flag aSign;
0c48262d 3915 int aExp;
bb98fe42 3916 uint32_t aSig;
38970efa 3917
ff32e16e 3918 a = float32_squash_input_denormal(a, status);
60011498
PB
3919
3920 aSig = extractFloat32Frac( a );
3921 aExp = extractFloat32Exp( a );
3922 aSign = extractFloat32Sign( a );
3923 if ( aExp == 0xFF ) {
3924 if (aSig) {
600e30d2 3925 /* Input is a NaN */
600e30d2 3926 if (!ieee) {
ff32e16e 3927 float_raise(float_flag_invalid, status);
600e30d2
PM
3928 return packFloat16(aSign, 0, 0);
3929 }
38970efa 3930 return commonNaNToFloat16(
ff32e16e 3931 float32ToCommonNaN(a, status), status);
60011498 3932 }
600e30d2
PM
3933 /* Infinity */
3934 if (!ieee) {
ff32e16e 3935 float_raise(float_flag_invalid, status);
600e30d2
PM
3936 return packFloat16(aSign, 0x1f, 0x3ff);
3937 }
3938 return packFloat16(aSign, 0x1f, 0);
60011498 3939 }
600e30d2 3940 if (aExp == 0 && aSig == 0) {
60011498
PB
3941 return packFloat16(aSign, 0, 0);
3942 }
38970efa
PM
3943 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3944 * even if the input is denormal; however this is harmless because
3945 * the largest possible single-precision denormal is still smaller
3946 * than the smallest representable half-precision denormal, and so we
3947 * will end up ignoring aSig and returning via the "always return zero"
3948 * codepath.
3949 */
60011498 3950 aSig |= 0x00800000;
c4a1c5e7 3951 aExp -= 0x71;
60011498 3952
ff32e16e 3953 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
60011498
PB
3954}
3955
e5a41ffa 3956float64 float16_to_float64(float16 a, flag ieee, float_status *status)
14c9a07e
PM
3957{
3958 flag aSign;
0c48262d 3959 int aExp;
14c9a07e
PM
3960 uint32_t aSig;
3961
3962 aSign = extractFloat16Sign(a);
3963 aExp = extractFloat16Exp(a);
3964 aSig = extractFloat16Frac(a);
3965
3966 if (aExp == 0x1f && ieee) {
3967 if (aSig) {
3968 return commonNaNToFloat64(
ff32e16e 3969 float16ToCommonNaN(a, status), status);
14c9a07e
PM
3970 }
3971 return packFloat64(aSign, 0x7ff, 0);
3972 }
3973 if (aExp == 0) {
3974 if (aSig == 0) {
3975 return packFloat64(aSign, 0, 0);
3976 }
3977
3978 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3979 aExp--;
3980 }
3981 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3982}
3983
e5a41ffa 3984float16 float64_to_float16(float64 a, flag ieee, float_status *status)
14c9a07e
PM
3985{
3986 flag aSign;
0c48262d 3987 int aExp;
14c9a07e
PM
3988 uint64_t aSig;
3989 uint32_t zSig;
3990
ff32e16e 3991 a = float64_squash_input_denormal(a, status);
14c9a07e
PM
3992
3993 aSig = extractFloat64Frac(a);
3994 aExp = extractFloat64Exp(a);
3995 aSign = extractFloat64Sign(a);
3996 if (aExp == 0x7FF) {
3997 if (aSig) {
3998 /* Input is a NaN */
3999 if (!ieee) {
ff32e16e 4000 float_raise(float_flag_invalid, status);
14c9a07e
PM
4001 return packFloat16(aSign, 0, 0);
4002 }
4003 return commonNaNToFloat16(
ff32e16e 4004 float64ToCommonNaN(a, status), status);
14c9a07e
PM
4005 }
4006 /* Infinity */
4007 if (!ieee) {
ff32e16e 4008 float_raise(float_flag_invalid, status);
14c9a07e
PM
4009 return packFloat16(aSign, 0x1f, 0x3ff);
4010 }
4011 return packFloat16(aSign, 0x1f, 0);
4012 }
4013 shift64RightJamming(aSig, 29, &aSig);
4014 zSig = aSig;
4015 if (aExp == 0 && zSig == 0) {
4016 return packFloat16(aSign, 0, 0);
4017 }
4018 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
4019 * even if the input is denormal; however this is harmless because
4020 * the largest possible single-precision denormal is still smaller
4021 * than the smallest representable half-precision denormal, and so we
4022 * will end up ignoring aSig and returning via the "always return zero"
4023 * codepath.
4024 */
4025 zSig |= 0x00800000;
4026 aExp -= 0x3F1;
4027
ff32e16e 4028 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
14c9a07e
PM
4029}
4030
158142c2
FB
4031/*----------------------------------------------------------------------------
4032| Returns the result of converting the double-precision floating-point value
4033| `a' to the extended double-precision floating-point format. The conversion
4034| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4035| Arithmetic.
4036*----------------------------------------------------------------------------*/
4037
e5a41ffa 4038floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4039{
4040 flag aSign;
0c48262d 4041 int aExp;
bb98fe42 4042 uint64_t aSig;
158142c2 4043
ff32e16e 4044 a = float64_squash_input_denormal(a, status);
158142c2
FB
4045 aSig = extractFloat64Frac( a );
4046 aExp = extractFloat64Exp( a );
4047 aSign = extractFloat64Sign( a );
4048 if ( aExp == 0x7FF ) {
ff32e16e
PM
4049 if (aSig) {
4050 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4051 }
0f605c88
LV
4052 return packFloatx80(aSign,
4053 floatx80_infinity_high,
4054 floatx80_infinity_low);
158142c2
FB
4055 }
4056 if ( aExp == 0 ) {
4057 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4058 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4059 }
4060 return
4061 packFloatx80(
4062 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4063
4064}
4065
158142c2
FB
4066/*----------------------------------------------------------------------------
4067| Returns the result of converting the double-precision floating-point value
4068| `a' to the quadruple-precision floating-point format. The conversion is
4069| performed according to the IEC/IEEE Standard for Binary Floating-Point
4070| Arithmetic.
4071*----------------------------------------------------------------------------*/
4072
e5a41ffa 4073float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4074{
4075 flag aSign;
0c48262d 4076 int aExp;
bb98fe42 4077 uint64_t aSig, zSig0, zSig1;
158142c2 4078
ff32e16e 4079 a = float64_squash_input_denormal(a, status);
158142c2
FB
4080 aSig = extractFloat64Frac( a );
4081 aExp = extractFloat64Exp( a );
4082 aSign = extractFloat64Sign( a );
4083 if ( aExp == 0x7FF ) {
ff32e16e
PM
4084 if (aSig) {
4085 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4086 }
158142c2
FB
4087 return packFloat128( aSign, 0x7FFF, 0, 0 );
4088 }
4089 if ( aExp == 0 ) {
4090 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4091 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4092 --aExp;
4093 }
4094 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4095 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4096
4097}
4098
158142c2
FB
4099
4100/*----------------------------------------------------------------------------
4101| Returns the remainder of the double-precision floating-point value `a'
4102| with respect to the corresponding value `b'. The operation is performed
4103| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4104*----------------------------------------------------------------------------*/
4105
e5a41ffa 4106float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4107{
ed086f3d 4108 flag aSign, zSign;
0c48262d 4109 int aExp, bExp, expDiff;
bb98fe42
AF
4110 uint64_t aSig, bSig;
4111 uint64_t q, alternateASig;
4112 int64_t sigMean;
158142c2 4113
ff32e16e
PM
4114 a = float64_squash_input_denormal(a, status);
4115 b = float64_squash_input_denormal(b, status);
158142c2
FB
4116 aSig = extractFloat64Frac( a );
4117 aExp = extractFloat64Exp( a );
4118 aSign = extractFloat64Sign( a );
4119 bSig = extractFloat64Frac( b );
4120 bExp = extractFloat64Exp( b );
158142c2
FB
4121 if ( aExp == 0x7FF ) {
4122 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4123 return propagateFloat64NaN(a, b, status);
158142c2 4124 }
ff32e16e 4125 float_raise(float_flag_invalid, status);
af39bc8c 4126 return float64_default_nan(status);
158142c2
FB
4127 }
4128 if ( bExp == 0x7FF ) {
ff32e16e
PM
4129 if (bSig) {
4130 return propagateFloat64NaN(a, b, status);
4131 }
158142c2
FB
4132 return a;
4133 }
4134 if ( bExp == 0 ) {
4135 if ( bSig == 0 ) {
ff32e16e 4136 float_raise(float_flag_invalid, status);
af39bc8c 4137 return float64_default_nan(status);
158142c2
FB
4138 }
4139 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4140 }
4141 if ( aExp == 0 ) {
4142 if ( aSig == 0 ) return a;
4143 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4144 }
4145 expDiff = aExp - bExp;
4146 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4147 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4148 if ( expDiff < 0 ) {
4149 if ( expDiff < -1 ) return a;
4150 aSig >>= 1;
4151 }
4152 q = ( bSig <= aSig );
4153 if ( q ) aSig -= bSig;
4154 expDiff -= 64;
4155 while ( 0 < expDiff ) {
4156 q = estimateDiv128To64( aSig, 0, bSig );
4157 q = ( 2 < q ) ? q - 2 : 0;
4158 aSig = - ( ( bSig>>2 ) * q );
4159 expDiff -= 62;
4160 }
4161 expDiff += 64;
4162 if ( 0 < expDiff ) {
4163 q = estimateDiv128To64( aSig, 0, bSig );
4164 q = ( 2 < q ) ? q - 2 : 0;
4165 q >>= 64 - expDiff;
4166 bSig >>= 2;
4167 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4168 }
4169 else {
4170 aSig >>= 2;
4171 bSig >>= 2;
4172 }
4173 do {
4174 alternateASig = aSig;
4175 ++q;
4176 aSig -= bSig;
bb98fe42 4177 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4178 sigMean = aSig + alternateASig;
4179 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4180 aSig = alternateASig;
4181 }
bb98fe42 4182 zSign = ( (int64_t) aSig < 0 );
158142c2 4183 if ( zSign ) aSig = - aSig;
ff32e16e 4184 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4185
4186}
4187
374dfc33
AJ
4188/*----------------------------------------------------------------------------
4189| Returns the binary log of the double-precision floating-point value `a'.
4190| The operation is performed according to the IEC/IEEE Standard for Binary
4191| Floating-Point Arithmetic.
4192*----------------------------------------------------------------------------*/
e5a41ffa 4193float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4194{
4195 flag aSign, zSign;
0c48262d 4196 int aExp;
bb98fe42 4197 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4198 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4199
4200 aSig = extractFloat64Frac( a );
4201 aExp = extractFloat64Exp( a );
4202 aSign = extractFloat64Sign( a );
4203
4204 if ( aExp == 0 ) {
4205 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4206 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4207 }
4208 if ( aSign ) {
ff32e16e 4209 float_raise(float_flag_invalid, status);
af39bc8c 4210 return float64_default_nan(status);
374dfc33
AJ
4211 }
4212 if ( aExp == 0x7FF ) {
ff32e16e
PM
4213 if (aSig) {
4214 return propagateFloat64NaN(a, float64_zero, status);
4215 }
374dfc33
AJ
4216 return a;
4217 }
4218
4219 aExp -= 0x3FF;
4220 aSig |= LIT64( 0x0010000000000000 );
4221 zSign = aExp < 0;
bb98fe42 4222 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4223 for (i = 1LL << 51; i > 0; i >>= 1) {
4224 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4225 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4226 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4227 aSig >>= 1;
4228 zSig |= i;
4229 }
4230 }
4231
4232 if ( zSign )
4233 zSig = -zSig;
ff32e16e 4234 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4235}
4236
158142c2
FB
4237/*----------------------------------------------------------------------------
4238| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4239| corresponding value `b', and 0 otherwise. The invalid exception is raised
4240| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4241| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4242*----------------------------------------------------------------------------*/
4243
e5a41ffa 4244int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4245{
bb98fe42 4246 uint64_t av, bv;
ff32e16e
PM
4247 a = float64_squash_input_denormal(a, status);
4248 b = float64_squash_input_denormal(b, status);
158142c2
FB
4249
4250 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4251 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4252 ) {
ff32e16e 4253 float_raise(float_flag_invalid, status);
158142c2
FB
4254 return 0;
4255 }
f090c9d4 4256 av = float64_val(a);
a1b91bb4 4257 bv = float64_val(b);
bb98fe42 4258 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4259
4260}
4261
4262/*----------------------------------------------------------------------------
4263| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4264| equal to the corresponding value `b', and 0 otherwise. The invalid
4265| exception is raised if either operand is a NaN. The comparison is performed
4266| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4267*----------------------------------------------------------------------------*/
4268
e5a41ffa 4269int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4270{
4271 flag aSign, bSign;
bb98fe42 4272 uint64_t av, bv;
ff32e16e
PM
4273 a = float64_squash_input_denormal(a, status);
4274 b = float64_squash_input_denormal(b, status);
158142c2
FB
4275
4276 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4277 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4278 ) {
ff32e16e 4279 float_raise(float_flag_invalid, status);
158142c2
FB
4280 return 0;
4281 }
4282 aSign = extractFloat64Sign( a );
4283 bSign = extractFloat64Sign( b );
f090c9d4 4284 av = float64_val(a);
a1b91bb4 4285 bv = float64_val(b);
bb98fe42 4286 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4287 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4288
4289}
4290
4291/*----------------------------------------------------------------------------
4292| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4293| the corresponding value `b', and 0 otherwise. The invalid exception is
4294| raised if either operand is a NaN. The comparison is performed according
4295| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4296*----------------------------------------------------------------------------*/
4297
e5a41ffa 4298int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4299{
4300 flag aSign, bSign;
bb98fe42 4301 uint64_t av, bv;
158142c2 4302
ff32e16e
PM
4303 a = float64_squash_input_denormal(a, status);
4304 b = float64_squash_input_denormal(b, status);
158142c2
FB
4305 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4306 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4307 ) {
ff32e16e 4308 float_raise(float_flag_invalid, status);
158142c2
FB
4309 return 0;
4310 }
4311 aSign = extractFloat64Sign( a );
4312 bSign = extractFloat64Sign( b );
f090c9d4 4313 av = float64_val(a);
a1b91bb4 4314 bv = float64_val(b);
bb98fe42 4315 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4316 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4317
4318}
4319
67b7861d
AJ
4320/*----------------------------------------------------------------------------
4321| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4322| be compared, and 0 otherwise. The invalid exception is raised if either
4323| operand is a NaN. The comparison is performed according to the IEC/IEEE
4324| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4325*----------------------------------------------------------------------------*/
4326
e5a41ffa 4327int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4328{
ff32e16e
PM
4329 a = float64_squash_input_denormal(a, status);
4330 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4331
4332 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4333 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4334 ) {
ff32e16e 4335 float_raise(float_flag_invalid, status);
67b7861d
AJ
4336 return 1;
4337 }
4338 return 0;
4339}
4340
158142c2
FB
4341/*----------------------------------------------------------------------------
4342| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4343| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4344| exception.The comparison is performed according to the IEC/IEEE Standard
4345| for Binary Floating-Point Arithmetic.
158142c2
FB
4346*----------------------------------------------------------------------------*/
4347
e5a41ffa 4348int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4349{
bb98fe42 4350 uint64_t av, bv;
ff32e16e
PM
4351 a = float64_squash_input_denormal(a, status);
4352 b = float64_squash_input_denormal(b, status);
158142c2
FB
4353
4354 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4355 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4356 ) {
af39bc8c
AM
4357 if (float64_is_signaling_nan(a, status)
4358 || float64_is_signaling_nan(b, status)) {
ff32e16e 4359 float_raise(float_flag_invalid, status);
b689362d 4360 }
158142c2
FB
4361 return 0;
4362 }
f090c9d4 4363 av = float64_val(a);
a1b91bb4 4364 bv = float64_val(b);
bb98fe42 4365 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4366
4367}
4368
4369/*----------------------------------------------------------------------------
4370| Returns 1 if the double-precision floating-point value `a' is less than or
4371| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4372| cause an exception. Otherwise, the comparison is performed according to the
4373| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4374*----------------------------------------------------------------------------*/
4375
e5a41ffa 4376int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4377{
4378 flag aSign, bSign;
bb98fe42 4379 uint64_t av, bv;
ff32e16e
PM
4380 a = float64_squash_input_denormal(a, status);
4381 b = float64_squash_input_denormal(b, status);
158142c2
FB
4382
4383 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4384 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4385 ) {
af39bc8c
AM
4386 if (float64_is_signaling_nan(a, status)
4387 || float64_is_signaling_nan(b, status)) {
ff32e16e 4388 float_raise(float_flag_invalid, status);
158142c2
FB
4389 }
4390 return 0;
4391 }
4392 aSign = extractFloat64Sign( a );
4393 bSign = extractFloat64Sign( b );
f090c9d4 4394 av = float64_val(a);
a1b91bb4 4395 bv = float64_val(b);
bb98fe42 4396 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4397 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4398
4399}
4400
4401/*----------------------------------------------------------------------------
4402| Returns 1 if the double-precision floating-point value `a' is less than
4403| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4404| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4405| Standard for Binary Floating-Point Arithmetic.
4406*----------------------------------------------------------------------------*/
4407
e5a41ffa 4408int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4409{
4410 flag aSign, bSign;
bb98fe42 4411 uint64_t av, bv;
ff32e16e
PM
4412 a = float64_squash_input_denormal(a, status);
4413 b = float64_squash_input_denormal(b, status);
158142c2
FB
4414
4415 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4416 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4417 ) {
af39bc8c
AM
4418 if (float64_is_signaling_nan(a, status)
4419 || float64_is_signaling_nan(b, status)) {
ff32e16e 4420 float_raise(float_flag_invalid, status);
158142c2
FB
4421 }
4422 return 0;
4423 }
4424 aSign = extractFloat64Sign( a );
4425 bSign = extractFloat64Sign( b );
f090c9d4 4426 av = float64_val(a);
a1b91bb4 4427 bv = float64_val(b);
bb98fe42 4428 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4429 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4430
4431}
4432
67b7861d
AJ
4433/*----------------------------------------------------------------------------
4434| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4435| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4436| comparison is performed according to the IEC/IEEE Standard for Binary
4437| Floating-Point Arithmetic.
4438*----------------------------------------------------------------------------*/
4439
e5a41ffa 4440int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4441{
ff32e16e
PM
4442 a = float64_squash_input_denormal(a, status);
4443 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4444
4445 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4446 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4447 ) {
af39bc8c
AM
4448 if (float64_is_signaling_nan(a, status)
4449 || float64_is_signaling_nan(b, status)) {
ff32e16e 4450 float_raise(float_flag_invalid, status);
67b7861d
AJ
4451 }
4452 return 1;
4453 }
4454 return 0;
4455}
4456
158142c2
FB
4457/*----------------------------------------------------------------------------
4458| Returns the result of converting the extended double-precision floating-
4459| point value `a' to the 32-bit two's complement integer format. The
4460| conversion is performed according to the IEC/IEEE Standard for Binary
4461| Floating-Point Arithmetic---which means in particular that the conversion
4462| is rounded according to the current rounding mode. If `a' is a NaN, the
4463| largest positive integer is returned. Otherwise, if the conversion
4464| overflows, the largest integer with the same sign as `a' is returned.
4465*----------------------------------------------------------------------------*/
4466
f4014512 4467int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4468{
4469 flag aSign;
f4014512 4470 int32_t aExp, shiftCount;
bb98fe42 4471 uint64_t aSig;
158142c2 4472
d1eb8f2a
AD
4473 if (floatx80_invalid_encoding(a)) {
4474 float_raise(float_flag_invalid, status);
4475 return 1 << 31;
4476 }
158142c2
FB
4477 aSig = extractFloatx80Frac( a );
4478 aExp = extractFloatx80Exp( a );
4479 aSign = extractFloatx80Sign( a );
bb98fe42 4480 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4481 shiftCount = 0x4037 - aExp;
4482 if ( shiftCount <= 0 ) shiftCount = 1;
4483 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4484 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4485
4486}
4487
4488/*----------------------------------------------------------------------------
4489| Returns the result of converting the extended double-precision floating-
4490| point value `a' to the 32-bit two's complement integer format. The
4491| conversion is performed according to the IEC/IEEE Standard for Binary
4492| Floating-Point Arithmetic, except that the conversion is always rounded
4493| toward zero. If `a' is a NaN, the largest positive integer is returned.
4494| Otherwise, if the conversion overflows, the largest integer with the same
4495| sign as `a' is returned.
4496*----------------------------------------------------------------------------*/
4497
f4014512 4498int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4499{
4500 flag aSign;
f4014512 4501 int32_t aExp, shiftCount;
bb98fe42 4502 uint64_t aSig, savedASig;
b3a6a2e0 4503 int32_t z;
158142c2 4504
d1eb8f2a
AD
4505 if (floatx80_invalid_encoding(a)) {
4506 float_raise(float_flag_invalid, status);
4507 return 1 << 31;
4508 }
158142c2
FB
4509 aSig = extractFloatx80Frac( a );
4510 aExp = extractFloatx80Exp( a );
4511 aSign = extractFloatx80Sign( a );
4512 if ( 0x401E < aExp ) {
bb98fe42 4513 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4514 goto invalid;
4515 }
4516 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4517 if (aExp || aSig) {
4518 status->float_exception_flags |= float_flag_inexact;
4519 }
158142c2
FB
4520 return 0;
4521 }
4522 shiftCount = 0x403E - aExp;
4523 savedASig = aSig;
4524 aSig >>= shiftCount;
4525 z = aSig;
4526 if ( aSign ) z = - z;
4527 if ( ( z < 0 ) ^ aSign ) {
4528 invalid:
ff32e16e 4529 float_raise(float_flag_invalid, status);
bb98fe42 4530 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4531 }
4532 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4533 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4534 }
4535 return z;
4536
4537}
4538
4539/*----------------------------------------------------------------------------
4540| Returns the result of converting the extended double-precision floating-
4541| point value `a' to the 64-bit two's complement integer format. The
4542| conversion is performed according to the IEC/IEEE Standard for Binary
4543| Floating-Point Arithmetic---which means in particular that the conversion
4544| is rounded according to the current rounding mode. If `a' is a NaN,
4545| the largest positive integer is returned. Otherwise, if the conversion
4546| overflows, the largest integer with the same sign as `a' is returned.
4547*----------------------------------------------------------------------------*/
4548
f42c2224 4549int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4550{
4551 flag aSign;
f4014512 4552 int32_t aExp, shiftCount;
bb98fe42 4553 uint64_t aSig, aSigExtra;
158142c2 4554
d1eb8f2a
AD
4555 if (floatx80_invalid_encoding(a)) {
4556 float_raise(float_flag_invalid, status);
4557 return 1ULL << 63;
4558 }
158142c2
FB
4559 aSig = extractFloatx80Frac( a );
4560 aExp = extractFloatx80Exp( a );
4561 aSign = extractFloatx80Sign( a );
4562 shiftCount = 0x403E - aExp;
4563 if ( shiftCount <= 0 ) {
4564 if ( shiftCount ) {
ff32e16e 4565 float_raise(float_flag_invalid, status);
0f605c88 4566 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4567 return LIT64( 0x7FFFFFFFFFFFFFFF );
4568 }
bb98fe42 4569 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4570 }
4571 aSigExtra = 0;
4572 }
4573 else {
4574 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4575 }
ff32e16e 4576 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4577
4578}
4579
4580/*----------------------------------------------------------------------------
4581| Returns the result of converting the extended double-precision floating-
4582| point value `a' to the 64-bit two's complement integer format. The
4583| conversion is performed according to the IEC/IEEE Standard for Binary
4584| Floating-Point Arithmetic, except that the conversion is always rounded
4585| toward zero. If `a' is a NaN, the largest positive integer is returned.
4586| Otherwise, if the conversion overflows, the largest integer with the same
4587| sign as `a' is returned.
4588*----------------------------------------------------------------------------*/
4589
f42c2224 4590int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4591{
4592 flag aSign;
f4014512 4593 int32_t aExp, shiftCount;
bb98fe42 4594 uint64_t aSig;
f42c2224 4595 int64_t z;
158142c2 4596
d1eb8f2a
AD
4597 if (floatx80_invalid_encoding(a)) {
4598 float_raise(float_flag_invalid, status);
4599 return 1ULL << 63;
4600 }
158142c2
FB
4601 aSig = extractFloatx80Frac( a );
4602 aExp = extractFloatx80Exp( a );
4603 aSign = extractFloatx80Sign( a );
4604 shiftCount = aExp - 0x403E;
4605 if ( 0 <= shiftCount ) {
4606 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4607 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4608 float_raise(float_flag_invalid, status);
158142c2
FB
4609 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4610 return LIT64( 0x7FFFFFFFFFFFFFFF );
4611 }
4612 }
bb98fe42 4613 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4614 }
4615 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4616 if (aExp | aSig) {
4617 status->float_exception_flags |= float_flag_inexact;
4618 }
158142c2
FB
4619 return 0;
4620 }
4621 z = aSig>>( - shiftCount );
bb98fe42 4622 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4623 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4624 }
4625 if ( aSign ) z = - z;
4626 return z;
4627
4628}
4629
4630/*----------------------------------------------------------------------------
4631| Returns the result of converting the extended double-precision floating-
4632| point value `a' to the single-precision floating-point format. The
4633| conversion is performed according to the IEC/IEEE Standard for Binary
4634| Floating-Point Arithmetic.
4635*----------------------------------------------------------------------------*/
4636
e5a41ffa 4637float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4638{
4639 flag aSign;
f4014512 4640 int32_t aExp;
bb98fe42 4641 uint64_t aSig;
158142c2 4642
d1eb8f2a
AD
4643 if (floatx80_invalid_encoding(a)) {
4644 float_raise(float_flag_invalid, status);
4645 return float32_default_nan(status);
4646 }
158142c2
FB
4647 aSig = extractFloatx80Frac( a );
4648 aExp = extractFloatx80Exp( a );
4649 aSign = extractFloatx80Sign( a );
4650 if ( aExp == 0x7FFF ) {
bb98fe42 4651 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4652 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4653 }
4654 return packFloat32( aSign, 0xFF, 0 );
4655 }
4656 shift64RightJamming( aSig, 33, &aSig );
4657 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4658 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4659
4660}
4661
4662/*----------------------------------------------------------------------------
4663| Returns the result of converting the extended double-precision floating-
4664| point value `a' to the double-precision floating-point format. The
4665| conversion is performed according to the IEC/IEEE Standard for Binary
4666| Floating-Point Arithmetic.
4667*----------------------------------------------------------------------------*/
4668
e5a41ffa 4669float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4670{
4671 flag aSign;
f4014512 4672 int32_t aExp;
bb98fe42 4673 uint64_t aSig, zSig;
158142c2 4674
d1eb8f2a
AD
4675 if (floatx80_invalid_encoding(a)) {
4676 float_raise(float_flag_invalid, status);
4677 return float64_default_nan(status);
4678 }
158142c2
FB
4679 aSig = extractFloatx80Frac( a );
4680 aExp = extractFloatx80Exp( a );
4681 aSign = extractFloatx80Sign( a );
4682 if ( aExp == 0x7FFF ) {
bb98fe42 4683 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4684 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4685 }
4686 return packFloat64( aSign, 0x7FF, 0 );
4687 }
4688 shift64RightJamming( aSig, 1, &zSig );
4689 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4690 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4691
4692}
4693
158142c2
FB
4694/*----------------------------------------------------------------------------
4695| Returns the result of converting the extended double-precision floating-
4696| point value `a' to the quadruple-precision floating-point format. The
4697| conversion is performed according to the IEC/IEEE Standard for Binary
4698| Floating-Point Arithmetic.
4699*----------------------------------------------------------------------------*/
4700
e5a41ffa 4701float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4702{
4703 flag aSign;
0c48262d 4704 int aExp;
bb98fe42 4705 uint64_t aSig, zSig0, zSig1;
158142c2 4706
d1eb8f2a
AD
4707 if (floatx80_invalid_encoding(a)) {
4708 float_raise(float_flag_invalid, status);
4709 return float128_default_nan(status);
4710 }
158142c2
FB
4711 aSig = extractFloatx80Frac( a );
4712 aExp = extractFloatx80Exp( a );
4713 aSign = extractFloatx80Sign( a );
bb98fe42 4714 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4715 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4716 }
4717 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4718 return packFloat128( aSign, aExp, zSig0, zSig1 );
4719
4720}
4721
0f721292
LV
4722/*----------------------------------------------------------------------------
4723| Rounds the extended double-precision floating-point value `a'
4724| to the precision provided by floatx80_rounding_precision and returns the
4725| result as an extended double-precision floating-point value.
4726| The operation is performed according to the IEC/IEEE Standard for Binary
4727| Floating-Point Arithmetic.
4728*----------------------------------------------------------------------------*/
4729
4730floatx80 floatx80_round(floatx80 a, float_status *status)
4731{
4732 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4733 extractFloatx80Sign(a),
4734 extractFloatx80Exp(a),
4735 extractFloatx80Frac(a), 0, status);
4736}
4737
158142c2
FB
4738/*----------------------------------------------------------------------------
4739| Rounds the extended double-precision floating-point value `a' to an integer,
4740| and returns the result as an extended quadruple-precision floating-point
4741| value. The operation is performed according to the IEC/IEEE Standard for
4742| Binary Floating-Point Arithmetic.
4743*----------------------------------------------------------------------------*/
4744
e5a41ffa 4745floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4746{
4747 flag aSign;
f4014512 4748 int32_t aExp;
bb98fe42 4749 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4750 floatx80 z;
4751
d1eb8f2a
AD
4752 if (floatx80_invalid_encoding(a)) {
4753 float_raise(float_flag_invalid, status);
4754 return floatx80_default_nan(status);
4755 }
158142c2
FB
4756 aExp = extractFloatx80Exp( a );
4757 if ( 0x403E <= aExp ) {
bb98fe42 4758 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4759 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4760 }
4761 return a;
4762 }
4763 if ( aExp < 0x3FFF ) {
4764 if ( ( aExp == 0 )
bb98fe42 4765 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4766 return a;
4767 }
a2f2d288 4768 status->float_exception_flags |= float_flag_inexact;
158142c2 4769 aSign = extractFloatx80Sign( a );
a2f2d288 4770 switch (status->float_rounding_mode) {
158142c2 4771 case float_round_nearest_even:
bb98fe42 4772 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4773 ) {
4774 return
4775 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4776 }
4777 break;
f9288a76
PM
4778 case float_round_ties_away:
4779 if (aExp == 0x3FFE) {
4780 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4781 }
4782 break;
158142c2
FB
4783 case float_round_down:
4784 return
4785 aSign ?
4786 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4787 : packFloatx80( 0, 0, 0 );
4788 case float_round_up:
4789 return
4790 aSign ? packFloatx80( 1, 0, 0 )
4791 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4792 }
4793 return packFloatx80( aSign, 0, 0 );
4794 }
4795 lastBitMask = 1;
4796 lastBitMask <<= 0x403E - aExp;
4797 roundBitsMask = lastBitMask - 1;
4798 z = a;
a2f2d288 4799 switch (status->float_rounding_mode) {
dc355b76 4800 case float_round_nearest_even:
158142c2 4801 z.low += lastBitMask>>1;
dc355b76
PM
4802 if ((z.low & roundBitsMask) == 0) {
4803 z.low &= ~lastBitMask;
4804 }
4805 break;
f9288a76
PM
4806 case float_round_ties_away:
4807 z.low += lastBitMask >> 1;
4808 break;
dc355b76
PM
4809 case float_round_to_zero:
4810 break;
4811 case float_round_up:
4812 if (!extractFloatx80Sign(z)) {
4813 z.low += roundBitsMask;
4814 }
4815 break;
4816 case float_round_down:
4817 if (extractFloatx80Sign(z)) {
158142c2
FB
4818 z.low += roundBitsMask;
4819 }
dc355b76
PM
4820 break;
4821 default:
4822 abort();
158142c2
FB
4823 }
4824 z.low &= ~ roundBitsMask;
4825 if ( z.low == 0 ) {
4826 ++z.high;
4827 z.low = LIT64( 0x8000000000000000 );
4828 }
a2f2d288
PM
4829 if (z.low != a.low) {
4830 status->float_exception_flags |= float_flag_inexact;
4831 }
158142c2
FB
4832 return z;
4833
4834}
4835
4836/*----------------------------------------------------------------------------
4837| Returns the result of adding the absolute values of the extended double-
4838| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4839| negated before being returned. `zSign' is ignored if the result is a NaN.
4840| The addition is performed according to the IEC/IEEE Standard for Binary
4841| Floating-Point Arithmetic.
4842*----------------------------------------------------------------------------*/
4843
e5a41ffa
PM
4844static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4845 float_status *status)
158142c2 4846{
f4014512 4847 int32_t aExp, bExp, zExp;
bb98fe42 4848 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4849 int32_t expDiff;
158142c2
FB
4850
4851 aSig = extractFloatx80Frac( a );
4852 aExp = extractFloatx80Exp( a );
4853 bSig = extractFloatx80Frac( b );
4854 bExp = extractFloatx80Exp( b );
4855 expDiff = aExp - bExp;
4856 if ( 0 < expDiff ) {
4857 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4858 if ((uint64_t)(aSig << 1)) {
4859 return propagateFloatx80NaN(a, b, status);
4860 }
158142c2
FB
4861 return a;
4862 }
4863 if ( bExp == 0 ) --expDiff;
4864 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4865 zExp = aExp;
4866 }
4867 else if ( expDiff < 0 ) {
4868 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4869 if ((uint64_t)(bSig << 1)) {
4870 return propagateFloatx80NaN(a, b, status);
4871 }
0f605c88
LV
4872 return packFloatx80(zSign,
4873 floatx80_infinity_high,
4874 floatx80_infinity_low);
158142c2
FB
4875 }
4876 if ( aExp == 0 ) ++expDiff;
4877 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4878 zExp = bExp;
4879 }
4880 else {
4881 if ( aExp == 0x7FFF ) {
bb98fe42 4882 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4883 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4884 }
4885 return a;
4886 }
4887 zSig1 = 0;
4888 zSig0 = aSig + bSig;
4889 if ( aExp == 0 ) {
4890 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4891 goto roundAndPack;
4892 }
4893 zExp = aExp;
4894 goto shiftRight1;
4895 }
4896 zSig0 = aSig + bSig;
bb98fe42 4897 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
4898 shiftRight1:
4899 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4900 zSig0 |= LIT64( 0x8000000000000000 );
4901 ++zExp;
4902 roundAndPack:
a2f2d288 4903 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4904 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4905}
4906
4907/*----------------------------------------------------------------------------
4908| Returns the result of subtracting the absolute values of the extended
4909| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4910| difference is negated before being returned. `zSign' is ignored if the
4911| result is a NaN. The subtraction is performed according to the IEC/IEEE
4912| Standard for Binary Floating-Point Arithmetic.
4913*----------------------------------------------------------------------------*/
4914
e5a41ffa
PM
4915static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4916 float_status *status)
158142c2 4917{
f4014512 4918 int32_t aExp, bExp, zExp;
bb98fe42 4919 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4920 int32_t expDiff;
158142c2
FB
4921
4922 aSig = extractFloatx80Frac( a );
4923 aExp = extractFloatx80Exp( a );
4924 bSig = extractFloatx80Frac( b );
4925 bExp = extractFloatx80Exp( b );
4926 expDiff = aExp - bExp;
4927 if ( 0 < expDiff ) goto aExpBigger;
4928 if ( expDiff < 0 ) goto bExpBigger;
4929 if ( aExp == 0x7FFF ) {
bb98fe42 4930 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4931 return propagateFloatx80NaN(a, b, status);
158142c2 4932 }
ff32e16e 4933 float_raise(float_flag_invalid, status);
af39bc8c 4934 return floatx80_default_nan(status);
158142c2
FB
4935 }
4936 if ( aExp == 0 ) {
4937 aExp = 1;
4938 bExp = 1;
4939 }
4940 zSig1 = 0;
4941 if ( bSig < aSig ) goto aBigger;
4942 if ( aSig < bSig ) goto bBigger;
a2f2d288 4943 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
4944 bExpBigger:
4945 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4946 if ((uint64_t)(bSig << 1)) {
4947 return propagateFloatx80NaN(a, b, status);
4948 }
0f605c88
LV
4949 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4950 floatx80_infinity_low);
158142c2
FB
4951 }
4952 if ( aExp == 0 ) ++expDiff;
4953 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4954 bBigger:
4955 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4956 zExp = bExp;
4957 zSign ^= 1;
4958 goto normalizeRoundAndPack;
4959 aExpBigger:
4960 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4961 if ((uint64_t)(aSig << 1)) {
4962 return propagateFloatx80NaN(a, b, status);
4963 }
158142c2
FB
4964 return a;
4965 }
4966 if ( bExp == 0 ) --expDiff;
4967 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4968 aBigger:
4969 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4970 zExp = aExp;
4971 normalizeRoundAndPack:
a2f2d288 4972 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4973 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4974}
4975
4976/*----------------------------------------------------------------------------
4977| Returns the result of adding the extended double-precision floating-point
4978| values `a' and `b'. The operation is performed according to the IEC/IEEE
4979| Standard for Binary Floating-Point Arithmetic.
4980*----------------------------------------------------------------------------*/
4981
e5a41ffa 4982floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4983{
4984 flag aSign, bSign;
4985
d1eb8f2a
AD
4986 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4987 float_raise(float_flag_invalid, status);
4988 return floatx80_default_nan(status);
4989 }
158142c2
FB
4990 aSign = extractFloatx80Sign( a );
4991 bSign = extractFloatx80Sign( b );
4992 if ( aSign == bSign ) {
ff32e16e 4993 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4994 }
4995 else {
ff32e16e 4996 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4997 }
4998
4999}
5000
5001/*----------------------------------------------------------------------------
5002| Returns the result of subtracting the extended double-precision floating-
5003| point values `a' and `b'. The operation is performed according to the
5004| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5005*----------------------------------------------------------------------------*/
5006
e5a41ffa 5007floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5008{
5009 flag aSign, bSign;
5010
d1eb8f2a
AD
5011 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5012 float_raise(float_flag_invalid, status);
5013 return floatx80_default_nan(status);
5014 }
158142c2
FB
5015 aSign = extractFloatx80Sign( a );
5016 bSign = extractFloatx80Sign( b );
5017 if ( aSign == bSign ) {
ff32e16e 5018 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5019 }
5020 else {
ff32e16e 5021 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5022 }
5023
5024}
5025
5026/*----------------------------------------------------------------------------
5027| Returns the result of multiplying the extended double-precision floating-
5028| point values `a' and `b'. The operation is performed according to the
5029| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5030*----------------------------------------------------------------------------*/
5031
e5a41ffa 5032floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5033{
5034 flag aSign, bSign, zSign;
f4014512 5035 int32_t aExp, bExp, zExp;
bb98fe42 5036 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5037
d1eb8f2a
AD
5038 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5039 float_raise(float_flag_invalid, status);
5040 return floatx80_default_nan(status);
5041 }
158142c2
FB
5042 aSig = extractFloatx80Frac( a );
5043 aExp = extractFloatx80Exp( a );
5044 aSign = extractFloatx80Sign( a );
5045 bSig = extractFloatx80Frac( b );
5046 bExp = extractFloatx80Exp( b );
5047 bSign = extractFloatx80Sign( b );
5048 zSign = aSign ^ bSign;
5049 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5050 if ( (uint64_t) ( aSig<<1 )
5051 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5052 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5053 }
5054 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5055 return packFloatx80(zSign, floatx80_infinity_high,
5056 floatx80_infinity_low);
158142c2
FB
5057 }
5058 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5059 if ((uint64_t)(bSig << 1)) {
5060 return propagateFloatx80NaN(a, b, status);
5061 }
158142c2
FB
5062 if ( ( aExp | aSig ) == 0 ) {
5063 invalid:
ff32e16e 5064 float_raise(float_flag_invalid, status);
af39bc8c 5065 return floatx80_default_nan(status);
158142c2 5066 }
0f605c88
LV
5067 return packFloatx80(zSign, floatx80_infinity_high,
5068 floatx80_infinity_low);
158142c2
FB
5069 }
5070 if ( aExp == 0 ) {
5071 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5072 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5073 }
5074 if ( bExp == 0 ) {
5075 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5076 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5077 }
5078 zExp = aExp + bExp - 0x3FFE;
5079 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5080 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5081 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5082 --zExp;
5083 }
a2f2d288 5084 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5085 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5086}
5087
5088/*----------------------------------------------------------------------------
5089| Returns the result of dividing the extended double-precision floating-point
5090| value `a' by the corresponding value `b'. The operation is performed
5091| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5092*----------------------------------------------------------------------------*/
5093
e5a41ffa 5094floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5095{
5096 flag aSign, bSign, zSign;
f4014512 5097 int32_t aExp, bExp, zExp;
bb98fe42
AF
5098 uint64_t aSig, bSig, zSig0, zSig1;
5099 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5100
d1eb8f2a
AD
5101 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5102 float_raise(float_flag_invalid, status);
5103 return floatx80_default_nan(status);
5104 }
158142c2
FB
5105 aSig = extractFloatx80Frac( a );
5106 aExp = extractFloatx80Exp( a );
5107 aSign = extractFloatx80Sign( a );
5108 bSig = extractFloatx80Frac( b );
5109 bExp = extractFloatx80Exp( b );
5110 bSign = extractFloatx80Sign( b );
5111 zSign = aSign ^ bSign;
5112 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5113 if ((uint64_t)(aSig << 1)) {
5114 return propagateFloatx80NaN(a, b, status);
5115 }
158142c2 5116 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5117 if ((uint64_t)(bSig << 1)) {
5118 return propagateFloatx80NaN(a, b, status);
5119 }
158142c2
FB
5120 goto invalid;
5121 }
0f605c88
LV
5122 return packFloatx80(zSign, floatx80_infinity_high,
5123 floatx80_infinity_low);
158142c2
FB
5124 }
5125 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5126 if ((uint64_t)(bSig << 1)) {
5127 return propagateFloatx80NaN(a, b, status);
5128 }
158142c2
FB
5129 return packFloatx80( zSign, 0, 0 );
5130 }
5131 if ( bExp == 0 ) {
5132 if ( bSig == 0 ) {
5133 if ( ( aExp | aSig ) == 0 ) {
5134 invalid:
ff32e16e 5135 float_raise(float_flag_invalid, status);
af39bc8c 5136 return floatx80_default_nan(status);
158142c2 5137 }
ff32e16e 5138 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5139 return packFloatx80(zSign, floatx80_infinity_high,
5140 floatx80_infinity_low);
158142c2
FB
5141 }
5142 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5143 }
5144 if ( aExp == 0 ) {
5145 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5146 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5147 }
5148 zExp = aExp - bExp + 0x3FFE;
5149 rem1 = 0;
5150 if ( bSig <= aSig ) {
5151 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5152 ++zExp;
5153 }
5154 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5155 mul64To128( bSig, zSig0, &term0, &term1 );
5156 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5157 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5158 --zSig0;
5159 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5160 }
5161 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5162 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5163 mul64To128( bSig, zSig1, &term1, &term2 );
5164 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5165 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5166 --zSig1;
5167 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5168 }
5169 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5170 }
a2f2d288 5171 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5172 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5173}
5174
5175/*----------------------------------------------------------------------------
5176| Returns the remainder of the extended double-precision floating-point value
5177| `a' with respect to the corresponding value `b'. The operation is performed
5178| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5179*----------------------------------------------------------------------------*/
5180
e5a41ffa 5181floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5182{
ed086f3d 5183 flag aSign, zSign;
f4014512 5184 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5185 uint64_t aSig0, aSig1, bSig;
5186 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5187
d1eb8f2a
AD
5188 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5189 float_raise(float_flag_invalid, status);
5190 return floatx80_default_nan(status);
5191 }
158142c2
FB
5192 aSig0 = extractFloatx80Frac( a );
5193 aExp = extractFloatx80Exp( a );
5194 aSign = extractFloatx80Sign( a );
5195 bSig = extractFloatx80Frac( b );
5196 bExp = extractFloatx80Exp( b );
158142c2 5197 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5198 if ( (uint64_t) ( aSig0<<1 )
5199 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5200 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5201 }
5202 goto invalid;
5203 }
5204 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5205 if ((uint64_t)(bSig << 1)) {
5206 return propagateFloatx80NaN(a, b, status);
5207 }
158142c2
FB
5208 return a;
5209 }
5210 if ( bExp == 0 ) {
5211 if ( bSig == 0 ) {
5212 invalid:
ff32e16e 5213 float_raise(float_flag_invalid, status);
af39bc8c 5214 return floatx80_default_nan(status);
158142c2
FB
5215 }
5216 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5217 }
5218 if ( aExp == 0 ) {
bb98fe42 5219 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5220 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5221 }
5222 bSig |= LIT64( 0x8000000000000000 );
5223 zSign = aSign;
5224 expDiff = aExp - bExp;
5225 aSig1 = 0;
5226 if ( expDiff < 0 ) {
5227 if ( expDiff < -1 ) return a;
5228 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5229 expDiff = 0;
5230 }
5231 q = ( bSig <= aSig0 );
5232 if ( q ) aSig0 -= bSig;
5233 expDiff -= 64;
5234 while ( 0 < expDiff ) {
5235 q = estimateDiv128To64( aSig0, aSig1, bSig );
5236 q = ( 2 < q ) ? q - 2 : 0;
5237 mul64To128( bSig, q, &term0, &term1 );
5238 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5239 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5240 expDiff -= 62;
5241 }
5242 expDiff += 64;
5243 if ( 0 < expDiff ) {
5244 q = estimateDiv128To64( aSig0, aSig1, bSig );
5245 q = ( 2 < q ) ? q - 2 : 0;
5246 q >>= 64 - expDiff;
5247 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5248 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5249 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5250 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5251 ++q;
5252 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5253 }
5254 }
5255 else {
5256 term1 = 0;
5257 term0 = bSig;
5258 }
5259 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5260 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5261 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5262 && ( q & 1 ) )
5263 ) {
5264 aSig0 = alternateASig0;
5265 aSig1 = alternateASig1;
5266 zSign = ! zSign;
5267 }
5268 return
5269 normalizeRoundAndPackFloatx80(
ff32e16e 5270 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5271
5272}
5273
5274/*----------------------------------------------------------------------------
5275| Returns the square root of the extended double-precision floating-point
5276| value `a'. The operation is performed according to the IEC/IEEE Standard
5277| for Binary Floating-Point Arithmetic.
5278*----------------------------------------------------------------------------*/
5279
e5a41ffa 5280floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5281{
5282 flag aSign;
f4014512 5283 int32_t aExp, zExp;
bb98fe42
AF
5284 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5285 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5286
d1eb8f2a
AD
5287 if (floatx80_invalid_encoding(a)) {
5288 float_raise(float_flag_invalid, status);
5289 return floatx80_default_nan(status);
5290 }
158142c2
FB
5291 aSig0 = extractFloatx80Frac( a );
5292 aExp = extractFloatx80Exp( a );
5293 aSign = extractFloatx80Sign( a );
5294 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5295 if ((uint64_t)(aSig0 << 1)) {
5296 return propagateFloatx80NaN(a, a, status);
5297 }
158142c2
FB
5298 if ( ! aSign ) return a;
5299 goto invalid;
5300 }
5301 if ( aSign ) {
5302 if ( ( aExp | aSig0 ) == 0 ) return a;
5303 invalid:
ff32e16e 5304 float_raise(float_flag_invalid, status);
af39bc8c 5305 return floatx80_default_nan(status);
158142c2
FB
5306 }
5307 if ( aExp == 0 ) {
5308 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5309 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5310 }
5311 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5312 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5313 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5314 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5315 doubleZSig0 = zSig0<<1;
5316 mul64To128( zSig0, zSig0, &term0, &term1 );
5317 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5318 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5319 --zSig0;
5320 doubleZSig0 -= 2;
5321 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5322 }
5323 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5324 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5325 if ( zSig1 == 0 ) zSig1 = 1;
5326 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5327 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5328 mul64To128( zSig1, zSig1, &term2, &term3 );
5329 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5330 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5331 --zSig1;
5332 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5333 term3 |= 1;
5334 term2 |= doubleZSig0;
5335 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5336 }
5337 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5338 }
5339 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5340 zSig0 |= doubleZSig0;
a2f2d288
PM
5341 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5342 0, zExp, zSig0, zSig1, status);
158142c2
FB
5343}
5344
5345/*----------------------------------------------------------------------------
b689362d
AJ
5346| Returns 1 if the extended double-precision floating-point value `a' is equal
5347| to the corresponding value `b', and 0 otherwise. The invalid exception is
5348| raised if either operand is a NaN. Otherwise, the comparison is performed
5349| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5350*----------------------------------------------------------------------------*/
5351
e5a41ffa 5352int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5353{
5354
d1eb8f2a
AD
5355 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5356 || (extractFloatx80Exp(a) == 0x7FFF
5357 && (uint64_t) (extractFloatx80Frac(a) << 1))
5358 || (extractFloatx80Exp(b) == 0x7FFF
5359 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5360 ) {
ff32e16e 5361 float_raise(float_flag_invalid, status);
158142c2
FB
5362 return 0;
5363 }
5364 return
5365 ( a.low == b.low )
5366 && ( ( a.high == b.high )
5367 || ( ( a.low == 0 )
bb98fe42 5368 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5369 );
5370
5371}
5372
5373/*----------------------------------------------------------------------------
5374| Returns 1 if the extended double-precision floating-point value `a' is
5375| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5376| invalid exception is raised if either operand is a NaN. The comparison is
5377| performed according to the IEC/IEEE Standard for Binary Floating-Point
5378| Arithmetic.
158142c2
FB
5379*----------------------------------------------------------------------------*/
5380
e5a41ffa 5381int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5382{
5383 flag aSign, bSign;
5384
d1eb8f2a
AD
5385 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5386 || (extractFloatx80Exp(a) == 0x7FFF
5387 && (uint64_t) (extractFloatx80Frac(a) << 1))
5388 || (extractFloatx80Exp(b) == 0x7FFF
5389 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5390 ) {
ff32e16e 5391 float_raise(float_flag_invalid, status);
158142c2
FB
5392 return 0;
5393 }
5394 aSign = extractFloatx80Sign( a );
5395 bSign = extractFloatx80Sign( b );
5396 if ( aSign != bSign ) {
5397 return
5398 aSign
bb98fe42 5399 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5400 == 0 );
5401 }
5402 return
5403 aSign ? le128( b.high, b.low, a.high, a.low )
5404 : le128( a.high, a.low, b.high, b.low );
5405
5406}
5407
5408/*----------------------------------------------------------------------------
5409| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5410| less than the corresponding value `b', and 0 otherwise. The invalid
5411| exception is raised if either operand is a NaN. The comparison is performed
5412| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5413*----------------------------------------------------------------------------*/
5414
e5a41ffa 5415int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5416{
5417 flag aSign, bSign;
5418
d1eb8f2a
AD
5419 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5420 || (extractFloatx80Exp(a) == 0x7FFF
5421 && (uint64_t) (extractFloatx80Frac(a) << 1))
5422 || (extractFloatx80Exp(b) == 0x7FFF
5423 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5424 ) {
ff32e16e 5425 float_raise(float_flag_invalid, status);
158142c2
FB
5426 return 0;
5427 }
5428 aSign = extractFloatx80Sign( a );
5429 bSign = extractFloatx80Sign( b );
5430 if ( aSign != bSign ) {
5431 return
5432 aSign
bb98fe42 5433 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5434 != 0 );
5435 }
5436 return
5437 aSign ? lt128( b.high, b.low, a.high, a.low )
5438 : lt128( a.high, a.low, b.high, b.low );
5439
5440}
5441
67b7861d
AJ
5442/*----------------------------------------------------------------------------
5443| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5444| cannot be compared, and 0 otherwise. The invalid exception is raised if
5445| either operand is a NaN. The comparison is performed according to the
5446| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5447*----------------------------------------------------------------------------*/
e5a41ffa 5448int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5449{
d1eb8f2a
AD
5450 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5451 || (extractFloatx80Exp(a) == 0x7FFF
5452 && (uint64_t) (extractFloatx80Frac(a) << 1))
5453 || (extractFloatx80Exp(b) == 0x7FFF
5454 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5455 ) {
ff32e16e 5456 float_raise(float_flag_invalid, status);
67b7861d
AJ
5457 return 1;
5458 }
5459 return 0;
5460}
5461
158142c2 5462/*----------------------------------------------------------------------------
b689362d 5463| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5464| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5465| cause an exception. The comparison is performed according to the IEC/IEEE
5466| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5467*----------------------------------------------------------------------------*/
5468
e5a41ffa 5469int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5470{
5471
d1eb8f2a
AD
5472 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5473 float_raise(float_flag_invalid, status);
5474 return 0;
5475 }
158142c2 5476 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5477 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5478 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5479 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5480 ) {
af39bc8c
AM
5481 if (floatx80_is_signaling_nan(a, status)
5482 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5483 float_raise(float_flag_invalid, status);
b689362d 5484 }
158142c2
FB
5485 return 0;
5486 }
5487 return
5488 ( a.low == b.low )
5489 && ( ( a.high == b.high )
5490 || ( ( a.low == 0 )
bb98fe42 5491 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5492 );
5493
5494}
5495
5496/*----------------------------------------------------------------------------
5497| Returns 1 if the extended double-precision floating-point value `a' is less
5498| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5499| do not cause an exception. Otherwise, the comparison is performed according
5500| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5501*----------------------------------------------------------------------------*/
5502
e5a41ffa 5503int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5504{
5505 flag aSign, bSign;
5506
d1eb8f2a
AD
5507 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5508 float_raise(float_flag_invalid, status);
5509 return 0;
5510 }
158142c2 5511 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5512 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5513 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5514 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5515 ) {
af39bc8c
AM
5516 if (floatx80_is_signaling_nan(a, status)
5517 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5518 float_raise(float_flag_invalid, status);
158142c2
FB
5519 }
5520 return 0;
5521 }
5522 aSign = extractFloatx80Sign( a );
5523 bSign = extractFloatx80Sign( b );
5524 if ( aSign != bSign ) {
5525 return
5526 aSign
bb98fe42 5527 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5528 == 0 );
5529 }
5530 return
5531 aSign ? le128( b.high, b.low, a.high, a.low )
5532 : le128( a.high, a.low, b.high, b.low );
5533
5534}
5535
5536/*----------------------------------------------------------------------------
5537| Returns 1 if the extended double-precision floating-point value `a' is less
5538| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5539| an exception. Otherwise, the comparison is performed according to the
5540| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5541*----------------------------------------------------------------------------*/
5542
e5a41ffa 5543int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5544{
5545 flag aSign, bSign;
5546
d1eb8f2a
AD
5547 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5548 float_raise(float_flag_invalid, status);
5549 return 0;
5550 }
158142c2 5551 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5552 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5553 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5554 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5555 ) {
af39bc8c
AM
5556 if (floatx80_is_signaling_nan(a, status)
5557 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5558 float_raise(float_flag_invalid, status);
158142c2
FB
5559 }
5560 return 0;
5561 }
5562 aSign = extractFloatx80Sign( a );
5563 bSign = extractFloatx80Sign( b );
5564 if ( aSign != bSign ) {
5565 return
5566 aSign
bb98fe42 5567 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5568 != 0 );
5569 }
5570 return
5571 aSign ? lt128( b.high, b.low, a.high, a.low )
5572 : lt128( a.high, a.low, b.high, b.low );
5573
5574}
5575
67b7861d
AJ
5576/*----------------------------------------------------------------------------
5577| Returns 1 if the extended double-precision floating-point values `a' and `b'
5578| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5579| The comparison is performed according to the IEC/IEEE Standard for Binary
5580| Floating-Point Arithmetic.
5581*----------------------------------------------------------------------------*/
e5a41ffa 5582int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5583{
d1eb8f2a
AD
5584 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5585 float_raise(float_flag_invalid, status);
5586 return 1;
5587 }
67b7861d
AJ
5588 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5589 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5590 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5591 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5592 ) {
af39bc8c
AM
5593 if (floatx80_is_signaling_nan(a, status)
5594 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5595 float_raise(float_flag_invalid, status);
67b7861d
AJ
5596 }
5597 return 1;
5598 }
5599 return 0;
5600}
5601
158142c2
FB
5602/*----------------------------------------------------------------------------
5603| Returns the result of converting the quadruple-precision floating-point
5604| value `a' to the 32-bit two's complement integer format. The conversion
5605| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5606| Arithmetic---which means in particular that the conversion is rounded
5607| according to the current rounding mode. If `a' is a NaN, the largest
5608| positive integer is returned. Otherwise, if the conversion overflows, the
5609| largest integer with the same sign as `a' is returned.
5610*----------------------------------------------------------------------------*/
5611
f4014512 5612int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5613{
5614 flag aSign;
f4014512 5615 int32_t aExp, shiftCount;
bb98fe42 5616 uint64_t aSig0, aSig1;
158142c2
FB
5617
5618 aSig1 = extractFloat128Frac1( a );
5619 aSig0 = extractFloat128Frac0( a );
5620 aExp = extractFloat128Exp( a );
5621 aSign = extractFloat128Sign( a );
5622 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5623 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5624 aSig0 |= ( aSig1 != 0 );
5625 shiftCount = 0x4028 - aExp;
5626 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5627 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5628
5629}
5630
5631/*----------------------------------------------------------------------------
5632| Returns the result of converting the quadruple-precision floating-point
5633| value `a' to the 32-bit two's complement integer format. The conversion
5634| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5635| Arithmetic, except that the conversion is always rounded toward zero. If
5636| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5637| conversion overflows, the largest integer with the same sign as `a' is
5638| returned.
5639*----------------------------------------------------------------------------*/
5640
f4014512 5641int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5642{
5643 flag aSign;
f4014512 5644 int32_t aExp, shiftCount;
bb98fe42 5645 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5646 int32_t z;
158142c2
FB
5647
5648 aSig1 = extractFloat128Frac1( a );
5649 aSig0 = extractFloat128Frac0( a );
5650 aExp = extractFloat128Exp( a );
5651 aSign = extractFloat128Sign( a );
5652 aSig0 |= ( aSig1 != 0 );
5653 if ( 0x401E < aExp ) {
5654 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5655 goto invalid;
5656 }
5657 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5658 if (aExp || aSig0) {
5659 status->float_exception_flags |= float_flag_inexact;
5660 }
158142c2
FB
5661 return 0;
5662 }
5663 aSig0 |= LIT64( 0x0001000000000000 );
5664 shiftCount = 0x402F - aExp;
5665 savedASig = aSig0;
5666 aSig0 >>= shiftCount;
5667 z = aSig0;
5668 if ( aSign ) z = - z;
5669 if ( ( z < 0 ) ^ aSign ) {
5670 invalid:
ff32e16e 5671 float_raise(float_flag_invalid, status);
bb98fe42 5672 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5673 }
5674 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5675 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5676 }
5677 return z;
5678
5679}
5680
5681/*----------------------------------------------------------------------------
5682| Returns the result of converting the quadruple-precision floating-point
5683| value `a' to the 64-bit two's complement integer format. The conversion
5684| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5685| Arithmetic---which means in particular that the conversion is rounded
5686| according to the current rounding mode. If `a' is a NaN, the largest
5687| positive integer is returned. Otherwise, if the conversion overflows, the
5688| largest integer with the same sign as `a' is returned.
5689*----------------------------------------------------------------------------*/
5690
f42c2224 5691int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5692{
5693 flag aSign;
f4014512 5694 int32_t aExp, shiftCount;
bb98fe42 5695 uint64_t aSig0, aSig1;
158142c2
FB
5696
5697 aSig1 = extractFloat128Frac1( a );
5698 aSig0 = extractFloat128Frac0( a );
5699 aExp = extractFloat128Exp( a );
5700 aSign = extractFloat128Sign( a );
5701 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5702 shiftCount = 0x402F - aExp;
5703 if ( shiftCount <= 0 ) {
5704 if ( 0x403E < aExp ) {
ff32e16e 5705 float_raise(float_flag_invalid, status);
158142c2
FB
5706 if ( ! aSign
5707 || ( ( aExp == 0x7FFF )
5708 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5709 )
5710 ) {
5711 return LIT64( 0x7FFFFFFFFFFFFFFF );
5712 }
bb98fe42 5713 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5714 }
5715 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5716 }
5717 else {
5718 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5719 }
ff32e16e 5720 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5721
5722}
5723
5724/*----------------------------------------------------------------------------
5725| Returns the result of converting the quadruple-precision floating-point
5726| value `a' to the 64-bit two's complement integer format. The conversion
5727| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5728| Arithmetic, except that the conversion is always rounded toward zero.
5729| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5730| the conversion overflows, the largest integer with the same sign as `a' is
5731| returned.
5732*----------------------------------------------------------------------------*/
5733
f42c2224 5734int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5735{
5736 flag aSign;
f4014512 5737 int32_t aExp, shiftCount;
bb98fe42 5738 uint64_t aSig0, aSig1;
f42c2224 5739 int64_t z;
158142c2
FB
5740
5741 aSig1 = extractFloat128Frac1( a );
5742 aSig0 = extractFloat128Frac0( a );
5743 aExp = extractFloat128Exp( a );
5744 aSign = extractFloat128Sign( a );
5745 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5746 shiftCount = aExp - 0x402F;
5747 if ( 0 < shiftCount ) {
5748 if ( 0x403E <= aExp ) {
5749 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5750 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5751 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5752 if (aSig1) {
5753 status->float_exception_flags |= float_flag_inexact;
5754 }
158142c2
FB
5755 }
5756 else {
ff32e16e 5757 float_raise(float_flag_invalid, status);
158142c2
FB
5758 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5759 return LIT64( 0x7FFFFFFFFFFFFFFF );
5760 }
5761 }
bb98fe42 5762 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5763 }
5764 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5765 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5766 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5767 }
5768 }
5769 else {
5770 if ( aExp < 0x3FFF ) {
5771 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5772 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5773 }
5774 return 0;
5775 }
5776 z = aSig0>>( - shiftCount );
5777 if ( aSig1
bb98fe42 5778 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5779 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5780 }
5781 }
5782 if ( aSign ) z = - z;
5783 return z;
5784
5785}
5786
2e6d8568
BR
5787/*----------------------------------------------------------------------------
5788| Returns the result of converting the quadruple-precision floating-point value
5789| `a' to the 64-bit unsigned integer format. The conversion is
5790| performed according to the IEC/IEEE Standard for Binary Floating-Point
5791| Arithmetic---which means in particular that the conversion is rounded
5792| according to the current rounding mode. If `a' is a NaN, the largest
5793| positive integer is returned. If the conversion overflows, the
5794| largest unsigned integer is returned. If 'a' is negative, the value is
5795| rounded and zero is returned; negative values that do not round to zero
5796| will raise the inexact exception.
5797*----------------------------------------------------------------------------*/
5798
5799uint64_t float128_to_uint64(float128 a, float_status *status)
5800{
5801 flag aSign;
5802 int aExp;
5803 int shiftCount;
5804 uint64_t aSig0, aSig1;
5805
5806 aSig0 = extractFloat128Frac0(a);
5807 aSig1 = extractFloat128Frac1(a);
5808 aExp = extractFloat128Exp(a);
5809 aSign = extractFloat128Sign(a);
5810 if (aSign && (aExp > 0x3FFE)) {
5811 float_raise(float_flag_invalid, status);
5812 if (float128_is_any_nan(a)) {
5813 return LIT64(0xFFFFFFFFFFFFFFFF);
5814 } else {
5815 return 0;
5816 }
5817 }
5818 if (aExp) {
5819 aSig0 |= LIT64(0x0001000000000000);
5820 }
5821 shiftCount = 0x402F - aExp;
5822 if (shiftCount <= 0) {
5823 if (0x403E < aExp) {
5824 float_raise(float_flag_invalid, status);
5825 return LIT64(0xFFFFFFFFFFFFFFFF);
5826 }
5827 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5828 } else {
5829 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5830 }
5831 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5832}
5833
5834uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5835{
5836 uint64_t v;
5837 signed char current_rounding_mode = status->float_rounding_mode;
5838
5839 set_float_rounding_mode(float_round_to_zero, status);
5840 v = float128_to_uint64(a, status);
5841 set_float_rounding_mode(current_rounding_mode, status);
5842
5843 return v;
5844}
5845
158142c2
FB
5846/*----------------------------------------------------------------------------
5847| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5848| value `a' to the 32-bit unsigned integer format. The conversion
5849| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5850| Arithmetic except that the conversion is always rounded toward zero.
5851| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5852| if the conversion overflows, the largest unsigned integer is returned.
5853| If 'a' is negative, the value is rounded and zero is returned; negative
5854| values that do not round to zero will raise the inexact exception.
5855*----------------------------------------------------------------------------*/
5856
5857uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5858{
5859 uint64_t v;
5860 uint32_t res;
5861 int old_exc_flags = get_float_exception_flags(status);
5862
5863 v = float128_to_uint64_round_to_zero(a, status);
5864 if (v > 0xffffffff) {
5865 res = 0xffffffff;
5866 } else {
5867 return v;
5868 }
5869 set_float_exception_flags(old_exc_flags, status);
5870 float_raise(float_flag_invalid, status);
5871 return res;
5872}
5873
5874/*----------------------------------------------------------------------------
5875| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
5876| value `a' to the single-precision floating-point format. The conversion
5877| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5878| Arithmetic.
5879*----------------------------------------------------------------------------*/
5880
e5a41ffa 5881float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
5882{
5883 flag aSign;
f4014512 5884 int32_t aExp;
bb98fe42
AF
5885 uint64_t aSig0, aSig1;
5886 uint32_t zSig;
158142c2
FB
5887
5888 aSig1 = extractFloat128Frac1( a );
5889 aSig0 = extractFloat128Frac0( a );
5890 aExp = extractFloat128Exp( a );
5891 aSign = extractFloat128Sign( a );
5892 if ( aExp == 0x7FFF ) {
5893 if ( aSig0 | aSig1 ) {
ff32e16e 5894 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
5895 }
5896 return packFloat32( aSign, 0xFF, 0 );
5897 }
5898 aSig0 |= ( aSig1 != 0 );
5899 shift64RightJamming( aSig0, 18, &aSig0 );
5900 zSig = aSig0;
5901 if ( aExp || zSig ) {
5902 zSig |= 0x40000000;
5903 aExp -= 0x3F81;
5904 }
ff32e16e 5905 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
5906
5907}
5908
5909/*----------------------------------------------------------------------------
5910| Returns the result of converting the quadruple-precision floating-point
5911| value `a' to the double-precision floating-point format. The conversion
5912| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5913| Arithmetic.
5914*----------------------------------------------------------------------------*/
5915
e5a41ffa 5916float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
5917{
5918 flag aSign;
f4014512 5919 int32_t aExp;
bb98fe42 5920 uint64_t aSig0, aSig1;
158142c2
FB
5921
5922 aSig1 = extractFloat128Frac1( a );
5923 aSig0 = extractFloat128Frac0( a );
5924 aExp = extractFloat128Exp( a );
5925 aSign = extractFloat128Sign( a );
5926 if ( aExp == 0x7FFF ) {
5927 if ( aSig0 | aSig1 ) {
ff32e16e 5928 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
5929 }
5930 return packFloat64( aSign, 0x7FF, 0 );
5931 }
5932 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5933 aSig0 |= ( aSig1 != 0 );
5934 if ( aExp || aSig0 ) {
5935 aSig0 |= LIT64( 0x4000000000000000 );
5936 aExp -= 0x3C01;
5937 }
ff32e16e 5938 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
5939
5940}
5941
158142c2
FB
5942/*----------------------------------------------------------------------------
5943| Returns the result of converting the quadruple-precision floating-point
5944| value `a' to the extended double-precision floating-point format. The
5945| conversion is performed according to the IEC/IEEE Standard for Binary
5946| Floating-Point Arithmetic.
5947*----------------------------------------------------------------------------*/
5948
e5a41ffa 5949floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
5950{
5951 flag aSign;
f4014512 5952 int32_t aExp;
bb98fe42 5953 uint64_t aSig0, aSig1;
158142c2
FB
5954
5955 aSig1 = extractFloat128Frac1( a );
5956 aSig0 = extractFloat128Frac0( a );
5957 aExp = extractFloat128Exp( a );
5958 aSign = extractFloat128Sign( a );
5959 if ( aExp == 0x7FFF ) {
5960 if ( aSig0 | aSig1 ) {
ff32e16e 5961 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 5962 }
0f605c88
LV
5963 return packFloatx80(aSign, floatx80_infinity_high,
5964 floatx80_infinity_low);
158142c2
FB
5965 }
5966 if ( aExp == 0 ) {
5967 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5968 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5969 }
5970 else {
5971 aSig0 |= LIT64( 0x0001000000000000 );
5972 }
5973 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 5974 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
5975
5976}
5977
158142c2
FB
5978/*----------------------------------------------------------------------------
5979| Rounds the quadruple-precision floating-point value `a' to an integer, and
5980| returns the result as a quadruple-precision floating-point value. The
5981| operation is performed according to the IEC/IEEE Standard for Binary
5982| Floating-Point Arithmetic.
5983*----------------------------------------------------------------------------*/
5984
e5a41ffa 5985float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
5986{
5987 flag aSign;
f4014512 5988 int32_t aExp;
bb98fe42 5989 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5990 float128 z;
5991
5992 aExp = extractFloat128Exp( a );
5993 if ( 0x402F <= aExp ) {
5994 if ( 0x406F <= aExp ) {
5995 if ( ( aExp == 0x7FFF )
5996 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5997 ) {
ff32e16e 5998 return propagateFloat128NaN(a, a, status);
158142c2
FB
5999 }
6000 return a;
6001 }
6002 lastBitMask = 1;
6003 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6004 roundBitsMask = lastBitMask - 1;
6005 z = a;
a2f2d288 6006 switch (status->float_rounding_mode) {
dc355b76 6007 case float_round_nearest_even:
158142c2
FB
6008 if ( lastBitMask ) {
6009 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6010 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6011 }
6012 else {
bb98fe42 6013 if ( (int64_t) z.low < 0 ) {
158142c2 6014 ++z.high;
bb98fe42 6015 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6016 }
6017 }
dc355b76 6018 break;
f9288a76
PM
6019 case float_round_ties_away:
6020 if (lastBitMask) {
6021 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6022 } else {
6023 if ((int64_t) z.low < 0) {
6024 ++z.high;
6025 }
6026 }
6027 break;
dc355b76
PM
6028 case float_round_to_zero:
6029 break;
6030 case float_round_up:
6031 if (!extractFloat128Sign(z)) {
6032 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6033 }
6034 break;
6035 case float_round_down:
6036 if (extractFloat128Sign(z)) {
6037 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6038 }
dc355b76
PM
6039 break;
6040 default:
6041 abort();
158142c2
FB
6042 }
6043 z.low &= ~ roundBitsMask;
6044 }
6045 else {
6046 if ( aExp < 0x3FFF ) {
bb98fe42 6047 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6048 status->float_exception_flags |= float_flag_inexact;
158142c2 6049 aSign = extractFloat128Sign( a );
a2f2d288 6050 switch (status->float_rounding_mode) {
158142c2
FB
6051 case float_round_nearest_even:
6052 if ( ( aExp == 0x3FFE )
6053 && ( extractFloat128Frac0( a )
6054 | extractFloat128Frac1( a ) )
6055 ) {
6056 return packFloat128( aSign, 0x3FFF, 0, 0 );
6057 }
6058 break;
f9288a76
PM
6059 case float_round_ties_away:
6060 if (aExp == 0x3FFE) {
6061 return packFloat128(aSign, 0x3FFF, 0, 0);
6062 }
6063 break;
158142c2
FB
6064 case float_round_down:
6065 return
6066 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6067 : packFloat128( 0, 0, 0, 0 );
6068 case float_round_up:
6069 return
6070 aSign ? packFloat128( 1, 0, 0, 0 )
6071 : packFloat128( 0, 0x3FFF, 0, 0 );
6072 }
6073 return packFloat128( aSign, 0, 0, 0 );
6074 }
6075 lastBitMask = 1;
6076 lastBitMask <<= 0x402F - aExp;
6077 roundBitsMask = lastBitMask - 1;
6078 z.low = 0;
6079 z.high = a.high;
a2f2d288 6080 switch (status->float_rounding_mode) {
dc355b76 6081 case float_round_nearest_even:
158142c2
FB
6082 z.high += lastBitMask>>1;
6083 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6084 z.high &= ~ lastBitMask;
6085 }
dc355b76 6086 break;
f9288a76
PM
6087 case float_round_ties_away:
6088 z.high += lastBitMask>>1;
6089 break;
dc355b76
PM
6090 case float_round_to_zero:
6091 break;
6092 case float_round_up:
6093 if (!extractFloat128Sign(z)) {
158142c2
FB
6094 z.high |= ( a.low != 0 );
6095 z.high += roundBitsMask;
6096 }
dc355b76
PM
6097 break;
6098 case float_round_down:
6099 if (extractFloat128Sign(z)) {
6100 z.high |= (a.low != 0);
6101 z.high += roundBitsMask;
6102 }
6103 break;
6104 default:
6105 abort();
158142c2
FB
6106 }
6107 z.high &= ~ roundBitsMask;
6108 }
6109 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6110 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6111 }
6112 return z;
6113
6114}
6115
6116/*----------------------------------------------------------------------------
6117| Returns the result of adding the absolute values of the quadruple-precision
6118| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6119| before being returned. `zSign' is ignored if the result is a NaN.
6120| The addition is performed according to the IEC/IEEE Standard for Binary
6121| Floating-Point Arithmetic.
6122*----------------------------------------------------------------------------*/
6123
e5a41ffa
PM
6124static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6125 float_status *status)
158142c2 6126{
f4014512 6127 int32_t aExp, bExp, zExp;
bb98fe42 6128 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6129 int32_t expDiff;
158142c2
FB
6130
6131 aSig1 = extractFloat128Frac1( a );
6132 aSig0 = extractFloat128Frac0( a );
6133 aExp = extractFloat128Exp( a );
6134 bSig1 = extractFloat128Frac1( b );
6135 bSig0 = extractFloat128Frac0( b );
6136 bExp = extractFloat128Exp( b );
6137 expDiff = aExp - bExp;
6138 if ( 0 < expDiff ) {
6139 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6140 if (aSig0 | aSig1) {
6141 return propagateFloat128NaN(a, b, status);
6142 }
158142c2
FB
6143 return a;
6144 }
6145 if ( bExp == 0 ) {
6146 --expDiff;
6147 }
6148 else {
6149 bSig0 |= LIT64( 0x0001000000000000 );
6150 }
6151 shift128ExtraRightJamming(
6152 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6153 zExp = aExp;
6154 }
6155 else if ( expDiff < 0 ) {
6156 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6157 if (bSig0 | bSig1) {
6158 return propagateFloat128NaN(a, b, status);
6159 }
158142c2
FB
6160 return packFloat128( zSign, 0x7FFF, 0, 0 );
6161 }
6162 if ( aExp == 0 ) {
6163 ++expDiff;
6164 }
6165 else {
6166 aSig0 |= LIT64( 0x0001000000000000 );
6167 }
6168 shift128ExtraRightJamming(
6169 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6170 zExp = bExp;
6171 }
6172 else {
6173 if ( aExp == 0x7FFF ) {
6174 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6175 return propagateFloat128NaN(a, b, status);
158142c2
FB
6176 }
6177 return a;
6178 }
6179 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6180 if ( aExp == 0 ) {
a2f2d288 6181 if (status->flush_to_zero) {
e6afc87f 6182 if (zSig0 | zSig1) {
ff32e16e 6183 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6184 }
6185 return packFloat128(zSign, 0, 0, 0);
6186 }
fe76d976
PB
6187 return packFloat128( zSign, 0, zSig0, zSig1 );
6188 }
158142c2
FB
6189 zSig2 = 0;
6190 zSig0 |= LIT64( 0x0002000000000000 );
6191 zExp = aExp;
6192 goto shiftRight1;
6193 }
6194 aSig0 |= LIT64( 0x0001000000000000 );
6195 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6196 --zExp;
6197 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6198 ++zExp;
6199 shiftRight1:
6200 shift128ExtraRightJamming(
6201 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6202 roundAndPack:
ff32e16e 6203 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6204
6205}
6206
6207/*----------------------------------------------------------------------------
6208| Returns the result of subtracting the absolute values of the quadruple-
6209| precision floating-point values `a' and `b'. If `zSign' is 1, the
6210| difference is negated before being returned. `zSign' is ignored if the
6211| result is a NaN. The subtraction is performed according to the IEC/IEEE
6212| Standard for Binary Floating-Point Arithmetic.
6213*----------------------------------------------------------------------------*/
6214
e5a41ffa
PM
6215static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6216 float_status *status)
158142c2 6217{
f4014512 6218 int32_t aExp, bExp, zExp;
bb98fe42 6219 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6220 int32_t expDiff;
158142c2
FB
6221
6222 aSig1 = extractFloat128Frac1( a );
6223 aSig0 = extractFloat128Frac0( a );
6224 aExp = extractFloat128Exp( a );
6225 bSig1 = extractFloat128Frac1( b );
6226 bSig0 = extractFloat128Frac0( b );
6227 bExp = extractFloat128Exp( b );
6228 expDiff = aExp - bExp;
6229 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6230 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6231 if ( 0 < expDiff ) goto aExpBigger;
6232 if ( expDiff < 0 ) goto bExpBigger;
6233 if ( aExp == 0x7FFF ) {
6234 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6235 return propagateFloat128NaN(a, b, status);
158142c2 6236 }
ff32e16e 6237 float_raise(float_flag_invalid, status);
af39bc8c 6238 return float128_default_nan(status);
158142c2
FB
6239 }
6240 if ( aExp == 0 ) {
6241 aExp = 1;
6242 bExp = 1;
6243 }
6244 if ( bSig0 < aSig0 ) goto aBigger;
6245 if ( aSig0 < bSig0 ) goto bBigger;
6246 if ( bSig1 < aSig1 ) goto aBigger;
6247 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6248 return packFloat128(status->float_rounding_mode == float_round_down,
6249 0, 0, 0);
158142c2
FB
6250 bExpBigger:
6251 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6252 if (bSig0 | bSig1) {
6253 return propagateFloat128NaN(a, b, status);
6254 }
158142c2
FB
6255 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6256 }
6257 if ( aExp == 0 ) {
6258 ++expDiff;
6259 }
6260 else {
6261 aSig0 |= LIT64( 0x4000000000000000 );
6262 }
6263 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6264 bSig0 |= LIT64( 0x4000000000000000 );
6265 bBigger:
6266 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6267 zExp = bExp;
6268 zSign ^= 1;
6269 goto normalizeRoundAndPack;
6270 aExpBigger:
6271 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6272 if (aSig0 | aSig1) {
6273 return propagateFloat128NaN(a, b, status);
6274 }
158142c2
FB
6275 return a;
6276 }
6277 if ( bExp == 0 ) {
6278 --expDiff;
6279 }
6280 else {
6281 bSig0 |= LIT64( 0x4000000000000000 );
6282 }
6283 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6284 aSig0 |= LIT64( 0x4000000000000000 );
6285 aBigger:
6286 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6287 zExp = aExp;
6288 normalizeRoundAndPack:
6289 --zExp;
ff32e16e
PM
6290 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6291 status);
158142c2
FB
6292
6293}
6294
6295/*----------------------------------------------------------------------------
6296| Returns the result of adding the quadruple-precision floating-point values
6297| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6298| for Binary Floating-Point Arithmetic.
6299*----------------------------------------------------------------------------*/
6300
e5a41ffa 6301float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6302{
6303 flag aSign, bSign;
6304
6305 aSign = extractFloat128Sign( a );
6306 bSign = extractFloat128Sign( b );
6307 if ( aSign == bSign ) {
ff32e16e 6308 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6309 }
6310 else {
ff32e16e 6311 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6312 }
6313
6314}
6315
6316/*----------------------------------------------------------------------------
6317| Returns the result of subtracting the quadruple-precision floating-point
6318| values `a' and `b'. The operation is performed according to the IEC/IEEE
6319| Standard for Binary Floating-Point Arithmetic.
6320*----------------------------------------------------------------------------*/
6321
e5a41ffa 6322float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6323{
6324 flag aSign, bSign;
6325
6326 aSign = extractFloat128Sign( a );
6327 bSign = extractFloat128Sign( b );
6328 if ( aSign == bSign ) {
ff32e16e 6329 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6330 }
6331 else {
ff32e16e 6332 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6333 }
6334
6335}
6336
6337/*----------------------------------------------------------------------------
6338| Returns the result of multiplying the quadruple-precision floating-point
6339| values `a' and `b'. The operation is performed according to the IEC/IEEE
6340| Standard for Binary Floating-Point Arithmetic.
6341*----------------------------------------------------------------------------*/
6342
e5a41ffa 6343float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6344{
6345 flag aSign, bSign, zSign;
f4014512 6346 int32_t aExp, bExp, zExp;
bb98fe42 6347 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6348
6349 aSig1 = extractFloat128Frac1( a );
6350 aSig0 = extractFloat128Frac0( a );
6351 aExp = extractFloat128Exp( a );
6352 aSign = extractFloat128Sign( a );
6353 bSig1 = extractFloat128Frac1( b );
6354 bSig0 = extractFloat128Frac0( b );
6355 bExp = extractFloat128Exp( b );
6356 bSign = extractFloat128Sign( b );
6357 zSign = aSign ^ bSign;
6358 if ( aExp == 0x7FFF ) {
6359 if ( ( aSig0 | aSig1 )
6360 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6361 return propagateFloat128NaN(a, b, status);
158142c2
FB
6362 }
6363 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6364 return packFloat128( zSign, 0x7FFF, 0, 0 );
6365 }
6366 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6367 if (bSig0 | bSig1) {
6368 return propagateFloat128NaN(a, b, status);
6369 }
158142c2
FB
6370 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6371 invalid:
ff32e16e 6372 float_raise(float_flag_invalid, status);
af39bc8c 6373 return float128_default_nan(status);
158142c2
FB
6374 }
6375 return packFloat128( zSign, 0x7FFF, 0, 0 );
6376 }
6377 if ( aExp == 0 ) {
6378 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6379 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6380 }
6381 if ( bExp == 0 ) {
6382 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6383 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6384 }
6385 zExp = aExp + bExp - 0x4000;
6386 aSig0 |= LIT64( 0x0001000000000000 );
6387 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6388 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6389 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6390 zSig2 |= ( zSig3 != 0 );
6391 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6392 shift128ExtraRightJamming(
6393 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6394 ++zExp;
6395 }
ff32e16e 6396 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6397
6398}
6399
6400/*----------------------------------------------------------------------------
6401| Returns the result of dividing the quadruple-precision floating-point value
6402| `a' by the corresponding value `b'. The operation is performed according to
6403| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6404*----------------------------------------------------------------------------*/
6405
e5a41ffa 6406float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6407{
6408 flag aSign, bSign, zSign;
f4014512 6409 int32_t aExp, bExp, zExp;
bb98fe42
AF
6410 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6411 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6412
6413 aSig1 = extractFloat128Frac1( a );
6414 aSig0 = extractFloat128Frac0( a );
6415 aExp = extractFloat128Exp( a );
6416 aSign = extractFloat128Sign( a );
6417 bSig1 = extractFloat128Frac1( b );
6418 bSig0 = extractFloat128Frac0( b );
6419 bExp = extractFloat128Exp( b );
6420 bSign = extractFloat128Sign( b );
6421 zSign = aSign ^ bSign;
6422 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6423 if (aSig0 | aSig1) {
6424 return propagateFloat128NaN(a, b, status);
6425 }
158142c2 6426 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6427 if (bSig0 | bSig1) {
6428 return propagateFloat128NaN(a, b, status);
6429 }
158142c2
FB
6430 goto invalid;
6431 }
6432 return packFloat128( zSign, 0x7FFF, 0, 0 );
6433 }
6434 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6435 if (bSig0 | bSig1) {
6436 return propagateFloat128NaN(a, b, status);
6437 }
158142c2
FB
6438 return packFloat128( zSign, 0, 0, 0 );
6439 }
6440 if ( bExp == 0 ) {
6441 if ( ( bSig0 | bSig1 ) == 0 ) {
6442 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6443 invalid:
ff32e16e 6444 float_raise(float_flag_invalid, status);
af39bc8c 6445 return float128_default_nan(status);
158142c2 6446 }
ff32e16e 6447 float_raise(float_flag_divbyzero, status);
158142c2
FB
6448 return packFloat128( zSign, 0x7FFF, 0, 0 );
6449 }
6450 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6451 }
6452 if ( aExp == 0 ) {
6453 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6454 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6455 }
6456 zExp = aExp - bExp + 0x3FFD;
6457 shortShift128Left(
6458 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6459 shortShift128Left(
6460 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6461 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6462 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6463 ++zExp;
6464 }
6465 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6466 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6467 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6468 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6469 --zSig0;
6470 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6471 }
6472 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6473 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6474 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6475 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6476 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6477 --zSig1;
6478 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6479 }
6480 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6481 }
6482 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6483 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6484
6485}
6486
6487/*----------------------------------------------------------------------------
6488| Returns the remainder of the quadruple-precision floating-point value `a'
6489| with respect to the corresponding value `b'. The operation is performed
6490| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6491*----------------------------------------------------------------------------*/
6492
e5a41ffa 6493float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6494{
ed086f3d 6495 flag aSign, zSign;
f4014512 6496 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6497 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6498 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6499 int64_t sigMean0;
158142c2
FB
6500
6501 aSig1 = extractFloat128Frac1( a );
6502 aSig0 = extractFloat128Frac0( a );
6503 aExp = extractFloat128Exp( a );
6504 aSign = extractFloat128Sign( a );
6505 bSig1 = extractFloat128Frac1( b );
6506 bSig0 = extractFloat128Frac0( b );
6507 bExp = extractFloat128Exp( b );
158142c2
FB
6508 if ( aExp == 0x7FFF ) {
6509 if ( ( aSig0 | aSig1 )
6510 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6511 return propagateFloat128NaN(a, b, status);
158142c2
FB
6512 }
6513 goto invalid;
6514 }
6515 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6516 if (bSig0 | bSig1) {
6517 return propagateFloat128NaN(a, b, status);
6518 }
158142c2
FB
6519 return a;
6520 }
6521 if ( bExp == 0 ) {
6522 if ( ( bSig0 | bSig1 ) == 0 ) {
6523 invalid:
ff32e16e 6524 float_raise(float_flag_invalid, status);
af39bc8c 6525 return float128_default_nan(status);
158142c2
FB
6526 }
6527 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6528 }
6529 if ( aExp == 0 ) {
6530 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6531 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6532 }
6533 expDiff = aExp - bExp;
6534 if ( expDiff < -1 ) return a;
6535 shortShift128Left(
6536 aSig0 | LIT64( 0x0001000000000000 ),
6537 aSig1,
6538 15 - ( expDiff < 0 ),
6539 &aSig0,
6540 &aSig1
6541 );
6542 shortShift128Left(
6543 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6544 q = le128( bSig0, bSig1, aSig0, aSig1 );
6545 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6546 expDiff -= 64;
6547 while ( 0 < expDiff ) {
6548 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6549 q = ( 4 < q ) ? q - 4 : 0;
6550 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6551 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6552 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6553 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6554 expDiff -= 61;
6555 }
6556 if ( -64 < expDiff ) {
6557 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6558 q = ( 4 < q ) ? q - 4 : 0;
6559 q >>= - expDiff;
6560 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6561 expDiff += 52;
6562 if ( expDiff < 0 ) {
6563 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6564 }
6565 else {
6566 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6567 }
6568 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6569 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6570 }
6571 else {
6572 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6573 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6574 }
6575 do {
6576 alternateASig0 = aSig0;
6577 alternateASig1 = aSig1;
6578 ++q;
6579 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6580 } while ( 0 <= (int64_t) aSig0 );
158142c2 6581 add128(
bb98fe42 6582 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6583 if ( ( sigMean0 < 0 )
6584 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6585 aSig0 = alternateASig0;
6586 aSig1 = alternateASig1;
6587 }
bb98fe42 6588 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6589 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6590 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6591 status);
158142c2
FB
6592}
6593
6594/*----------------------------------------------------------------------------
6595| Returns the square root of the quadruple-precision floating-point value `a'.
6596| The operation is performed according to the IEC/IEEE Standard for Binary
6597| Floating-Point Arithmetic.
6598*----------------------------------------------------------------------------*/
6599
e5a41ffa 6600float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6601{
6602 flag aSign;
f4014512 6603 int32_t aExp, zExp;
bb98fe42
AF
6604 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6605 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6606
6607 aSig1 = extractFloat128Frac1( a );
6608 aSig0 = extractFloat128Frac0( a );
6609 aExp = extractFloat128Exp( a );
6610 aSign = extractFloat128Sign( a );
6611 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6612 if (aSig0 | aSig1) {
6613 return propagateFloat128NaN(a, a, status);
6614 }
158142c2
FB
6615 if ( ! aSign ) return a;
6616 goto invalid;
6617 }
6618 if ( aSign ) {
6619 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6620 invalid:
ff32e16e 6621 float_raise(float_flag_invalid, status);
af39bc8c 6622 return float128_default_nan(status);
158142c2
FB
6623 }
6624 if ( aExp == 0 ) {
6625 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6626 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6627 }
6628 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6629 aSig0 |= LIT64( 0x0001000000000000 );
6630 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6631 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6632 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6633 doubleZSig0 = zSig0<<1;
6634 mul64To128( zSig0, zSig0, &term0, &term1 );
6635 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6636 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6637 --zSig0;
6638 doubleZSig0 -= 2;
6639 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6640 }
6641 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6642 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6643 if ( zSig1 == 0 ) zSig1 = 1;
6644 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6645 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6646 mul64To128( zSig1, zSig1, &term2, &term3 );
6647 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6648 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6649 --zSig1;
6650 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6651 term3 |= 1;
6652 term2 |= doubleZSig0;
6653 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6654 }
6655 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6656 }
6657 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6658 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6659
6660}
6661
6662/*----------------------------------------------------------------------------
6663| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6664| the corresponding value `b', and 0 otherwise. The invalid exception is
6665| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6666| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6667*----------------------------------------------------------------------------*/
6668
e5a41ffa 6669int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6670{
6671
6672 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6673 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6674 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6675 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6676 ) {
ff32e16e 6677 float_raise(float_flag_invalid, status);
158142c2
FB
6678 return 0;
6679 }
6680 return
6681 ( a.low == b.low )
6682 && ( ( a.high == b.high )
6683 || ( ( a.low == 0 )
bb98fe42 6684 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6685 );
6686
6687}
6688
6689/*----------------------------------------------------------------------------
6690| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6691| or equal to the corresponding value `b', and 0 otherwise. The invalid
6692| exception is raised if either operand is a NaN. The comparison is performed
6693| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6694*----------------------------------------------------------------------------*/
6695
e5a41ffa 6696int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6697{
6698 flag aSign, bSign;
6699
6700 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6701 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6702 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6703 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6704 ) {
ff32e16e 6705 float_raise(float_flag_invalid, status);
158142c2
FB
6706 return 0;
6707 }
6708 aSign = extractFloat128Sign( a );
6709 bSign = extractFloat128Sign( b );
6710 if ( aSign != bSign ) {
6711 return
6712 aSign
bb98fe42 6713 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6714 == 0 );
6715 }
6716 return
6717 aSign ? le128( b.high, b.low, a.high, a.low )
6718 : le128( a.high, a.low, b.high, b.low );
6719
6720}
6721
6722/*----------------------------------------------------------------------------
6723| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6724| the corresponding value `b', and 0 otherwise. The invalid exception is
6725| raised if either operand is a NaN. The comparison is performed according
6726| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6727*----------------------------------------------------------------------------*/
6728
e5a41ffa 6729int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6730{
6731 flag aSign, bSign;
6732
6733 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6734 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6735 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6736 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6737 ) {
ff32e16e 6738 float_raise(float_flag_invalid, status);
158142c2
FB
6739 return 0;
6740 }
6741 aSign = extractFloat128Sign( a );
6742 bSign = extractFloat128Sign( b );
6743 if ( aSign != bSign ) {
6744 return
6745 aSign
bb98fe42 6746 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6747 != 0 );
6748 }
6749 return
6750 aSign ? lt128( b.high, b.low, a.high, a.low )
6751 : lt128( a.high, a.low, b.high, b.low );
6752
6753}
6754
67b7861d
AJ
6755/*----------------------------------------------------------------------------
6756| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6757| be compared, and 0 otherwise. The invalid exception is raised if either
6758| operand is a NaN. The comparison is performed according to the IEC/IEEE
6759| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6760*----------------------------------------------------------------------------*/
6761
e5a41ffa 6762int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6763{
6764 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6765 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6766 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6767 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6768 ) {
ff32e16e 6769 float_raise(float_flag_invalid, status);
67b7861d
AJ
6770 return 1;
6771 }
6772 return 0;
6773}
6774
158142c2
FB
6775/*----------------------------------------------------------------------------
6776| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6777| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6778| exception. The comparison is performed according to the IEC/IEEE Standard
6779| for Binary Floating-Point Arithmetic.
158142c2
FB
6780*----------------------------------------------------------------------------*/
6781
e5a41ffa 6782int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6783{
6784
6785 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6786 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6787 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6788 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6789 ) {
af39bc8c
AM
6790 if (float128_is_signaling_nan(a, status)
6791 || float128_is_signaling_nan(b, status)) {
ff32e16e 6792 float_raise(float_flag_invalid, status);
b689362d 6793 }
158142c2
FB
6794 return 0;
6795 }
6796 return
6797 ( a.low == b.low )
6798 && ( ( a.high == b.high )
6799 || ( ( a.low == 0 )
bb98fe42 6800 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6801 );
6802
6803}
6804
6805/*----------------------------------------------------------------------------
6806| Returns 1 if the quadruple-precision floating-point value `a' is less than
6807| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6808| cause an exception. Otherwise, the comparison is performed according to the
6809| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6810*----------------------------------------------------------------------------*/
6811
e5a41ffa 6812int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6813{
6814 flag aSign, bSign;
6815
6816 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6817 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6818 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6819 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6820 ) {
af39bc8c
AM
6821 if (float128_is_signaling_nan(a, status)
6822 || float128_is_signaling_nan(b, status)) {
ff32e16e 6823 float_raise(float_flag_invalid, status);
158142c2
FB
6824 }
6825 return 0;
6826 }
6827 aSign = extractFloat128Sign( a );
6828 bSign = extractFloat128Sign( b );
6829 if ( aSign != bSign ) {
6830 return
6831 aSign
bb98fe42 6832 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6833 == 0 );
6834 }
6835 return
6836 aSign ? le128( b.high, b.low, a.high, a.low )
6837 : le128( a.high, a.low, b.high, b.low );
6838
6839}
6840
6841/*----------------------------------------------------------------------------
6842| Returns 1 if the quadruple-precision floating-point value `a' is less than
6843| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6844| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6845| Standard for Binary Floating-Point Arithmetic.
6846*----------------------------------------------------------------------------*/
6847
e5a41ffa 6848int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6849{
6850 flag aSign, bSign;
6851
6852 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6853 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6854 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6855 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6856 ) {
af39bc8c
AM
6857 if (float128_is_signaling_nan(a, status)
6858 || float128_is_signaling_nan(b, status)) {
ff32e16e 6859 float_raise(float_flag_invalid, status);
158142c2
FB
6860 }
6861 return 0;
6862 }
6863 aSign = extractFloat128Sign( a );
6864 bSign = extractFloat128Sign( b );
6865 if ( aSign != bSign ) {
6866 return
6867 aSign
bb98fe42 6868 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6869 != 0 );
6870 }
6871 return
6872 aSign ? lt128( b.high, b.low, a.high, a.low )
6873 : lt128( a.high, a.low, b.high, b.low );
6874
6875}
6876
67b7861d
AJ
6877/*----------------------------------------------------------------------------
6878| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6879| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6880| comparison is performed according to the IEC/IEEE Standard for Binary
6881| Floating-Point Arithmetic.
6882*----------------------------------------------------------------------------*/
6883
e5a41ffa 6884int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
6885{
6886 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6887 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6888 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6889 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6890 ) {
af39bc8c
AM
6891 if (float128_is_signaling_nan(a, status)
6892 || float128_is_signaling_nan(b, status)) {
ff32e16e 6893 float_raise(float_flag_invalid, status);
67b7861d
AJ
6894 }
6895 return 1;
6896 }
6897 return 0;
6898}
6899
e5a41ffa
PM
6900static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6901 int is_quiet, float_status *status)
f6714d36
AJ
6902{
6903 flag aSign, bSign;
6904
d1eb8f2a
AD
6905 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6906 float_raise(float_flag_invalid, status);
6907 return float_relation_unordered;
6908 }
f6714d36
AJ
6909 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6910 ( extractFloatx80Frac( a )<<1 ) ) ||
6911 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6912 ( extractFloatx80Frac( b )<<1 ) )) {
6913 if (!is_quiet ||
af39bc8c
AM
6914 floatx80_is_signaling_nan(a, status) ||
6915 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6916 float_raise(float_flag_invalid, status);
f6714d36
AJ
6917 }
6918 return float_relation_unordered;
6919 }
6920 aSign = extractFloatx80Sign( a );
6921 bSign = extractFloatx80Sign( b );
6922 if ( aSign != bSign ) {
6923
6924 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6925 ( ( a.low | b.low ) == 0 ) ) {
6926 /* zero case */
6927 return float_relation_equal;
6928 } else {
6929 return 1 - (2 * aSign);
6930 }
6931 } else {
6932 if (a.low == b.low && a.high == b.high) {
6933 return float_relation_equal;
6934 } else {
6935 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6936 }
6937 }
6938}
6939
e5a41ffa 6940int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6941{
ff32e16e 6942 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6943}
6944
e5a41ffa 6945int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 6946{
ff32e16e 6947 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6948}
6949
e5a41ffa
PM
6950static inline int float128_compare_internal(float128 a, float128 b,
6951 int is_quiet, float_status *status)
1f587329
BS
6952{
6953 flag aSign, bSign;
6954
6955 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6956 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6957 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6958 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6959 if (!is_quiet ||
af39bc8c
AM
6960 float128_is_signaling_nan(a, status) ||
6961 float128_is_signaling_nan(b, status)) {
ff32e16e 6962 float_raise(float_flag_invalid, status);
1f587329
BS
6963 }
6964 return float_relation_unordered;
6965 }
6966 aSign = extractFloat128Sign( a );
6967 bSign = extractFloat128Sign( b );
6968 if ( aSign != bSign ) {
6969 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6970 /* zero case */
6971 return float_relation_equal;
6972 } else {
6973 return 1 - (2 * aSign);
6974 }
6975 } else {
6976 if (a.low == b.low && a.high == b.high) {
6977 return float_relation_equal;
6978 } else {
6979 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6980 }
6981 }
6982}
6983
e5a41ffa 6984int float128_compare(float128 a, float128 b, float_status *status)
1f587329 6985{
ff32e16e 6986 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6987}
6988
e5a41ffa 6989int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 6990{
ff32e16e 6991 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6992}
6993
e5a41ffa 6994floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
6995{
6996 flag aSign;
326b9e98 6997 int32_t aExp;
bb98fe42 6998 uint64_t aSig;
9ee6e8bb 6999
d1eb8f2a
AD
7000 if (floatx80_invalid_encoding(a)) {
7001 float_raise(float_flag_invalid, status);
7002 return floatx80_default_nan(status);
7003 }
9ee6e8bb
PB
7004 aSig = extractFloatx80Frac( a );
7005 aExp = extractFloatx80Exp( a );
7006 aSign = extractFloatx80Sign( a );
7007
326b9e98
AJ
7008 if ( aExp == 0x7FFF ) {
7009 if ( aSig<<1 ) {
ff32e16e 7010 return propagateFloatx80NaN(a, a, status);
326b9e98 7011 }
9ee6e8bb
PB
7012 return a;
7013 }
326b9e98 7014
3c85c37f
PM
7015 if (aExp == 0) {
7016 if (aSig == 0) {
7017 return a;
7018 }
7019 aExp++;
7020 }
69397542 7021
326b9e98
AJ
7022 if (n > 0x10000) {
7023 n = 0x10000;
7024 } else if (n < -0x10000) {
7025 n = -0x10000;
7026 }
7027
9ee6e8bb 7028 aExp += n;
a2f2d288
PM
7029 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7030 aSign, aExp, aSig, 0, status);
9ee6e8bb 7031}
9ee6e8bb 7032
e5a41ffa 7033float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7034{
7035 flag aSign;
326b9e98 7036 int32_t aExp;
bb98fe42 7037 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7038
7039 aSig1 = extractFloat128Frac1( a );
7040 aSig0 = extractFloat128Frac0( a );
7041 aExp = extractFloat128Exp( a );
7042 aSign = extractFloat128Sign( a );
7043 if ( aExp == 0x7FFF ) {
326b9e98 7044 if ( aSig0 | aSig1 ) {
ff32e16e 7045 return propagateFloat128NaN(a, a, status);
326b9e98 7046 }
9ee6e8bb
PB
7047 return a;
7048 }
3c85c37f 7049 if (aExp != 0) {
69397542 7050 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7051 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7052 return a;
3c85c37f
PM
7053 } else {
7054 aExp++;
7055 }
69397542 7056
326b9e98
AJ
7057 if (n > 0x10000) {
7058 n = 0x10000;
7059 } else if (n < -0x10000) {
7060 n = -0x10000;
7061 }
7062
69397542
PB
7063 aExp += n - 1;
7064 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7065 , status);
9ee6e8bb
PB
7066
7067}