]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
tests/fp/fp-test: add floating point tests
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167
AB
338/* Canonicalize EXP and FRAC, setting CLS. */
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167 582 } else {
4f251cfd 583 if (pickNaN(a.cls, b.cls,
6fff2167
AB
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
0bcfbcbe
RH
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
6fff2167
AB
591 }
592 return a;
593}
594
d446830a
AB
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
1839189b
PM
598 int which;
599
d446830a
AB
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
3bd2dec1 604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 605
d446830a 606 if (s->default_nan_mode) {
1839189b
PM
607 /* Note that this check is after pickNaNMulAdd so that function
608 * has an opportunity to set the Invalid flag.
609 */
f7e598e2 610 which = 3;
1839189b 611 }
d446830a 612
1839189b
PM
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
f7e598e2 623 return parts_default_nan(s);
1839189b
PM
624 default:
625 g_assert_not_reached();
d446830a 626 }
1839189b 627
0bcfbcbe
RH
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
d446830a
AB
631 return a;
632}
633
6fff2167
AB
634/*
635 * Returns the result of adding or subtracting the values of the
636 * floating-point values `a' and `b'. The operation is performed
637 * according to the IEC/IEEE Standard for Binary Floating-Point
638 * Arithmetic.
639 */
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648 /* Subtraction */
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
f7e598e2 678 return parts_default_nan(s);
6fff2167
AB
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694 /* Addition */
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 704 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723/*
724 * Returns the result of adding or subtracting the floating-point
725 * values `a' and `b'. The operation is performed according to the
726 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
727 */
728
729float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
730 float_status *status)
731{
732 FloatParts pa = float16_unpack_canonical(a, status);
733 FloatParts pb = float16_unpack_canonical(b, status);
734 FloatParts pr = addsub_floats(pa, pb, false, status);
735
736 return float16_round_pack_canonical(pr, status);
737}
738
739float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
740 float_status *status)
741{
742 FloatParts pa = float32_unpack_canonical(a, status);
743 FloatParts pb = float32_unpack_canonical(b, status);
744 FloatParts pr = addsub_floats(pa, pb, false, status);
745
746 return float32_round_pack_canonical(pr, status);
747}
748
749float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
750 float_status *status)
751{
752 FloatParts pa = float64_unpack_canonical(a, status);
753 FloatParts pb = float64_unpack_canonical(b, status);
754 FloatParts pr = addsub_floats(pa, pb, false, status);
755
756 return float64_round_pack_canonical(pr, status);
757}
758
759float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
760 float_status *status)
761{
762 FloatParts pa = float16_unpack_canonical(a, status);
763 FloatParts pb = float16_unpack_canonical(b, status);
764 FloatParts pr = addsub_floats(pa, pb, true, status);
765
766 return float16_round_pack_canonical(pr, status);
767}
768
769float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
770 float_status *status)
771{
772 FloatParts pa = float32_unpack_canonical(a, status);
773 FloatParts pb = float32_unpack_canonical(b, status);
774 FloatParts pr = addsub_floats(pa, pb, true, status);
775
776 return float32_round_pack_canonical(pr, status);
777}
778
779float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
780 float_status *status)
781{
782 FloatParts pa = float64_unpack_canonical(a, status);
783 FloatParts pb = float64_unpack_canonical(b, status);
784 FloatParts pr = addsub_floats(pa, pb, true, status);
785
786 return float64_round_pack_canonical(pr, status);
787}
788
74d707e2
AB
789/*
790 * Returns the result of multiplying the floating-point values `a' and
791 * `b'. The operation is performed according to the IEC/IEEE Standard
792 * for Binary Floating-Point Arithmetic.
793 */
794
795static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
796{
797 bool sign = a.sign ^ b.sign;
798
799 if (a.cls == float_class_normal && b.cls == float_class_normal) {
800 uint64_t hi, lo;
801 int exp = a.exp + b.exp;
802
803 mul64To128(a.frac, b.frac, &hi, &lo);
804 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
805 if (lo & DECOMPOSED_OVERFLOW_BIT) {
806 shift64RightJamming(lo, 1, &lo);
807 exp += 1;
808 }
809
810 /* Re-use a */
811 a.exp = exp;
812 a.sign = sign;
813 a.frac = lo;
814 return a;
815 }
816 /* handle all the NaN cases */
817 if (is_nan(a.cls) || is_nan(b.cls)) {
818 return pick_nan(a, b, s);
819 }
820 /* Inf * Zero == NaN */
821 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
822 (a.cls == float_class_zero && b.cls == float_class_inf)) {
823 s->float_exception_flags |= float_flag_invalid;
f7e598e2 824 return parts_default_nan(s);
74d707e2
AB
825 }
826 /* Multiply by 0 or Inf */
827 if (a.cls == float_class_inf || a.cls == float_class_zero) {
828 a.sign = sign;
829 return a;
830 }
831 if (b.cls == float_class_inf || b.cls == float_class_zero) {
832 b.sign = sign;
833 return b;
834 }
835 g_assert_not_reached();
836}
837
838float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
839 float_status *status)
840{
841 FloatParts pa = float16_unpack_canonical(a, status);
842 FloatParts pb = float16_unpack_canonical(b, status);
843 FloatParts pr = mul_floats(pa, pb, status);
844
845 return float16_round_pack_canonical(pr, status);
846}
847
848float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
849 float_status *status)
850{
851 FloatParts pa = float32_unpack_canonical(a, status);
852 FloatParts pb = float32_unpack_canonical(b, status);
853 FloatParts pr = mul_floats(pa, pb, status);
854
855 return float32_round_pack_canonical(pr, status);
856}
857
858float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
859 float_status *status)
860{
861 FloatParts pa = float64_unpack_canonical(a, status);
862 FloatParts pb = float64_unpack_canonical(b, status);
863 FloatParts pr = mul_floats(pa, pb, status);
864
865 return float64_round_pack_canonical(pr, status);
866}
867
d446830a
AB
868/*
869 * Returns the result of multiplying the floating-point values `a' and
870 * `b' then adding 'c', with no intermediate rounding step after the
871 * multiplication. The operation is performed according to the
872 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
873 * The flags argument allows the caller to select negation of the
874 * addend, the intermediate product, or the final result. (The
875 * difference between this and having the caller do a separate
876 * negation is that negating externally will flip the sign bit on
877 * NaNs.)
878 */
879
880static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
881 int flags, float_status *s)
882{
883 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
884 ((1 << float_class_inf) | (1 << float_class_zero));
885 bool p_sign;
886 bool sign_flip = flags & float_muladd_negate_result;
887 FloatClass p_class;
888 uint64_t hi, lo;
889 int p_exp;
890
891 /* It is implementation-defined whether the cases of (0,inf,qnan)
892 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
893 * they return if they do), so we have to hand this information
894 * off to the target-specific pick-a-NaN routine.
895 */
896 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
897 return pick_nan_muladd(a, b, c, inf_zero, s);
898 }
899
900 if (inf_zero) {
901 s->float_exception_flags |= float_flag_invalid;
f7e598e2 902 return parts_default_nan(s);
d446830a
AB
903 }
904
905 if (flags & float_muladd_negate_c) {
906 c.sign ^= 1;
907 }
908
909 p_sign = a.sign ^ b.sign;
910
911 if (flags & float_muladd_negate_product) {
912 p_sign ^= 1;
913 }
914
915 if (a.cls == float_class_inf || b.cls == float_class_inf) {
916 p_class = float_class_inf;
917 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
918 p_class = float_class_zero;
919 } else {
920 p_class = float_class_normal;
921 }
922
923 if (c.cls == float_class_inf) {
924 if (p_class == float_class_inf && p_sign != c.sign) {
925 s->float_exception_flags |= float_flag_invalid;
f7e598e2 926 return parts_default_nan(s);
d446830a
AB
927 } else {
928 a.cls = float_class_inf;
929 a.sign = c.sign ^ sign_flip;
f7e598e2 930 return a;
d446830a 931 }
d446830a
AB
932 }
933
934 if (p_class == float_class_inf) {
935 a.cls = float_class_inf;
936 a.sign = p_sign ^ sign_flip;
937 return a;
938 }
939
940 if (p_class == float_class_zero) {
941 if (c.cls == float_class_zero) {
942 if (p_sign != c.sign) {
943 p_sign = s->float_rounding_mode == float_round_down;
944 }
945 c.sign = p_sign;
946 } else if (flags & float_muladd_halve_result) {
947 c.exp -= 1;
948 }
949 c.sign ^= sign_flip;
950 return c;
951 }
952
953 /* a & b should be normals now... */
954 assert(a.cls == float_class_normal &&
955 b.cls == float_class_normal);
956
957 p_exp = a.exp + b.exp;
958
959 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
960 * result.
961 */
962 mul64To128(a.frac, b.frac, &hi, &lo);
963 /* binary point now at bit 124 */
964
965 /* check for overflow */
966 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
967 shift128RightJamming(hi, lo, 1, &hi, &lo);
968 p_exp += 1;
969 }
970
971 /* + add/sub */
972 if (c.cls == float_class_zero) {
973 /* move binary point back to 62 */
974 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
975 } else {
976 int exp_diff = p_exp - c.exp;
977 if (p_sign == c.sign) {
978 /* Addition */
979 if (exp_diff <= 0) {
980 shift128RightJamming(hi, lo,
981 DECOMPOSED_BINARY_POINT - exp_diff,
982 &hi, &lo);
983 lo += c.frac;
984 p_exp = c.exp;
985 } else {
986 uint64_t c_hi, c_lo;
987 /* shift c to the same binary point as the product (124) */
988 c_hi = c.frac >> 2;
989 c_lo = 0;
990 shift128RightJamming(c_hi, c_lo,
991 exp_diff,
992 &c_hi, &c_lo);
993 add128(hi, lo, c_hi, c_lo, &hi, &lo);
994 /* move binary point back to 62 */
995 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
996 }
997
998 if (lo & DECOMPOSED_OVERFLOW_BIT) {
999 shift64RightJamming(lo, 1, &lo);
1000 p_exp += 1;
1001 }
1002
1003 } else {
1004 /* Subtraction */
1005 uint64_t c_hi, c_lo;
1006 /* make C binary point match product at bit 124 */
1007 c_hi = c.frac >> 2;
1008 c_lo = 0;
1009
1010 if (exp_diff <= 0) {
1011 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1012 if (exp_diff == 0
1013 &&
1014 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1015 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1016 } else {
1017 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1018 p_sign ^= 1;
1019 p_exp = c.exp;
1020 }
1021 } else {
1022 shift128RightJamming(c_hi, c_lo,
1023 exp_diff,
1024 &c_hi, &c_lo);
1025 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1026 }
1027
1028 if (hi == 0 && lo == 0) {
1029 a.cls = float_class_zero;
1030 a.sign = s->float_rounding_mode == float_round_down;
1031 a.sign ^= sign_flip;
1032 return a;
1033 } else {
1034 int shift;
1035 if (hi != 0) {
1036 shift = clz64(hi);
1037 } else {
1038 shift = clz64(lo) + 64;
1039 }
1040 /* Normalizing to a binary point of 124 is the
1041 correct adjust for the exponent. However since we're
1042 shifting, we might as well put the binary point back
1043 at 62 where we really want it. Therefore shift as
1044 if we're leaving 1 bit at the top of the word, but
1045 adjust the exponent as if we're leaving 3 bits. */
1046 shift -= 1;
1047 if (shift >= 64) {
1048 lo = lo << (shift - 64);
1049 } else {
1050 hi = (hi << shift) | (lo >> (64 - shift));
1051 lo = hi | ((lo << shift) != 0);
1052 }
1053 p_exp -= shift - 2;
1054 }
1055 }
1056 }
1057
1058 if (flags & float_muladd_halve_result) {
1059 p_exp -= 1;
1060 }
1061
1062 /* finally prepare our result */
1063 a.cls = float_class_normal;
1064 a.sign = p_sign ^ sign_flip;
1065 a.exp = p_exp;
1066 a.frac = lo;
1067
1068 return a;
1069}
1070
1071float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1072 int flags, float_status *status)
1073{
1074 FloatParts pa = float16_unpack_canonical(a, status);
1075 FloatParts pb = float16_unpack_canonical(b, status);
1076 FloatParts pc = float16_unpack_canonical(c, status);
1077 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1078
1079 return float16_round_pack_canonical(pr, status);
1080}
1081
1082float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1083 int flags, float_status *status)
1084{
1085 FloatParts pa = float32_unpack_canonical(a, status);
1086 FloatParts pb = float32_unpack_canonical(b, status);
1087 FloatParts pc = float32_unpack_canonical(c, status);
1088 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1089
1090 return float32_round_pack_canonical(pr, status);
1091}
1092
1093float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1094 int flags, float_status *status)
1095{
1096 FloatParts pa = float64_unpack_canonical(a, status);
1097 FloatParts pb = float64_unpack_canonical(b, status);
1098 FloatParts pc = float64_unpack_canonical(c, status);
1099 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1100
1101 return float64_round_pack_canonical(pr, status);
1102}
1103
cf07323d
AB
1104/*
1105 * Returns the result of dividing the floating-point value `a' by the
1106 * corresponding value `b'. The operation is performed according to
1107 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1108 */
1109
1110static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1111{
1112 bool sign = a.sign ^ b.sign;
1113
1114 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1115 uint64_t temp_lo, temp_hi;
1116 int exp = a.exp - b.exp;
1117 if (a.frac < b.frac) {
1118 exp -= 1;
1119 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1120 &temp_hi, &temp_lo);
1121 } else {
1122 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1123 &temp_hi, &temp_lo);
1124 }
1125 /* LSB of quot is set if inexact which roundandpack will use
1126 * to set flags. Yet again we re-use a for the result */
1127 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1128 a.sign = sign;
1129 a.exp = exp;
1130 return a;
1131 }
1132 /* handle all the NaN cases */
1133 if (is_nan(a.cls) || is_nan(b.cls)) {
1134 return pick_nan(a, b, s);
1135 }
1136 /* 0/0 or Inf/Inf */
1137 if (a.cls == b.cls
1138 &&
1139 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1140 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1141 return parts_default_nan(s);
cf07323d 1142 }
9cb4e398
AB
1143 /* Inf / x or 0 / x */
1144 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1145 a.sign = sign;
1146 return a;
1147 }
cf07323d
AB
1148 /* Div 0 => Inf */
1149 if (b.cls == float_class_zero) {
1150 s->float_exception_flags |= float_flag_divbyzero;
1151 a.cls = float_class_inf;
1152 a.sign = sign;
1153 return a;
1154 }
cf07323d
AB
1155 /* Div by Inf */
1156 if (b.cls == float_class_inf) {
1157 a.cls = float_class_zero;
1158 a.sign = sign;
1159 return a;
1160 }
1161 g_assert_not_reached();
1162}
1163
1164float16 float16_div(float16 a, float16 b, float_status *status)
1165{
1166 FloatParts pa = float16_unpack_canonical(a, status);
1167 FloatParts pb = float16_unpack_canonical(b, status);
1168 FloatParts pr = div_floats(pa, pb, status);
1169
1170 return float16_round_pack_canonical(pr, status);
1171}
1172
1173float32 float32_div(float32 a, float32 b, float_status *status)
1174{
1175 FloatParts pa = float32_unpack_canonical(a, status);
1176 FloatParts pb = float32_unpack_canonical(b, status);
1177 FloatParts pr = div_floats(pa, pb, status);
1178
1179 return float32_round_pack_canonical(pr, status);
1180}
1181
1182float64 float64_div(float64 a, float64 b, float_status *status)
1183{
1184 FloatParts pa = float64_unpack_canonical(a, status);
1185 FloatParts pb = float64_unpack_canonical(b, status);
1186 FloatParts pr = div_floats(pa, pb, status);
1187
1188 return float64_round_pack_canonical(pr, status);
1189}
1190
6fed16b2
AB
1191/*
1192 * Float to Float conversions
1193 *
1194 * Returns the result of converting one float format to another. The
1195 * conversion is performed according to the IEC/IEEE Standard for
1196 * Binary Floating-Point Arithmetic.
1197 *
1198 * The float_to_float helper only needs to take care of raising
1199 * invalid exceptions and handling the conversion on NaNs.
1200 */
1201
1202static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1203 float_status *s)
1204{
1205 if (dstf->arm_althp) {
1206 switch (a.cls) {
1207 case float_class_qnan:
1208 case float_class_snan:
1209 /* There is no NaN in the destination format. Raise Invalid
1210 * and return a zero with the sign of the input NaN.
1211 */
1212 s->float_exception_flags |= float_flag_invalid;
1213 a.cls = float_class_zero;
1214 a.frac = 0;
1215 a.exp = 0;
1216 break;
1217
1218 case float_class_inf:
1219 /* There is no Inf in the destination format. Raise Invalid
1220 * and return the maximum normal with the correct sign.
1221 */
1222 s->float_exception_flags |= float_flag_invalid;
1223 a.cls = float_class_normal;
1224 a.exp = dstf->exp_max;
1225 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1226 break;
1227
1228 default:
1229 break;
1230 }
1231 } else if (is_nan(a.cls)) {
1232 if (is_snan(a.cls)) {
1233 s->float_exception_flags |= float_flag_invalid;
1234 a = parts_silence_nan(a, s);
1235 }
1236 if (s->default_nan_mode) {
1237 return parts_default_nan(s);
1238 }
1239 }
1240 return a;
1241}
1242
1243float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1244{
1245 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1246 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1247 FloatParts pr = float_to_float(p, &float32_params, s);
1248 return float32_round_pack_canonical(pr, s);
1249}
1250
1251float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1252{
1253 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1254 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1255 FloatParts pr = float_to_float(p, &float64_params, s);
1256 return float64_round_pack_canonical(pr, s);
1257}
1258
1259float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1260{
1261 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1262 FloatParts p = float32_unpack_canonical(a, s);
1263 FloatParts pr = float_to_float(p, fmt16, s);
1264 return float16a_round_pack_canonical(pr, s, fmt16);
1265}
1266
1267float64 float32_to_float64(float32 a, float_status *s)
1268{
1269 FloatParts p = float32_unpack_canonical(a, s);
1270 FloatParts pr = float_to_float(p, &float64_params, s);
1271 return float64_round_pack_canonical(pr, s);
1272}
1273
1274float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1275{
1276 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1277 FloatParts p = float64_unpack_canonical(a, s);
1278 FloatParts pr = float_to_float(p, fmt16, s);
1279 return float16a_round_pack_canonical(pr, s, fmt16);
1280}
1281
1282float32 float64_to_float32(float64 a, float_status *s)
1283{
1284 FloatParts p = float64_unpack_canonical(a, s);
1285 FloatParts pr = float_to_float(p, &float32_params, s);
1286 return float32_round_pack_canonical(pr, s);
1287}
1288
dbe4d53a
AB
1289/*
1290 * Rounds the floating-point value `a' to an integer, and returns the
1291 * result as a floating-point value. The operation is performed
1292 * according to the IEC/IEEE Standard for Binary Floating-Point
1293 * Arithmetic.
1294 */
1295
2f6c74be
RH
1296static FloatParts round_to_int(FloatParts a, int rmode,
1297 int scale, float_status *s)
dbe4d53a 1298{
2f6c74be
RH
1299 switch (a.cls) {
1300 case float_class_qnan:
1301 case float_class_snan:
dbe4d53a 1302 return return_nan(a, s);
dbe4d53a 1303
dbe4d53a
AB
1304 case float_class_zero:
1305 case float_class_inf:
dbe4d53a
AB
1306 /* already "integral" */
1307 break;
2f6c74be 1308
dbe4d53a 1309 case float_class_normal:
2f6c74be
RH
1310 scale = MIN(MAX(scale, -0x10000), 0x10000);
1311 a.exp += scale;
1312
dbe4d53a
AB
1313 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1314 /* already integral */
1315 break;
1316 }
1317 if (a.exp < 0) {
1318 bool one;
1319 /* all fractional */
1320 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1321 switch (rmode) {
dbe4d53a
AB
1322 case float_round_nearest_even:
1323 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1324 break;
1325 case float_round_ties_away:
1326 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1327 break;
1328 case float_round_to_zero:
1329 one = false;
1330 break;
1331 case float_round_up:
1332 one = !a.sign;
1333 break;
1334 case float_round_down:
1335 one = a.sign;
1336 break;
1337 default:
1338 g_assert_not_reached();
1339 }
1340
1341 if (one) {
1342 a.frac = DECOMPOSED_IMPLICIT_BIT;
1343 a.exp = 0;
1344 } else {
1345 a.cls = float_class_zero;
1346 }
1347 } else {
1348 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1349 uint64_t frac_lsbm1 = frac_lsb >> 1;
1350 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1351 uint64_t rnd_mask = rnd_even_mask >> 1;
1352 uint64_t inc;
1353
2f6c74be 1354 switch (rmode) {
dbe4d53a
AB
1355 case float_round_nearest_even:
1356 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1357 break;
1358 case float_round_ties_away:
1359 inc = frac_lsbm1;
1360 break;
1361 case float_round_to_zero:
1362 inc = 0;
1363 break;
1364 case float_round_up:
1365 inc = a.sign ? 0 : rnd_mask;
1366 break;
1367 case float_round_down:
1368 inc = a.sign ? rnd_mask : 0;
1369 break;
1370 default:
1371 g_assert_not_reached();
1372 }
1373
1374 if (a.frac & rnd_mask) {
1375 s->float_exception_flags |= float_flag_inexact;
1376 a.frac += inc;
1377 a.frac &= ~rnd_mask;
1378 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1379 a.frac >>= 1;
1380 a.exp++;
1381 }
1382 }
1383 }
1384 break;
1385 default:
1386 g_assert_not_reached();
1387 }
1388 return a;
1389}
1390
1391float16 float16_round_to_int(float16 a, float_status *s)
1392{
1393 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 1394 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1395 return float16_round_pack_canonical(pr, s);
1396}
1397
1398float32 float32_round_to_int(float32 a, float_status *s)
1399{
1400 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 1401 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1402 return float32_round_pack_canonical(pr, s);
1403}
1404
1405float64 float64_round_to_int(float64 a, float_status *s)
1406{
1407 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 1408 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1409 return float64_round_pack_canonical(pr, s);
1410}
1411
ab52f973
AB
1412/*
1413 * Returns the result of converting the floating-point value `a' to
1414 * the two's complement integer format. The conversion is performed
1415 * according to the IEC/IEEE Standard for Binary Floating-Point
1416 * Arithmetic---which means in particular that the conversion is
1417 * rounded according to the current rounding mode. If `a' is a NaN,
1418 * the largest positive integer is returned. Otherwise, if the
1419 * conversion overflows, the largest integer with the same sign as `a'
1420 * is returned.
1421*/
1422
2f6c74be 1423static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
1424 int64_t min, int64_t max,
1425 float_status *s)
1426{
1427 uint64_t r;
1428 int orig_flags = get_float_exception_flags(s);
2f6c74be 1429 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
1430
1431 switch (p.cls) {
1432 case float_class_snan:
1433 case float_class_qnan:
801bc563 1434 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1435 return max;
1436 case float_class_inf:
801bc563 1437 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1438 return p.sign ? min : max;
1439 case float_class_zero:
1440 return 0;
1441 case float_class_normal:
1442 if (p.exp < DECOMPOSED_BINARY_POINT) {
1443 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1444 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1445 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1446 } else {
1447 r = UINT64_MAX;
1448 }
1449 if (p.sign) {
33358375 1450 if (r <= -(uint64_t) min) {
ab52f973
AB
1451 return -r;
1452 } else {
1453 s->float_exception_flags = orig_flags | float_flag_invalid;
1454 return min;
1455 }
1456 } else {
33358375 1457 if (r <= max) {
ab52f973
AB
1458 return r;
1459 } else {
1460 s->float_exception_flags = orig_flags | float_flag_invalid;
1461 return max;
1462 }
1463 }
1464 default:
1465 g_assert_not_reached();
1466 }
1467}
1468
2f6c74be
RH
1469int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
1470 float_status *s)
1471{
1472 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1473 rmode, scale, INT16_MIN, INT16_MAX, s);
1474}
1475
1476int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
1477 float_status *s)
1478{
1479 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1480 rmode, scale, INT32_MIN, INT32_MAX, s);
1481}
1482
1483int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
1484 float_status *s)
1485{
1486 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1487 rmode, scale, INT64_MIN, INT64_MAX, s);
1488}
1489
1490int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
1491 float_status *s)
1492{
1493 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1494 rmode, scale, INT16_MIN, INT16_MAX, s);
1495}
1496
1497int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
1498 float_status *s)
1499{
1500 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1501 rmode, scale, INT32_MIN, INT32_MAX, s);
1502}
1503
1504int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
1505 float_status *s)
1506{
1507 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1508 rmode, scale, INT64_MIN, INT64_MAX, s);
1509}
1510
1511int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
1512 float_status *s)
1513{
1514 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1515 rmode, scale, INT16_MIN, INT16_MAX, s);
1516}
1517
1518int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
1519 float_status *s)
1520{
1521 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1522 rmode, scale, INT32_MIN, INT32_MAX, s);
1523}
1524
1525int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
1526 float_status *s)
1527{
1528 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1529 rmode, scale, INT64_MIN, INT64_MAX, s);
1530}
1531
1532int16_t float16_to_int16(float16 a, float_status *s)
1533{
1534 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1535}
1536
1537int32_t float16_to_int32(float16 a, float_status *s)
1538{
1539 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1540}
1541
1542int64_t float16_to_int64(float16 a, float_status *s)
1543{
1544 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1545}
1546
1547int16_t float32_to_int16(float32 a, float_status *s)
1548{
1549 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1550}
1551
1552int32_t float32_to_int32(float32 a, float_status *s)
1553{
1554 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1555}
1556
1557int64_t float32_to_int64(float32 a, float_status *s)
1558{
1559 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1560}
1561
1562int16_t float64_to_int16(float64 a, float_status *s)
1563{
1564 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1565}
1566
1567int32_t float64_to_int32(float64 a, float_status *s)
1568{
1569 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1570}
1571
1572int64_t float64_to_int64(float64 a, float_status *s)
1573{
1574 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1575}
1576
1577int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
1578{
1579 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
1580}
1581
1582int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
1583{
1584 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
1585}
1586
1587int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
1588{
1589 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
1590}
1591
2f6c74be
RH
1592int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
1593{
1594 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
1595}
ab52f973 1596
2f6c74be
RH
1597int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
1598{
1599 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
1600}
1601
1602int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
1603{
1604 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
1605}
1606
1607int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
1608{
1609 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
1610}
ab52f973 1611
2f6c74be
RH
1612int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
1613{
1614 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
1615}
ab52f973 1616
2f6c74be
RH
1617int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
1618{
1619 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
1620}
ab52f973
AB
1621
1622/*
1623 * Returns the result of converting the floating-point value `a' to
1624 * the unsigned integer format. The conversion is performed according
1625 * to the IEC/IEEE Standard for Binary Floating-Point
1626 * Arithmetic---which means in particular that the conversion is
1627 * rounded according to the current rounding mode. If `a' is a NaN,
1628 * the largest unsigned integer is returned. Otherwise, if the
1629 * conversion overflows, the largest unsigned integer is returned. If
1630 * the 'a' is negative, the result is rounded and zero is returned;
1631 * values that do not round to zero will raise the inexact exception
1632 * flag.
1633 */
1634
2f6c74be
RH
1635static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
1636 uint64_t max, float_status *s)
ab52f973
AB
1637{
1638 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
1639 FloatParts p = round_to_int(in, rmode, scale, s);
1640 uint64_t r;
ab52f973
AB
1641
1642 switch (p.cls) {
1643 case float_class_snan:
1644 case float_class_qnan:
1645 s->float_exception_flags = orig_flags | float_flag_invalid;
1646 return max;
1647 case float_class_inf:
801bc563 1648 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1649 return p.sign ? 0 : max;
1650 case float_class_zero:
1651 return 0;
1652 case float_class_normal:
ab52f973
AB
1653 if (p.sign) {
1654 s->float_exception_flags = orig_flags | float_flag_invalid;
1655 return 0;
1656 }
1657
1658 if (p.exp < DECOMPOSED_BINARY_POINT) {
1659 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1660 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1661 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1662 } else {
1663 s->float_exception_flags = orig_flags | float_flag_invalid;
1664 return max;
1665 }
1666
1667 /* For uint64 this will never trip, but if p.exp is too large
1668 * to shift a decomposed fraction we shall have exited via the
1669 * 3rd leg above.
1670 */
1671 if (r > max) {
1672 s->float_exception_flags = orig_flags | float_flag_invalid;
1673 return max;
ab52f973 1674 }
2f6c74be 1675 return r;
ab52f973
AB
1676 default:
1677 g_assert_not_reached();
1678 }
1679}
1680
2f6c74be
RH
1681uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
1682 float_status *s)
1683{
1684 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1685 rmode, scale, UINT16_MAX, s);
1686}
1687
1688uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
1689 float_status *s)
1690{
1691 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1692 rmode, scale, UINT32_MAX, s);
1693}
1694
1695uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
1696 float_status *s)
1697{
1698 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1699 rmode, scale, UINT64_MAX, s);
1700}
1701
1702uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
1703 float_status *s)
1704{
1705 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1706 rmode, scale, UINT16_MAX, s);
1707}
1708
1709uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
1710 float_status *s)
1711{
1712 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1713 rmode, scale, UINT32_MAX, s);
1714}
1715
1716uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
1717 float_status *s)
1718{
1719 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1720 rmode, scale, UINT64_MAX, s);
1721}
1722
1723uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
1724 float_status *s)
1725{
1726 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1727 rmode, scale, UINT16_MAX, s);
1728}
1729
1730uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
1731 float_status *s)
1732{
1733 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1734 rmode, scale, UINT32_MAX, s);
1735}
1736
1737uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
1738 float_status *s)
1739{
1740 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1741 rmode, scale, UINT64_MAX, s);
1742}
1743
1744uint16_t float16_to_uint16(float16 a, float_status *s)
1745{
1746 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1747}
1748
1749uint32_t float16_to_uint32(float16 a, float_status *s)
1750{
1751 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1752}
1753
1754uint64_t float16_to_uint64(float16 a, float_status *s)
1755{
1756 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1757}
1758
1759uint16_t float32_to_uint16(float32 a, float_status *s)
1760{
1761 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1762}
1763
1764uint32_t float32_to_uint32(float32 a, float_status *s)
1765{
1766 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1767}
1768
1769uint64_t float32_to_uint64(float32 a, float_status *s)
1770{
1771 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1772}
1773
1774uint16_t float64_to_uint16(float64 a, float_status *s)
1775{
1776 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1777}
1778
1779uint32_t float64_to_uint32(float64 a, float_status *s)
1780{
1781 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1782}
1783
1784uint64_t float64_to_uint64(float64 a, float_status *s)
1785{
1786 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1787}
1788
1789uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
1790{
1791 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1792}
1793
1794uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
1795{
1796 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1797}
1798
1799uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
1800{
1801 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1802}
1803
1804uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
1805{
1806 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1807}
1808
1809uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
1810{
1811 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1812}
1813
1814uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
1815{
1816 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1817}
1818
1819uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
1820{
1821 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1822}
1823
1824uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
1825{
1826 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1827}
1828
1829uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
1830{
1831 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1832}
ab52f973 1833
c02e1fb8
AB
1834/*
1835 * Integer to float conversions
1836 *
1837 * Returns the result of converting the two's complement integer `a'
1838 * to the floating-point format. The conversion is performed according
1839 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1840 */
1841
2abdfe24 1842static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 1843{
2abdfe24
RH
1844 FloatParts r = { .sign = false };
1845
c02e1fb8
AB
1846 if (a == 0) {
1847 r.cls = float_class_zero;
c02e1fb8 1848 } else {
2abdfe24
RH
1849 uint64_t f = a;
1850 int shift;
1851
1852 r.cls = float_class_normal;
c02e1fb8 1853 if (a < 0) {
2abdfe24 1854 f = -f;
c02e1fb8 1855 r.sign = true;
c02e1fb8 1856 }
2abdfe24
RH
1857 shift = clz64(f) - 1;
1858 scale = MIN(MAX(scale, -0x10000), 0x10000);
1859
1860 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1861 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
1862 }
1863
1864 return r;
1865}
1866
2abdfe24 1867float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1868{
2abdfe24 1869 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1870 return float16_round_pack_canonical(pa, status);
1871}
1872
2abdfe24
RH
1873float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
1874{
1875 return int64_to_float16_scalbn(a, scale, status);
1876}
1877
1878float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
1879{
1880 return int64_to_float16_scalbn(a, scale, status);
1881}
1882
1883float16 int64_to_float16(int64_t a, float_status *status)
1884{
1885 return int64_to_float16_scalbn(a, 0, status);
1886}
1887
c02e1fb8
AB
1888float16 int32_to_float16(int32_t a, float_status *status)
1889{
2abdfe24 1890 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1891}
1892
1893float16 int16_to_float16(int16_t a, float_status *status)
1894{
2abdfe24 1895 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1896}
1897
2abdfe24 1898float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1899{
2abdfe24 1900 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1901 return float32_round_pack_canonical(pa, status);
1902}
1903
2abdfe24
RH
1904float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
1905{
1906 return int64_to_float32_scalbn(a, scale, status);
1907}
1908
1909float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
1910{
1911 return int64_to_float32_scalbn(a, scale, status);
1912}
1913
1914float32 int64_to_float32(int64_t a, float_status *status)
1915{
1916 return int64_to_float32_scalbn(a, 0, status);
1917}
1918
c02e1fb8
AB
1919float32 int32_to_float32(int32_t a, float_status *status)
1920{
2abdfe24 1921 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1922}
1923
1924float32 int16_to_float32(int16_t a, float_status *status)
1925{
2abdfe24 1926 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1927}
1928
2abdfe24 1929float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1930{
2abdfe24 1931 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1932 return float64_round_pack_canonical(pa, status);
1933}
1934
2abdfe24
RH
1935float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
1936{
1937 return int64_to_float64_scalbn(a, scale, status);
1938}
1939
1940float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
1941{
1942 return int64_to_float64_scalbn(a, scale, status);
1943}
1944
1945float64 int64_to_float64(int64_t a, float_status *status)
1946{
1947 return int64_to_float64_scalbn(a, 0, status);
1948}
1949
c02e1fb8
AB
1950float64 int32_to_float64(int32_t a, float_status *status)
1951{
2abdfe24 1952 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1953}
1954
1955float64 int16_to_float64(int16_t a, float_status *status)
1956{
2abdfe24 1957 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1958}
1959
1960
1961/*
1962 * Unsigned Integer to float conversions
1963 *
1964 * Returns the result of converting the unsigned integer `a' to the
1965 * floating-point format. The conversion is performed according to the
1966 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1967 */
1968
2abdfe24 1969static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 1970{
2abdfe24 1971 FloatParts r = { .sign = false };
c02e1fb8
AB
1972
1973 if (a == 0) {
1974 r.cls = float_class_zero;
1975 } else {
2abdfe24 1976 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 1977 r.cls = float_class_normal;
2abdfe24
RH
1978 if ((int64_t)a < 0) {
1979 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
1980 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
1981 r.frac = a;
1982 } else {
2abdfe24
RH
1983 int shift = clz64(a) - 1;
1984 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1985 r.frac = a << shift;
c02e1fb8
AB
1986 }
1987 }
1988
1989 return r;
1990}
1991
2abdfe24 1992float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 1993{
2abdfe24 1994 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
1995 return float16_round_pack_canonical(pa, status);
1996}
1997
2abdfe24
RH
1998float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
1999{
2000 return uint64_to_float16_scalbn(a, scale, status);
2001}
2002
2003float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2004{
2005 return uint64_to_float16_scalbn(a, scale, status);
2006}
2007
2008float16 uint64_to_float16(uint64_t a, float_status *status)
2009{
2010 return uint64_to_float16_scalbn(a, 0, status);
2011}
2012
c02e1fb8
AB
2013float16 uint32_to_float16(uint32_t a, float_status *status)
2014{
2abdfe24 2015 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2016}
2017
2018float16 uint16_to_float16(uint16_t a, float_status *status)
2019{
2abdfe24 2020 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2021}
2022
2abdfe24 2023float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2024{
2abdfe24 2025 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2026 return float32_round_pack_canonical(pa, status);
2027}
2028
2abdfe24
RH
2029float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2030{
2031 return uint64_to_float32_scalbn(a, scale, status);
2032}
2033
2034float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2035{
2036 return uint64_to_float32_scalbn(a, scale, status);
2037}
2038
2039float32 uint64_to_float32(uint64_t a, float_status *status)
2040{
2041 return uint64_to_float32_scalbn(a, 0, status);
2042}
2043
c02e1fb8
AB
2044float32 uint32_to_float32(uint32_t a, float_status *status)
2045{
2abdfe24 2046 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2047}
2048
2049float32 uint16_to_float32(uint16_t a, float_status *status)
2050{
2abdfe24 2051 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2052}
2053
2abdfe24 2054float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2055{
2abdfe24 2056 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2057 return float64_round_pack_canonical(pa, status);
2058}
2059
2abdfe24
RH
2060float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2061{
2062 return uint64_to_float64_scalbn(a, scale, status);
2063}
2064
2065float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2066{
2067 return uint64_to_float64_scalbn(a, scale, status);
2068}
2069
2070float64 uint64_to_float64(uint64_t a, float_status *status)
2071{
2072 return uint64_to_float64_scalbn(a, 0, status);
2073}
2074
c02e1fb8
AB
2075float64 uint32_to_float64(uint32_t a, float_status *status)
2076{
2abdfe24 2077 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2078}
2079
2080float64 uint16_to_float64(uint16_t a, float_status *status)
2081{
2abdfe24 2082 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2083}
2084
89360067
AB
2085/* Float Min/Max */
2086/* min() and max() functions. These can't be implemented as
2087 * 'compare and pick one input' because that would mishandle
2088 * NaNs and +0 vs -0.
2089 *
2090 * minnum() and maxnum() functions. These are similar to the min()
2091 * and max() functions but if one of the arguments is a QNaN and
2092 * the other is numerical then the numerical argument is returned.
2093 * SNaNs will get quietened before being returned.
2094 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2095 * and maxNum() operations. min() and max() are the typical min/max
2096 * semantics provided by many CPUs which predate that specification.
2097 *
2098 * minnummag() and maxnummag() functions correspond to minNumMag()
2099 * and minNumMag() from the IEEE-754 2008.
2100 */
2101static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2102 bool ieee, bool ismag, float_status *s)
2103{
2104 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2105 if (ieee) {
2106 /* Takes two floating-point values `a' and `b', one of
2107 * which is a NaN, and returns the appropriate NaN
2108 * result. If either `a' or `b' is a signaling NaN,
2109 * the invalid exception is raised.
2110 */
2111 if (is_snan(a.cls) || is_snan(b.cls)) {
2112 return pick_nan(a, b, s);
2113 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2114 return b;
2115 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2116 return a;
2117 }
2118 }
2119 return pick_nan(a, b, s);
2120 } else {
2121 int a_exp, b_exp;
89360067
AB
2122
2123 switch (a.cls) {
2124 case float_class_normal:
2125 a_exp = a.exp;
2126 break;
2127 case float_class_inf:
2128 a_exp = INT_MAX;
2129 break;
2130 case float_class_zero:
2131 a_exp = INT_MIN;
2132 break;
2133 default:
2134 g_assert_not_reached();
2135 break;
2136 }
2137 switch (b.cls) {
2138 case float_class_normal:
2139 b_exp = b.exp;
2140 break;
2141 case float_class_inf:
2142 b_exp = INT_MAX;
2143 break;
2144 case float_class_zero:
2145 b_exp = INT_MIN;
2146 break;
2147 default:
2148 g_assert_not_reached();
2149 break;
2150 }
2151
6245327a
EC
2152 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2153 bool a_less = a_exp < b_exp;
2154 if (a_exp == b_exp) {
2155 a_less = a.frac < b.frac;
2156 }
2157 return a_less ^ ismin ? b : a;
89360067
AB
2158 }
2159
6245327a 2160 if (a.sign == b.sign) {
89360067
AB
2161 bool a_less = a_exp < b_exp;
2162 if (a_exp == b_exp) {
2163 a_less = a.frac < b.frac;
2164 }
6245327a 2165 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2166 } else {
6245327a 2167 return a.sign ^ ismin ? b : a;
89360067
AB
2168 }
2169 }
2170}
2171
2172#define MINMAX(sz, name, ismin, isiee, ismag) \
2173float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2174 float_status *s) \
2175{ \
2176 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2177 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2178 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2179 \
2180 return float ## sz ## _round_pack_canonical(pr, s); \
2181}
2182
2183MINMAX(16, min, true, false, false)
2184MINMAX(16, minnum, true, true, false)
2185MINMAX(16, minnummag, true, true, true)
2186MINMAX(16, max, false, false, false)
2187MINMAX(16, maxnum, false, true, false)
2188MINMAX(16, maxnummag, false, true, true)
2189
2190MINMAX(32, min, true, false, false)
2191MINMAX(32, minnum, true, true, false)
2192MINMAX(32, minnummag, true, true, true)
2193MINMAX(32, max, false, false, false)
2194MINMAX(32, maxnum, false, true, false)
2195MINMAX(32, maxnummag, false, true, true)
2196
2197MINMAX(64, min, true, false, false)
2198MINMAX(64, minnum, true, true, false)
2199MINMAX(64, minnummag, true, true, true)
2200MINMAX(64, max, false, false, false)
2201MINMAX(64, maxnum, false, true, false)
2202MINMAX(64, maxnummag, false, true, true)
2203
2204#undef MINMAX
2205
0c4c9092
AB
2206/* Floating point compare */
2207static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2208 float_status *s)
2209{
2210 if (is_nan(a.cls) || is_nan(b.cls)) {
2211 if (!is_quiet ||
2212 a.cls == float_class_snan ||
2213 b.cls == float_class_snan) {
2214 s->float_exception_flags |= float_flag_invalid;
2215 }
2216 return float_relation_unordered;
2217 }
2218
2219 if (a.cls == float_class_zero) {
2220 if (b.cls == float_class_zero) {
2221 return float_relation_equal;
2222 }
2223 return b.sign ? float_relation_greater : float_relation_less;
2224 } else if (b.cls == float_class_zero) {
2225 return a.sign ? float_relation_less : float_relation_greater;
2226 }
2227
2228 /* The only really important thing about infinity is its sign. If
2229 * both are infinities the sign marks the smallest of the two.
2230 */
2231 if (a.cls == float_class_inf) {
2232 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2233 return float_relation_equal;
2234 }
2235 return a.sign ? float_relation_less : float_relation_greater;
2236 } else if (b.cls == float_class_inf) {
2237 return b.sign ? float_relation_greater : float_relation_less;
2238 }
2239
2240 if (a.sign != b.sign) {
2241 return a.sign ? float_relation_less : float_relation_greater;
2242 }
2243
2244 if (a.exp == b.exp) {
2245 if (a.frac == b.frac) {
2246 return float_relation_equal;
2247 }
2248 if (a.sign) {
2249 return a.frac > b.frac ?
2250 float_relation_less : float_relation_greater;
2251 } else {
2252 return a.frac > b.frac ?
2253 float_relation_greater : float_relation_less;
2254 }
2255 } else {
2256 if (a.sign) {
2257 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2258 } else {
2259 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2260 }
2261 }
2262}
2263
2264#define COMPARE(sz) \
2265int float ## sz ## _compare(float ## sz a, float ## sz b, \
2266 float_status *s) \
2267{ \
2268 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2269 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2270 return compare_floats(pa, pb, false, s); \
2271} \
2272int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2273 float_status *s) \
2274{ \
2275 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2276 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2277 return compare_floats(pa, pb, true, s); \
2278}
2279
2280COMPARE(16)
2281COMPARE(32)
2282COMPARE(64)
2283
2284#undef COMPARE
2285
0bfc9f19
AB
2286/* Multiply A by 2 raised to the power N. */
2287static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2288{
2289 if (unlikely(is_nan(a.cls))) {
2290 return return_nan(a, s);
2291 }
2292 if (a.cls == float_class_normal) {
ce8d4082
RH
2293 /* The largest float type (even though not supported by FloatParts)
2294 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
2295 * still allows rounding to infinity, without allowing overflow
2296 * within the int32_t that backs FloatParts.exp.
2297 */
2298 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
2299 a.exp += n;
2300 }
2301 return a;
2302}
2303
2304float16 float16_scalbn(float16 a, int n, float_status *status)
2305{
2306 FloatParts pa = float16_unpack_canonical(a, status);
2307 FloatParts pr = scalbn_decomposed(pa, n, status);
2308 return float16_round_pack_canonical(pr, status);
2309}
2310
2311float32 float32_scalbn(float32 a, int n, float_status *status)
2312{
2313 FloatParts pa = float32_unpack_canonical(a, status);
2314 FloatParts pr = scalbn_decomposed(pa, n, status);
2315 return float32_round_pack_canonical(pr, status);
2316}
2317
2318float64 float64_scalbn(float64 a, int n, float_status *status)
2319{
2320 FloatParts pa = float64_unpack_canonical(a, status);
2321 FloatParts pr = scalbn_decomposed(pa, n, status);
2322 return float64_round_pack_canonical(pr, status);
2323}
2324
c13bb2da
AB
2325/*
2326 * Square Root
2327 *
2328 * The old softfloat code did an approximation step before zeroing in
2329 * on the final result. However for simpleness we just compute the
2330 * square root by iterating down from the implicit bit to enough extra
2331 * bits to ensure we get a correctly rounded result.
2332 *
2333 * This does mean however the calculation is slower than before,
2334 * especially for 64 bit floats.
2335 */
2336
2337static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2338{
2339 uint64_t a_frac, r_frac, s_frac;
2340 int bit, last_bit;
2341
2342 if (is_nan(a.cls)) {
2343 return return_nan(a, s);
2344 }
2345 if (a.cls == float_class_zero) {
2346 return a; /* sqrt(+-0) = +-0 */
2347 }
2348 if (a.sign) {
2349 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2350 return parts_default_nan(s);
c13bb2da
AB
2351 }
2352 if (a.cls == float_class_inf) {
2353 return a; /* sqrt(+inf) = +inf */
2354 }
2355
2356 assert(a.cls == float_class_normal);
2357
2358 /* We need two overflow bits at the top. Adding room for that is a
2359 * right shift. If the exponent is odd, we can discard the low bit
2360 * by multiplying the fraction by 2; that's a left shift. Combine
2361 * those and we shift right if the exponent is even.
2362 */
2363 a_frac = a.frac;
2364 if (!(a.exp & 1)) {
2365 a_frac >>= 1;
2366 }
2367 a.exp >>= 1;
2368
2369 /* Bit-by-bit computation of sqrt. */
2370 r_frac = 0;
2371 s_frac = 0;
2372
2373 /* Iterate from implicit bit down to the 3 extra bits to compute a
2374 * properly rounded result. Remember we've inserted one more bit
2375 * at the top, so these positions are one less.
2376 */
2377 bit = DECOMPOSED_BINARY_POINT - 1;
2378 last_bit = MAX(p->frac_shift - 4, 0);
2379 do {
2380 uint64_t q = 1ULL << bit;
2381 uint64_t t_frac = s_frac + q;
2382 if (t_frac <= a_frac) {
2383 s_frac = t_frac + q;
2384 a_frac -= t_frac;
2385 r_frac += q;
2386 }
2387 a_frac <<= 1;
2388 } while (--bit >= last_bit);
2389
2390 /* Undo the right shift done above. If there is any remaining
2391 * fraction, the result is inexact. Set the sticky bit.
2392 */
2393 a.frac = (r_frac << 1) + (a_frac != 0);
2394
2395 return a;
2396}
2397
2398float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2399{
2400 FloatParts pa = float16_unpack_canonical(a, status);
2401 FloatParts pr = sqrt_float(pa, status, &float16_params);
2402 return float16_round_pack_canonical(pr, status);
2403}
2404
2405float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2406{
2407 FloatParts pa = float32_unpack_canonical(a, status);
2408 FloatParts pr = sqrt_float(pa, status, &float32_params);
2409 return float32_round_pack_canonical(pr, status);
2410}
2411
2412float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2413{
2414 FloatParts pa = float64_unpack_canonical(a, status);
2415 FloatParts pr = sqrt_float(pa, status, &float64_params);
2416 return float64_round_pack_canonical(pr, status);
2417}
2418
0218a16e
RH
2419/*----------------------------------------------------------------------------
2420| The pattern for a default generated NaN.
2421*----------------------------------------------------------------------------*/
2422
2423float16 float16_default_nan(float_status *status)
2424{
2425 FloatParts p = parts_default_nan(status);
2426 p.frac >>= float16_params.frac_shift;
2427 return float16_pack_raw(p);
2428}
2429
2430float32 float32_default_nan(float_status *status)
2431{
2432 FloatParts p = parts_default_nan(status);
2433 p.frac >>= float32_params.frac_shift;
2434 return float32_pack_raw(p);
2435}
2436
2437float64 float64_default_nan(float_status *status)
2438{
2439 FloatParts p = parts_default_nan(status);
2440 p.frac >>= float64_params.frac_shift;
2441 return float64_pack_raw(p);
2442}
2443
2444float128 float128_default_nan(float_status *status)
2445{
2446 FloatParts p = parts_default_nan(status);
2447 float128 r;
2448
2449 /* Extrapolate from the choices made by parts_default_nan to fill
2450 * in the quad-floating format. If the low bit is set, assume we
2451 * want to set all non-snan bits.
2452 */
2453 r.low = -(p.frac & 1);
2454 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2455 r.high |= LIT64(0x7FFF000000000000);
2456 r.high |= (uint64_t)p.sign << 63;
2457
2458 return r;
2459}
c13bb2da 2460
158142c2 2461/*----------------------------------------------------------------------------
377ed926
RH
2462| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
2463*----------------------------------------------------------------------------*/
2464
2465float16 float16_silence_nan(float16 a, float_status *status)
2466{
2467 FloatParts p = float16_unpack_raw(a);
2468 p.frac <<= float16_params.frac_shift;
2469 p = parts_silence_nan(p, status);
2470 p.frac >>= float16_params.frac_shift;
2471 return float16_pack_raw(p);
2472}
2473
2474float32 float32_silence_nan(float32 a, float_status *status)
2475{
2476 FloatParts p = float32_unpack_raw(a);
2477 p.frac <<= float32_params.frac_shift;
2478 p = parts_silence_nan(p, status);
2479 p.frac >>= float32_params.frac_shift;
2480 return float32_pack_raw(p);
2481}
2482
2483float64 float64_silence_nan(float64 a, float_status *status)
2484{
2485 FloatParts p = float64_unpack_raw(a);
2486 p.frac <<= float64_params.frac_shift;
2487 p = parts_silence_nan(p, status);
2488 p.frac >>= float64_params.frac_shift;
2489 return float64_pack_raw(p);
2490}
2491
2492/*----------------------------------------------------------------------------
158142c2
FB
2493| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2494| and 7, and returns the properly rounded 32-bit integer corresponding to the
2495| input. If `zSign' is 1, the input is negated before being converted to an
2496| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2497| is simply rounded to an integer, with the inexact exception raised if the
2498| input cannot be represented exactly as an integer. However, if the fixed-
2499| point input is too large, the invalid exception is raised and the largest
2500| positive or negative integer is returned.
2501*----------------------------------------------------------------------------*/
2502
f4014512 2503static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2504{
8f506c70 2505 int8_t roundingMode;
158142c2 2506 flag roundNearestEven;
8f506c70 2507 int8_t roundIncrement, roundBits;
760e1416 2508 int32_t z;
158142c2 2509
a2f2d288 2510 roundingMode = status->float_rounding_mode;
158142c2 2511 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2512 switch (roundingMode) {
2513 case float_round_nearest_even:
f9288a76 2514 case float_round_ties_away:
dc355b76
PM
2515 roundIncrement = 0x40;
2516 break;
2517 case float_round_to_zero:
2518 roundIncrement = 0;
2519 break;
2520 case float_round_up:
2521 roundIncrement = zSign ? 0 : 0x7f;
2522 break;
2523 case float_round_down:
2524 roundIncrement = zSign ? 0x7f : 0;
2525 break;
2526 default:
2527 abort();
158142c2
FB
2528 }
2529 roundBits = absZ & 0x7F;
2530 absZ = ( absZ + roundIncrement )>>7;
2531 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2532 z = absZ;
2533 if ( zSign ) z = - z;
2534 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2535 float_raise(float_flag_invalid, status);
bb98fe42 2536 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2537 }
a2f2d288
PM
2538 if (roundBits) {
2539 status->float_exception_flags |= float_flag_inexact;
2540 }
158142c2
FB
2541 return z;
2542
2543}
2544
2545/*----------------------------------------------------------------------------
2546| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2547| `absZ1', with binary point between bits 63 and 64 (between the input words),
2548| and returns the properly rounded 64-bit integer corresponding to the input.
2549| If `zSign' is 1, the input is negated before being converted to an integer.
2550| Ordinarily, the fixed-point input is simply rounded to an integer, with
2551| the inexact exception raised if the input cannot be represented exactly as
2552| an integer. However, if the fixed-point input is too large, the invalid
2553| exception is raised and the largest positive or negative integer is
2554| returned.
2555*----------------------------------------------------------------------------*/
2556
f42c2224 2557static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2558 float_status *status)
158142c2 2559{
8f506c70 2560 int8_t roundingMode;
158142c2 2561 flag roundNearestEven, increment;
760e1416 2562 int64_t z;
158142c2 2563
a2f2d288 2564 roundingMode = status->float_rounding_mode;
158142c2 2565 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2566 switch (roundingMode) {
2567 case float_round_nearest_even:
f9288a76 2568 case float_round_ties_away:
dc355b76
PM
2569 increment = ((int64_t) absZ1 < 0);
2570 break;
2571 case float_round_to_zero:
2572 increment = 0;
2573 break;
2574 case float_round_up:
2575 increment = !zSign && absZ1;
2576 break;
2577 case float_round_down:
2578 increment = zSign && absZ1;
2579 break;
2580 default:
2581 abort();
158142c2
FB
2582 }
2583 if ( increment ) {
2584 ++absZ0;
2585 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2586 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2587 }
2588 z = absZ0;
2589 if ( zSign ) z = - z;
2590 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2591 overflow:
ff32e16e 2592 float_raise(float_flag_invalid, status);
158142c2 2593 return
bb98fe42 2594 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2595 : LIT64( 0x7FFFFFFFFFFFFFFF );
2596 }
a2f2d288
PM
2597 if (absZ1) {
2598 status->float_exception_flags |= float_flag_inexact;
2599 }
158142c2
FB
2600 return z;
2601
2602}
2603
fb3ea83a
TM
2604/*----------------------------------------------------------------------------
2605| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2606| `absZ1', with binary point between bits 63 and 64 (between the input words),
2607| and returns the properly rounded 64-bit unsigned integer corresponding to the
2608| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2609| with the inexact exception raised if the input cannot be represented exactly
2610| as an integer. However, if the fixed-point input is too large, the invalid
2611| exception is raised and the largest unsigned integer is returned.
2612*----------------------------------------------------------------------------*/
2613
f42c2224 2614static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2615 uint64_t absZ1, float_status *status)
fb3ea83a 2616{
8f506c70 2617 int8_t roundingMode;
fb3ea83a
TM
2618 flag roundNearestEven, increment;
2619
a2f2d288 2620 roundingMode = status->float_rounding_mode;
fb3ea83a 2621 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2622 switch (roundingMode) {
2623 case float_round_nearest_even:
f9288a76 2624 case float_round_ties_away:
dc355b76
PM
2625 increment = ((int64_t)absZ1 < 0);
2626 break;
2627 case float_round_to_zero:
2628 increment = 0;
2629 break;
2630 case float_round_up:
2631 increment = !zSign && absZ1;
2632 break;
2633 case float_round_down:
2634 increment = zSign && absZ1;
2635 break;
2636 default:
2637 abort();
fb3ea83a
TM
2638 }
2639 if (increment) {
2640 ++absZ0;
2641 if (absZ0 == 0) {
ff32e16e 2642 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2643 return LIT64(0xFFFFFFFFFFFFFFFF);
2644 }
2645 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2646 }
2647
2648 if (zSign && absZ0) {
ff32e16e 2649 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2650 return 0;
2651 }
2652
2653 if (absZ1) {
a2f2d288 2654 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2655 }
2656 return absZ0;
2657}
2658
37d18660
PM
2659/*----------------------------------------------------------------------------
2660| If `a' is denormal and we are in flush-to-zero mode then set the
2661| input-denormal exception and return zero. Otherwise just return the value.
2662*----------------------------------------------------------------------------*/
e5a41ffa 2663float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2664{
a2f2d288 2665 if (status->flush_inputs_to_zero) {
37d18660 2666 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2667 float_raise(float_flag_input_denormal, status);
37d18660
PM
2668 return make_float32(float32_val(a) & 0x80000000);
2669 }
2670 }
2671 return a;
2672}
2673
158142c2
FB
2674/*----------------------------------------------------------------------------
2675| Normalizes the subnormal single-precision floating-point value represented
2676| by the denormalized significand `aSig'. The normalized exponent and
2677| significand are stored at the locations pointed to by `zExpPtr' and
2678| `zSigPtr', respectively.
2679*----------------------------------------------------------------------------*/
2680
2681static void
0c48262d 2682 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2683{
8f506c70 2684 int8_t shiftCount;
158142c2
FB
2685
2686 shiftCount = countLeadingZeros32( aSig ) - 8;
2687 *zSigPtr = aSig<<shiftCount;
2688 *zExpPtr = 1 - shiftCount;
2689
2690}
2691
158142c2
FB
2692/*----------------------------------------------------------------------------
2693| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2694| and significand `zSig', and returns the proper single-precision floating-
2695| point value corresponding to the abstract input. Ordinarily, the abstract
2696| value is simply rounded and packed into the single-precision format, with
2697| the inexact exception raised if the abstract input cannot be represented
2698| exactly. However, if the abstract value is too large, the overflow and
2699| inexact exceptions are raised and an infinity or maximal finite value is
2700| returned. If the abstract value is too small, the input value is rounded to
2701| a subnormal number, and the underflow and inexact exceptions are raised if
2702| the abstract input cannot be represented exactly as a subnormal single-
2703| precision floating-point number.
2704| The input significand `zSig' has its binary point between bits 30
2705| and 29, which is 7 bits to the left of the usual location. This shifted
2706| significand must be normalized or smaller. If `zSig' is not normalized,
2707| `zExp' must be 0; in that case, the result returned is a subnormal number,
2708| and it must not require rounding. In the usual case that `zSig' is
2709| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2710| The handling of underflow and overflow follows the IEC/IEEE Standard for
2711| Binary Floating-Point Arithmetic.
2712*----------------------------------------------------------------------------*/
2713
0c48262d 2714static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2715 float_status *status)
158142c2 2716{
8f506c70 2717 int8_t roundingMode;
158142c2 2718 flag roundNearestEven;
8f506c70 2719 int8_t roundIncrement, roundBits;
158142c2
FB
2720 flag isTiny;
2721
a2f2d288 2722 roundingMode = status->float_rounding_mode;
158142c2 2723 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2724 switch (roundingMode) {
2725 case float_round_nearest_even:
f9288a76 2726 case float_round_ties_away:
dc355b76
PM
2727 roundIncrement = 0x40;
2728 break;
2729 case float_round_to_zero:
2730 roundIncrement = 0;
2731 break;
2732 case float_round_up:
2733 roundIncrement = zSign ? 0 : 0x7f;
2734 break;
2735 case float_round_down:
2736 roundIncrement = zSign ? 0x7f : 0;
2737 break;
2738 default:
2739 abort();
2740 break;
158142c2
FB
2741 }
2742 roundBits = zSig & 0x7F;
bb98fe42 2743 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2744 if ( ( 0xFD < zExp )
2745 || ( ( zExp == 0xFD )
bb98fe42 2746 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2747 ) {
ff32e16e 2748 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2749 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2750 }
2751 if ( zExp < 0 ) {
a2f2d288 2752 if (status->flush_to_zero) {
ff32e16e 2753 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2754 return packFloat32(zSign, 0, 0);
2755 }
158142c2 2756 isTiny =
a2f2d288
PM
2757 (status->float_detect_tininess
2758 == float_tininess_before_rounding)
158142c2
FB
2759 || ( zExp < -1 )
2760 || ( zSig + roundIncrement < 0x80000000 );
2761 shift32RightJamming( zSig, - zExp, &zSig );
2762 zExp = 0;
2763 roundBits = zSig & 0x7F;
ff32e16e
PM
2764 if (isTiny && roundBits) {
2765 float_raise(float_flag_underflow, status);
2766 }
158142c2
FB
2767 }
2768 }
a2f2d288
PM
2769 if (roundBits) {
2770 status->float_exception_flags |= float_flag_inexact;
2771 }
158142c2
FB
2772 zSig = ( zSig + roundIncrement )>>7;
2773 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2774 if ( zSig == 0 ) zExp = 0;
2775 return packFloat32( zSign, zExp, zSig );
2776
2777}
2778
2779/*----------------------------------------------------------------------------
2780| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2781| and significand `zSig', and returns the proper single-precision floating-
2782| point value corresponding to the abstract input. This routine is just like
2783| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2784| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2785| floating-point exponent.
2786*----------------------------------------------------------------------------*/
2787
2788static float32
0c48262d 2789 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2790 float_status *status)
158142c2 2791{
8f506c70 2792 int8_t shiftCount;
158142c2
FB
2793
2794 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2795 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2796 status);
158142c2
FB
2797
2798}
2799
37d18660
PM
2800/*----------------------------------------------------------------------------
2801| If `a' is denormal and we are in flush-to-zero mode then set the
2802| input-denormal exception and return zero. Otherwise just return the value.
2803*----------------------------------------------------------------------------*/
e5a41ffa 2804float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2805{
a2f2d288 2806 if (status->flush_inputs_to_zero) {
37d18660 2807 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2808 float_raise(float_flag_input_denormal, status);
37d18660
PM
2809 return make_float64(float64_val(a) & (1ULL << 63));
2810 }
2811 }
2812 return a;
2813}
2814
158142c2
FB
2815/*----------------------------------------------------------------------------
2816| Normalizes the subnormal double-precision floating-point value represented
2817| by the denormalized significand `aSig'. The normalized exponent and
2818| significand are stored at the locations pointed to by `zExpPtr' and
2819| `zSigPtr', respectively.
2820*----------------------------------------------------------------------------*/
2821
2822static void
0c48262d 2823 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2824{
8f506c70 2825 int8_t shiftCount;
158142c2
FB
2826
2827 shiftCount = countLeadingZeros64( aSig ) - 11;
2828 *zSigPtr = aSig<<shiftCount;
2829 *zExpPtr = 1 - shiftCount;
2830
2831}
2832
2833/*----------------------------------------------------------------------------
2834| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2835| double-precision floating-point value, returning the result. After being
2836| shifted into the proper positions, the three fields are simply added
2837| together to form the result. This means that any integer portion of `zSig'
2838| will be added into the exponent. Since a properly normalized significand
2839| will have an integer portion equal to 1, the `zExp' input should be 1 less
2840| than the desired result exponent whenever `zSig' is a complete, normalized
2841| significand.
2842*----------------------------------------------------------------------------*/
2843
0c48262d 2844static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2845{
2846
f090c9d4 2847 return make_float64(
bb98fe42 2848 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2849
2850}
2851
2852/*----------------------------------------------------------------------------
2853| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2854| and significand `zSig', and returns the proper double-precision floating-
2855| point value corresponding to the abstract input. Ordinarily, the abstract
2856| value is simply rounded and packed into the double-precision format, with
2857| the inexact exception raised if the abstract input cannot be represented
2858| exactly. However, if the abstract value is too large, the overflow and
2859| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2860| returned. If the abstract value is too small, the input value is rounded to
2861| a subnormal number, and the underflow and inexact exceptions are raised if
2862| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2863| precision floating-point number.
2864| The input significand `zSig' has its binary point between bits 62
2865| and 61, which is 10 bits to the left of the usual location. This shifted
2866| significand must be normalized or smaller. If `zSig' is not normalized,
2867| `zExp' must be 0; in that case, the result returned is a subnormal number,
2868| and it must not require rounding. In the usual case that `zSig' is
2869| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2870| The handling of underflow and overflow follows the IEC/IEEE Standard for
2871| Binary Floating-Point Arithmetic.
2872*----------------------------------------------------------------------------*/
2873
0c48262d 2874static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2875 float_status *status)
158142c2 2876{
8f506c70 2877 int8_t roundingMode;
158142c2 2878 flag roundNearestEven;
0c48262d 2879 int roundIncrement, roundBits;
158142c2
FB
2880 flag isTiny;
2881
a2f2d288 2882 roundingMode = status->float_rounding_mode;
158142c2 2883 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2884 switch (roundingMode) {
2885 case float_round_nearest_even:
f9288a76 2886 case float_round_ties_away:
dc355b76
PM
2887 roundIncrement = 0x200;
2888 break;
2889 case float_round_to_zero:
2890 roundIncrement = 0;
2891 break;
2892 case float_round_up:
2893 roundIncrement = zSign ? 0 : 0x3ff;
2894 break;
2895 case float_round_down:
2896 roundIncrement = zSign ? 0x3ff : 0;
2897 break;
9ee6f678
BR
2898 case float_round_to_odd:
2899 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2900 break;
dc355b76
PM
2901 default:
2902 abort();
158142c2
FB
2903 }
2904 roundBits = zSig & 0x3FF;
bb98fe42 2905 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2906 if ( ( 0x7FD < zExp )
2907 || ( ( zExp == 0x7FD )
bb98fe42 2908 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2909 ) {
9ee6f678
BR
2910 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2911 roundIncrement != 0;
ff32e16e 2912 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2913 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2914 }
2915 if ( zExp < 0 ) {
a2f2d288 2916 if (status->flush_to_zero) {
ff32e16e 2917 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2918 return packFloat64(zSign, 0, 0);
2919 }
158142c2 2920 isTiny =
a2f2d288
PM
2921 (status->float_detect_tininess
2922 == float_tininess_before_rounding)
158142c2
FB
2923 || ( zExp < -1 )
2924 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2925 shift64RightJamming( zSig, - zExp, &zSig );
2926 zExp = 0;
2927 roundBits = zSig & 0x3FF;
ff32e16e
PM
2928 if (isTiny && roundBits) {
2929 float_raise(float_flag_underflow, status);
2930 }
9ee6f678
BR
2931 if (roundingMode == float_round_to_odd) {
2932 /*
2933 * For round-to-odd case, the roundIncrement depends on
2934 * zSig which just changed.
2935 */
2936 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2937 }
158142c2
FB
2938 }
2939 }
a2f2d288
PM
2940 if (roundBits) {
2941 status->float_exception_flags |= float_flag_inexact;
2942 }
158142c2
FB
2943 zSig = ( zSig + roundIncrement )>>10;
2944 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2945 if ( zSig == 0 ) zExp = 0;
2946 return packFloat64( zSign, zExp, zSig );
2947
2948}
2949
2950/*----------------------------------------------------------------------------
2951| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2952| and significand `zSig', and returns the proper double-precision floating-
2953| point value corresponding to the abstract input. This routine is just like
2954| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2955| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2956| floating-point exponent.
2957*----------------------------------------------------------------------------*/
2958
2959static float64
0c48262d 2960 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2961 float_status *status)
158142c2 2962{
8f506c70 2963 int8_t shiftCount;
158142c2
FB
2964
2965 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2966 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2967 status);
158142c2
FB
2968
2969}
2970
158142c2
FB
2971/*----------------------------------------------------------------------------
2972| Normalizes the subnormal extended double-precision floating-point value
2973| represented by the denormalized significand `aSig'. The normalized exponent
2974| and significand are stored at the locations pointed to by `zExpPtr' and
2975| `zSigPtr', respectively.
2976*----------------------------------------------------------------------------*/
2977
88857aca
LV
2978void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2979 uint64_t *zSigPtr)
158142c2 2980{
8f506c70 2981 int8_t shiftCount;
158142c2
FB
2982
2983 shiftCount = countLeadingZeros64( aSig );
2984 *zSigPtr = aSig<<shiftCount;
2985 *zExpPtr = 1 - shiftCount;
158142c2
FB
2986}
2987
2988/*----------------------------------------------------------------------------
2989| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2990| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2991| and returns the proper extended double-precision floating-point value
2992| corresponding to the abstract input. Ordinarily, the abstract value is
2993| rounded and packed into the extended double-precision format, with the
2994| inexact exception raised if the abstract input cannot be represented
2995| exactly. However, if the abstract value is too large, the overflow and
2996| inexact exceptions are raised and an infinity or maximal finite value is
2997| returned. If the abstract value is too small, the input value is rounded to
2998| a subnormal number, and the underflow and inexact exceptions are raised if
2999| the abstract input cannot be represented exactly as a subnormal extended
3000| double-precision floating-point number.
3001| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3002| number of bits as single or double precision, respectively. Otherwise, the
3003| result is rounded to the full precision of the extended double-precision
3004| format.
3005| The input significand must be normalized or smaller. If the input
3006| significand is not normalized, `zExp' must be 0; in that case, the result
3007| returned is a subnormal number, and it must not require rounding. The
3008| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3009| Floating-Point Arithmetic.
3010*----------------------------------------------------------------------------*/
3011
88857aca
LV
3012floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3013 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3014 float_status *status)
158142c2 3015{
8f506c70 3016 int8_t roundingMode;
158142c2 3017 flag roundNearestEven, increment, isTiny;
f42c2224 3018 int64_t roundIncrement, roundMask, roundBits;
158142c2 3019
a2f2d288 3020 roundingMode = status->float_rounding_mode;
158142c2
FB
3021 roundNearestEven = ( roundingMode == float_round_nearest_even );
3022 if ( roundingPrecision == 80 ) goto precision80;
3023 if ( roundingPrecision == 64 ) {
3024 roundIncrement = LIT64( 0x0000000000000400 );
3025 roundMask = LIT64( 0x00000000000007FF );
3026 }
3027 else if ( roundingPrecision == 32 ) {
3028 roundIncrement = LIT64( 0x0000008000000000 );
3029 roundMask = LIT64( 0x000000FFFFFFFFFF );
3030 }
3031 else {
3032 goto precision80;
3033 }
3034 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3035 switch (roundingMode) {
3036 case float_round_nearest_even:
f9288a76 3037 case float_round_ties_away:
dc355b76
PM
3038 break;
3039 case float_round_to_zero:
3040 roundIncrement = 0;
3041 break;
3042 case float_round_up:
3043 roundIncrement = zSign ? 0 : roundMask;
3044 break;
3045 case float_round_down:
3046 roundIncrement = zSign ? roundMask : 0;
3047 break;
3048 default:
3049 abort();
158142c2
FB
3050 }
3051 roundBits = zSig0 & roundMask;
bb98fe42 3052 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3053 if ( ( 0x7FFE < zExp )
3054 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3055 ) {
3056 goto overflow;
3057 }
3058 if ( zExp <= 0 ) {
a2f2d288 3059 if (status->flush_to_zero) {
ff32e16e 3060 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3061 return packFloatx80(zSign, 0, 0);
3062 }
158142c2 3063 isTiny =
a2f2d288
PM
3064 (status->float_detect_tininess
3065 == float_tininess_before_rounding)
158142c2
FB
3066 || ( zExp < 0 )
3067 || ( zSig0 <= zSig0 + roundIncrement );
3068 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3069 zExp = 0;
3070 roundBits = zSig0 & roundMask;
ff32e16e
PM
3071 if (isTiny && roundBits) {
3072 float_raise(float_flag_underflow, status);
3073 }
a2f2d288
PM
3074 if (roundBits) {
3075 status->float_exception_flags |= float_flag_inexact;
3076 }
158142c2 3077 zSig0 += roundIncrement;
bb98fe42 3078 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3079 roundIncrement = roundMask + 1;
3080 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3081 roundMask |= roundIncrement;
3082 }
3083 zSig0 &= ~ roundMask;
3084 return packFloatx80( zSign, zExp, zSig0 );
3085 }
3086 }
a2f2d288
PM
3087 if (roundBits) {
3088 status->float_exception_flags |= float_flag_inexact;
3089 }
158142c2
FB
3090 zSig0 += roundIncrement;
3091 if ( zSig0 < roundIncrement ) {
3092 ++zExp;
3093 zSig0 = LIT64( 0x8000000000000000 );
3094 }
3095 roundIncrement = roundMask + 1;
3096 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3097 roundMask |= roundIncrement;
3098 }
3099 zSig0 &= ~ roundMask;
3100 if ( zSig0 == 0 ) zExp = 0;
3101 return packFloatx80( zSign, zExp, zSig0 );
3102 precision80:
dc355b76
PM
3103 switch (roundingMode) {
3104 case float_round_nearest_even:
f9288a76 3105 case float_round_ties_away:
dc355b76
PM
3106 increment = ((int64_t)zSig1 < 0);
3107 break;
3108 case float_round_to_zero:
3109 increment = 0;
3110 break;
3111 case float_round_up:
3112 increment = !zSign && zSig1;
3113 break;
3114 case float_round_down:
3115 increment = zSign && zSig1;
3116 break;
3117 default:
3118 abort();
158142c2 3119 }
bb98fe42 3120 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3121 if ( ( 0x7FFE < zExp )
3122 || ( ( zExp == 0x7FFE )
3123 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3124 && increment
3125 )
3126 ) {
3127 roundMask = 0;
3128 overflow:
ff32e16e 3129 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3130 if ( ( roundingMode == float_round_to_zero )
3131 || ( zSign && ( roundingMode == float_round_up ) )
3132 || ( ! zSign && ( roundingMode == float_round_down ) )
3133 ) {
3134 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3135 }
0f605c88
LV
3136 return packFloatx80(zSign,
3137 floatx80_infinity_high,
3138 floatx80_infinity_low);
158142c2
FB
3139 }
3140 if ( zExp <= 0 ) {
3141 isTiny =
a2f2d288
PM
3142 (status->float_detect_tininess
3143 == float_tininess_before_rounding)
158142c2
FB
3144 || ( zExp < 0 )
3145 || ! increment
3146 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3147 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3148 zExp = 0;
ff32e16e
PM
3149 if (isTiny && zSig1) {
3150 float_raise(float_flag_underflow, status);
3151 }
a2f2d288
PM
3152 if (zSig1) {
3153 status->float_exception_flags |= float_flag_inexact;
3154 }
dc355b76
PM
3155 switch (roundingMode) {
3156 case float_round_nearest_even:
f9288a76 3157 case float_round_ties_away:
dc355b76
PM
3158 increment = ((int64_t)zSig1 < 0);
3159 break;
3160 case float_round_to_zero:
3161 increment = 0;
3162 break;
3163 case float_round_up:
3164 increment = !zSign && zSig1;
3165 break;
3166 case float_round_down:
3167 increment = zSign && zSig1;
3168 break;
3169 default:
3170 abort();
158142c2
FB
3171 }
3172 if ( increment ) {
3173 ++zSig0;
3174 zSig0 &=
bb98fe42
AF
3175 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3176 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3177 }
3178 return packFloatx80( zSign, zExp, zSig0 );
3179 }
3180 }
a2f2d288
PM
3181 if (zSig1) {
3182 status->float_exception_flags |= float_flag_inexact;
3183 }
158142c2
FB
3184 if ( increment ) {
3185 ++zSig0;
3186 if ( zSig0 == 0 ) {
3187 ++zExp;
3188 zSig0 = LIT64( 0x8000000000000000 );
3189 }
3190 else {
bb98fe42 3191 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3192 }
3193 }
3194 else {
3195 if ( zSig0 == 0 ) zExp = 0;
3196 }
3197 return packFloatx80( zSign, zExp, zSig0 );
3198
3199}
3200
3201/*----------------------------------------------------------------------------
3202| Takes an abstract floating-point value having sign `zSign', exponent
3203| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3204| and returns the proper extended double-precision floating-point value
3205| corresponding to the abstract input. This routine is just like
3206| `roundAndPackFloatx80' except that the input significand does not have to be
3207| normalized.
3208*----------------------------------------------------------------------------*/
3209
88857aca
LV
3210floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3211 flag zSign, int32_t zExp,
3212 uint64_t zSig0, uint64_t zSig1,
3213 float_status *status)
158142c2 3214{
8f506c70 3215 int8_t shiftCount;
158142c2
FB
3216
3217 if ( zSig0 == 0 ) {
3218 zSig0 = zSig1;
3219 zSig1 = 0;
3220 zExp -= 64;
3221 }
3222 shiftCount = countLeadingZeros64( zSig0 );
3223 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3224 zExp -= shiftCount;
ff32e16e
PM
3225 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3226 zSig0, zSig1, status);
158142c2
FB
3227
3228}
3229
158142c2
FB
3230/*----------------------------------------------------------------------------
3231| Returns the least-significant 64 fraction bits of the quadruple-precision
3232| floating-point value `a'.
3233*----------------------------------------------------------------------------*/
3234
a49db98d 3235static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
3236{
3237
3238 return a.low;
3239
3240}
3241
3242/*----------------------------------------------------------------------------
3243| Returns the most-significant 48 fraction bits of the quadruple-precision
3244| floating-point value `a'.
3245*----------------------------------------------------------------------------*/
3246
a49db98d 3247static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
3248{
3249
3250 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3251
3252}
3253
3254/*----------------------------------------------------------------------------
3255| Returns the exponent bits of the quadruple-precision floating-point value
3256| `a'.
3257*----------------------------------------------------------------------------*/
3258
f4014512 3259static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
3260{
3261
3262 return ( a.high>>48 ) & 0x7FFF;
3263
3264}
3265
3266/*----------------------------------------------------------------------------
3267| Returns the sign bit of the quadruple-precision floating-point value `a'.
3268*----------------------------------------------------------------------------*/
3269
a49db98d 3270static inline flag extractFloat128Sign( float128 a )
158142c2
FB
3271{
3272
3273 return a.high>>63;
3274
3275}
3276
3277/*----------------------------------------------------------------------------
3278| Normalizes the subnormal quadruple-precision floating-point value
3279| represented by the denormalized significand formed by the concatenation of
3280| `aSig0' and `aSig1'. The normalized exponent is stored at the location
3281| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
3282| significand are stored at the location pointed to by `zSig0Ptr', and the
3283| least significant 64 bits of the normalized significand are stored at the
3284| location pointed to by `zSig1Ptr'.
3285*----------------------------------------------------------------------------*/
3286
3287static void
3288 normalizeFloat128Subnormal(
bb98fe42
AF
3289 uint64_t aSig0,
3290 uint64_t aSig1,
f4014512 3291 int32_t *zExpPtr,
bb98fe42
AF
3292 uint64_t *zSig0Ptr,
3293 uint64_t *zSig1Ptr
158142c2
FB
3294 )
3295{
8f506c70 3296 int8_t shiftCount;
158142c2
FB
3297
3298 if ( aSig0 == 0 ) {
3299 shiftCount = countLeadingZeros64( aSig1 ) - 15;
3300 if ( shiftCount < 0 ) {
3301 *zSig0Ptr = aSig1>>( - shiftCount );
3302 *zSig1Ptr = aSig1<<( shiftCount & 63 );
3303 }
3304 else {
3305 *zSig0Ptr = aSig1<<shiftCount;
3306 *zSig1Ptr = 0;
3307 }
3308 *zExpPtr = - shiftCount - 63;
3309 }
3310 else {
3311 shiftCount = countLeadingZeros64( aSig0 ) - 15;
3312 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
3313 *zExpPtr = 1 - shiftCount;
3314 }
3315
3316}
3317
3318/*----------------------------------------------------------------------------
3319| Packs the sign `zSign', the exponent `zExp', and the significand formed
3320| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
3321| floating-point value, returning the result. After being shifted into the
3322| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
3323| added together to form the most significant 32 bits of the result. This
3324| means that any integer portion of `zSig0' will be added into the exponent.
3325| Since a properly normalized significand will have an integer portion equal
3326| to 1, the `zExp' input should be 1 less than the desired result exponent
3327| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
3328| significand.
3329*----------------------------------------------------------------------------*/
3330
a49db98d 3331static inline float128
f4014512 3332 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
3333{
3334 float128 z;
3335
3336 z.low = zSig1;
bb98fe42 3337 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
3338 return z;
3339
3340}
3341
3342/*----------------------------------------------------------------------------
3343| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3344| and extended significand formed by the concatenation of `zSig0', `zSig1',
3345| and `zSig2', and returns the proper quadruple-precision floating-point value
3346| corresponding to the abstract input. Ordinarily, the abstract value is
3347| simply rounded and packed into the quadruple-precision format, with the
3348| inexact exception raised if the abstract input cannot be represented
3349| exactly. However, if the abstract value is too large, the overflow and
3350| inexact exceptions are raised and an infinity or maximal finite value is
3351| returned. If the abstract value is too small, the input value is rounded to
3352| a subnormal number, and the underflow and inexact exceptions are raised if
3353| the abstract input cannot be represented exactly as a subnormal quadruple-
3354| precision floating-point number.
3355| The input significand must be normalized or smaller. If the input
3356| significand is not normalized, `zExp' must be 0; in that case, the result
3357| returned is a subnormal number, and it must not require rounding. In the
3358| usual case that the input significand is normalized, `zExp' must be 1 less
3359| than the ``true'' floating-point exponent. The handling of underflow and
3360| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3361*----------------------------------------------------------------------------*/
3362
f4014512 3363static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3364 uint64_t zSig0, uint64_t zSig1,
3365 uint64_t zSig2, float_status *status)
158142c2 3366{
8f506c70 3367 int8_t roundingMode;
158142c2
FB
3368 flag roundNearestEven, increment, isTiny;
3369
a2f2d288 3370 roundingMode = status->float_rounding_mode;
158142c2 3371 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3372 switch (roundingMode) {
3373 case float_round_nearest_even:
f9288a76 3374 case float_round_ties_away:
dc355b76
PM
3375 increment = ((int64_t)zSig2 < 0);
3376 break;
3377 case float_round_to_zero:
3378 increment = 0;
3379 break;
3380 case float_round_up:
3381 increment = !zSign && zSig2;
3382 break;
3383 case float_round_down:
3384 increment = zSign && zSig2;
3385 break;
9ee6f678
BR
3386 case float_round_to_odd:
3387 increment = !(zSig1 & 0x1) && zSig2;
3388 break;
dc355b76
PM
3389 default:
3390 abort();
158142c2 3391 }
bb98fe42 3392 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3393 if ( ( 0x7FFD < zExp )
3394 || ( ( zExp == 0x7FFD )
3395 && eq128(
3396 LIT64( 0x0001FFFFFFFFFFFF ),
3397 LIT64( 0xFFFFFFFFFFFFFFFF ),
3398 zSig0,
3399 zSig1
3400 )
3401 && increment
3402 )
3403 ) {
ff32e16e 3404 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3405 if ( ( roundingMode == float_round_to_zero )
3406 || ( zSign && ( roundingMode == float_round_up ) )
3407 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3408 || (roundingMode == float_round_to_odd)
158142c2
FB
3409 ) {
3410 return
3411 packFloat128(
3412 zSign,
3413 0x7FFE,
3414 LIT64( 0x0000FFFFFFFFFFFF ),
3415 LIT64( 0xFFFFFFFFFFFFFFFF )
3416 );
3417 }
3418 return packFloat128( zSign, 0x7FFF, 0, 0 );
3419 }
3420 if ( zExp < 0 ) {
a2f2d288 3421 if (status->flush_to_zero) {
ff32e16e 3422 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3423 return packFloat128(zSign, 0, 0, 0);
3424 }
158142c2 3425 isTiny =
a2f2d288
PM
3426 (status->float_detect_tininess
3427 == float_tininess_before_rounding)
158142c2
FB
3428 || ( zExp < -1 )
3429 || ! increment
3430 || lt128(
3431 zSig0,
3432 zSig1,
3433 LIT64( 0x0001FFFFFFFFFFFF ),
3434 LIT64( 0xFFFFFFFFFFFFFFFF )
3435 );
3436 shift128ExtraRightJamming(
3437 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3438 zExp = 0;
ff32e16e
PM
3439 if (isTiny && zSig2) {
3440 float_raise(float_flag_underflow, status);
3441 }
dc355b76
PM
3442 switch (roundingMode) {
3443 case float_round_nearest_even:
f9288a76 3444 case float_round_ties_away:
dc355b76
PM
3445 increment = ((int64_t)zSig2 < 0);
3446 break;
3447 case float_round_to_zero:
3448 increment = 0;
3449 break;
3450 case float_round_up:
3451 increment = !zSign && zSig2;
3452 break;
3453 case float_round_down:
3454 increment = zSign && zSig2;
3455 break;
9ee6f678
BR
3456 case float_round_to_odd:
3457 increment = !(zSig1 & 0x1) && zSig2;
3458 break;
dc355b76
PM
3459 default:
3460 abort();
158142c2
FB
3461 }
3462 }
3463 }
a2f2d288
PM
3464 if (zSig2) {
3465 status->float_exception_flags |= float_flag_inexact;
3466 }
158142c2
FB
3467 if ( increment ) {
3468 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3469 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3470 }
3471 else {
3472 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3473 }
3474 return packFloat128( zSign, zExp, zSig0, zSig1 );
3475
3476}
3477
3478/*----------------------------------------------------------------------------
3479| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3480| and significand formed by the concatenation of `zSig0' and `zSig1', and
3481| returns the proper quadruple-precision floating-point value corresponding
3482| to the abstract input. This routine is just like `roundAndPackFloat128'
3483| except that the input significand has fewer bits and does not have to be
3484| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3485| point exponent.
3486*----------------------------------------------------------------------------*/
3487
f4014512 3488static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3489 uint64_t zSig0, uint64_t zSig1,
3490 float_status *status)
158142c2 3491{
8f506c70 3492 int8_t shiftCount;
bb98fe42 3493 uint64_t zSig2;
158142c2
FB
3494
3495 if ( zSig0 == 0 ) {
3496 zSig0 = zSig1;
3497 zSig1 = 0;
3498 zExp -= 64;
3499 }
3500 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3501 if ( 0 <= shiftCount ) {
3502 zSig2 = 0;
3503 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3504 }
3505 else {
3506 shift128ExtraRightJamming(
3507 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3508 }
3509 zExp -= shiftCount;
ff32e16e 3510 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3511
3512}
3513
158142c2 3514
158142c2
FB
3515/*----------------------------------------------------------------------------
3516| Returns the result of converting the 32-bit two's complement integer `a'
3517| to the extended double-precision floating-point format. The conversion
3518| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3519| Arithmetic.
3520*----------------------------------------------------------------------------*/
3521
e5a41ffa 3522floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3523{
3524 flag zSign;
3a87d009 3525 uint32_t absA;
8f506c70 3526 int8_t shiftCount;
bb98fe42 3527 uint64_t zSig;
158142c2
FB
3528
3529 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3530 zSign = ( a < 0 );
3531 absA = zSign ? - a : a;
3532 shiftCount = countLeadingZeros32( absA ) + 32;
3533 zSig = absA;
3534 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3535
3536}
3537
158142c2
FB
3538/*----------------------------------------------------------------------------
3539| Returns the result of converting the 32-bit two's complement integer `a' to
3540| the quadruple-precision floating-point format. The conversion is performed
3541| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3542*----------------------------------------------------------------------------*/
3543
e5a41ffa 3544float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3545{
3546 flag zSign;
3a87d009 3547 uint32_t absA;
8f506c70 3548 int8_t shiftCount;
bb98fe42 3549 uint64_t zSig0;
158142c2
FB
3550
3551 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3552 zSign = ( a < 0 );
3553 absA = zSign ? - a : a;
3554 shiftCount = countLeadingZeros32( absA ) + 17;
3555 zSig0 = absA;
3556 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3557
3558}
3559
158142c2
FB
3560/*----------------------------------------------------------------------------
3561| Returns the result of converting the 64-bit two's complement integer `a'
3562| to the extended double-precision floating-point format. The conversion
3563| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3564| Arithmetic.
3565*----------------------------------------------------------------------------*/
3566
e5a41ffa 3567floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3568{
3569 flag zSign;
182f42fd 3570 uint64_t absA;
8f506c70 3571 int8_t shiftCount;
158142c2
FB
3572
3573 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3574 zSign = ( a < 0 );
3575 absA = zSign ? - a : a;
3576 shiftCount = countLeadingZeros64( absA );
3577 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3578
3579}
3580
158142c2
FB
3581/*----------------------------------------------------------------------------
3582| Returns the result of converting the 64-bit two's complement integer `a' to
3583| the quadruple-precision floating-point format. The conversion is performed
3584| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3585*----------------------------------------------------------------------------*/
3586
e5a41ffa 3587float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3588{
3589 flag zSign;
182f42fd 3590 uint64_t absA;
8f506c70 3591 int8_t shiftCount;
f4014512 3592 int32_t zExp;
bb98fe42 3593 uint64_t zSig0, zSig1;
158142c2
FB
3594
3595 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3596 zSign = ( a < 0 );
3597 absA = zSign ? - a : a;
3598 shiftCount = countLeadingZeros64( absA ) + 49;
3599 zExp = 0x406E - shiftCount;
3600 if ( 64 <= shiftCount ) {
3601 zSig1 = 0;
3602 zSig0 = absA;
3603 shiftCount -= 64;
3604 }
3605 else {
3606 zSig1 = absA;
3607 zSig0 = 0;
3608 }
3609 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3610 return packFloat128( zSign, zExp, zSig0, zSig1 );
3611
3612}
3613
6bb8e0f1
PM
3614/*----------------------------------------------------------------------------
3615| Returns the result of converting the 64-bit unsigned integer `a'
3616| to the quadruple-precision floating-point format. The conversion is performed
3617| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3618*----------------------------------------------------------------------------*/
3619
e5a41ffa 3620float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3621{
3622 if (a == 0) {
3623 return float128_zero;
3624 }
6603d506 3625 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3626}
3627
158142c2
FB
3628/*----------------------------------------------------------------------------
3629| Returns the result of converting the single-precision floating-point value
3630| `a' to the extended double-precision floating-point format. The conversion
3631| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3632| Arithmetic.
3633*----------------------------------------------------------------------------*/
3634
e5a41ffa 3635floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3636{
3637 flag aSign;
0c48262d 3638 int aExp;
bb98fe42 3639 uint32_t aSig;
158142c2 3640
ff32e16e 3641 a = float32_squash_input_denormal(a, status);
158142c2
FB
3642 aSig = extractFloat32Frac( a );
3643 aExp = extractFloat32Exp( a );
3644 aSign = extractFloat32Sign( a );
3645 if ( aExp == 0xFF ) {
ff32e16e
PM
3646 if (aSig) {
3647 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3648 }
0f605c88
LV
3649 return packFloatx80(aSign,
3650 floatx80_infinity_high,
3651 floatx80_infinity_low);
158142c2
FB
3652 }
3653 if ( aExp == 0 ) {
3654 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3655 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3656 }
3657 aSig |= 0x00800000;
bb98fe42 3658 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3659
3660}
3661
158142c2
FB
3662/*----------------------------------------------------------------------------
3663| Returns the result of converting the single-precision floating-point value
3664| `a' to the double-precision floating-point format. The conversion is
3665| performed according to the IEC/IEEE Standard for Binary Floating-Point
3666| Arithmetic.
3667*----------------------------------------------------------------------------*/
3668
e5a41ffa 3669float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3670{
3671 flag aSign;
0c48262d 3672 int aExp;
bb98fe42 3673 uint32_t aSig;
158142c2 3674
ff32e16e 3675 a = float32_squash_input_denormal(a, status);
158142c2
FB
3676 aSig = extractFloat32Frac( a );
3677 aExp = extractFloat32Exp( a );
3678 aSign = extractFloat32Sign( a );
3679 if ( aExp == 0xFF ) {
ff32e16e
PM
3680 if (aSig) {
3681 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3682 }
158142c2
FB
3683 return packFloat128( aSign, 0x7FFF, 0, 0 );
3684 }
3685 if ( aExp == 0 ) {
3686 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3687 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3688 --aExp;
3689 }
bb98fe42 3690 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3691
3692}
3693
158142c2
FB
3694/*----------------------------------------------------------------------------
3695| Returns the remainder of the single-precision floating-point value `a'
3696| with respect to the corresponding value `b'. The operation is performed
3697| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3698*----------------------------------------------------------------------------*/
3699
e5a41ffa 3700float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3701{
ed086f3d 3702 flag aSign, zSign;
0c48262d 3703 int aExp, bExp, expDiff;
bb98fe42
AF
3704 uint32_t aSig, bSig;
3705 uint32_t q;
3706 uint64_t aSig64, bSig64, q64;
3707 uint32_t alternateASig;
3708 int32_t sigMean;
ff32e16e
PM
3709 a = float32_squash_input_denormal(a, status);
3710 b = float32_squash_input_denormal(b, status);
158142c2
FB
3711
3712 aSig = extractFloat32Frac( a );
3713 aExp = extractFloat32Exp( a );
3714 aSign = extractFloat32Sign( a );
3715 bSig = extractFloat32Frac( b );
3716 bExp = extractFloat32Exp( b );
158142c2
FB
3717 if ( aExp == 0xFF ) {
3718 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3719 return propagateFloat32NaN(a, b, status);
158142c2 3720 }
ff32e16e 3721 float_raise(float_flag_invalid, status);
af39bc8c 3722 return float32_default_nan(status);
158142c2
FB
3723 }
3724 if ( bExp == 0xFF ) {
ff32e16e
PM
3725 if (bSig) {
3726 return propagateFloat32NaN(a, b, status);
3727 }
158142c2
FB
3728 return a;
3729 }
3730 if ( bExp == 0 ) {
3731 if ( bSig == 0 ) {
ff32e16e 3732 float_raise(float_flag_invalid, status);
af39bc8c 3733 return float32_default_nan(status);
158142c2
FB
3734 }
3735 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3736 }
3737 if ( aExp == 0 ) {
3738 if ( aSig == 0 ) return a;
3739 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3740 }
3741 expDiff = aExp - bExp;
3742 aSig |= 0x00800000;
3743 bSig |= 0x00800000;
3744 if ( expDiff < 32 ) {
3745 aSig <<= 8;
3746 bSig <<= 8;
3747 if ( expDiff < 0 ) {
3748 if ( expDiff < -1 ) return a;
3749 aSig >>= 1;
3750 }
3751 q = ( bSig <= aSig );
3752 if ( q ) aSig -= bSig;
3753 if ( 0 < expDiff ) {
bb98fe42 3754 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3755 q >>= 32 - expDiff;
3756 bSig >>= 2;
3757 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3758 }
3759 else {
3760 aSig >>= 2;
3761 bSig >>= 2;
3762 }
3763 }
3764 else {
3765 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3766 aSig64 = ( (uint64_t) aSig )<<40;
3767 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3768 expDiff -= 64;
3769 while ( 0 < expDiff ) {
3770 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3771 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3772 aSig64 = - ( ( bSig * q64 )<<38 );
3773 expDiff -= 62;
3774 }
3775 expDiff += 64;
3776 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3777 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3778 q = q64>>( 64 - expDiff );
3779 bSig <<= 6;
3780 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3781 }
3782 do {
3783 alternateASig = aSig;
3784 ++q;
3785 aSig -= bSig;
bb98fe42 3786 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3787 sigMean = aSig + alternateASig;
3788 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3789 aSig = alternateASig;
3790 }
bb98fe42 3791 zSign = ( (int32_t) aSig < 0 );
158142c2 3792 if ( zSign ) aSig = - aSig;
ff32e16e 3793 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3794}
3795
369be8f6 3796
158142c2 3797
8229c991
AJ
3798/*----------------------------------------------------------------------------
3799| Returns the binary exponential of the single-precision floating-point value
3800| `a'. The operation is performed according to the IEC/IEEE Standard for
3801| Binary Floating-Point Arithmetic.
3802|
3803| Uses the following identities:
3804|
3805| 1. -------------------------------------------------------------------------
3806| x x*ln(2)
3807| 2 = e
3808|
3809| 2. -------------------------------------------------------------------------
3810| 2 3 4 5 n
3811| x x x x x x x
3812| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3813| 1! 2! 3! 4! 5! n!
3814*----------------------------------------------------------------------------*/
3815
3816static const float64 float32_exp2_coefficients[15] =
3817{
d5138cf4
PM
3818 const_float64( 0x3ff0000000000000ll ), /* 1 */
3819 const_float64( 0x3fe0000000000000ll ), /* 2 */
3820 const_float64( 0x3fc5555555555555ll ), /* 3 */
3821 const_float64( 0x3fa5555555555555ll ), /* 4 */
3822 const_float64( 0x3f81111111111111ll ), /* 5 */
3823 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3824 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3825 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3826 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3827 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3828 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3829 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3830 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3831 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3832 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3833};
3834
e5a41ffa 3835float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3836{
3837 flag aSign;
0c48262d 3838 int aExp;
bb98fe42 3839 uint32_t aSig;
8229c991
AJ
3840 float64 r, x, xn;
3841 int i;
ff32e16e 3842 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3843
3844 aSig = extractFloat32Frac( a );
3845 aExp = extractFloat32Exp( a );
3846 aSign = extractFloat32Sign( a );
3847
3848 if ( aExp == 0xFF) {
ff32e16e
PM
3849 if (aSig) {
3850 return propagateFloat32NaN(a, float32_zero, status);
3851 }
8229c991
AJ
3852 return (aSign) ? float32_zero : a;
3853 }
3854 if (aExp == 0) {
3855 if (aSig == 0) return float32_one;
3856 }
3857
ff32e16e 3858 float_raise(float_flag_inexact, status);
8229c991
AJ
3859
3860 /* ******************************* */
3861 /* using float64 for approximation */
3862 /* ******************************* */
ff32e16e
PM
3863 x = float32_to_float64(a, status);
3864 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3865
3866 xn = x;
3867 r = float64_one;
3868 for (i = 0 ; i < 15 ; i++) {
3869 float64 f;
3870
ff32e16e
PM
3871 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3872 r = float64_add(r, f, status);
8229c991 3873
ff32e16e 3874 xn = float64_mul(xn, x, status);
8229c991
AJ
3875 }
3876
3877 return float64_to_float32(r, status);
3878}
3879
374dfc33
AJ
3880/*----------------------------------------------------------------------------
3881| Returns the binary log of the single-precision floating-point value `a'.
3882| The operation is performed according to the IEC/IEEE Standard for Binary
3883| Floating-Point Arithmetic.
3884*----------------------------------------------------------------------------*/
e5a41ffa 3885float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3886{
3887 flag aSign, zSign;
0c48262d 3888 int aExp;
bb98fe42 3889 uint32_t aSig, zSig, i;
374dfc33 3890
ff32e16e 3891 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3892 aSig = extractFloat32Frac( a );
3893 aExp = extractFloat32Exp( a );
3894 aSign = extractFloat32Sign( a );
3895
3896 if ( aExp == 0 ) {
3897 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3898 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3899 }
3900 if ( aSign ) {
ff32e16e 3901 float_raise(float_flag_invalid, status);
af39bc8c 3902 return float32_default_nan(status);
374dfc33
AJ
3903 }
3904 if ( aExp == 0xFF ) {
ff32e16e
PM
3905 if (aSig) {
3906 return propagateFloat32NaN(a, float32_zero, status);
3907 }
374dfc33
AJ
3908 return a;
3909 }
3910
3911 aExp -= 0x7F;
3912 aSig |= 0x00800000;
3913 zSign = aExp < 0;
3914 zSig = aExp << 23;
3915
3916 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3917 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3918 if ( aSig & 0x01000000 ) {
3919 aSig >>= 1;
3920 zSig |= i;
3921 }
3922 }
3923
3924 if ( zSign )
3925 zSig = -zSig;
3926
ff32e16e 3927 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3928}
3929
158142c2
FB
3930/*----------------------------------------------------------------------------
3931| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3932| the corresponding value `b', and 0 otherwise. The invalid exception is
3933| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3934| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3935*----------------------------------------------------------------------------*/
3936
e5a41ffa 3937int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3938{
b689362d 3939 uint32_t av, bv;
ff32e16e
PM
3940 a = float32_squash_input_denormal(a, status);
3941 b = float32_squash_input_denormal(b, status);
158142c2
FB
3942
3943 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3944 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3945 ) {
ff32e16e 3946 float_raise(float_flag_invalid, status);
158142c2
FB
3947 return 0;
3948 }
b689362d
AJ
3949 av = float32_val(a);
3950 bv = float32_val(b);
3951 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3952}
3953
3954/*----------------------------------------------------------------------------
3955| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3956| or equal to the corresponding value `b', and 0 otherwise. The invalid
3957| exception is raised if either operand is a NaN. The comparison is performed
3958| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3959*----------------------------------------------------------------------------*/
3960
e5a41ffa 3961int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3962{
3963 flag aSign, bSign;
bb98fe42 3964 uint32_t av, bv;
ff32e16e
PM
3965 a = float32_squash_input_denormal(a, status);
3966 b = float32_squash_input_denormal(b, status);
158142c2
FB
3967
3968 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3969 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3970 ) {
ff32e16e 3971 float_raise(float_flag_invalid, status);
158142c2
FB
3972 return 0;
3973 }
3974 aSign = extractFloat32Sign( a );
3975 bSign = extractFloat32Sign( b );
f090c9d4
PB
3976 av = float32_val(a);
3977 bv = float32_val(b);
bb98fe42 3978 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3979 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3980
3981}
3982
3983/*----------------------------------------------------------------------------
3984| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3985| the corresponding value `b', and 0 otherwise. The invalid exception is
3986| raised if either operand is a NaN. The comparison is performed according
3987| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3988*----------------------------------------------------------------------------*/
3989
e5a41ffa 3990int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3991{
3992 flag aSign, bSign;
bb98fe42 3993 uint32_t av, bv;
ff32e16e
PM
3994 a = float32_squash_input_denormal(a, status);
3995 b = float32_squash_input_denormal(b, status);
158142c2
FB
3996
3997 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3998 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3999 ) {
ff32e16e 4000 float_raise(float_flag_invalid, status);
158142c2
FB
4001 return 0;
4002 }
4003 aSign = extractFloat32Sign( a );
4004 bSign = extractFloat32Sign( b );
f090c9d4
PB
4005 av = float32_val(a);
4006 bv = float32_val(b);
bb98fe42 4007 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4008 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4009
4010}
4011
67b7861d
AJ
4012/*----------------------------------------------------------------------------
4013| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4014| be compared, and 0 otherwise. The invalid exception is raised if either
4015| operand is a NaN. The comparison is performed according to the IEC/IEEE
4016| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4017*----------------------------------------------------------------------------*/
4018
e5a41ffa 4019int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4020{
ff32e16e
PM
4021 a = float32_squash_input_denormal(a, status);
4022 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4023
4024 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4025 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4026 ) {
ff32e16e 4027 float_raise(float_flag_invalid, status);
67b7861d
AJ
4028 return 1;
4029 }
4030 return 0;
4031}
b689362d 4032
158142c2
FB
4033/*----------------------------------------------------------------------------
4034| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4035| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4036| exception. The comparison is performed according to the IEC/IEEE Standard
4037| for Binary Floating-Point Arithmetic.
158142c2
FB
4038*----------------------------------------------------------------------------*/
4039
e5a41ffa 4040int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4041{
ff32e16e
PM
4042 a = float32_squash_input_denormal(a, status);
4043 b = float32_squash_input_denormal(b, status);
158142c2
FB
4044
4045 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4046 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4047 ) {
af39bc8c
AM
4048 if (float32_is_signaling_nan(a, status)
4049 || float32_is_signaling_nan(b, status)) {
ff32e16e 4050 float_raise(float_flag_invalid, status);
b689362d 4051 }
158142c2
FB
4052 return 0;
4053 }
b689362d
AJ
4054 return ( float32_val(a) == float32_val(b) ) ||
4055 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4056}
4057
4058/*----------------------------------------------------------------------------
4059| Returns 1 if the single-precision floating-point value `a' is less than or
4060| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4061| cause an exception. Otherwise, the comparison is performed according to the
4062| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4063*----------------------------------------------------------------------------*/
4064
e5a41ffa 4065int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4066{
4067 flag aSign, bSign;
bb98fe42 4068 uint32_t av, bv;
ff32e16e
PM
4069 a = float32_squash_input_denormal(a, status);
4070 b = float32_squash_input_denormal(b, status);
158142c2
FB
4071
4072 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4073 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4074 ) {
af39bc8c
AM
4075 if (float32_is_signaling_nan(a, status)
4076 || float32_is_signaling_nan(b, status)) {
ff32e16e 4077 float_raise(float_flag_invalid, status);
158142c2
FB
4078 }
4079 return 0;
4080 }
4081 aSign = extractFloat32Sign( a );
4082 bSign = extractFloat32Sign( b );
f090c9d4
PB
4083 av = float32_val(a);
4084 bv = float32_val(b);
bb98fe42 4085 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4086 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4087
4088}
4089
4090/*----------------------------------------------------------------------------
4091| Returns 1 if the single-precision floating-point value `a' is less than
4092| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4093| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4094| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4095*----------------------------------------------------------------------------*/
4096
ab52f973 4097int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4098{
ab52f973
AB
4099 flag aSign, bSign;
4100 uint32_t av, bv;
4101 a = float32_squash_input_denormal(a, status);
4102 b = float32_squash_input_denormal(b, status);
158142c2 4103
ab52f973
AB
4104 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4105 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4106 ) {
4107 if (float32_is_signaling_nan(a, status)
4108 || float32_is_signaling_nan(b, status)) {
ff32e16e 4109 float_raise(float_flag_invalid, status);
158142c2 4110 }
ab52f973 4111 return 0;
158142c2 4112 }
ab52f973
AB
4113 aSign = extractFloat32Sign( a );
4114 bSign = extractFloat32Sign( b );
4115 av = float32_val(a);
4116 bv = float32_val(b);
4117 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4118 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4119
4120}
4121
4122/*----------------------------------------------------------------------------
ab52f973
AB
4123| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4124| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4125| comparison is performed according to the IEC/IEEE Standard for Binary
4126| Floating-Point Arithmetic.
158142c2
FB
4127*----------------------------------------------------------------------------*/
4128
ab52f973 4129int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4130{
ab52f973
AB
4131 a = float32_squash_input_denormal(a, status);
4132 b = float32_squash_input_denormal(b, status);
158142c2 4133
ab52f973
AB
4134 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4135 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4136 ) {
4137 if (float32_is_signaling_nan(a, status)
4138 || float32_is_signaling_nan(b, status)) {
4139 float_raise(float_flag_invalid, status);
158142c2 4140 }
ab52f973 4141 return 1;
158142c2 4142 }
ab52f973 4143 return 0;
158142c2
FB
4144}
4145
210cbd49
AB
4146/*----------------------------------------------------------------------------
4147| If `a' is denormal and we are in flush-to-zero mode then set the
4148| input-denormal exception and return zero. Otherwise just return the value.
4149*----------------------------------------------------------------------------*/
4150float16 float16_squash_input_denormal(float16 a, float_status *status)
4151{
4152 if (status->flush_inputs_to_zero) {
4153 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4154 float_raise(float_flag_input_denormal, status);
4155 return make_float16(float16_val(a) & 0x8000);
4156 }
4157 }
4158 return a;
4159}
4160
158142c2
FB
4161/*----------------------------------------------------------------------------
4162| Returns the result of converting the double-precision floating-point value
4163| `a' to the extended double-precision floating-point format. The conversion
4164| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4165| Arithmetic.
4166*----------------------------------------------------------------------------*/
4167
e5a41ffa 4168floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4169{
4170 flag aSign;
0c48262d 4171 int aExp;
bb98fe42 4172 uint64_t aSig;
158142c2 4173
ff32e16e 4174 a = float64_squash_input_denormal(a, status);
158142c2
FB
4175 aSig = extractFloat64Frac( a );
4176 aExp = extractFloat64Exp( a );
4177 aSign = extractFloat64Sign( a );
4178 if ( aExp == 0x7FF ) {
ff32e16e
PM
4179 if (aSig) {
4180 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4181 }
0f605c88
LV
4182 return packFloatx80(aSign,
4183 floatx80_infinity_high,
4184 floatx80_infinity_low);
158142c2
FB
4185 }
4186 if ( aExp == 0 ) {
4187 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4188 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4189 }
4190 return
4191 packFloatx80(
4192 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4193
4194}
4195
158142c2
FB
4196/*----------------------------------------------------------------------------
4197| Returns the result of converting the double-precision floating-point value
4198| `a' to the quadruple-precision floating-point format. The conversion is
4199| performed according to the IEC/IEEE Standard for Binary Floating-Point
4200| Arithmetic.
4201*----------------------------------------------------------------------------*/
4202
e5a41ffa 4203float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4204{
4205 flag aSign;
0c48262d 4206 int aExp;
bb98fe42 4207 uint64_t aSig, zSig0, zSig1;
158142c2 4208
ff32e16e 4209 a = float64_squash_input_denormal(a, status);
158142c2
FB
4210 aSig = extractFloat64Frac( a );
4211 aExp = extractFloat64Exp( a );
4212 aSign = extractFloat64Sign( a );
4213 if ( aExp == 0x7FF ) {
ff32e16e
PM
4214 if (aSig) {
4215 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4216 }
158142c2
FB
4217 return packFloat128( aSign, 0x7FFF, 0, 0 );
4218 }
4219 if ( aExp == 0 ) {
4220 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4221 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4222 --aExp;
4223 }
4224 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4225 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4226
4227}
4228
158142c2
FB
4229
4230/*----------------------------------------------------------------------------
4231| Returns the remainder of the double-precision floating-point value `a'
4232| with respect to the corresponding value `b'. The operation is performed
4233| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4234*----------------------------------------------------------------------------*/
4235
e5a41ffa 4236float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4237{
ed086f3d 4238 flag aSign, zSign;
0c48262d 4239 int aExp, bExp, expDiff;
bb98fe42
AF
4240 uint64_t aSig, bSig;
4241 uint64_t q, alternateASig;
4242 int64_t sigMean;
158142c2 4243
ff32e16e
PM
4244 a = float64_squash_input_denormal(a, status);
4245 b = float64_squash_input_denormal(b, status);
158142c2
FB
4246 aSig = extractFloat64Frac( a );
4247 aExp = extractFloat64Exp( a );
4248 aSign = extractFloat64Sign( a );
4249 bSig = extractFloat64Frac( b );
4250 bExp = extractFloat64Exp( b );
158142c2
FB
4251 if ( aExp == 0x7FF ) {
4252 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4253 return propagateFloat64NaN(a, b, status);
158142c2 4254 }
ff32e16e 4255 float_raise(float_flag_invalid, status);
af39bc8c 4256 return float64_default_nan(status);
158142c2
FB
4257 }
4258 if ( bExp == 0x7FF ) {
ff32e16e
PM
4259 if (bSig) {
4260 return propagateFloat64NaN(a, b, status);
4261 }
158142c2
FB
4262 return a;
4263 }
4264 if ( bExp == 0 ) {
4265 if ( bSig == 0 ) {
ff32e16e 4266 float_raise(float_flag_invalid, status);
af39bc8c 4267 return float64_default_nan(status);
158142c2
FB
4268 }
4269 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4270 }
4271 if ( aExp == 0 ) {
4272 if ( aSig == 0 ) return a;
4273 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4274 }
4275 expDiff = aExp - bExp;
4276 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4277 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4278 if ( expDiff < 0 ) {
4279 if ( expDiff < -1 ) return a;
4280 aSig >>= 1;
4281 }
4282 q = ( bSig <= aSig );
4283 if ( q ) aSig -= bSig;
4284 expDiff -= 64;
4285 while ( 0 < expDiff ) {
4286 q = estimateDiv128To64( aSig, 0, bSig );
4287 q = ( 2 < q ) ? q - 2 : 0;
4288 aSig = - ( ( bSig>>2 ) * q );
4289 expDiff -= 62;
4290 }
4291 expDiff += 64;
4292 if ( 0 < expDiff ) {
4293 q = estimateDiv128To64( aSig, 0, bSig );
4294 q = ( 2 < q ) ? q - 2 : 0;
4295 q >>= 64 - expDiff;
4296 bSig >>= 2;
4297 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4298 }
4299 else {
4300 aSig >>= 2;
4301 bSig >>= 2;
4302 }
4303 do {
4304 alternateASig = aSig;
4305 ++q;
4306 aSig -= bSig;
bb98fe42 4307 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4308 sigMean = aSig + alternateASig;
4309 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4310 aSig = alternateASig;
4311 }
bb98fe42 4312 zSign = ( (int64_t) aSig < 0 );
158142c2 4313 if ( zSign ) aSig = - aSig;
ff32e16e 4314 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4315
4316}
4317
374dfc33
AJ
4318/*----------------------------------------------------------------------------
4319| Returns the binary log of the double-precision floating-point value `a'.
4320| The operation is performed according to the IEC/IEEE Standard for Binary
4321| Floating-Point Arithmetic.
4322*----------------------------------------------------------------------------*/
e5a41ffa 4323float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4324{
4325 flag aSign, zSign;
0c48262d 4326 int aExp;
bb98fe42 4327 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4328 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4329
4330 aSig = extractFloat64Frac( a );
4331 aExp = extractFloat64Exp( a );
4332 aSign = extractFloat64Sign( a );
4333
4334 if ( aExp == 0 ) {
4335 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4336 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4337 }
4338 if ( aSign ) {
ff32e16e 4339 float_raise(float_flag_invalid, status);
af39bc8c 4340 return float64_default_nan(status);
374dfc33
AJ
4341 }
4342 if ( aExp == 0x7FF ) {
ff32e16e
PM
4343 if (aSig) {
4344 return propagateFloat64NaN(a, float64_zero, status);
4345 }
374dfc33
AJ
4346 return a;
4347 }
4348
4349 aExp -= 0x3FF;
4350 aSig |= LIT64( 0x0010000000000000 );
4351 zSign = aExp < 0;
bb98fe42 4352 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4353 for (i = 1LL << 51; i > 0; i >>= 1) {
4354 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4355 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4356 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4357 aSig >>= 1;
4358 zSig |= i;
4359 }
4360 }
4361
4362 if ( zSign )
4363 zSig = -zSig;
ff32e16e 4364 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4365}
4366
158142c2
FB
4367/*----------------------------------------------------------------------------
4368| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4369| corresponding value `b', and 0 otherwise. The invalid exception is raised
4370| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4371| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4372*----------------------------------------------------------------------------*/
4373
e5a41ffa 4374int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4375{
bb98fe42 4376 uint64_t av, bv;
ff32e16e
PM
4377 a = float64_squash_input_denormal(a, status);
4378 b = float64_squash_input_denormal(b, status);
158142c2
FB
4379
4380 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4381 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4382 ) {
ff32e16e 4383 float_raise(float_flag_invalid, status);
158142c2
FB
4384 return 0;
4385 }
f090c9d4 4386 av = float64_val(a);
a1b91bb4 4387 bv = float64_val(b);
bb98fe42 4388 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4389
4390}
4391
4392/*----------------------------------------------------------------------------
4393| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4394| equal to the corresponding value `b', and 0 otherwise. The invalid
4395| exception is raised if either operand is a NaN. The comparison is performed
4396| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4397*----------------------------------------------------------------------------*/
4398
e5a41ffa 4399int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4400{
4401 flag aSign, bSign;
bb98fe42 4402 uint64_t av, bv;
ff32e16e
PM
4403 a = float64_squash_input_denormal(a, status);
4404 b = float64_squash_input_denormal(b, status);
158142c2
FB
4405
4406 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4407 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4408 ) {
ff32e16e 4409 float_raise(float_flag_invalid, status);
158142c2
FB
4410 return 0;
4411 }
4412 aSign = extractFloat64Sign( a );
4413 bSign = extractFloat64Sign( b );
f090c9d4 4414 av = float64_val(a);
a1b91bb4 4415 bv = float64_val(b);
bb98fe42 4416 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4417 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4418
4419}
4420
4421/*----------------------------------------------------------------------------
4422| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4423| the corresponding value `b', and 0 otherwise. The invalid exception is
4424| raised if either operand is a NaN. The comparison is performed according
4425| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4426*----------------------------------------------------------------------------*/
4427
e5a41ffa 4428int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4429{
4430 flag aSign, bSign;
bb98fe42 4431 uint64_t av, bv;
158142c2 4432
ff32e16e
PM
4433 a = float64_squash_input_denormal(a, status);
4434 b = float64_squash_input_denormal(b, status);
158142c2
FB
4435 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4436 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4437 ) {
ff32e16e 4438 float_raise(float_flag_invalid, status);
158142c2
FB
4439 return 0;
4440 }
4441 aSign = extractFloat64Sign( a );
4442 bSign = extractFloat64Sign( b );
f090c9d4 4443 av = float64_val(a);
a1b91bb4 4444 bv = float64_val(b);
bb98fe42 4445 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4446 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4447
4448}
4449
67b7861d
AJ
4450/*----------------------------------------------------------------------------
4451| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4452| be compared, and 0 otherwise. The invalid exception is raised if either
4453| operand is a NaN. The comparison is performed according to the IEC/IEEE
4454| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4455*----------------------------------------------------------------------------*/
4456
e5a41ffa 4457int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4458{
ff32e16e
PM
4459 a = float64_squash_input_denormal(a, status);
4460 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4461
4462 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4463 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4464 ) {
ff32e16e 4465 float_raise(float_flag_invalid, status);
67b7861d
AJ
4466 return 1;
4467 }
4468 return 0;
4469}
4470
158142c2
FB
4471/*----------------------------------------------------------------------------
4472| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4473| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4474| exception.The comparison is performed according to the IEC/IEEE Standard
4475| for Binary Floating-Point Arithmetic.
158142c2
FB
4476*----------------------------------------------------------------------------*/
4477
e5a41ffa 4478int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4479{
bb98fe42 4480 uint64_t av, bv;
ff32e16e
PM
4481 a = float64_squash_input_denormal(a, status);
4482 b = float64_squash_input_denormal(b, status);
158142c2
FB
4483
4484 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4485 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4486 ) {
af39bc8c
AM
4487 if (float64_is_signaling_nan(a, status)
4488 || float64_is_signaling_nan(b, status)) {
ff32e16e 4489 float_raise(float_flag_invalid, status);
b689362d 4490 }
158142c2
FB
4491 return 0;
4492 }
f090c9d4 4493 av = float64_val(a);
a1b91bb4 4494 bv = float64_val(b);
bb98fe42 4495 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4496
4497}
4498
4499/*----------------------------------------------------------------------------
4500| Returns 1 if the double-precision floating-point value `a' is less than or
4501| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4502| cause an exception. Otherwise, the comparison is performed according to the
4503| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4504*----------------------------------------------------------------------------*/
4505
e5a41ffa 4506int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4507{
4508 flag aSign, bSign;
bb98fe42 4509 uint64_t av, bv;
ff32e16e
PM
4510 a = float64_squash_input_denormal(a, status);
4511 b = float64_squash_input_denormal(b, status);
158142c2
FB
4512
4513 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4514 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4515 ) {
af39bc8c
AM
4516 if (float64_is_signaling_nan(a, status)
4517 || float64_is_signaling_nan(b, status)) {
ff32e16e 4518 float_raise(float_flag_invalid, status);
158142c2
FB
4519 }
4520 return 0;
4521 }
4522 aSign = extractFloat64Sign( a );
4523 bSign = extractFloat64Sign( b );
f090c9d4 4524 av = float64_val(a);
a1b91bb4 4525 bv = float64_val(b);
bb98fe42 4526 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4527 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4528
4529}
4530
4531/*----------------------------------------------------------------------------
4532| Returns 1 if the double-precision floating-point value `a' is less than
4533| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4534| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4535| Standard for Binary Floating-Point Arithmetic.
4536*----------------------------------------------------------------------------*/
4537
e5a41ffa 4538int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4539{
4540 flag aSign, bSign;
bb98fe42 4541 uint64_t av, bv;
ff32e16e
PM
4542 a = float64_squash_input_denormal(a, status);
4543 b = float64_squash_input_denormal(b, status);
158142c2
FB
4544
4545 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4546 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4547 ) {
af39bc8c
AM
4548 if (float64_is_signaling_nan(a, status)
4549 || float64_is_signaling_nan(b, status)) {
ff32e16e 4550 float_raise(float_flag_invalid, status);
158142c2
FB
4551 }
4552 return 0;
4553 }
4554 aSign = extractFloat64Sign( a );
4555 bSign = extractFloat64Sign( b );
f090c9d4 4556 av = float64_val(a);
a1b91bb4 4557 bv = float64_val(b);
bb98fe42 4558 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4559 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4560
4561}
4562
67b7861d
AJ
4563/*----------------------------------------------------------------------------
4564| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4565| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4566| comparison is performed according to the IEC/IEEE Standard for Binary
4567| Floating-Point Arithmetic.
4568*----------------------------------------------------------------------------*/
4569
e5a41ffa 4570int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4571{
ff32e16e
PM
4572 a = float64_squash_input_denormal(a, status);
4573 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4574
4575 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4576 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4577 ) {
af39bc8c
AM
4578 if (float64_is_signaling_nan(a, status)
4579 || float64_is_signaling_nan(b, status)) {
ff32e16e 4580 float_raise(float_flag_invalid, status);
67b7861d
AJ
4581 }
4582 return 1;
4583 }
4584 return 0;
4585}
4586
158142c2
FB
4587/*----------------------------------------------------------------------------
4588| Returns the result of converting the extended double-precision floating-
4589| point value `a' to the 32-bit two's complement integer format. The
4590| conversion is performed according to the IEC/IEEE Standard for Binary
4591| Floating-Point Arithmetic---which means in particular that the conversion
4592| is rounded according to the current rounding mode. If `a' is a NaN, the
4593| largest positive integer is returned. Otherwise, if the conversion
4594| overflows, the largest integer with the same sign as `a' is returned.
4595*----------------------------------------------------------------------------*/
4596
f4014512 4597int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4598{
4599 flag aSign;
f4014512 4600 int32_t aExp, shiftCount;
bb98fe42 4601 uint64_t aSig;
158142c2 4602
d1eb8f2a
AD
4603 if (floatx80_invalid_encoding(a)) {
4604 float_raise(float_flag_invalid, status);
4605 return 1 << 31;
4606 }
158142c2
FB
4607 aSig = extractFloatx80Frac( a );
4608 aExp = extractFloatx80Exp( a );
4609 aSign = extractFloatx80Sign( a );
bb98fe42 4610 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4611 shiftCount = 0x4037 - aExp;
4612 if ( shiftCount <= 0 ) shiftCount = 1;
4613 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4614 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4615
4616}
4617
4618/*----------------------------------------------------------------------------
4619| Returns the result of converting the extended double-precision floating-
4620| point value `a' to the 32-bit two's complement integer format. The
4621| conversion is performed according to the IEC/IEEE Standard for Binary
4622| Floating-Point Arithmetic, except that the conversion is always rounded
4623| toward zero. If `a' is a NaN, the largest positive integer is returned.
4624| Otherwise, if the conversion overflows, the largest integer with the same
4625| sign as `a' is returned.
4626*----------------------------------------------------------------------------*/
4627
f4014512 4628int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4629{
4630 flag aSign;
f4014512 4631 int32_t aExp, shiftCount;
bb98fe42 4632 uint64_t aSig, savedASig;
b3a6a2e0 4633 int32_t z;
158142c2 4634
d1eb8f2a
AD
4635 if (floatx80_invalid_encoding(a)) {
4636 float_raise(float_flag_invalid, status);
4637 return 1 << 31;
4638 }
158142c2
FB
4639 aSig = extractFloatx80Frac( a );
4640 aExp = extractFloatx80Exp( a );
4641 aSign = extractFloatx80Sign( a );
4642 if ( 0x401E < aExp ) {
bb98fe42 4643 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4644 goto invalid;
4645 }
4646 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4647 if (aExp || aSig) {
4648 status->float_exception_flags |= float_flag_inexact;
4649 }
158142c2
FB
4650 return 0;
4651 }
4652 shiftCount = 0x403E - aExp;
4653 savedASig = aSig;
4654 aSig >>= shiftCount;
4655 z = aSig;
4656 if ( aSign ) z = - z;
4657 if ( ( z < 0 ) ^ aSign ) {
4658 invalid:
ff32e16e 4659 float_raise(float_flag_invalid, status);
bb98fe42 4660 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4661 }
4662 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4663 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4664 }
4665 return z;
4666
4667}
4668
4669/*----------------------------------------------------------------------------
4670| Returns the result of converting the extended double-precision floating-
4671| point value `a' to the 64-bit two's complement integer format. The
4672| conversion is performed according to the IEC/IEEE Standard for Binary
4673| Floating-Point Arithmetic---which means in particular that the conversion
4674| is rounded according to the current rounding mode. If `a' is a NaN,
4675| the largest positive integer is returned. Otherwise, if the conversion
4676| overflows, the largest integer with the same sign as `a' is returned.
4677*----------------------------------------------------------------------------*/
4678
f42c2224 4679int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4680{
4681 flag aSign;
f4014512 4682 int32_t aExp, shiftCount;
bb98fe42 4683 uint64_t aSig, aSigExtra;
158142c2 4684
d1eb8f2a
AD
4685 if (floatx80_invalid_encoding(a)) {
4686 float_raise(float_flag_invalid, status);
4687 return 1ULL << 63;
4688 }
158142c2
FB
4689 aSig = extractFloatx80Frac( a );
4690 aExp = extractFloatx80Exp( a );
4691 aSign = extractFloatx80Sign( a );
4692 shiftCount = 0x403E - aExp;
4693 if ( shiftCount <= 0 ) {
4694 if ( shiftCount ) {
ff32e16e 4695 float_raise(float_flag_invalid, status);
0f605c88 4696 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4697 return LIT64( 0x7FFFFFFFFFFFFFFF );
4698 }
bb98fe42 4699 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4700 }
4701 aSigExtra = 0;
4702 }
4703 else {
4704 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4705 }
ff32e16e 4706 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4707
4708}
4709
4710/*----------------------------------------------------------------------------
4711| Returns the result of converting the extended double-precision floating-
4712| point value `a' to the 64-bit two's complement integer format. The
4713| conversion is performed according to the IEC/IEEE Standard for Binary
4714| Floating-Point Arithmetic, except that the conversion is always rounded
4715| toward zero. If `a' is a NaN, the largest positive integer is returned.
4716| Otherwise, if the conversion overflows, the largest integer with the same
4717| sign as `a' is returned.
4718*----------------------------------------------------------------------------*/
4719
f42c2224 4720int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4721{
4722 flag aSign;
f4014512 4723 int32_t aExp, shiftCount;
bb98fe42 4724 uint64_t aSig;
f42c2224 4725 int64_t z;
158142c2 4726
d1eb8f2a
AD
4727 if (floatx80_invalid_encoding(a)) {
4728 float_raise(float_flag_invalid, status);
4729 return 1ULL << 63;
4730 }
158142c2
FB
4731 aSig = extractFloatx80Frac( a );
4732 aExp = extractFloatx80Exp( a );
4733 aSign = extractFloatx80Sign( a );
4734 shiftCount = aExp - 0x403E;
4735 if ( 0 <= shiftCount ) {
4736 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4737 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4738 float_raise(float_flag_invalid, status);
158142c2
FB
4739 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4740 return LIT64( 0x7FFFFFFFFFFFFFFF );
4741 }
4742 }
bb98fe42 4743 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4744 }
4745 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4746 if (aExp | aSig) {
4747 status->float_exception_flags |= float_flag_inexact;
4748 }
158142c2
FB
4749 return 0;
4750 }
4751 z = aSig>>( - shiftCount );
bb98fe42 4752 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4753 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4754 }
4755 if ( aSign ) z = - z;
4756 return z;
4757
4758}
4759
4760/*----------------------------------------------------------------------------
4761| Returns the result of converting the extended double-precision floating-
4762| point value `a' to the single-precision floating-point format. The
4763| conversion is performed according to the IEC/IEEE Standard for Binary
4764| Floating-Point Arithmetic.
4765*----------------------------------------------------------------------------*/
4766
e5a41ffa 4767float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4768{
4769 flag aSign;
f4014512 4770 int32_t aExp;
bb98fe42 4771 uint64_t aSig;
158142c2 4772
d1eb8f2a
AD
4773 if (floatx80_invalid_encoding(a)) {
4774 float_raise(float_flag_invalid, status);
4775 return float32_default_nan(status);
4776 }
158142c2
FB
4777 aSig = extractFloatx80Frac( a );
4778 aExp = extractFloatx80Exp( a );
4779 aSign = extractFloatx80Sign( a );
4780 if ( aExp == 0x7FFF ) {
bb98fe42 4781 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4782 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4783 }
4784 return packFloat32( aSign, 0xFF, 0 );
4785 }
4786 shift64RightJamming( aSig, 33, &aSig );
4787 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4788 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4789
4790}
4791
4792/*----------------------------------------------------------------------------
4793| Returns the result of converting the extended double-precision floating-
4794| point value `a' to the double-precision floating-point format. The
4795| conversion is performed according to the IEC/IEEE Standard for Binary
4796| Floating-Point Arithmetic.
4797*----------------------------------------------------------------------------*/
4798
e5a41ffa 4799float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4800{
4801 flag aSign;
f4014512 4802 int32_t aExp;
bb98fe42 4803 uint64_t aSig, zSig;
158142c2 4804
d1eb8f2a
AD
4805 if (floatx80_invalid_encoding(a)) {
4806 float_raise(float_flag_invalid, status);
4807 return float64_default_nan(status);
4808 }
158142c2
FB
4809 aSig = extractFloatx80Frac( a );
4810 aExp = extractFloatx80Exp( a );
4811 aSign = extractFloatx80Sign( a );
4812 if ( aExp == 0x7FFF ) {
bb98fe42 4813 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4814 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4815 }
4816 return packFloat64( aSign, 0x7FF, 0 );
4817 }
4818 shift64RightJamming( aSig, 1, &zSig );
4819 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4820 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4821
4822}
4823
158142c2
FB
4824/*----------------------------------------------------------------------------
4825| Returns the result of converting the extended double-precision floating-
4826| point value `a' to the quadruple-precision floating-point format. The
4827| conversion is performed according to the IEC/IEEE Standard for Binary
4828| Floating-Point Arithmetic.
4829*----------------------------------------------------------------------------*/
4830
e5a41ffa 4831float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4832{
4833 flag aSign;
0c48262d 4834 int aExp;
bb98fe42 4835 uint64_t aSig, zSig0, zSig1;
158142c2 4836
d1eb8f2a
AD
4837 if (floatx80_invalid_encoding(a)) {
4838 float_raise(float_flag_invalid, status);
4839 return float128_default_nan(status);
4840 }
158142c2
FB
4841 aSig = extractFloatx80Frac( a );
4842 aExp = extractFloatx80Exp( a );
4843 aSign = extractFloatx80Sign( a );
bb98fe42 4844 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4845 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4846 }
4847 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4848 return packFloat128( aSign, aExp, zSig0, zSig1 );
4849
4850}
4851
0f721292
LV
4852/*----------------------------------------------------------------------------
4853| Rounds the extended double-precision floating-point value `a'
4854| to the precision provided by floatx80_rounding_precision and returns the
4855| result as an extended double-precision floating-point value.
4856| The operation is performed according to the IEC/IEEE Standard for Binary
4857| Floating-Point Arithmetic.
4858*----------------------------------------------------------------------------*/
4859
4860floatx80 floatx80_round(floatx80 a, float_status *status)
4861{
4862 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4863 extractFloatx80Sign(a),
4864 extractFloatx80Exp(a),
4865 extractFloatx80Frac(a), 0, status);
4866}
4867
158142c2
FB
4868/*----------------------------------------------------------------------------
4869| Rounds the extended double-precision floating-point value `a' to an integer,
4870| and returns the result as an extended quadruple-precision floating-point
4871| value. The operation is performed according to the IEC/IEEE Standard for
4872| Binary Floating-Point Arithmetic.
4873*----------------------------------------------------------------------------*/
4874
e5a41ffa 4875floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4876{
4877 flag aSign;
f4014512 4878 int32_t aExp;
bb98fe42 4879 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4880 floatx80 z;
4881
d1eb8f2a
AD
4882 if (floatx80_invalid_encoding(a)) {
4883 float_raise(float_flag_invalid, status);
4884 return floatx80_default_nan(status);
4885 }
158142c2
FB
4886 aExp = extractFloatx80Exp( a );
4887 if ( 0x403E <= aExp ) {
bb98fe42 4888 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4889 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4890 }
4891 return a;
4892 }
4893 if ( aExp < 0x3FFF ) {
4894 if ( ( aExp == 0 )
bb98fe42 4895 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4896 return a;
4897 }
a2f2d288 4898 status->float_exception_flags |= float_flag_inexact;
158142c2 4899 aSign = extractFloatx80Sign( a );
a2f2d288 4900 switch (status->float_rounding_mode) {
158142c2 4901 case float_round_nearest_even:
bb98fe42 4902 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4903 ) {
4904 return
4905 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4906 }
4907 break;
f9288a76
PM
4908 case float_round_ties_away:
4909 if (aExp == 0x3FFE) {
4910 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4911 }
4912 break;
158142c2
FB
4913 case float_round_down:
4914 return
4915 aSign ?
4916 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4917 : packFloatx80( 0, 0, 0 );
4918 case float_round_up:
4919 return
4920 aSign ? packFloatx80( 1, 0, 0 )
4921 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4922 }
4923 return packFloatx80( aSign, 0, 0 );
4924 }
4925 lastBitMask = 1;
4926 lastBitMask <<= 0x403E - aExp;
4927 roundBitsMask = lastBitMask - 1;
4928 z = a;
a2f2d288 4929 switch (status->float_rounding_mode) {
dc355b76 4930 case float_round_nearest_even:
158142c2 4931 z.low += lastBitMask>>1;
dc355b76
PM
4932 if ((z.low & roundBitsMask) == 0) {
4933 z.low &= ~lastBitMask;
4934 }
4935 break;
f9288a76
PM
4936 case float_round_ties_away:
4937 z.low += lastBitMask >> 1;
4938 break;
dc355b76
PM
4939 case float_round_to_zero:
4940 break;
4941 case float_round_up:
4942 if (!extractFloatx80Sign(z)) {
4943 z.low += roundBitsMask;
4944 }
4945 break;
4946 case float_round_down:
4947 if (extractFloatx80Sign(z)) {
158142c2
FB
4948 z.low += roundBitsMask;
4949 }
dc355b76
PM
4950 break;
4951 default:
4952 abort();
158142c2
FB
4953 }
4954 z.low &= ~ roundBitsMask;
4955 if ( z.low == 0 ) {
4956 ++z.high;
4957 z.low = LIT64( 0x8000000000000000 );
4958 }
a2f2d288
PM
4959 if (z.low != a.low) {
4960 status->float_exception_flags |= float_flag_inexact;
4961 }
158142c2
FB
4962 return z;
4963
4964}
4965
4966/*----------------------------------------------------------------------------
4967| Returns the result of adding the absolute values of the extended double-
4968| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4969| negated before being returned. `zSign' is ignored if the result is a NaN.
4970| The addition is performed according to the IEC/IEEE Standard for Binary
4971| Floating-Point Arithmetic.
4972*----------------------------------------------------------------------------*/
4973
e5a41ffa
PM
4974static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4975 float_status *status)
158142c2 4976{
f4014512 4977 int32_t aExp, bExp, zExp;
bb98fe42 4978 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4979 int32_t expDiff;
158142c2
FB
4980
4981 aSig = extractFloatx80Frac( a );
4982 aExp = extractFloatx80Exp( a );
4983 bSig = extractFloatx80Frac( b );
4984 bExp = extractFloatx80Exp( b );
4985 expDiff = aExp - bExp;
4986 if ( 0 < expDiff ) {
4987 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4988 if ((uint64_t)(aSig << 1)) {
4989 return propagateFloatx80NaN(a, b, status);
4990 }
158142c2
FB
4991 return a;
4992 }
4993 if ( bExp == 0 ) --expDiff;
4994 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4995 zExp = aExp;
4996 }
4997 else if ( expDiff < 0 ) {
4998 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4999 if ((uint64_t)(bSig << 1)) {
5000 return propagateFloatx80NaN(a, b, status);
5001 }
0f605c88
LV
5002 return packFloatx80(zSign,
5003 floatx80_infinity_high,
5004 floatx80_infinity_low);
158142c2
FB
5005 }
5006 if ( aExp == 0 ) ++expDiff;
5007 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5008 zExp = bExp;
5009 }
5010 else {
5011 if ( aExp == 0x7FFF ) {
bb98fe42 5012 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5013 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5014 }
5015 return a;
5016 }
5017 zSig1 = 0;
5018 zSig0 = aSig + bSig;
5019 if ( aExp == 0 ) {
5020 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5021 goto roundAndPack;
5022 }
5023 zExp = aExp;
5024 goto shiftRight1;
5025 }
5026 zSig0 = aSig + bSig;
bb98fe42 5027 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5028 shiftRight1:
5029 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5030 zSig0 |= LIT64( 0x8000000000000000 );
5031 ++zExp;
5032 roundAndPack:
a2f2d288 5033 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5034 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5035}
5036
5037/*----------------------------------------------------------------------------
5038| Returns the result of subtracting the absolute values of the extended
5039| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5040| difference is negated before being returned. `zSign' is ignored if the
5041| result is a NaN. The subtraction is performed according to the IEC/IEEE
5042| Standard for Binary Floating-Point Arithmetic.
5043*----------------------------------------------------------------------------*/
5044
e5a41ffa
PM
5045static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5046 float_status *status)
158142c2 5047{
f4014512 5048 int32_t aExp, bExp, zExp;
bb98fe42 5049 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5050 int32_t expDiff;
158142c2
FB
5051
5052 aSig = extractFloatx80Frac( a );
5053 aExp = extractFloatx80Exp( a );
5054 bSig = extractFloatx80Frac( b );
5055 bExp = extractFloatx80Exp( b );
5056 expDiff = aExp - bExp;
5057 if ( 0 < expDiff ) goto aExpBigger;
5058 if ( expDiff < 0 ) goto bExpBigger;
5059 if ( aExp == 0x7FFF ) {
bb98fe42 5060 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5061 return propagateFloatx80NaN(a, b, status);
158142c2 5062 }
ff32e16e 5063 float_raise(float_flag_invalid, status);
af39bc8c 5064 return floatx80_default_nan(status);
158142c2
FB
5065 }
5066 if ( aExp == 0 ) {
5067 aExp = 1;
5068 bExp = 1;
5069 }
5070 zSig1 = 0;
5071 if ( bSig < aSig ) goto aBigger;
5072 if ( aSig < bSig ) goto bBigger;
a2f2d288 5073 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5074 bExpBigger:
5075 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5076 if ((uint64_t)(bSig << 1)) {
5077 return propagateFloatx80NaN(a, b, status);
5078 }
0f605c88
LV
5079 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5080 floatx80_infinity_low);
158142c2
FB
5081 }
5082 if ( aExp == 0 ) ++expDiff;
5083 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5084 bBigger:
5085 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5086 zExp = bExp;
5087 zSign ^= 1;
5088 goto normalizeRoundAndPack;
5089 aExpBigger:
5090 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5091 if ((uint64_t)(aSig << 1)) {
5092 return propagateFloatx80NaN(a, b, status);
5093 }
158142c2
FB
5094 return a;
5095 }
5096 if ( bExp == 0 ) --expDiff;
5097 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5098 aBigger:
5099 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5100 zExp = aExp;
5101 normalizeRoundAndPack:
a2f2d288 5102 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5103 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5104}
5105
5106/*----------------------------------------------------------------------------
5107| Returns the result of adding the extended double-precision floating-point
5108| values `a' and `b'. The operation is performed according to the IEC/IEEE
5109| Standard for Binary Floating-Point Arithmetic.
5110*----------------------------------------------------------------------------*/
5111
e5a41ffa 5112floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5113{
5114 flag aSign, bSign;
5115
d1eb8f2a
AD
5116 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5117 float_raise(float_flag_invalid, status);
5118 return floatx80_default_nan(status);
5119 }
158142c2
FB
5120 aSign = extractFloatx80Sign( a );
5121 bSign = extractFloatx80Sign( b );
5122 if ( aSign == bSign ) {
ff32e16e 5123 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5124 }
5125 else {
ff32e16e 5126 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5127 }
5128
5129}
5130
5131/*----------------------------------------------------------------------------
5132| Returns the result of subtracting the extended double-precision floating-
5133| point values `a' and `b'. The operation is performed according to the
5134| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5135*----------------------------------------------------------------------------*/
5136
e5a41ffa 5137floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5138{
5139 flag aSign, bSign;
5140
d1eb8f2a
AD
5141 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5142 float_raise(float_flag_invalid, status);
5143 return floatx80_default_nan(status);
5144 }
158142c2
FB
5145 aSign = extractFloatx80Sign( a );
5146 bSign = extractFloatx80Sign( b );
5147 if ( aSign == bSign ) {
ff32e16e 5148 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5149 }
5150 else {
ff32e16e 5151 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5152 }
5153
5154}
5155
5156/*----------------------------------------------------------------------------
5157| Returns the result of multiplying the extended double-precision floating-
5158| point values `a' and `b'. The operation is performed according to the
5159| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5160*----------------------------------------------------------------------------*/
5161
e5a41ffa 5162floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5163{
5164 flag aSign, bSign, zSign;
f4014512 5165 int32_t aExp, bExp, zExp;
bb98fe42 5166 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5167
d1eb8f2a
AD
5168 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5169 float_raise(float_flag_invalid, status);
5170 return floatx80_default_nan(status);
5171 }
158142c2
FB
5172 aSig = extractFloatx80Frac( a );
5173 aExp = extractFloatx80Exp( a );
5174 aSign = extractFloatx80Sign( a );
5175 bSig = extractFloatx80Frac( b );
5176 bExp = extractFloatx80Exp( b );
5177 bSign = extractFloatx80Sign( b );
5178 zSign = aSign ^ bSign;
5179 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5180 if ( (uint64_t) ( aSig<<1 )
5181 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5182 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5183 }
5184 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5185 return packFloatx80(zSign, floatx80_infinity_high,
5186 floatx80_infinity_low);
158142c2
FB
5187 }
5188 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5189 if ((uint64_t)(bSig << 1)) {
5190 return propagateFloatx80NaN(a, b, status);
5191 }
158142c2
FB
5192 if ( ( aExp | aSig ) == 0 ) {
5193 invalid:
ff32e16e 5194 float_raise(float_flag_invalid, status);
af39bc8c 5195 return floatx80_default_nan(status);
158142c2 5196 }
0f605c88
LV
5197 return packFloatx80(zSign, floatx80_infinity_high,
5198 floatx80_infinity_low);
158142c2
FB
5199 }
5200 if ( aExp == 0 ) {
5201 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5202 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5203 }
5204 if ( bExp == 0 ) {
5205 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5206 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5207 }
5208 zExp = aExp + bExp - 0x3FFE;
5209 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5210 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5211 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5212 --zExp;
5213 }
a2f2d288 5214 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5215 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5216}
5217
5218/*----------------------------------------------------------------------------
5219| Returns the result of dividing the extended double-precision floating-point
5220| value `a' by the corresponding value `b'. The operation is performed
5221| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5222*----------------------------------------------------------------------------*/
5223
e5a41ffa 5224floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5225{
5226 flag aSign, bSign, zSign;
f4014512 5227 int32_t aExp, bExp, zExp;
bb98fe42
AF
5228 uint64_t aSig, bSig, zSig0, zSig1;
5229 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5230
d1eb8f2a
AD
5231 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5232 float_raise(float_flag_invalid, status);
5233 return floatx80_default_nan(status);
5234 }
158142c2
FB
5235 aSig = extractFloatx80Frac( a );
5236 aExp = extractFloatx80Exp( a );
5237 aSign = extractFloatx80Sign( a );
5238 bSig = extractFloatx80Frac( b );
5239 bExp = extractFloatx80Exp( b );
5240 bSign = extractFloatx80Sign( b );
5241 zSign = aSign ^ bSign;
5242 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5243 if ((uint64_t)(aSig << 1)) {
5244 return propagateFloatx80NaN(a, b, status);
5245 }
158142c2 5246 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5247 if ((uint64_t)(bSig << 1)) {
5248 return propagateFloatx80NaN(a, b, status);
5249 }
158142c2
FB
5250 goto invalid;
5251 }
0f605c88
LV
5252 return packFloatx80(zSign, floatx80_infinity_high,
5253 floatx80_infinity_low);
158142c2
FB
5254 }
5255 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5256 if ((uint64_t)(bSig << 1)) {
5257 return propagateFloatx80NaN(a, b, status);
5258 }
158142c2
FB
5259 return packFloatx80( zSign, 0, 0 );
5260 }
5261 if ( bExp == 0 ) {
5262 if ( bSig == 0 ) {
5263 if ( ( aExp | aSig ) == 0 ) {
5264 invalid:
ff32e16e 5265 float_raise(float_flag_invalid, status);
af39bc8c 5266 return floatx80_default_nan(status);
158142c2 5267 }
ff32e16e 5268 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5269 return packFloatx80(zSign, floatx80_infinity_high,
5270 floatx80_infinity_low);
158142c2
FB
5271 }
5272 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5273 }
5274 if ( aExp == 0 ) {
5275 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5276 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5277 }
5278 zExp = aExp - bExp + 0x3FFE;
5279 rem1 = 0;
5280 if ( bSig <= aSig ) {
5281 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5282 ++zExp;
5283 }
5284 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5285 mul64To128( bSig, zSig0, &term0, &term1 );
5286 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5287 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5288 --zSig0;
5289 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5290 }
5291 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5292 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5293 mul64To128( bSig, zSig1, &term1, &term2 );
5294 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5295 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5296 --zSig1;
5297 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5298 }
5299 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5300 }
a2f2d288 5301 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5302 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5303}
5304
5305/*----------------------------------------------------------------------------
5306| Returns the remainder of the extended double-precision floating-point value
5307| `a' with respect to the corresponding value `b'. The operation is performed
5308| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5309*----------------------------------------------------------------------------*/
5310
e5a41ffa 5311floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5312{
ed086f3d 5313 flag aSign, zSign;
f4014512 5314 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5315 uint64_t aSig0, aSig1, bSig;
5316 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5317
d1eb8f2a
AD
5318 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5319 float_raise(float_flag_invalid, status);
5320 return floatx80_default_nan(status);
5321 }
158142c2
FB
5322 aSig0 = extractFloatx80Frac( a );
5323 aExp = extractFloatx80Exp( a );
5324 aSign = extractFloatx80Sign( a );
5325 bSig = extractFloatx80Frac( b );
5326 bExp = extractFloatx80Exp( b );
158142c2 5327 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5328 if ( (uint64_t) ( aSig0<<1 )
5329 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5330 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5331 }
5332 goto invalid;
5333 }
5334 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5335 if ((uint64_t)(bSig << 1)) {
5336 return propagateFloatx80NaN(a, b, status);
5337 }
158142c2
FB
5338 return a;
5339 }
5340 if ( bExp == 0 ) {
5341 if ( bSig == 0 ) {
5342 invalid:
ff32e16e 5343 float_raise(float_flag_invalid, status);
af39bc8c 5344 return floatx80_default_nan(status);
158142c2
FB
5345 }
5346 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5347 }
5348 if ( aExp == 0 ) {
bb98fe42 5349 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5350 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5351 }
5352 bSig |= LIT64( 0x8000000000000000 );
5353 zSign = aSign;
5354 expDiff = aExp - bExp;
5355 aSig1 = 0;
5356 if ( expDiff < 0 ) {
5357 if ( expDiff < -1 ) return a;
5358 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5359 expDiff = 0;
5360 }
5361 q = ( bSig <= aSig0 );
5362 if ( q ) aSig0 -= bSig;
5363 expDiff -= 64;
5364 while ( 0 < expDiff ) {
5365 q = estimateDiv128To64( aSig0, aSig1, bSig );
5366 q = ( 2 < q ) ? q - 2 : 0;
5367 mul64To128( bSig, q, &term0, &term1 );
5368 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5369 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5370 expDiff -= 62;
5371 }
5372 expDiff += 64;
5373 if ( 0 < expDiff ) {
5374 q = estimateDiv128To64( aSig0, aSig1, bSig );
5375 q = ( 2 < q ) ? q - 2 : 0;
5376 q >>= 64 - expDiff;
5377 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5378 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5379 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5380 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5381 ++q;
5382 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5383 }
5384 }
5385 else {
5386 term1 = 0;
5387 term0 = bSig;
5388 }
5389 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5390 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5391 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5392 && ( q & 1 ) )
5393 ) {
5394 aSig0 = alternateASig0;
5395 aSig1 = alternateASig1;
5396 zSign = ! zSign;
5397 }
5398 return
5399 normalizeRoundAndPackFloatx80(
ff32e16e 5400 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5401
5402}
5403
5404/*----------------------------------------------------------------------------
5405| Returns the square root of the extended double-precision floating-point
5406| value `a'. The operation is performed according to the IEC/IEEE Standard
5407| for Binary Floating-Point Arithmetic.
5408*----------------------------------------------------------------------------*/
5409
e5a41ffa 5410floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5411{
5412 flag aSign;
f4014512 5413 int32_t aExp, zExp;
bb98fe42
AF
5414 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5415 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5416
d1eb8f2a
AD
5417 if (floatx80_invalid_encoding(a)) {
5418 float_raise(float_flag_invalid, status);
5419 return floatx80_default_nan(status);
5420 }
158142c2
FB
5421 aSig0 = extractFloatx80Frac( a );
5422 aExp = extractFloatx80Exp( a );
5423 aSign = extractFloatx80Sign( a );
5424 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5425 if ((uint64_t)(aSig0 << 1)) {
5426 return propagateFloatx80NaN(a, a, status);
5427 }
158142c2
FB
5428 if ( ! aSign ) return a;
5429 goto invalid;
5430 }
5431 if ( aSign ) {
5432 if ( ( aExp | aSig0 ) == 0 ) return a;
5433 invalid:
ff32e16e 5434 float_raise(float_flag_invalid, status);
af39bc8c 5435 return floatx80_default_nan(status);
158142c2
FB
5436 }
5437 if ( aExp == 0 ) {
5438 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5439 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5440 }
5441 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5442 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5443 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5444 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5445 doubleZSig0 = zSig0<<1;
5446 mul64To128( zSig0, zSig0, &term0, &term1 );
5447 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5448 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5449 --zSig0;
5450 doubleZSig0 -= 2;
5451 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5452 }
5453 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5454 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5455 if ( zSig1 == 0 ) zSig1 = 1;
5456 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5457 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5458 mul64To128( zSig1, zSig1, &term2, &term3 );
5459 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5460 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5461 --zSig1;
5462 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5463 term3 |= 1;
5464 term2 |= doubleZSig0;
5465 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5466 }
5467 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5468 }
5469 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5470 zSig0 |= doubleZSig0;
a2f2d288
PM
5471 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5472 0, zExp, zSig0, zSig1, status);
158142c2
FB
5473}
5474
5475/*----------------------------------------------------------------------------
b689362d
AJ
5476| Returns 1 if the extended double-precision floating-point value `a' is equal
5477| to the corresponding value `b', and 0 otherwise. The invalid exception is
5478| raised if either operand is a NaN. Otherwise, the comparison is performed
5479| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5480*----------------------------------------------------------------------------*/
5481
e5a41ffa 5482int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5483{
5484
d1eb8f2a
AD
5485 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5486 || (extractFloatx80Exp(a) == 0x7FFF
5487 && (uint64_t) (extractFloatx80Frac(a) << 1))
5488 || (extractFloatx80Exp(b) == 0x7FFF
5489 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5490 ) {
ff32e16e 5491 float_raise(float_flag_invalid, status);
158142c2
FB
5492 return 0;
5493 }
5494 return
5495 ( a.low == b.low )
5496 && ( ( a.high == b.high )
5497 || ( ( a.low == 0 )
bb98fe42 5498 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5499 );
5500
5501}
5502
5503/*----------------------------------------------------------------------------
5504| Returns 1 if the extended double-precision floating-point value `a' is
5505| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5506| invalid exception is raised if either operand is a NaN. The comparison is
5507| performed according to the IEC/IEEE Standard for Binary Floating-Point
5508| Arithmetic.
158142c2
FB
5509*----------------------------------------------------------------------------*/
5510
e5a41ffa 5511int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5512{
5513 flag aSign, bSign;
5514
d1eb8f2a
AD
5515 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5516 || (extractFloatx80Exp(a) == 0x7FFF
5517 && (uint64_t) (extractFloatx80Frac(a) << 1))
5518 || (extractFloatx80Exp(b) == 0x7FFF
5519 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5520 ) {
ff32e16e 5521 float_raise(float_flag_invalid, status);
158142c2
FB
5522 return 0;
5523 }
5524 aSign = extractFloatx80Sign( a );
5525 bSign = extractFloatx80Sign( b );
5526 if ( aSign != bSign ) {
5527 return
5528 aSign
bb98fe42 5529 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5530 == 0 );
5531 }
5532 return
5533 aSign ? le128( b.high, b.low, a.high, a.low )
5534 : le128( a.high, a.low, b.high, b.low );
5535
5536}
5537
5538/*----------------------------------------------------------------------------
5539| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5540| less than the corresponding value `b', and 0 otherwise. The invalid
5541| exception is raised if either operand is a NaN. The comparison is performed
5542| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5543*----------------------------------------------------------------------------*/
5544
e5a41ffa 5545int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5546{
5547 flag aSign, bSign;
5548
d1eb8f2a
AD
5549 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5550 || (extractFloatx80Exp(a) == 0x7FFF
5551 && (uint64_t) (extractFloatx80Frac(a) << 1))
5552 || (extractFloatx80Exp(b) == 0x7FFF
5553 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5554 ) {
ff32e16e 5555 float_raise(float_flag_invalid, status);
158142c2
FB
5556 return 0;
5557 }
5558 aSign = extractFloatx80Sign( a );
5559 bSign = extractFloatx80Sign( b );
5560 if ( aSign != bSign ) {
5561 return
5562 aSign
bb98fe42 5563 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5564 != 0 );
5565 }
5566 return
5567 aSign ? lt128( b.high, b.low, a.high, a.low )
5568 : lt128( a.high, a.low, b.high, b.low );
5569
5570}
5571
67b7861d
AJ
5572/*----------------------------------------------------------------------------
5573| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5574| cannot be compared, and 0 otherwise. The invalid exception is raised if
5575| either operand is a NaN. The comparison is performed according to the
5576| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5577*----------------------------------------------------------------------------*/
e5a41ffa 5578int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5579{
d1eb8f2a
AD
5580 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5581 || (extractFloatx80Exp(a) == 0x7FFF
5582 && (uint64_t) (extractFloatx80Frac(a) << 1))
5583 || (extractFloatx80Exp(b) == 0x7FFF
5584 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5585 ) {
ff32e16e 5586 float_raise(float_flag_invalid, status);
67b7861d
AJ
5587 return 1;
5588 }
5589 return 0;
5590}
5591
158142c2 5592/*----------------------------------------------------------------------------
b689362d 5593| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5594| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5595| cause an exception. The comparison is performed according to the IEC/IEEE
5596| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5597*----------------------------------------------------------------------------*/
5598
e5a41ffa 5599int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5600{
5601
d1eb8f2a
AD
5602 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5603 float_raise(float_flag_invalid, status);
5604 return 0;
5605 }
158142c2 5606 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5607 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5608 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5609 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5610 ) {
af39bc8c
AM
5611 if (floatx80_is_signaling_nan(a, status)
5612 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5613 float_raise(float_flag_invalid, status);
b689362d 5614 }
158142c2
FB
5615 return 0;
5616 }
5617 return
5618 ( a.low == b.low )
5619 && ( ( a.high == b.high )
5620 || ( ( a.low == 0 )
bb98fe42 5621 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5622 );
5623
5624}
5625
5626/*----------------------------------------------------------------------------
5627| Returns 1 if the extended double-precision floating-point value `a' is less
5628| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5629| do not cause an exception. Otherwise, the comparison is performed according
5630| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5631*----------------------------------------------------------------------------*/
5632
e5a41ffa 5633int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5634{
5635 flag aSign, bSign;
5636
d1eb8f2a
AD
5637 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5638 float_raise(float_flag_invalid, status);
5639 return 0;
5640 }
158142c2 5641 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5642 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5643 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5644 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5645 ) {
af39bc8c
AM
5646 if (floatx80_is_signaling_nan(a, status)
5647 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5648 float_raise(float_flag_invalid, status);
158142c2
FB
5649 }
5650 return 0;
5651 }
5652 aSign = extractFloatx80Sign( a );
5653 bSign = extractFloatx80Sign( b );
5654 if ( aSign != bSign ) {
5655 return
5656 aSign
bb98fe42 5657 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5658 == 0 );
5659 }
5660 return
5661 aSign ? le128( b.high, b.low, a.high, a.low )
5662 : le128( a.high, a.low, b.high, b.low );
5663
5664}
5665
5666/*----------------------------------------------------------------------------
5667| Returns 1 if the extended double-precision floating-point value `a' is less
5668| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5669| an exception. Otherwise, the comparison is performed according to the
5670| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5671*----------------------------------------------------------------------------*/
5672
e5a41ffa 5673int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5674{
5675 flag aSign, bSign;
5676
d1eb8f2a
AD
5677 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5678 float_raise(float_flag_invalid, status);
5679 return 0;
5680 }
158142c2 5681 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5682 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5683 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5684 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5685 ) {
af39bc8c
AM
5686 if (floatx80_is_signaling_nan(a, status)
5687 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5688 float_raise(float_flag_invalid, status);
158142c2
FB
5689 }
5690 return 0;
5691 }
5692 aSign = extractFloatx80Sign( a );
5693 bSign = extractFloatx80Sign( b );
5694 if ( aSign != bSign ) {
5695 return
5696 aSign
bb98fe42 5697 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5698 != 0 );
5699 }
5700 return
5701 aSign ? lt128( b.high, b.low, a.high, a.low )
5702 : lt128( a.high, a.low, b.high, b.low );
5703
5704}
5705
67b7861d
AJ
5706/*----------------------------------------------------------------------------
5707| Returns 1 if the extended double-precision floating-point values `a' and `b'
5708| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5709| The comparison is performed according to the IEC/IEEE Standard for Binary
5710| Floating-Point Arithmetic.
5711*----------------------------------------------------------------------------*/
e5a41ffa 5712int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5713{
d1eb8f2a
AD
5714 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5715 float_raise(float_flag_invalid, status);
5716 return 1;
5717 }
67b7861d
AJ
5718 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5719 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5720 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5721 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5722 ) {
af39bc8c
AM
5723 if (floatx80_is_signaling_nan(a, status)
5724 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5725 float_raise(float_flag_invalid, status);
67b7861d
AJ
5726 }
5727 return 1;
5728 }
5729 return 0;
5730}
5731
158142c2
FB
5732/*----------------------------------------------------------------------------
5733| Returns the result of converting the quadruple-precision floating-point
5734| value `a' to the 32-bit two's complement integer format. The conversion
5735| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5736| Arithmetic---which means in particular that the conversion is rounded
5737| according to the current rounding mode. If `a' is a NaN, the largest
5738| positive integer is returned. Otherwise, if the conversion overflows, the
5739| largest integer with the same sign as `a' is returned.
5740*----------------------------------------------------------------------------*/
5741
f4014512 5742int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5743{
5744 flag aSign;
f4014512 5745 int32_t aExp, shiftCount;
bb98fe42 5746 uint64_t aSig0, aSig1;
158142c2
FB
5747
5748 aSig1 = extractFloat128Frac1( a );
5749 aSig0 = extractFloat128Frac0( a );
5750 aExp = extractFloat128Exp( a );
5751 aSign = extractFloat128Sign( a );
5752 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5753 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5754 aSig0 |= ( aSig1 != 0 );
5755 shiftCount = 0x4028 - aExp;
5756 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5757 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5758
5759}
5760
5761/*----------------------------------------------------------------------------
5762| Returns the result of converting the quadruple-precision floating-point
5763| value `a' to the 32-bit two's complement integer format. The conversion
5764| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5765| Arithmetic, except that the conversion is always rounded toward zero. If
5766| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5767| conversion overflows, the largest integer with the same sign as `a' is
5768| returned.
5769*----------------------------------------------------------------------------*/
5770
f4014512 5771int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5772{
5773 flag aSign;
f4014512 5774 int32_t aExp, shiftCount;
bb98fe42 5775 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5776 int32_t z;
158142c2
FB
5777
5778 aSig1 = extractFloat128Frac1( a );
5779 aSig0 = extractFloat128Frac0( a );
5780 aExp = extractFloat128Exp( a );
5781 aSign = extractFloat128Sign( a );
5782 aSig0 |= ( aSig1 != 0 );
5783 if ( 0x401E < aExp ) {
5784 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5785 goto invalid;
5786 }
5787 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5788 if (aExp || aSig0) {
5789 status->float_exception_flags |= float_flag_inexact;
5790 }
158142c2
FB
5791 return 0;
5792 }
5793 aSig0 |= LIT64( 0x0001000000000000 );
5794 shiftCount = 0x402F - aExp;
5795 savedASig = aSig0;
5796 aSig0 >>= shiftCount;
5797 z = aSig0;
5798 if ( aSign ) z = - z;
5799 if ( ( z < 0 ) ^ aSign ) {
5800 invalid:
ff32e16e 5801 float_raise(float_flag_invalid, status);
bb98fe42 5802 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5803 }
5804 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5805 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5806 }
5807 return z;
5808
5809}
5810
5811/*----------------------------------------------------------------------------
5812| Returns the result of converting the quadruple-precision floating-point
5813| value `a' to the 64-bit two's complement integer format. The conversion
5814| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5815| Arithmetic---which means in particular that the conversion is rounded
5816| according to the current rounding mode. If `a' is a NaN, the largest
5817| positive integer is returned. Otherwise, if the conversion overflows, the
5818| largest integer with the same sign as `a' is returned.
5819*----------------------------------------------------------------------------*/
5820
f42c2224 5821int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5822{
5823 flag aSign;
f4014512 5824 int32_t aExp, shiftCount;
bb98fe42 5825 uint64_t aSig0, aSig1;
158142c2
FB
5826
5827 aSig1 = extractFloat128Frac1( a );
5828 aSig0 = extractFloat128Frac0( a );
5829 aExp = extractFloat128Exp( a );
5830 aSign = extractFloat128Sign( a );
5831 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5832 shiftCount = 0x402F - aExp;
5833 if ( shiftCount <= 0 ) {
5834 if ( 0x403E < aExp ) {
ff32e16e 5835 float_raise(float_flag_invalid, status);
158142c2
FB
5836 if ( ! aSign
5837 || ( ( aExp == 0x7FFF )
5838 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5839 )
5840 ) {
5841 return LIT64( 0x7FFFFFFFFFFFFFFF );
5842 }
bb98fe42 5843 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5844 }
5845 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5846 }
5847 else {
5848 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5849 }
ff32e16e 5850 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5851
5852}
5853
5854/*----------------------------------------------------------------------------
5855| Returns the result of converting the quadruple-precision floating-point
5856| value `a' to the 64-bit two's complement integer format. The conversion
5857| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5858| Arithmetic, except that the conversion is always rounded toward zero.
5859| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5860| the conversion overflows, the largest integer with the same sign as `a' is
5861| returned.
5862*----------------------------------------------------------------------------*/
5863
f42c2224 5864int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5865{
5866 flag aSign;
f4014512 5867 int32_t aExp, shiftCount;
bb98fe42 5868 uint64_t aSig0, aSig1;
f42c2224 5869 int64_t z;
158142c2
FB
5870
5871 aSig1 = extractFloat128Frac1( a );
5872 aSig0 = extractFloat128Frac0( a );
5873 aExp = extractFloat128Exp( a );
5874 aSign = extractFloat128Sign( a );
5875 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5876 shiftCount = aExp - 0x402F;
5877 if ( 0 < shiftCount ) {
5878 if ( 0x403E <= aExp ) {
5879 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5880 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5881 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5882 if (aSig1) {
5883 status->float_exception_flags |= float_flag_inexact;
5884 }
158142c2
FB
5885 }
5886 else {
ff32e16e 5887 float_raise(float_flag_invalid, status);
158142c2
FB
5888 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5889 return LIT64( 0x7FFFFFFFFFFFFFFF );
5890 }
5891 }
bb98fe42 5892 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5893 }
5894 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5895 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5896 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5897 }
5898 }
5899 else {
5900 if ( aExp < 0x3FFF ) {
5901 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5902 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5903 }
5904 return 0;
5905 }
5906 z = aSig0>>( - shiftCount );
5907 if ( aSig1
bb98fe42 5908 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5909 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5910 }
5911 }
5912 if ( aSign ) z = - z;
5913 return z;
5914
5915}
5916
2e6d8568
BR
5917/*----------------------------------------------------------------------------
5918| Returns the result of converting the quadruple-precision floating-point value
5919| `a' to the 64-bit unsigned integer format. The conversion is
5920| performed according to the IEC/IEEE Standard for Binary Floating-Point
5921| Arithmetic---which means in particular that the conversion is rounded
5922| according to the current rounding mode. If `a' is a NaN, the largest
5923| positive integer is returned. If the conversion overflows, the
5924| largest unsigned integer is returned. If 'a' is negative, the value is
5925| rounded and zero is returned; negative values that do not round to zero
5926| will raise the inexact exception.
5927*----------------------------------------------------------------------------*/
5928
5929uint64_t float128_to_uint64(float128 a, float_status *status)
5930{
5931 flag aSign;
5932 int aExp;
5933 int shiftCount;
5934 uint64_t aSig0, aSig1;
5935
5936 aSig0 = extractFloat128Frac0(a);
5937 aSig1 = extractFloat128Frac1(a);
5938 aExp = extractFloat128Exp(a);
5939 aSign = extractFloat128Sign(a);
5940 if (aSign && (aExp > 0x3FFE)) {
5941 float_raise(float_flag_invalid, status);
5942 if (float128_is_any_nan(a)) {
5943 return LIT64(0xFFFFFFFFFFFFFFFF);
5944 } else {
5945 return 0;
5946 }
5947 }
5948 if (aExp) {
5949 aSig0 |= LIT64(0x0001000000000000);
5950 }
5951 shiftCount = 0x402F - aExp;
5952 if (shiftCount <= 0) {
5953 if (0x403E < aExp) {
5954 float_raise(float_flag_invalid, status);
5955 return LIT64(0xFFFFFFFFFFFFFFFF);
5956 }
5957 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5958 } else {
5959 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5960 }
5961 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5962}
5963
5964uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5965{
5966 uint64_t v;
5967 signed char current_rounding_mode = status->float_rounding_mode;
5968
5969 set_float_rounding_mode(float_round_to_zero, status);
5970 v = float128_to_uint64(a, status);
5971 set_float_rounding_mode(current_rounding_mode, status);
5972
5973 return v;
5974}
5975
158142c2
FB
5976/*----------------------------------------------------------------------------
5977| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5978| value `a' to the 32-bit unsigned integer format. The conversion
5979| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5980| Arithmetic except that the conversion is always rounded toward zero.
5981| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5982| if the conversion overflows, the largest unsigned integer is returned.
5983| If 'a' is negative, the value is rounded and zero is returned; negative
5984| values that do not round to zero will raise the inexact exception.
5985*----------------------------------------------------------------------------*/
5986
5987uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5988{
5989 uint64_t v;
5990 uint32_t res;
5991 int old_exc_flags = get_float_exception_flags(status);
5992
5993 v = float128_to_uint64_round_to_zero(a, status);
5994 if (v > 0xffffffff) {
5995 res = 0xffffffff;
5996 } else {
5997 return v;
5998 }
5999 set_float_exception_flags(old_exc_flags, status);
6000 float_raise(float_flag_invalid, status);
6001 return res;
6002}
6003
6004/*----------------------------------------------------------------------------
6005| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6006| value `a' to the single-precision floating-point format. The conversion
6007| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6008| Arithmetic.
6009*----------------------------------------------------------------------------*/
6010
e5a41ffa 6011float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6012{
6013 flag aSign;
f4014512 6014 int32_t aExp;
bb98fe42
AF
6015 uint64_t aSig0, aSig1;
6016 uint32_t zSig;
158142c2
FB
6017
6018 aSig1 = extractFloat128Frac1( a );
6019 aSig0 = extractFloat128Frac0( a );
6020 aExp = extractFloat128Exp( a );
6021 aSign = extractFloat128Sign( a );
6022 if ( aExp == 0x7FFF ) {
6023 if ( aSig0 | aSig1 ) {
ff32e16e 6024 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6025 }
6026 return packFloat32( aSign, 0xFF, 0 );
6027 }
6028 aSig0 |= ( aSig1 != 0 );
6029 shift64RightJamming( aSig0, 18, &aSig0 );
6030 zSig = aSig0;
6031 if ( aExp || zSig ) {
6032 zSig |= 0x40000000;
6033 aExp -= 0x3F81;
6034 }
ff32e16e 6035 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6036
6037}
6038
6039/*----------------------------------------------------------------------------
6040| Returns the result of converting the quadruple-precision floating-point
6041| value `a' to the double-precision floating-point format. The conversion
6042| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6043| Arithmetic.
6044*----------------------------------------------------------------------------*/
6045
e5a41ffa 6046float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6047{
6048 flag aSign;
f4014512 6049 int32_t aExp;
bb98fe42 6050 uint64_t aSig0, aSig1;
158142c2
FB
6051
6052 aSig1 = extractFloat128Frac1( a );
6053 aSig0 = extractFloat128Frac0( a );
6054 aExp = extractFloat128Exp( a );
6055 aSign = extractFloat128Sign( a );
6056 if ( aExp == 0x7FFF ) {
6057 if ( aSig0 | aSig1 ) {
ff32e16e 6058 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6059 }
6060 return packFloat64( aSign, 0x7FF, 0 );
6061 }
6062 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6063 aSig0 |= ( aSig1 != 0 );
6064 if ( aExp || aSig0 ) {
6065 aSig0 |= LIT64( 0x4000000000000000 );
6066 aExp -= 0x3C01;
6067 }
ff32e16e 6068 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6069
6070}
6071
158142c2
FB
6072/*----------------------------------------------------------------------------
6073| Returns the result of converting the quadruple-precision floating-point
6074| value `a' to the extended double-precision floating-point format. The
6075| conversion is performed according to the IEC/IEEE Standard for Binary
6076| Floating-Point Arithmetic.
6077*----------------------------------------------------------------------------*/
6078
e5a41ffa 6079floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6080{
6081 flag aSign;
f4014512 6082 int32_t aExp;
bb98fe42 6083 uint64_t aSig0, aSig1;
158142c2
FB
6084
6085 aSig1 = extractFloat128Frac1( a );
6086 aSig0 = extractFloat128Frac0( a );
6087 aExp = extractFloat128Exp( a );
6088 aSign = extractFloat128Sign( a );
6089 if ( aExp == 0x7FFF ) {
6090 if ( aSig0 | aSig1 ) {
ff32e16e 6091 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6092 }
0f605c88
LV
6093 return packFloatx80(aSign, floatx80_infinity_high,
6094 floatx80_infinity_low);
158142c2
FB
6095 }
6096 if ( aExp == 0 ) {
6097 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6098 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6099 }
6100 else {
6101 aSig0 |= LIT64( 0x0001000000000000 );
6102 }
6103 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6104 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6105
6106}
6107
158142c2
FB
6108/*----------------------------------------------------------------------------
6109| Rounds the quadruple-precision floating-point value `a' to an integer, and
6110| returns the result as a quadruple-precision floating-point value. The
6111| operation is performed according to the IEC/IEEE Standard for Binary
6112| Floating-Point Arithmetic.
6113*----------------------------------------------------------------------------*/
6114
e5a41ffa 6115float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6116{
6117 flag aSign;
f4014512 6118 int32_t aExp;
bb98fe42 6119 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6120 float128 z;
6121
6122 aExp = extractFloat128Exp( a );
6123 if ( 0x402F <= aExp ) {
6124 if ( 0x406F <= aExp ) {
6125 if ( ( aExp == 0x7FFF )
6126 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6127 ) {
ff32e16e 6128 return propagateFloat128NaN(a, a, status);
158142c2
FB
6129 }
6130 return a;
6131 }
6132 lastBitMask = 1;
6133 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6134 roundBitsMask = lastBitMask - 1;
6135 z = a;
a2f2d288 6136 switch (status->float_rounding_mode) {
dc355b76 6137 case float_round_nearest_even:
158142c2
FB
6138 if ( lastBitMask ) {
6139 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6140 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6141 }
6142 else {
bb98fe42 6143 if ( (int64_t) z.low < 0 ) {
158142c2 6144 ++z.high;
bb98fe42 6145 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6146 }
6147 }
dc355b76 6148 break;
f9288a76
PM
6149 case float_round_ties_away:
6150 if (lastBitMask) {
6151 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6152 } else {
6153 if ((int64_t) z.low < 0) {
6154 ++z.high;
6155 }
6156 }
6157 break;
dc355b76
PM
6158 case float_round_to_zero:
6159 break;
6160 case float_round_up:
6161 if (!extractFloat128Sign(z)) {
6162 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6163 }
6164 break;
6165 case float_round_down:
6166 if (extractFloat128Sign(z)) {
6167 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6168 }
dc355b76
PM
6169 break;
6170 default:
6171 abort();
158142c2
FB
6172 }
6173 z.low &= ~ roundBitsMask;
6174 }
6175 else {
6176 if ( aExp < 0x3FFF ) {
bb98fe42 6177 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6178 status->float_exception_flags |= float_flag_inexact;
158142c2 6179 aSign = extractFloat128Sign( a );
a2f2d288 6180 switch (status->float_rounding_mode) {
158142c2
FB
6181 case float_round_nearest_even:
6182 if ( ( aExp == 0x3FFE )
6183 && ( extractFloat128Frac0( a )
6184 | extractFloat128Frac1( a ) )
6185 ) {
6186 return packFloat128( aSign, 0x3FFF, 0, 0 );
6187 }
6188 break;
f9288a76
PM
6189 case float_round_ties_away:
6190 if (aExp == 0x3FFE) {
6191 return packFloat128(aSign, 0x3FFF, 0, 0);
6192 }
6193 break;
158142c2
FB
6194 case float_round_down:
6195 return
6196 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6197 : packFloat128( 0, 0, 0, 0 );
6198 case float_round_up:
6199 return
6200 aSign ? packFloat128( 1, 0, 0, 0 )
6201 : packFloat128( 0, 0x3FFF, 0, 0 );
6202 }
6203 return packFloat128( aSign, 0, 0, 0 );
6204 }
6205 lastBitMask = 1;
6206 lastBitMask <<= 0x402F - aExp;
6207 roundBitsMask = lastBitMask - 1;
6208 z.low = 0;
6209 z.high = a.high;
a2f2d288 6210 switch (status->float_rounding_mode) {
dc355b76 6211 case float_round_nearest_even:
158142c2
FB
6212 z.high += lastBitMask>>1;
6213 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6214 z.high &= ~ lastBitMask;
6215 }
dc355b76 6216 break;
f9288a76
PM
6217 case float_round_ties_away:
6218 z.high += lastBitMask>>1;
6219 break;
dc355b76
PM
6220 case float_round_to_zero:
6221 break;
6222 case float_round_up:
6223 if (!extractFloat128Sign(z)) {
158142c2
FB
6224 z.high |= ( a.low != 0 );
6225 z.high += roundBitsMask;
6226 }
dc355b76
PM
6227 break;
6228 case float_round_down:
6229 if (extractFloat128Sign(z)) {
6230 z.high |= (a.low != 0);
6231 z.high += roundBitsMask;
6232 }
6233 break;
6234 default:
6235 abort();
158142c2
FB
6236 }
6237 z.high &= ~ roundBitsMask;
6238 }
6239 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6240 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6241 }
6242 return z;
6243
6244}
6245
6246/*----------------------------------------------------------------------------
6247| Returns the result of adding the absolute values of the quadruple-precision
6248| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6249| before being returned. `zSign' is ignored if the result is a NaN.
6250| The addition is performed according to the IEC/IEEE Standard for Binary
6251| Floating-Point Arithmetic.
6252*----------------------------------------------------------------------------*/
6253
e5a41ffa
PM
6254static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6255 float_status *status)
158142c2 6256{
f4014512 6257 int32_t aExp, bExp, zExp;
bb98fe42 6258 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6259 int32_t expDiff;
158142c2
FB
6260
6261 aSig1 = extractFloat128Frac1( a );
6262 aSig0 = extractFloat128Frac0( a );
6263 aExp = extractFloat128Exp( a );
6264 bSig1 = extractFloat128Frac1( b );
6265 bSig0 = extractFloat128Frac0( b );
6266 bExp = extractFloat128Exp( b );
6267 expDiff = aExp - bExp;
6268 if ( 0 < expDiff ) {
6269 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6270 if (aSig0 | aSig1) {
6271 return propagateFloat128NaN(a, b, status);
6272 }
158142c2
FB
6273 return a;
6274 }
6275 if ( bExp == 0 ) {
6276 --expDiff;
6277 }
6278 else {
6279 bSig0 |= LIT64( 0x0001000000000000 );
6280 }
6281 shift128ExtraRightJamming(
6282 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6283 zExp = aExp;
6284 }
6285 else if ( expDiff < 0 ) {
6286 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6287 if (bSig0 | bSig1) {
6288 return propagateFloat128NaN(a, b, status);
6289 }
158142c2
FB
6290 return packFloat128( zSign, 0x7FFF, 0, 0 );
6291 }
6292 if ( aExp == 0 ) {
6293 ++expDiff;
6294 }
6295 else {
6296 aSig0 |= LIT64( 0x0001000000000000 );
6297 }
6298 shift128ExtraRightJamming(
6299 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6300 zExp = bExp;
6301 }
6302 else {
6303 if ( aExp == 0x7FFF ) {
6304 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6305 return propagateFloat128NaN(a, b, status);
158142c2
FB
6306 }
6307 return a;
6308 }
6309 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6310 if ( aExp == 0 ) {
a2f2d288 6311 if (status->flush_to_zero) {
e6afc87f 6312 if (zSig0 | zSig1) {
ff32e16e 6313 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6314 }
6315 return packFloat128(zSign, 0, 0, 0);
6316 }
fe76d976
PB
6317 return packFloat128( zSign, 0, zSig0, zSig1 );
6318 }
158142c2
FB
6319 zSig2 = 0;
6320 zSig0 |= LIT64( 0x0002000000000000 );
6321 zExp = aExp;
6322 goto shiftRight1;
6323 }
6324 aSig0 |= LIT64( 0x0001000000000000 );
6325 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6326 --zExp;
6327 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6328 ++zExp;
6329 shiftRight1:
6330 shift128ExtraRightJamming(
6331 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6332 roundAndPack:
ff32e16e 6333 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6334
6335}
6336
6337/*----------------------------------------------------------------------------
6338| Returns the result of subtracting the absolute values of the quadruple-
6339| precision floating-point values `a' and `b'. If `zSign' is 1, the
6340| difference is negated before being returned. `zSign' is ignored if the
6341| result is a NaN. The subtraction is performed according to the IEC/IEEE
6342| Standard for Binary Floating-Point Arithmetic.
6343*----------------------------------------------------------------------------*/
6344
e5a41ffa
PM
6345static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6346 float_status *status)
158142c2 6347{
f4014512 6348 int32_t aExp, bExp, zExp;
bb98fe42 6349 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6350 int32_t expDiff;
158142c2
FB
6351
6352 aSig1 = extractFloat128Frac1( a );
6353 aSig0 = extractFloat128Frac0( a );
6354 aExp = extractFloat128Exp( a );
6355 bSig1 = extractFloat128Frac1( b );
6356 bSig0 = extractFloat128Frac0( b );
6357 bExp = extractFloat128Exp( b );
6358 expDiff = aExp - bExp;
6359 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6360 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6361 if ( 0 < expDiff ) goto aExpBigger;
6362 if ( expDiff < 0 ) goto bExpBigger;
6363 if ( aExp == 0x7FFF ) {
6364 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6365 return propagateFloat128NaN(a, b, status);
158142c2 6366 }
ff32e16e 6367 float_raise(float_flag_invalid, status);
af39bc8c 6368 return float128_default_nan(status);
158142c2
FB
6369 }
6370 if ( aExp == 0 ) {
6371 aExp = 1;
6372 bExp = 1;
6373 }
6374 if ( bSig0 < aSig0 ) goto aBigger;
6375 if ( aSig0 < bSig0 ) goto bBigger;
6376 if ( bSig1 < aSig1 ) goto aBigger;
6377 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6378 return packFloat128(status->float_rounding_mode == float_round_down,
6379 0, 0, 0);
158142c2
FB
6380 bExpBigger:
6381 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6382 if (bSig0 | bSig1) {
6383 return propagateFloat128NaN(a, b, status);
6384 }
158142c2
FB
6385 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6386 }
6387 if ( aExp == 0 ) {
6388 ++expDiff;
6389 }
6390 else {
6391 aSig0 |= LIT64( 0x4000000000000000 );
6392 }
6393 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6394 bSig0 |= LIT64( 0x4000000000000000 );
6395 bBigger:
6396 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6397 zExp = bExp;
6398 zSign ^= 1;
6399 goto normalizeRoundAndPack;
6400 aExpBigger:
6401 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6402 if (aSig0 | aSig1) {
6403 return propagateFloat128NaN(a, b, status);
6404 }
158142c2
FB
6405 return a;
6406 }
6407 if ( bExp == 0 ) {
6408 --expDiff;
6409 }
6410 else {
6411 bSig0 |= LIT64( 0x4000000000000000 );
6412 }
6413 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6414 aSig0 |= LIT64( 0x4000000000000000 );
6415 aBigger:
6416 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6417 zExp = aExp;
6418 normalizeRoundAndPack:
6419 --zExp;
ff32e16e
PM
6420 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6421 status);
158142c2
FB
6422
6423}
6424
6425/*----------------------------------------------------------------------------
6426| Returns the result of adding the quadruple-precision floating-point values
6427| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6428| for Binary Floating-Point Arithmetic.
6429*----------------------------------------------------------------------------*/
6430
e5a41ffa 6431float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6432{
6433 flag aSign, bSign;
6434
6435 aSign = extractFloat128Sign( a );
6436 bSign = extractFloat128Sign( b );
6437 if ( aSign == bSign ) {
ff32e16e 6438 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6439 }
6440 else {
ff32e16e 6441 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6442 }
6443
6444}
6445
6446/*----------------------------------------------------------------------------
6447| Returns the result of subtracting the quadruple-precision floating-point
6448| values `a' and `b'. The operation is performed according to the IEC/IEEE
6449| Standard for Binary Floating-Point Arithmetic.
6450*----------------------------------------------------------------------------*/
6451
e5a41ffa 6452float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6453{
6454 flag aSign, bSign;
6455
6456 aSign = extractFloat128Sign( a );
6457 bSign = extractFloat128Sign( b );
6458 if ( aSign == bSign ) {
ff32e16e 6459 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6460 }
6461 else {
ff32e16e 6462 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6463 }
6464
6465}
6466
6467/*----------------------------------------------------------------------------
6468| Returns the result of multiplying the quadruple-precision floating-point
6469| values `a' and `b'. The operation is performed according to the IEC/IEEE
6470| Standard for Binary Floating-Point Arithmetic.
6471*----------------------------------------------------------------------------*/
6472
e5a41ffa 6473float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6474{
6475 flag aSign, bSign, zSign;
f4014512 6476 int32_t aExp, bExp, zExp;
bb98fe42 6477 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6478
6479 aSig1 = extractFloat128Frac1( a );
6480 aSig0 = extractFloat128Frac0( a );
6481 aExp = extractFloat128Exp( a );
6482 aSign = extractFloat128Sign( a );
6483 bSig1 = extractFloat128Frac1( b );
6484 bSig0 = extractFloat128Frac0( b );
6485 bExp = extractFloat128Exp( b );
6486 bSign = extractFloat128Sign( b );
6487 zSign = aSign ^ bSign;
6488 if ( aExp == 0x7FFF ) {
6489 if ( ( aSig0 | aSig1 )
6490 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6491 return propagateFloat128NaN(a, b, status);
158142c2
FB
6492 }
6493 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6494 return packFloat128( zSign, 0x7FFF, 0, 0 );
6495 }
6496 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6497 if (bSig0 | bSig1) {
6498 return propagateFloat128NaN(a, b, status);
6499 }
158142c2
FB
6500 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6501 invalid:
ff32e16e 6502 float_raise(float_flag_invalid, status);
af39bc8c 6503 return float128_default_nan(status);
158142c2
FB
6504 }
6505 return packFloat128( zSign, 0x7FFF, 0, 0 );
6506 }
6507 if ( aExp == 0 ) {
6508 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6509 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6510 }
6511 if ( bExp == 0 ) {
6512 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6513 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6514 }
6515 zExp = aExp + bExp - 0x4000;
6516 aSig0 |= LIT64( 0x0001000000000000 );
6517 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6518 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6519 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6520 zSig2 |= ( zSig3 != 0 );
6521 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6522 shift128ExtraRightJamming(
6523 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6524 ++zExp;
6525 }
ff32e16e 6526 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6527
6528}
6529
6530/*----------------------------------------------------------------------------
6531| Returns the result of dividing the quadruple-precision floating-point value
6532| `a' by the corresponding value `b'. The operation is performed according to
6533| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6534*----------------------------------------------------------------------------*/
6535
e5a41ffa 6536float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6537{
6538 flag aSign, bSign, zSign;
f4014512 6539 int32_t aExp, bExp, zExp;
bb98fe42
AF
6540 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6541 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6542
6543 aSig1 = extractFloat128Frac1( a );
6544 aSig0 = extractFloat128Frac0( a );
6545 aExp = extractFloat128Exp( a );
6546 aSign = extractFloat128Sign( a );
6547 bSig1 = extractFloat128Frac1( b );
6548 bSig0 = extractFloat128Frac0( b );
6549 bExp = extractFloat128Exp( b );
6550 bSign = extractFloat128Sign( b );
6551 zSign = aSign ^ bSign;
6552 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6553 if (aSig0 | aSig1) {
6554 return propagateFloat128NaN(a, b, status);
6555 }
158142c2 6556 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6557 if (bSig0 | bSig1) {
6558 return propagateFloat128NaN(a, b, status);
6559 }
158142c2
FB
6560 goto invalid;
6561 }
6562 return packFloat128( zSign, 0x7FFF, 0, 0 );
6563 }
6564 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6565 if (bSig0 | bSig1) {
6566 return propagateFloat128NaN(a, b, status);
6567 }
158142c2
FB
6568 return packFloat128( zSign, 0, 0, 0 );
6569 }
6570 if ( bExp == 0 ) {
6571 if ( ( bSig0 | bSig1 ) == 0 ) {
6572 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6573 invalid:
ff32e16e 6574 float_raise(float_flag_invalid, status);
af39bc8c 6575 return float128_default_nan(status);
158142c2 6576 }
ff32e16e 6577 float_raise(float_flag_divbyzero, status);
158142c2
FB
6578 return packFloat128( zSign, 0x7FFF, 0, 0 );
6579 }
6580 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6581 }
6582 if ( aExp == 0 ) {
6583 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6584 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6585 }
6586 zExp = aExp - bExp + 0x3FFD;
6587 shortShift128Left(
6588 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6589 shortShift128Left(
6590 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6591 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6592 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6593 ++zExp;
6594 }
6595 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6596 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6597 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6598 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6599 --zSig0;
6600 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6601 }
6602 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6603 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6604 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6605 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6606 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6607 --zSig1;
6608 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6609 }
6610 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6611 }
6612 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6613 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6614
6615}
6616
6617/*----------------------------------------------------------------------------
6618| Returns the remainder of the quadruple-precision floating-point value `a'
6619| with respect to the corresponding value `b'. The operation is performed
6620| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6621*----------------------------------------------------------------------------*/
6622
e5a41ffa 6623float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6624{
ed086f3d 6625 flag aSign, zSign;
f4014512 6626 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6627 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6628 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6629 int64_t sigMean0;
158142c2
FB
6630
6631 aSig1 = extractFloat128Frac1( a );
6632 aSig0 = extractFloat128Frac0( a );
6633 aExp = extractFloat128Exp( a );
6634 aSign = extractFloat128Sign( a );
6635 bSig1 = extractFloat128Frac1( b );
6636 bSig0 = extractFloat128Frac0( b );
6637 bExp = extractFloat128Exp( b );
158142c2
FB
6638 if ( aExp == 0x7FFF ) {
6639 if ( ( aSig0 | aSig1 )
6640 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6641 return propagateFloat128NaN(a, b, status);
158142c2
FB
6642 }
6643 goto invalid;
6644 }
6645 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6646 if (bSig0 | bSig1) {
6647 return propagateFloat128NaN(a, b, status);
6648 }
158142c2
FB
6649 return a;
6650 }
6651 if ( bExp == 0 ) {
6652 if ( ( bSig0 | bSig1 ) == 0 ) {
6653 invalid:
ff32e16e 6654 float_raise(float_flag_invalid, status);
af39bc8c 6655 return float128_default_nan(status);
158142c2
FB
6656 }
6657 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6658 }
6659 if ( aExp == 0 ) {
6660 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6661 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6662 }
6663 expDiff = aExp - bExp;
6664 if ( expDiff < -1 ) return a;
6665 shortShift128Left(
6666 aSig0 | LIT64( 0x0001000000000000 ),
6667 aSig1,
6668 15 - ( expDiff < 0 ),
6669 &aSig0,
6670 &aSig1
6671 );
6672 shortShift128Left(
6673 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6674 q = le128( bSig0, bSig1, aSig0, aSig1 );
6675 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6676 expDiff -= 64;
6677 while ( 0 < expDiff ) {
6678 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6679 q = ( 4 < q ) ? q - 4 : 0;
6680 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6681 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6682 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6683 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6684 expDiff -= 61;
6685 }
6686 if ( -64 < expDiff ) {
6687 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6688 q = ( 4 < q ) ? q - 4 : 0;
6689 q >>= - expDiff;
6690 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6691 expDiff += 52;
6692 if ( expDiff < 0 ) {
6693 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6694 }
6695 else {
6696 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6697 }
6698 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6699 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6700 }
6701 else {
6702 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6703 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6704 }
6705 do {
6706 alternateASig0 = aSig0;
6707 alternateASig1 = aSig1;
6708 ++q;
6709 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6710 } while ( 0 <= (int64_t) aSig0 );
158142c2 6711 add128(
bb98fe42 6712 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6713 if ( ( sigMean0 < 0 )
6714 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6715 aSig0 = alternateASig0;
6716 aSig1 = alternateASig1;
6717 }
bb98fe42 6718 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6719 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6720 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6721 status);
158142c2
FB
6722}
6723
6724/*----------------------------------------------------------------------------
6725| Returns the square root of the quadruple-precision floating-point value `a'.
6726| The operation is performed according to the IEC/IEEE Standard for Binary
6727| Floating-Point Arithmetic.
6728*----------------------------------------------------------------------------*/
6729
e5a41ffa 6730float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6731{
6732 flag aSign;
f4014512 6733 int32_t aExp, zExp;
bb98fe42
AF
6734 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6735 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6736
6737 aSig1 = extractFloat128Frac1( a );
6738 aSig0 = extractFloat128Frac0( a );
6739 aExp = extractFloat128Exp( a );
6740 aSign = extractFloat128Sign( a );
6741 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6742 if (aSig0 | aSig1) {
6743 return propagateFloat128NaN(a, a, status);
6744 }
158142c2
FB
6745 if ( ! aSign ) return a;
6746 goto invalid;
6747 }
6748 if ( aSign ) {
6749 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6750 invalid:
ff32e16e 6751 float_raise(float_flag_invalid, status);
af39bc8c 6752 return float128_default_nan(status);
158142c2
FB
6753 }
6754 if ( aExp == 0 ) {
6755 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6756 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6757 }
6758 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6759 aSig0 |= LIT64( 0x0001000000000000 );
6760 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6761 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6762 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6763 doubleZSig0 = zSig0<<1;
6764 mul64To128( zSig0, zSig0, &term0, &term1 );
6765 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6766 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6767 --zSig0;
6768 doubleZSig0 -= 2;
6769 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6770 }
6771 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6772 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6773 if ( zSig1 == 0 ) zSig1 = 1;
6774 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6775 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6776 mul64To128( zSig1, zSig1, &term2, &term3 );
6777 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6778 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6779 --zSig1;
6780 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6781 term3 |= 1;
6782 term2 |= doubleZSig0;
6783 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6784 }
6785 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6786 }
6787 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6788 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6789
6790}
6791
6792/*----------------------------------------------------------------------------
6793| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6794| the corresponding value `b', and 0 otherwise. The invalid exception is
6795| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6796| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6797*----------------------------------------------------------------------------*/
6798
e5a41ffa 6799int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6800{
6801
6802 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6803 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6804 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6805 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6806 ) {
ff32e16e 6807 float_raise(float_flag_invalid, status);
158142c2
FB
6808 return 0;
6809 }
6810 return
6811 ( a.low == b.low )
6812 && ( ( a.high == b.high )
6813 || ( ( a.low == 0 )
bb98fe42 6814 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6815 );
6816
6817}
6818
6819/*----------------------------------------------------------------------------
6820| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6821| or equal to the corresponding value `b', and 0 otherwise. The invalid
6822| exception is raised if either operand is a NaN. The comparison is performed
6823| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6824*----------------------------------------------------------------------------*/
6825
e5a41ffa 6826int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6827{
6828 flag aSign, bSign;
6829
6830 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6831 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6832 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6833 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6834 ) {
ff32e16e 6835 float_raise(float_flag_invalid, status);
158142c2
FB
6836 return 0;
6837 }
6838 aSign = extractFloat128Sign( a );
6839 bSign = extractFloat128Sign( b );
6840 if ( aSign != bSign ) {
6841 return
6842 aSign
bb98fe42 6843 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6844 == 0 );
6845 }
6846 return
6847 aSign ? le128( b.high, b.low, a.high, a.low )
6848 : le128( a.high, a.low, b.high, b.low );
6849
6850}
6851
6852/*----------------------------------------------------------------------------
6853| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6854| the corresponding value `b', and 0 otherwise. The invalid exception is
6855| raised if either operand is a NaN. The comparison is performed according
6856| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6857*----------------------------------------------------------------------------*/
6858
e5a41ffa 6859int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6860{
6861 flag aSign, bSign;
6862
6863 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6864 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6865 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6866 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6867 ) {
ff32e16e 6868 float_raise(float_flag_invalid, status);
158142c2
FB
6869 return 0;
6870 }
6871 aSign = extractFloat128Sign( a );
6872 bSign = extractFloat128Sign( b );
6873 if ( aSign != bSign ) {
6874 return
6875 aSign
bb98fe42 6876 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6877 != 0 );
6878 }
6879 return
6880 aSign ? lt128( b.high, b.low, a.high, a.low )
6881 : lt128( a.high, a.low, b.high, b.low );
6882
6883}
6884
67b7861d
AJ
6885/*----------------------------------------------------------------------------
6886| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6887| be compared, and 0 otherwise. The invalid exception is raised if either
6888| operand is a NaN. The comparison is performed according to the IEC/IEEE
6889| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6890*----------------------------------------------------------------------------*/
6891
e5a41ffa 6892int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6893{
6894 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6895 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6896 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6897 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6898 ) {
ff32e16e 6899 float_raise(float_flag_invalid, status);
67b7861d
AJ
6900 return 1;
6901 }
6902 return 0;
6903}
6904
158142c2
FB
6905/*----------------------------------------------------------------------------
6906| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6907| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6908| exception. The comparison is performed according to the IEC/IEEE Standard
6909| for Binary Floating-Point Arithmetic.
158142c2
FB
6910*----------------------------------------------------------------------------*/
6911
e5a41ffa 6912int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6913{
6914
6915 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6916 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6917 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6918 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6919 ) {
af39bc8c
AM
6920 if (float128_is_signaling_nan(a, status)
6921 || float128_is_signaling_nan(b, status)) {
ff32e16e 6922 float_raise(float_flag_invalid, status);
b689362d 6923 }
158142c2
FB
6924 return 0;
6925 }
6926 return
6927 ( a.low == b.low )
6928 && ( ( a.high == b.high )
6929 || ( ( a.low == 0 )
bb98fe42 6930 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6931 );
6932
6933}
6934
6935/*----------------------------------------------------------------------------
6936| Returns 1 if the quadruple-precision floating-point value `a' is less than
6937| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6938| cause an exception. Otherwise, the comparison is performed according to the
6939| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6940*----------------------------------------------------------------------------*/
6941
e5a41ffa 6942int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6943{
6944 flag aSign, bSign;
6945
6946 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6947 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6948 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6949 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6950 ) {
af39bc8c
AM
6951 if (float128_is_signaling_nan(a, status)
6952 || float128_is_signaling_nan(b, status)) {
ff32e16e 6953 float_raise(float_flag_invalid, status);
158142c2
FB
6954 }
6955 return 0;
6956 }
6957 aSign = extractFloat128Sign( a );
6958 bSign = extractFloat128Sign( b );
6959 if ( aSign != bSign ) {
6960 return
6961 aSign
bb98fe42 6962 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6963 == 0 );
6964 }
6965 return
6966 aSign ? le128( b.high, b.low, a.high, a.low )
6967 : le128( a.high, a.low, b.high, b.low );
6968
6969}
6970
6971/*----------------------------------------------------------------------------
6972| Returns 1 if the quadruple-precision floating-point value `a' is less than
6973| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6974| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6975| Standard for Binary Floating-Point Arithmetic.
6976*----------------------------------------------------------------------------*/
6977
e5a41ffa 6978int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6979{
6980 flag aSign, bSign;
6981
6982 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6983 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6984 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6985 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6986 ) {
af39bc8c
AM
6987 if (float128_is_signaling_nan(a, status)
6988 || float128_is_signaling_nan(b, status)) {
ff32e16e 6989 float_raise(float_flag_invalid, status);
158142c2
FB
6990 }
6991 return 0;
6992 }
6993 aSign = extractFloat128Sign( a );
6994 bSign = extractFloat128Sign( b );
6995 if ( aSign != bSign ) {
6996 return
6997 aSign
bb98fe42 6998 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6999 != 0 );
7000 }
7001 return
7002 aSign ? lt128( b.high, b.low, a.high, a.low )
7003 : lt128( a.high, a.low, b.high, b.low );
7004
7005}
7006
67b7861d
AJ
7007/*----------------------------------------------------------------------------
7008| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7009| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7010| comparison is performed according to the IEC/IEEE Standard for Binary
7011| Floating-Point Arithmetic.
7012*----------------------------------------------------------------------------*/
7013
e5a41ffa 7014int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7015{
7016 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7017 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7018 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7019 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7020 ) {
af39bc8c
AM
7021 if (float128_is_signaling_nan(a, status)
7022 || float128_is_signaling_nan(b, status)) {
ff32e16e 7023 float_raise(float_flag_invalid, status);
67b7861d
AJ
7024 }
7025 return 1;
7026 }
7027 return 0;
7028}
7029
e5a41ffa
PM
7030static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7031 int is_quiet, float_status *status)
f6714d36
AJ
7032{
7033 flag aSign, bSign;
7034
d1eb8f2a
AD
7035 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7036 float_raise(float_flag_invalid, status);
7037 return float_relation_unordered;
7038 }
f6714d36
AJ
7039 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7040 ( extractFloatx80Frac( a )<<1 ) ) ||
7041 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7042 ( extractFloatx80Frac( b )<<1 ) )) {
7043 if (!is_quiet ||
af39bc8c
AM
7044 floatx80_is_signaling_nan(a, status) ||
7045 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7046 float_raise(float_flag_invalid, status);
f6714d36
AJ
7047 }
7048 return float_relation_unordered;
7049 }
7050 aSign = extractFloatx80Sign( a );
7051 bSign = extractFloatx80Sign( b );
7052 if ( aSign != bSign ) {
7053
7054 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7055 ( ( a.low | b.low ) == 0 ) ) {
7056 /* zero case */
7057 return float_relation_equal;
7058 } else {
7059 return 1 - (2 * aSign);
7060 }
7061 } else {
7062 if (a.low == b.low && a.high == b.high) {
7063 return float_relation_equal;
7064 } else {
7065 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7066 }
7067 }
7068}
7069
e5a41ffa 7070int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7071{
ff32e16e 7072 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7073}
7074
e5a41ffa 7075int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7076{
ff32e16e 7077 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7078}
7079
e5a41ffa
PM
7080static inline int float128_compare_internal(float128 a, float128 b,
7081 int is_quiet, float_status *status)
1f587329
BS
7082{
7083 flag aSign, bSign;
7084
7085 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7086 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7087 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7088 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7089 if (!is_quiet ||
af39bc8c
AM
7090 float128_is_signaling_nan(a, status) ||
7091 float128_is_signaling_nan(b, status)) {
ff32e16e 7092 float_raise(float_flag_invalid, status);
1f587329
BS
7093 }
7094 return float_relation_unordered;
7095 }
7096 aSign = extractFloat128Sign( a );
7097 bSign = extractFloat128Sign( b );
7098 if ( aSign != bSign ) {
7099 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7100 /* zero case */
7101 return float_relation_equal;
7102 } else {
7103 return 1 - (2 * aSign);
7104 }
7105 } else {
7106 if (a.low == b.low && a.high == b.high) {
7107 return float_relation_equal;
7108 } else {
7109 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7110 }
7111 }
7112}
7113
e5a41ffa 7114int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7115{
ff32e16e 7116 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7117}
7118
e5a41ffa 7119int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7120{
ff32e16e 7121 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7122}
7123
e5a41ffa 7124floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7125{
7126 flag aSign;
326b9e98 7127 int32_t aExp;
bb98fe42 7128 uint64_t aSig;
9ee6e8bb 7129
d1eb8f2a
AD
7130 if (floatx80_invalid_encoding(a)) {
7131 float_raise(float_flag_invalid, status);
7132 return floatx80_default_nan(status);
7133 }
9ee6e8bb
PB
7134 aSig = extractFloatx80Frac( a );
7135 aExp = extractFloatx80Exp( a );
7136 aSign = extractFloatx80Sign( a );
7137
326b9e98
AJ
7138 if ( aExp == 0x7FFF ) {
7139 if ( aSig<<1 ) {
ff32e16e 7140 return propagateFloatx80NaN(a, a, status);
326b9e98 7141 }
9ee6e8bb
PB
7142 return a;
7143 }
326b9e98 7144
3c85c37f
PM
7145 if (aExp == 0) {
7146 if (aSig == 0) {
7147 return a;
7148 }
7149 aExp++;
7150 }
69397542 7151
326b9e98
AJ
7152 if (n > 0x10000) {
7153 n = 0x10000;
7154 } else if (n < -0x10000) {
7155 n = -0x10000;
7156 }
7157
9ee6e8bb 7158 aExp += n;
a2f2d288
PM
7159 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7160 aSign, aExp, aSig, 0, status);
9ee6e8bb 7161}
9ee6e8bb 7162
e5a41ffa 7163float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7164{
7165 flag aSign;
326b9e98 7166 int32_t aExp;
bb98fe42 7167 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7168
7169 aSig1 = extractFloat128Frac1( a );
7170 aSig0 = extractFloat128Frac0( a );
7171 aExp = extractFloat128Exp( a );
7172 aSign = extractFloat128Sign( a );
7173 if ( aExp == 0x7FFF ) {
326b9e98 7174 if ( aSig0 | aSig1 ) {
ff32e16e 7175 return propagateFloat128NaN(a, a, status);
326b9e98 7176 }
9ee6e8bb
PB
7177 return a;
7178 }
3c85c37f 7179 if (aExp != 0) {
69397542 7180 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7181 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7182 return a;
3c85c37f
PM
7183 } else {
7184 aExp++;
7185 }
69397542 7186
326b9e98
AJ
7187 if (n > 0x10000) {
7188 n = 0x10000;
7189 } else if (n < -0x10000) {
7190 n = -0x10000;
7191 }
7192
69397542
PB
7193 aExp += n - 1;
7194 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7195 , status);
9ee6e8bb
PB
7196
7197}