]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
fpu/softfloat: Make is_nan et al available to softfloat-specialize.h
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167
AB
338/* Canonicalize EXP and FRAC, setting CLS. */
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167
AB
582 } else {
583 if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
584 is_qnan(b.cls), is_snan(b.cls),
585 a.frac > b.frac ||
586 (a.frac == b.frac && a.sign < b.sign))) {
587 a = b;
588 }
0bcfbcbe
RH
589 if (is_snan(a.cls)) {
590 return parts_silence_nan(a, s);
591 }
6fff2167
AB
592 }
593 return a;
594}
595
d446830a
AB
596static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
597 bool inf_zero, float_status *s)
598{
1839189b
PM
599 int which;
600
d446830a
AB
601 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
602 s->float_exception_flags |= float_flag_invalid;
603 }
604
1839189b
PM
605 which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
606 is_qnan(b.cls), is_snan(b.cls),
607 is_qnan(c.cls), is_snan(c.cls),
608 inf_zero, s);
609
d446830a 610 if (s->default_nan_mode) {
1839189b
PM
611 /* Note that this check is after pickNaNMulAdd so that function
612 * has an opportunity to set the Invalid flag.
613 */
f7e598e2 614 which = 3;
1839189b 615 }
d446830a 616
1839189b
PM
617 switch (which) {
618 case 0:
619 break;
620 case 1:
621 a = b;
622 break;
623 case 2:
624 a = c;
625 break;
626 case 3:
f7e598e2 627 return parts_default_nan(s);
1839189b
PM
628 default:
629 g_assert_not_reached();
d446830a 630 }
1839189b 631
0bcfbcbe
RH
632 if (is_snan(a.cls)) {
633 return parts_silence_nan(a, s);
634 }
d446830a
AB
635 return a;
636}
637
6fff2167
AB
638/*
639 * Returns the result of adding or subtracting the values of the
640 * floating-point values `a' and `b'. The operation is performed
641 * according to the IEC/IEEE Standard for Binary Floating-Point
642 * Arithmetic.
643 */
644
645static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
646 float_status *s)
647{
648 bool a_sign = a.sign;
649 bool b_sign = b.sign ^ subtract;
650
651 if (a_sign != b_sign) {
652 /* Subtraction */
653
654 if (a.cls == float_class_normal && b.cls == float_class_normal) {
655 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
656 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
657 a.frac = a.frac - b.frac;
658 } else {
659 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
660 a.frac = b.frac - a.frac;
661 a.exp = b.exp;
662 a_sign ^= 1;
663 }
664
665 if (a.frac == 0) {
666 a.cls = float_class_zero;
667 a.sign = s->float_rounding_mode == float_round_down;
668 } else {
669 int shift = clz64(a.frac) - 1;
670 a.frac = a.frac << shift;
671 a.exp = a.exp - shift;
672 a.sign = a_sign;
673 }
674 return a;
675 }
676 if (is_nan(a.cls) || is_nan(b.cls)) {
677 return pick_nan(a, b, s);
678 }
679 if (a.cls == float_class_inf) {
680 if (b.cls == float_class_inf) {
681 float_raise(float_flag_invalid, s);
f7e598e2 682 return parts_default_nan(s);
6fff2167
AB
683 }
684 return a;
685 }
686 if (a.cls == float_class_zero && b.cls == float_class_zero) {
687 a.sign = s->float_rounding_mode == float_round_down;
688 return a;
689 }
690 if (a.cls == float_class_zero || b.cls == float_class_inf) {
691 b.sign = a_sign ^ 1;
692 return b;
693 }
694 if (b.cls == float_class_zero) {
695 return a;
696 }
697 } else {
698 /* Addition */
699 if (a.cls == float_class_normal && b.cls == float_class_normal) {
700 if (a.exp > b.exp) {
701 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
702 } else if (a.exp < b.exp) {
703 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
704 a.exp = b.exp;
705 }
706 a.frac += b.frac;
707 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
708 a.frac >>= 1;
709 a.exp += 1;
710 }
711 return a;
712 }
713 if (is_nan(a.cls) || is_nan(b.cls)) {
714 return pick_nan(a, b, s);
715 }
716 if (a.cls == float_class_inf || b.cls == float_class_zero) {
717 return a;
718 }
719 if (b.cls == float_class_inf || a.cls == float_class_zero) {
720 b.sign = b_sign;
721 return b;
722 }
723 }
724 g_assert_not_reached();
725}
726
727/*
728 * Returns the result of adding or subtracting the floating-point
729 * values `a' and `b'. The operation is performed according to the
730 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
731 */
732
733float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
734 float_status *status)
735{
736 FloatParts pa = float16_unpack_canonical(a, status);
737 FloatParts pb = float16_unpack_canonical(b, status);
738 FloatParts pr = addsub_floats(pa, pb, false, status);
739
740 return float16_round_pack_canonical(pr, status);
741}
742
743float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
744 float_status *status)
745{
746 FloatParts pa = float32_unpack_canonical(a, status);
747 FloatParts pb = float32_unpack_canonical(b, status);
748 FloatParts pr = addsub_floats(pa, pb, false, status);
749
750 return float32_round_pack_canonical(pr, status);
751}
752
753float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
754 float_status *status)
755{
756 FloatParts pa = float64_unpack_canonical(a, status);
757 FloatParts pb = float64_unpack_canonical(b, status);
758 FloatParts pr = addsub_floats(pa, pb, false, status);
759
760 return float64_round_pack_canonical(pr, status);
761}
762
763float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
764 float_status *status)
765{
766 FloatParts pa = float16_unpack_canonical(a, status);
767 FloatParts pb = float16_unpack_canonical(b, status);
768 FloatParts pr = addsub_floats(pa, pb, true, status);
769
770 return float16_round_pack_canonical(pr, status);
771}
772
773float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
774 float_status *status)
775{
776 FloatParts pa = float32_unpack_canonical(a, status);
777 FloatParts pb = float32_unpack_canonical(b, status);
778 FloatParts pr = addsub_floats(pa, pb, true, status);
779
780 return float32_round_pack_canonical(pr, status);
781}
782
783float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
784 float_status *status)
785{
786 FloatParts pa = float64_unpack_canonical(a, status);
787 FloatParts pb = float64_unpack_canonical(b, status);
788 FloatParts pr = addsub_floats(pa, pb, true, status);
789
790 return float64_round_pack_canonical(pr, status);
791}
792
74d707e2
AB
793/*
794 * Returns the result of multiplying the floating-point values `a' and
795 * `b'. The operation is performed according to the IEC/IEEE Standard
796 * for Binary Floating-Point Arithmetic.
797 */
798
799static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
800{
801 bool sign = a.sign ^ b.sign;
802
803 if (a.cls == float_class_normal && b.cls == float_class_normal) {
804 uint64_t hi, lo;
805 int exp = a.exp + b.exp;
806
807 mul64To128(a.frac, b.frac, &hi, &lo);
808 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
809 if (lo & DECOMPOSED_OVERFLOW_BIT) {
810 shift64RightJamming(lo, 1, &lo);
811 exp += 1;
812 }
813
814 /* Re-use a */
815 a.exp = exp;
816 a.sign = sign;
817 a.frac = lo;
818 return a;
819 }
820 /* handle all the NaN cases */
821 if (is_nan(a.cls) || is_nan(b.cls)) {
822 return pick_nan(a, b, s);
823 }
824 /* Inf * Zero == NaN */
825 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
826 (a.cls == float_class_zero && b.cls == float_class_inf)) {
827 s->float_exception_flags |= float_flag_invalid;
f7e598e2 828 return parts_default_nan(s);
74d707e2
AB
829 }
830 /* Multiply by 0 or Inf */
831 if (a.cls == float_class_inf || a.cls == float_class_zero) {
832 a.sign = sign;
833 return a;
834 }
835 if (b.cls == float_class_inf || b.cls == float_class_zero) {
836 b.sign = sign;
837 return b;
838 }
839 g_assert_not_reached();
840}
841
842float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
843 float_status *status)
844{
845 FloatParts pa = float16_unpack_canonical(a, status);
846 FloatParts pb = float16_unpack_canonical(b, status);
847 FloatParts pr = mul_floats(pa, pb, status);
848
849 return float16_round_pack_canonical(pr, status);
850}
851
852float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
853 float_status *status)
854{
855 FloatParts pa = float32_unpack_canonical(a, status);
856 FloatParts pb = float32_unpack_canonical(b, status);
857 FloatParts pr = mul_floats(pa, pb, status);
858
859 return float32_round_pack_canonical(pr, status);
860}
861
862float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
863 float_status *status)
864{
865 FloatParts pa = float64_unpack_canonical(a, status);
866 FloatParts pb = float64_unpack_canonical(b, status);
867 FloatParts pr = mul_floats(pa, pb, status);
868
869 return float64_round_pack_canonical(pr, status);
870}
871
d446830a
AB
872/*
873 * Returns the result of multiplying the floating-point values `a' and
874 * `b' then adding 'c', with no intermediate rounding step after the
875 * multiplication. The operation is performed according to the
876 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
877 * The flags argument allows the caller to select negation of the
878 * addend, the intermediate product, or the final result. (The
879 * difference between this and having the caller do a separate
880 * negation is that negating externally will flip the sign bit on
881 * NaNs.)
882 */
883
884static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
885 int flags, float_status *s)
886{
887 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
888 ((1 << float_class_inf) | (1 << float_class_zero));
889 bool p_sign;
890 bool sign_flip = flags & float_muladd_negate_result;
891 FloatClass p_class;
892 uint64_t hi, lo;
893 int p_exp;
894
895 /* It is implementation-defined whether the cases of (0,inf,qnan)
896 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
897 * they return if they do), so we have to hand this information
898 * off to the target-specific pick-a-NaN routine.
899 */
900 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
901 return pick_nan_muladd(a, b, c, inf_zero, s);
902 }
903
904 if (inf_zero) {
905 s->float_exception_flags |= float_flag_invalid;
f7e598e2 906 return parts_default_nan(s);
d446830a
AB
907 }
908
909 if (flags & float_muladd_negate_c) {
910 c.sign ^= 1;
911 }
912
913 p_sign = a.sign ^ b.sign;
914
915 if (flags & float_muladd_negate_product) {
916 p_sign ^= 1;
917 }
918
919 if (a.cls == float_class_inf || b.cls == float_class_inf) {
920 p_class = float_class_inf;
921 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
922 p_class = float_class_zero;
923 } else {
924 p_class = float_class_normal;
925 }
926
927 if (c.cls == float_class_inf) {
928 if (p_class == float_class_inf && p_sign != c.sign) {
929 s->float_exception_flags |= float_flag_invalid;
f7e598e2 930 return parts_default_nan(s);
d446830a
AB
931 } else {
932 a.cls = float_class_inf;
933 a.sign = c.sign ^ sign_flip;
f7e598e2 934 return a;
d446830a 935 }
d446830a
AB
936 }
937
938 if (p_class == float_class_inf) {
939 a.cls = float_class_inf;
940 a.sign = p_sign ^ sign_flip;
941 return a;
942 }
943
944 if (p_class == float_class_zero) {
945 if (c.cls == float_class_zero) {
946 if (p_sign != c.sign) {
947 p_sign = s->float_rounding_mode == float_round_down;
948 }
949 c.sign = p_sign;
950 } else if (flags & float_muladd_halve_result) {
951 c.exp -= 1;
952 }
953 c.sign ^= sign_flip;
954 return c;
955 }
956
957 /* a & b should be normals now... */
958 assert(a.cls == float_class_normal &&
959 b.cls == float_class_normal);
960
961 p_exp = a.exp + b.exp;
962
963 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
964 * result.
965 */
966 mul64To128(a.frac, b.frac, &hi, &lo);
967 /* binary point now at bit 124 */
968
969 /* check for overflow */
970 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
971 shift128RightJamming(hi, lo, 1, &hi, &lo);
972 p_exp += 1;
973 }
974
975 /* + add/sub */
976 if (c.cls == float_class_zero) {
977 /* move binary point back to 62 */
978 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
979 } else {
980 int exp_diff = p_exp - c.exp;
981 if (p_sign == c.sign) {
982 /* Addition */
983 if (exp_diff <= 0) {
984 shift128RightJamming(hi, lo,
985 DECOMPOSED_BINARY_POINT - exp_diff,
986 &hi, &lo);
987 lo += c.frac;
988 p_exp = c.exp;
989 } else {
990 uint64_t c_hi, c_lo;
991 /* shift c to the same binary point as the product (124) */
992 c_hi = c.frac >> 2;
993 c_lo = 0;
994 shift128RightJamming(c_hi, c_lo,
995 exp_diff,
996 &c_hi, &c_lo);
997 add128(hi, lo, c_hi, c_lo, &hi, &lo);
998 /* move binary point back to 62 */
999 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1000 }
1001
1002 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1003 shift64RightJamming(lo, 1, &lo);
1004 p_exp += 1;
1005 }
1006
1007 } else {
1008 /* Subtraction */
1009 uint64_t c_hi, c_lo;
1010 /* make C binary point match product at bit 124 */
1011 c_hi = c.frac >> 2;
1012 c_lo = 0;
1013
1014 if (exp_diff <= 0) {
1015 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1016 if (exp_diff == 0
1017 &&
1018 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1019 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1020 } else {
1021 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1022 p_sign ^= 1;
1023 p_exp = c.exp;
1024 }
1025 } else {
1026 shift128RightJamming(c_hi, c_lo,
1027 exp_diff,
1028 &c_hi, &c_lo);
1029 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1030 }
1031
1032 if (hi == 0 && lo == 0) {
1033 a.cls = float_class_zero;
1034 a.sign = s->float_rounding_mode == float_round_down;
1035 a.sign ^= sign_flip;
1036 return a;
1037 } else {
1038 int shift;
1039 if (hi != 0) {
1040 shift = clz64(hi);
1041 } else {
1042 shift = clz64(lo) + 64;
1043 }
1044 /* Normalizing to a binary point of 124 is the
1045 correct adjust for the exponent. However since we're
1046 shifting, we might as well put the binary point back
1047 at 62 where we really want it. Therefore shift as
1048 if we're leaving 1 bit at the top of the word, but
1049 adjust the exponent as if we're leaving 3 bits. */
1050 shift -= 1;
1051 if (shift >= 64) {
1052 lo = lo << (shift - 64);
1053 } else {
1054 hi = (hi << shift) | (lo >> (64 - shift));
1055 lo = hi | ((lo << shift) != 0);
1056 }
1057 p_exp -= shift - 2;
1058 }
1059 }
1060 }
1061
1062 if (flags & float_muladd_halve_result) {
1063 p_exp -= 1;
1064 }
1065
1066 /* finally prepare our result */
1067 a.cls = float_class_normal;
1068 a.sign = p_sign ^ sign_flip;
1069 a.exp = p_exp;
1070 a.frac = lo;
1071
1072 return a;
1073}
1074
1075float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1076 int flags, float_status *status)
1077{
1078 FloatParts pa = float16_unpack_canonical(a, status);
1079 FloatParts pb = float16_unpack_canonical(b, status);
1080 FloatParts pc = float16_unpack_canonical(c, status);
1081 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1082
1083 return float16_round_pack_canonical(pr, status);
1084}
1085
1086float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1087 int flags, float_status *status)
1088{
1089 FloatParts pa = float32_unpack_canonical(a, status);
1090 FloatParts pb = float32_unpack_canonical(b, status);
1091 FloatParts pc = float32_unpack_canonical(c, status);
1092 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1093
1094 return float32_round_pack_canonical(pr, status);
1095}
1096
1097float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1098 int flags, float_status *status)
1099{
1100 FloatParts pa = float64_unpack_canonical(a, status);
1101 FloatParts pb = float64_unpack_canonical(b, status);
1102 FloatParts pc = float64_unpack_canonical(c, status);
1103 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1104
1105 return float64_round_pack_canonical(pr, status);
1106}
1107
cf07323d
AB
1108/*
1109 * Returns the result of dividing the floating-point value `a' by the
1110 * corresponding value `b'. The operation is performed according to
1111 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1112 */
1113
1114static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1115{
1116 bool sign = a.sign ^ b.sign;
1117
1118 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1119 uint64_t temp_lo, temp_hi;
1120 int exp = a.exp - b.exp;
1121 if (a.frac < b.frac) {
1122 exp -= 1;
1123 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1124 &temp_hi, &temp_lo);
1125 } else {
1126 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1127 &temp_hi, &temp_lo);
1128 }
1129 /* LSB of quot is set if inexact which roundandpack will use
1130 * to set flags. Yet again we re-use a for the result */
1131 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1132 a.sign = sign;
1133 a.exp = exp;
1134 return a;
1135 }
1136 /* handle all the NaN cases */
1137 if (is_nan(a.cls) || is_nan(b.cls)) {
1138 return pick_nan(a, b, s);
1139 }
1140 /* 0/0 or Inf/Inf */
1141 if (a.cls == b.cls
1142 &&
1143 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1144 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1145 return parts_default_nan(s);
cf07323d 1146 }
9cb4e398
AB
1147 /* Inf / x or 0 / x */
1148 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1149 a.sign = sign;
1150 return a;
1151 }
cf07323d
AB
1152 /* Div 0 => Inf */
1153 if (b.cls == float_class_zero) {
1154 s->float_exception_flags |= float_flag_divbyzero;
1155 a.cls = float_class_inf;
1156 a.sign = sign;
1157 return a;
1158 }
cf07323d
AB
1159 /* Div by Inf */
1160 if (b.cls == float_class_inf) {
1161 a.cls = float_class_zero;
1162 a.sign = sign;
1163 return a;
1164 }
1165 g_assert_not_reached();
1166}
1167
1168float16 float16_div(float16 a, float16 b, float_status *status)
1169{
1170 FloatParts pa = float16_unpack_canonical(a, status);
1171 FloatParts pb = float16_unpack_canonical(b, status);
1172 FloatParts pr = div_floats(pa, pb, status);
1173
1174 return float16_round_pack_canonical(pr, status);
1175}
1176
1177float32 float32_div(float32 a, float32 b, float_status *status)
1178{
1179 FloatParts pa = float32_unpack_canonical(a, status);
1180 FloatParts pb = float32_unpack_canonical(b, status);
1181 FloatParts pr = div_floats(pa, pb, status);
1182
1183 return float32_round_pack_canonical(pr, status);
1184}
1185
1186float64 float64_div(float64 a, float64 b, float_status *status)
1187{
1188 FloatParts pa = float64_unpack_canonical(a, status);
1189 FloatParts pb = float64_unpack_canonical(b, status);
1190 FloatParts pr = div_floats(pa, pb, status);
1191
1192 return float64_round_pack_canonical(pr, status);
1193}
1194
6fed16b2
AB
1195/*
1196 * Float to Float conversions
1197 *
1198 * Returns the result of converting one float format to another. The
1199 * conversion is performed according to the IEC/IEEE Standard for
1200 * Binary Floating-Point Arithmetic.
1201 *
1202 * The float_to_float helper only needs to take care of raising
1203 * invalid exceptions and handling the conversion on NaNs.
1204 */
1205
1206static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1207 float_status *s)
1208{
1209 if (dstf->arm_althp) {
1210 switch (a.cls) {
1211 case float_class_qnan:
1212 case float_class_snan:
1213 /* There is no NaN in the destination format. Raise Invalid
1214 * and return a zero with the sign of the input NaN.
1215 */
1216 s->float_exception_flags |= float_flag_invalid;
1217 a.cls = float_class_zero;
1218 a.frac = 0;
1219 a.exp = 0;
1220 break;
1221
1222 case float_class_inf:
1223 /* There is no Inf in the destination format. Raise Invalid
1224 * and return the maximum normal with the correct sign.
1225 */
1226 s->float_exception_flags |= float_flag_invalid;
1227 a.cls = float_class_normal;
1228 a.exp = dstf->exp_max;
1229 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1230 break;
1231
1232 default:
1233 break;
1234 }
1235 } else if (is_nan(a.cls)) {
1236 if (is_snan(a.cls)) {
1237 s->float_exception_flags |= float_flag_invalid;
1238 a = parts_silence_nan(a, s);
1239 }
1240 if (s->default_nan_mode) {
1241 return parts_default_nan(s);
1242 }
1243 }
1244 return a;
1245}
1246
1247float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1248{
1249 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1250 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1251 FloatParts pr = float_to_float(p, &float32_params, s);
1252 return float32_round_pack_canonical(pr, s);
1253}
1254
1255float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1256{
1257 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1258 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1259 FloatParts pr = float_to_float(p, &float64_params, s);
1260 return float64_round_pack_canonical(pr, s);
1261}
1262
1263float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1264{
1265 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1266 FloatParts p = float32_unpack_canonical(a, s);
1267 FloatParts pr = float_to_float(p, fmt16, s);
1268 return float16a_round_pack_canonical(pr, s, fmt16);
1269}
1270
1271float64 float32_to_float64(float32 a, float_status *s)
1272{
1273 FloatParts p = float32_unpack_canonical(a, s);
1274 FloatParts pr = float_to_float(p, &float64_params, s);
1275 return float64_round_pack_canonical(pr, s);
1276}
1277
1278float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1279{
1280 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1281 FloatParts p = float64_unpack_canonical(a, s);
1282 FloatParts pr = float_to_float(p, fmt16, s);
1283 return float16a_round_pack_canonical(pr, s, fmt16);
1284}
1285
1286float32 float64_to_float32(float64 a, float_status *s)
1287{
1288 FloatParts p = float64_unpack_canonical(a, s);
1289 FloatParts pr = float_to_float(p, &float32_params, s);
1290 return float32_round_pack_canonical(pr, s);
1291}
1292
dbe4d53a
AB
1293/*
1294 * Rounds the floating-point value `a' to an integer, and returns the
1295 * result as a floating-point value. The operation is performed
1296 * according to the IEC/IEEE Standard for Binary Floating-Point
1297 * Arithmetic.
1298 */
1299
1300static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1301{
1302 if (is_nan(a.cls)) {
1303 return return_nan(a, s);
1304 }
1305
1306 switch (a.cls) {
1307 case float_class_zero:
1308 case float_class_inf:
1309 case float_class_qnan:
1310 /* already "integral" */
1311 break;
1312 case float_class_normal:
1313 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1314 /* already integral */
1315 break;
1316 }
1317 if (a.exp < 0) {
1318 bool one;
1319 /* all fractional */
1320 s->float_exception_flags |= float_flag_inexact;
1321 switch (rounding_mode) {
1322 case float_round_nearest_even:
1323 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1324 break;
1325 case float_round_ties_away:
1326 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1327 break;
1328 case float_round_to_zero:
1329 one = false;
1330 break;
1331 case float_round_up:
1332 one = !a.sign;
1333 break;
1334 case float_round_down:
1335 one = a.sign;
1336 break;
1337 default:
1338 g_assert_not_reached();
1339 }
1340
1341 if (one) {
1342 a.frac = DECOMPOSED_IMPLICIT_BIT;
1343 a.exp = 0;
1344 } else {
1345 a.cls = float_class_zero;
1346 }
1347 } else {
1348 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1349 uint64_t frac_lsbm1 = frac_lsb >> 1;
1350 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1351 uint64_t rnd_mask = rnd_even_mask >> 1;
1352 uint64_t inc;
1353
1354 switch (rounding_mode) {
1355 case float_round_nearest_even:
1356 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1357 break;
1358 case float_round_ties_away:
1359 inc = frac_lsbm1;
1360 break;
1361 case float_round_to_zero:
1362 inc = 0;
1363 break;
1364 case float_round_up:
1365 inc = a.sign ? 0 : rnd_mask;
1366 break;
1367 case float_round_down:
1368 inc = a.sign ? rnd_mask : 0;
1369 break;
1370 default:
1371 g_assert_not_reached();
1372 }
1373
1374 if (a.frac & rnd_mask) {
1375 s->float_exception_flags |= float_flag_inexact;
1376 a.frac += inc;
1377 a.frac &= ~rnd_mask;
1378 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1379 a.frac >>= 1;
1380 a.exp++;
1381 }
1382 }
1383 }
1384 break;
1385 default:
1386 g_assert_not_reached();
1387 }
1388 return a;
1389}
1390
1391float16 float16_round_to_int(float16 a, float_status *s)
1392{
1393 FloatParts pa = float16_unpack_canonical(a, s);
1394 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1395 return float16_round_pack_canonical(pr, s);
1396}
1397
1398float32 float32_round_to_int(float32 a, float_status *s)
1399{
1400 FloatParts pa = float32_unpack_canonical(a, s);
1401 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1402 return float32_round_pack_canonical(pr, s);
1403}
1404
1405float64 float64_round_to_int(float64 a, float_status *s)
1406{
1407 FloatParts pa = float64_unpack_canonical(a, s);
1408 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1409 return float64_round_pack_canonical(pr, s);
1410}
1411
1412float64 float64_trunc_to_int(float64 a, float_status *s)
1413{
1414 FloatParts pa = float64_unpack_canonical(a, s);
1415 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1416 return float64_round_pack_canonical(pr, s);
1417}
1418
ab52f973
AB
1419/*
1420 * Returns the result of converting the floating-point value `a' to
1421 * the two's complement integer format. The conversion is performed
1422 * according to the IEC/IEEE Standard for Binary Floating-Point
1423 * Arithmetic---which means in particular that the conversion is
1424 * rounded according to the current rounding mode. If `a' is a NaN,
1425 * the largest positive integer is returned. Otherwise, if the
1426 * conversion overflows, the largest integer with the same sign as `a'
1427 * is returned.
1428*/
1429
1430static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1431 int64_t min, int64_t max,
1432 float_status *s)
1433{
1434 uint64_t r;
1435 int orig_flags = get_float_exception_flags(s);
1436 FloatParts p = round_to_int(in, rmode, s);
1437
1438 switch (p.cls) {
1439 case float_class_snan:
1440 case float_class_qnan:
801bc563 1441 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1442 return max;
1443 case float_class_inf:
801bc563 1444 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1445 return p.sign ? min : max;
1446 case float_class_zero:
1447 return 0;
1448 case float_class_normal:
1449 if (p.exp < DECOMPOSED_BINARY_POINT) {
1450 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1451 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1452 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1453 } else {
1454 r = UINT64_MAX;
1455 }
1456 if (p.sign) {
33358375 1457 if (r <= -(uint64_t) min) {
ab52f973
AB
1458 return -r;
1459 } else {
1460 s->float_exception_flags = orig_flags | float_flag_invalid;
1461 return min;
1462 }
1463 } else {
33358375 1464 if (r <= max) {
ab52f973
AB
1465 return r;
1466 } else {
1467 s->float_exception_flags = orig_flags | float_flag_invalid;
1468 return max;
1469 }
1470 }
1471 default:
1472 g_assert_not_reached();
1473 }
1474}
1475
1476#define FLOAT_TO_INT(fsz, isz) \
1477int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1478 float_status *s) \
1479{ \
1480 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1481 return round_to_int_and_pack(p, s->float_rounding_mode, \
1482 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1483 s); \
1484} \
1485 \
1486int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1487 (float ## fsz a, float_status *s) \
1488{ \
1489 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1490 return round_to_int_and_pack(p, float_round_to_zero, \
1491 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1492 s); \
1493}
1494
1495FLOAT_TO_INT(16, 16)
1496FLOAT_TO_INT(16, 32)
1497FLOAT_TO_INT(16, 64)
1498
1499FLOAT_TO_INT(32, 16)
1500FLOAT_TO_INT(32, 32)
1501FLOAT_TO_INT(32, 64)
1502
1503FLOAT_TO_INT(64, 16)
1504FLOAT_TO_INT(64, 32)
1505FLOAT_TO_INT(64, 64)
1506
1507#undef FLOAT_TO_INT
1508
1509/*
1510 * Returns the result of converting the floating-point value `a' to
1511 * the unsigned integer format. The conversion is performed according
1512 * to the IEC/IEEE Standard for Binary Floating-Point
1513 * Arithmetic---which means in particular that the conversion is
1514 * rounded according to the current rounding mode. If `a' is a NaN,
1515 * the largest unsigned integer is returned. Otherwise, if the
1516 * conversion overflows, the largest unsigned integer is returned. If
1517 * the 'a' is negative, the result is rounded and zero is returned;
1518 * values that do not round to zero will raise the inexact exception
1519 * flag.
1520 */
1521
1522static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1523 float_status *s)
1524{
1525 int orig_flags = get_float_exception_flags(s);
1526 FloatParts p = round_to_int(in, rmode, s);
1527
1528 switch (p.cls) {
1529 case float_class_snan:
1530 case float_class_qnan:
1531 s->float_exception_flags = orig_flags | float_flag_invalid;
1532 return max;
1533 case float_class_inf:
801bc563 1534 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1535 return p.sign ? 0 : max;
1536 case float_class_zero:
1537 return 0;
1538 case float_class_normal:
1539 {
1540 uint64_t r;
1541 if (p.sign) {
1542 s->float_exception_flags = orig_flags | float_flag_invalid;
1543 return 0;
1544 }
1545
1546 if (p.exp < DECOMPOSED_BINARY_POINT) {
1547 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1548 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1549 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1550 } else {
1551 s->float_exception_flags = orig_flags | float_flag_invalid;
1552 return max;
1553 }
1554
1555 /* For uint64 this will never trip, but if p.exp is too large
1556 * to shift a decomposed fraction we shall have exited via the
1557 * 3rd leg above.
1558 */
1559 if (r > max) {
1560 s->float_exception_flags = orig_flags | float_flag_invalid;
1561 return max;
1562 } else {
1563 return r;
1564 }
1565 }
1566 default:
1567 g_assert_not_reached();
1568 }
1569}
1570
1571#define FLOAT_TO_UINT(fsz, isz) \
1572uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1573 float_status *s) \
1574{ \
1575 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1576 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1577 UINT ## isz ## _MAX, s); \
1578} \
1579 \
1580uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1581 (float ## fsz a, float_status *s) \
1582{ \
1583 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
bd49e602
RH
1584 return round_to_uint_and_pack(p, float_round_to_zero, \
1585 UINT ## isz ## _MAX, s); \
ab52f973
AB
1586}
1587
1588FLOAT_TO_UINT(16, 16)
1589FLOAT_TO_UINT(16, 32)
1590FLOAT_TO_UINT(16, 64)
1591
1592FLOAT_TO_UINT(32, 16)
1593FLOAT_TO_UINT(32, 32)
1594FLOAT_TO_UINT(32, 64)
1595
1596FLOAT_TO_UINT(64, 16)
1597FLOAT_TO_UINT(64, 32)
1598FLOAT_TO_UINT(64, 64)
1599
1600#undef FLOAT_TO_UINT
1601
c02e1fb8
AB
1602/*
1603 * Integer to float conversions
1604 *
1605 * Returns the result of converting the two's complement integer `a'
1606 * to the floating-point format. The conversion is performed according
1607 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1608 */
1609
1610static FloatParts int_to_float(int64_t a, float_status *status)
1611{
a5a5f5e2 1612 FloatParts r = {};
c02e1fb8
AB
1613 if (a == 0) {
1614 r.cls = float_class_zero;
1615 r.sign = false;
1616 } else if (a == (1ULL << 63)) {
1617 r.cls = float_class_normal;
1618 r.sign = true;
1619 r.frac = DECOMPOSED_IMPLICIT_BIT;
1620 r.exp = 63;
1621 } else {
1622 uint64_t f;
1623 if (a < 0) {
1624 f = -a;
1625 r.sign = true;
1626 } else {
1627 f = a;
1628 r.sign = false;
1629 }
1630 int shift = clz64(f) - 1;
1631 r.cls = float_class_normal;
1632 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1633 r.frac = f << shift;
1634 }
1635
1636 return r;
1637}
1638
1639float16 int64_to_float16(int64_t a, float_status *status)
1640{
1641 FloatParts pa = int_to_float(a, status);
1642 return float16_round_pack_canonical(pa, status);
1643}
1644
1645float16 int32_to_float16(int32_t a, float_status *status)
1646{
1647 return int64_to_float16(a, status);
1648}
1649
1650float16 int16_to_float16(int16_t a, float_status *status)
1651{
1652 return int64_to_float16(a, status);
1653}
1654
1655float32 int64_to_float32(int64_t a, float_status *status)
1656{
1657 FloatParts pa = int_to_float(a, status);
1658 return float32_round_pack_canonical(pa, status);
1659}
1660
1661float32 int32_to_float32(int32_t a, float_status *status)
1662{
1663 return int64_to_float32(a, status);
1664}
1665
1666float32 int16_to_float32(int16_t a, float_status *status)
1667{
1668 return int64_to_float32(a, status);
1669}
1670
1671float64 int64_to_float64(int64_t a, float_status *status)
1672{
1673 FloatParts pa = int_to_float(a, status);
1674 return float64_round_pack_canonical(pa, status);
1675}
1676
1677float64 int32_to_float64(int32_t a, float_status *status)
1678{
1679 return int64_to_float64(a, status);
1680}
1681
1682float64 int16_to_float64(int16_t a, float_status *status)
1683{
1684 return int64_to_float64(a, status);
1685}
1686
1687
1688/*
1689 * Unsigned Integer to float conversions
1690 *
1691 * Returns the result of converting the unsigned integer `a' to the
1692 * floating-point format. The conversion is performed according to the
1693 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1694 */
1695
1696static FloatParts uint_to_float(uint64_t a, float_status *status)
1697{
1698 FloatParts r = { .sign = false};
1699
1700 if (a == 0) {
1701 r.cls = float_class_zero;
1702 } else {
1703 int spare_bits = clz64(a) - 1;
1704 r.cls = float_class_normal;
1705 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1706 if (spare_bits < 0) {
1707 shift64RightJamming(a, -spare_bits, &a);
1708 r.frac = a;
1709 } else {
1710 r.frac = a << spare_bits;
1711 }
1712 }
1713
1714 return r;
1715}
1716
1717float16 uint64_to_float16(uint64_t a, float_status *status)
1718{
1719 FloatParts pa = uint_to_float(a, status);
1720 return float16_round_pack_canonical(pa, status);
1721}
1722
1723float16 uint32_to_float16(uint32_t a, float_status *status)
1724{
1725 return uint64_to_float16(a, status);
1726}
1727
1728float16 uint16_to_float16(uint16_t a, float_status *status)
1729{
1730 return uint64_to_float16(a, status);
1731}
1732
1733float32 uint64_to_float32(uint64_t a, float_status *status)
1734{
1735 FloatParts pa = uint_to_float(a, status);
1736 return float32_round_pack_canonical(pa, status);
1737}
1738
1739float32 uint32_to_float32(uint32_t a, float_status *status)
1740{
1741 return uint64_to_float32(a, status);
1742}
1743
1744float32 uint16_to_float32(uint16_t a, float_status *status)
1745{
1746 return uint64_to_float32(a, status);
1747}
1748
1749float64 uint64_to_float64(uint64_t a, float_status *status)
1750{
1751 FloatParts pa = uint_to_float(a, status);
1752 return float64_round_pack_canonical(pa, status);
1753}
1754
1755float64 uint32_to_float64(uint32_t a, float_status *status)
1756{
1757 return uint64_to_float64(a, status);
1758}
1759
1760float64 uint16_to_float64(uint16_t a, float_status *status)
1761{
1762 return uint64_to_float64(a, status);
1763}
1764
89360067
AB
1765/* Float Min/Max */
1766/* min() and max() functions. These can't be implemented as
1767 * 'compare and pick one input' because that would mishandle
1768 * NaNs and +0 vs -0.
1769 *
1770 * minnum() and maxnum() functions. These are similar to the min()
1771 * and max() functions but if one of the arguments is a QNaN and
1772 * the other is numerical then the numerical argument is returned.
1773 * SNaNs will get quietened before being returned.
1774 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
1775 * and maxNum() operations. min() and max() are the typical min/max
1776 * semantics provided by many CPUs which predate that specification.
1777 *
1778 * minnummag() and maxnummag() functions correspond to minNumMag()
1779 * and minNumMag() from the IEEE-754 2008.
1780 */
1781static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1782 bool ieee, bool ismag, float_status *s)
1783{
1784 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1785 if (ieee) {
1786 /* Takes two floating-point values `a' and `b', one of
1787 * which is a NaN, and returns the appropriate NaN
1788 * result. If either `a' or `b' is a signaling NaN,
1789 * the invalid exception is raised.
1790 */
1791 if (is_snan(a.cls) || is_snan(b.cls)) {
1792 return pick_nan(a, b, s);
1793 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1794 return b;
1795 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1796 return a;
1797 }
1798 }
1799 return pick_nan(a, b, s);
1800 } else {
1801 int a_exp, b_exp;
89360067
AB
1802
1803 switch (a.cls) {
1804 case float_class_normal:
1805 a_exp = a.exp;
1806 break;
1807 case float_class_inf:
1808 a_exp = INT_MAX;
1809 break;
1810 case float_class_zero:
1811 a_exp = INT_MIN;
1812 break;
1813 default:
1814 g_assert_not_reached();
1815 break;
1816 }
1817 switch (b.cls) {
1818 case float_class_normal:
1819 b_exp = b.exp;
1820 break;
1821 case float_class_inf:
1822 b_exp = INT_MAX;
1823 break;
1824 case float_class_zero:
1825 b_exp = INT_MIN;
1826 break;
1827 default:
1828 g_assert_not_reached();
1829 break;
1830 }
1831
6245327a
EC
1832 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1833 bool a_less = a_exp < b_exp;
1834 if (a_exp == b_exp) {
1835 a_less = a.frac < b.frac;
1836 }
1837 return a_less ^ ismin ? b : a;
89360067
AB
1838 }
1839
6245327a 1840 if (a.sign == b.sign) {
89360067
AB
1841 bool a_less = a_exp < b_exp;
1842 if (a_exp == b_exp) {
1843 a_less = a.frac < b.frac;
1844 }
6245327a 1845 return a.sign ^ a_less ^ ismin ? b : a;
89360067 1846 } else {
6245327a 1847 return a.sign ^ ismin ? b : a;
89360067
AB
1848 }
1849 }
1850}
1851
1852#define MINMAX(sz, name, ismin, isiee, ismag) \
1853float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1854 float_status *s) \
1855{ \
1856 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1857 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1858 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1859 \
1860 return float ## sz ## _round_pack_canonical(pr, s); \
1861}
1862
1863MINMAX(16, min, true, false, false)
1864MINMAX(16, minnum, true, true, false)
1865MINMAX(16, minnummag, true, true, true)
1866MINMAX(16, max, false, false, false)
1867MINMAX(16, maxnum, false, true, false)
1868MINMAX(16, maxnummag, false, true, true)
1869
1870MINMAX(32, min, true, false, false)
1871MINMAX(32, minnum, true, true, false)
1872MINMAX(32, minnummag, true, true, true)
1873MINMAX(32, max, false, false, false)
1874MINMAX(32, maxnum, false, true, false)
1875MINMAX(32, maxnummag, false, true, true)
1876
1877MINMAX(64, min, true, false, false)
1878MINMAX(64, minnum, true, true, false)
1879MINMAX(64, minnummag, true, true, true)
1880MINMAX(64, max, false, false, false)
1881MINMAX(64, maxnum, false, true, false)
1882MINMAX(64, maxnummag, false, true, true)
1883
1884#undef MINMAX
1885
0c4c9092
AB
1886/* Floating point compare */
1887static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1888 float_status *s)
1889{
1890 if (is_nan(a.cls) || is_nan(b.cls)) {
1891 if (!is_quiet ||
1892 a.cls == float_class_snan ||
1893 b.cls == float_class_snan) {
1894 s->float_exception_flags |= float_flag_invalid;
1895 }
1896 return float_relation_unordered;
1897 }
1898
1899 if (a.cls == float_class_zero) {
1900 if (b.cls == float_class_zero) {
1901 return float_relation_equal;
1902 }
1903 return b.sign ? float_relation_greater : float_relation_less;
1904 } else if (b.cls == float_class_zero) {
1905 return a.sign ? float_relation_less : float_relation_greater;
1906 }
1907
1908 /* The only really important thing about infinity is its sign. If
1909 * both are infinities the sign marks the smallest of the two.
1910 */
1911 if (a.cls == float_class_inf) {
1912 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1913 return float_relation_equal;
1914 }
1915 return a.sign ? float_relation_less : float_relation_greater;
1916 } else if (b.cls == float_class_inf) {
1917 return b.sign ? float_relation_greater : float_relation_less;
1918 }
1919
1920 if (a.sign != b.sign) {
1921 return a.sign ? float_relation_less : float_relation_greater;
1922 }
1923
1924 if (a.exp == b.exp) {
1925 if (a.frac == b.frac) {
1926 return float_relation_equal;
1927 }
1928 if (a.sign) {
1929 return a.frac > b.frac ?
1930 float_relation_less : float_relation_greater;
1931 } else {
1932 return a.frac > b.frac ?
1933 float_relation_greater : float_relation_less;
1934 }
1935 } else {
1936 if (a.sign) {
1937 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1938 } else {
1939 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1940 }
1941 }
1942}
1943
1944#define COMPARE(sz) \
1945int float ## sz ## _compare(float ## sz a, float ## sz b, \
1946 float_status *s) \
1947{ \
1948 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1949 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1950 return compare_floats(pa, pb, false, s); \
1951} \
1952int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1953 float_status *s) \
1954{ \
1955 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1956 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1957 return compare_floats(pa, pb, true, s); \
1958}
1959
1960COMPARE(16)
1961COMPARE(32)
1962COMPARE(64)
1963
1964#undef COMPARE
1965
0bfc9f19
AB
1966/* Multiply A by 2 raised to the power N. */
1967static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1968{
1969 if (unlikely(is_nan(a.cls))) {
1970 return return_nan(a, s);
1971 }
1972 if (a.cls == float_class_normal) {
ce8d4082
RH
1973 /* The largest float type (even though not supported by FloatParts)
1974 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
1975 * still allows rounding to infinity, without allowing overflow
1976 * within the int32_t that backs FloatParts.exp.
1977 */
1978 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
1979 a.exp += n;
1980 }
1981 return a;
1982}
1983
1984float16 float16_scalbn(float16 a, int n, float_status *status)
1985{
1986 FloatParts pa = float16_unpack_canonical(a, status);
1987 FloatParts pr = scalbn_decomposed(pa, n, status);
1988 return float16_round_pack_canonical(pr, status);
1989}
1990
1991float32 float32_scalbn(float32 a, int n, float_status *status)
1992{
1993 FloatParts pa = float32_unpack_canonical(a, status);
1994 FloatParts pr = scalbn_decomposed(pa, n, status);
1995 return float32_round_pack_canonical(pr, status);
1996}
1997
1998float64 float64_scalbn(float64 a, int n, float_status *status)
1999{
2000 FloatParts pa = float64_unpack_canonical(a, status);
2001 FloatParts pr = scalbn_decomposed(pa, n, status);
2002 return float64_round_pack_canonical(pr, status);
2003}
2004
c13bb2da
AB
2005/*
2006 * Square Root
2007 *
2008 * The old softfloat code did an approximation step before zeroing in
2009 * on the final result. However for simpleness we just compute the
2010 * square root by iterating down from the implicit bit to enough extra
2011 * bits to ensure we get a correctly rounded result.
2012 *
2013 * This does mean however the calculation is slower than before,
2014 * especially for 64 bit floats.
2015 */
2016
2017static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2018{
2019 uint64_t a_frac, r_frac, s_frac;
2020 int bit, last_bit;
2021
2022 if (is_nan(a.cls)) {
2023 return return_nan(a, s);
2024 }
2025 if (a.cls == float_class_zero) {
2026 return a; /* sqrt(+-0) = +-0 */
2027 }
2028 if (a.sign) {
2029 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2030 return parts_default_nan(s);
c13bb2da
AB
2031 }
2032 if (a.cls == float_class_inf) {
2033 return a; /* sqrt(+inf) = +inf */
2034 }
2035
2036 assert(a.cls == float_class_normal);
2037
2038 /* We need two overflow bits at the top. Adding room for that is a
2039 * right shift. If the exponent is odd, we can discard the low bit
2040 * by multiplying the fraction by 2; that's a left shift. Combine
2041 * those and we shift right if the exponent is even.
2042 */
2043 a_frac = a.frac;
2044 if (!(a.exp & 1)) {
2045 a_frac >>= 1;
2046 }
2047 a.exp >>= 1;
2048
2049 /* Bit-by-bit computation of sqrt. */
2050 r_frac = 0;
2051 s_frac = 0;
2052
2053 /* Iterate from implicit bit down to the 3 extra bits to compute a
2054 * properly rounded result. Remember we've inserted one more bit
2055 * at the top, so these positions are one less.
2056 */
2057 bit = DECOMPOSED_BINARY_POINT - 1;
2058 last_bit = MAX(p->frac_shift - 4, 0);
2059 do {
2060 uint64_t q = 1ULL << bit;
2061 uint64_t t_frac = s_frac + q;
2062 if (t_frac <= a_frac) {
2063 s_frac = t_frac + q;
2064 a_frac -= t_frac;
2065 r_frac += q;
2066 }
2067 a_frac <<= 1;
2068 } while (--bit >= last_bit);
2069
2070 /* Undo the right shift done above. If there is any remaining
2071 * fraction, the result is inexact. Set the sticky bit.
2072 */
2073 a.frac = (r_frac << 1) + (a_frac != 0);
2074
2075 return a;
2076}
2077
2078float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2079{
2080 FloatParts pa = float16_unpack_canonical(a, status);
2081 FloatParts pr = sqrt_float(pa, status, &float16_params);
2082 return float16_round_pack_canonical(pr, status);
2083}
2084
2085float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2086{
2087 FloatParts pa = float32_unpack_canonical(a, status);
2088 FloatParts pr = sqrt_float(pa, status, &float32_params);
2089 return float32_round_pack_canonical(pr, status);
2090}
2091
2092float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2093{
2094 FloatParts pa = float64_unpack_canonical(a, status);
2095 FloatParts pr = sqrt_float(pa, status, &float64_params);
2096 return float64_round_pack_canonical(pr, status);
2097}
2098
2099
158142c2
FB
2100/*----------------------------------------------------------------------------
2101| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2102| and 7, and returns the properly rounded 32-bit integer corresponding to the
2103| input. If `zSign' is 1, the input is negated before being converted to an
2104| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2105| is simply rounded to an integer, with the inexact exception raised if the
2106| input cannot be represented exactly as an integer. However, if the fixed-
2107| point input is too large, the invalid exception is raised and the largest
2108| positive or negative integer is returned.
2109*----------------------------------------------------------------------------*/
2110
f4014512 2111static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2112{
8f506c70 2113 int8_t roundingMode;
158142c2 2114 flag roundNearestEven;
8f506c70 2115 int8_t roundIncrement, roundBits;
760e1416 2116 int32_t z;
158142c2 2117
a2f2d288 2118 roundingMode = status->float_rounding_mode;
158142c2 2119 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2120 switch (roundingMode) {
2121 case float_round_nearest_even:
f9288a76 2122 case float_round_ties_away:
dc355b76
PM
2123 roundIncrement = 0x40;
2124 break;
2125 case float_round_to_zero:
2126 roundIncrement = 0;
2127 break;
2128 case float_round_up:
2129 roundIncrement = zSign ? 0 : 0x7f;
2130 break;
2131 case float_round_down:
2132 roundIncrement = zSign ? 0x7f : 0;
2133 break;
2134 default:
2135 abort();
158142c2
FB
2136 }
2137 roundBits = absZ & 0x7F;
2138 absZ = ( absZ + roundIncrement )>>7;
2139 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2140 z = absZ;
2141 if ( zSign ) z = - z;
2142 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2143 float_raise(float_flag_invalid, status);
bb98fe42 2144 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2145 }
a2f2d288
PM
2146 if (roundBits) {
2147 status->float_exception_flags |= float_flag_inexact;
2148 }
158142c2
FB
2149 return z;
2150
2151}
2152
2153/*----------------------------------------------------------------------------
2154| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2155| `absZ1', with binary point between bits 63 and 64 (between the input words),
2156| and returns the properly rounded 64-bit integer corresponding to the input.
2157| If `zSign' is 1, the input is negated before being converted to an integer.
2158| Ordinarily, the fixed-point input is simply rounded to an integer, with
2159| the inexact exception raised if the input cannot be represented exactly as
2160| an integer. However, if the fixed-point input is too large, the invalid
2161| exception is raised and the largest positive or negative integer is
2162| returned.
2163*----------------------------------------------------------------------------*/
2164
f42c2224 2165static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2166 float_status *status)
158142c2 2167{
8f506c70 2168 int8_t roundingMode;
158142c2 2169 flag roundNearestEven, increment;
760e1416 2170 int64_t z;
158142c2 2171
a2f2d288 2172 roundingMode = status->float_rounding_mode;
158142c2 2173 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2174 switch (roundingMode) {
2175 case float_round_nearest_even:
f9288a76 2176 case float_round_ties_away:
dc355b76
PM
2177 increment = ((int64_t) absZ1 < 0);
2178 break;
2179 case float_round_to_zero:
2180 increment = 0;
2181 break;
2182 case float_round_up:
2183 increment = !zSign && absZ1;
2184 break;
2185 case float_round_down:
2186 increment = zSign && absZ1;
2187 break;
2188 default:
2189 abort();
158142c2
FB
2190 }
2191 if ( increment ) {
2192 ++absZ0;
2193 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2194 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2195 }
2196 z = absZ0;
2197 if ( zSign ) z = - z;
2198 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2199 overflow:
ff32e16e 2200 float_raise(float_flag_invalid, status);
158142c2 2201 return
bb98fe42 2202 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2203 : LIT64( 0x7FFFFFFFFFFFFFFF );
2204 }
a2f2d288
PM
2205 if (absZ1) {
2206 status->float_exception_flags |= float_flag_inexact;
2207 }
158142c2
FB
2208 return z;
2209
2210}
2211
fb3ea83a
TM
2212/*----------------------------------------------------------------------------
2213| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2214| `absZ1', with binary point between bits 63 and 64 (between the input words),
2215| and returns the properly rounded 64-bit unsigned integer corresponding to the
2216| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2217| with the inexact exception raised if the input cannot be represented exactly
2218| as an integer. However, if the fixed-point input is too large, the invalid
2219| exception is raised and the largest unsigned integer is returned.
2220*----------------------------------------------------------------------------*/
2221
f42c2224 2222static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2223 uint64_t absZ1, float_status *status)
fb3ea83a 2224{
8f506c70 2225 int8_t roundingMode;
fb3ea83a
TM
2226 flag roundNearestEven, increment;
2227
a2f2d288 2228 roundingMode = status->float_rounding_mode;
fb3ea83a 2229 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2230 switch (roundingMode) {
2231 case float_round_nearest_even:
f9288a76 2232 case float_round_ties_away:
dc355b76
PM
2233 increment = ((int64_t)absZ1 < 0);
2234 break;
2235 case float_round_to_zero:
2236 increment = 0;
2237 break;
2238 case float_round_up:
2239 increment = !zSign && absZ1;
2240 break;
2241 case float_round_down:
2242 increment = zSign && absZ1;
2243 break;
2244 default:
2245 abort();
fb3ea83a
TM
2246 }
2247 if (increment) {
2248 ++absZ0;
2249 if (absZ0 == 0) {
ff32e16e 2250 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2251 return LIT64(0xFFFFFFFFFFFFFFFF);
2252 }
2253 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2254 }
2255
2256 if (zSign && absZ0) {
ff32e16e 2257 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2258 return 0;
2259 }
2260
2261 if (absZ1) {
a2f2d288 2262 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2263 }
2264 return absZ0;
2265}
2266
37d18660
PM
2267/*----------------------------------------------------------------------------
2268| If `a' is denormal and we are in flush-to-zero mode then set the
2269| input-denormal exception and return zero. Otherwise just return the value.
2270*----------------------------------------------------------------------------*/
e5a41ffa 2271float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2272{
a2f2d288 2273 if (status->flush_inputs_to_zero) {
37d18660 2274 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2275 float_raise(float_flag_input_denormal, status);
37d18660
PM
2276 return make_float32(float32_val(a) & 0x80000000);
2277 }
2278 }
2279 return a;
2280}
2281
158142c2
FB
2282/*----------------------------------------------------------------------------
2283| Normalizes the subnormal single-precision floating-point value represented
2284| by the denormalized significand `aSig'. The normalized exponent and
2285| significand are stored at the locations pointed to by `zExpPtr' and
2286| `zSigPtr', respectively.
2287*----------------------------------------------------------------------------*/
2288
2289static void
0c48262d 2290 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2291{
8f506c70 2292 int8_t shiftCount;
158142c2
FB
2293
2294 shiftCount = countLeadingZeros32( aSig ) - 8;
2295 *zSigPtr = aSig<<shiftCount;
2296 *zExpPtr = 1 - shiftCount;
2297
2298}
2299
158142c2
FB
2300/*----------------------------------------------------------------------------
2301| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2302| and significand `zSig', and returns the proper single-precision floating-
2303| point value corresponding to the abstract input. Ordinarily, the abstract
2304| value is simply rounded and packed into the single-precision format, with
2305| the inexact exception raised if the abstract input cannot be represented
2306| exactly. However, if the abstract value is too large, the overflow and
2307| inexact exceptions are raised and an infinity or maximal finite value is
2308| returned. If the abstract value is too small, the input value is rounded to
2309| a subnormal number, and the underflow and inexact exceptions are raised if
2310| the abstract input cannot be represented exactly as a subnormal single-
2311| precision floating-point number.
2312| The input significand `zSig' has its binary point between bits 30
2313| and 29, which is 7 bits to the left of the usual location. This shifted
2314| significand must be normalized or smaller. If `zSig' is not normalized,
2315| `zExp' must be 0; in that case, the result returned is a subnormal number,
2316| and it must not require rounding. In the usual case that `zSig' is
2317| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2318| The handling of underflow and overflow follows the IEC/IEEE Standard for
2319| Binary Floating-Point Arithmetic.
2320*----------------------------------------------------------------------------*/
2321
0c48262d 2322static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2323 float_status *status)
158142c2 2324{
8f506c70 2325 int8_t roundingMode;
158142c2 2326 flag roundNearestEven;
8f506c70 2327 int8_t roundIncrement, roundBits;
158142c2
FB
2328 flag isTiny;
2329
a2f2d288 2330 roundingMode = status->float_rounding_mode;
158142c2 2331 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2332 switch (roundingMode) {
2333 case float_round_nearest_even:
f9288a76 2334 case float_round_ties_away:
dc355b76
PM
2335 roundIncrement = 0x40;
2336 break;
2337 case float_round_to_zero:
2338 roundIncrement = 0;
2339 break;
2340 case float_round_up:
2341 roundIncrement = zSign ? 0 : 0x7f;
2342 break;
2343 case float_round_down:
2344 roundIncrement = zSign ? 0x7f : 0;
2345 break;
2346 default:
2347 abort();
2348 break;
158142c2
FB
2349 }
2350 roundBits = zSig & 0x7F;
bb98fe42 2351 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2352 if ( ( 0xFD < zExp )
2353 || ( ( zExp == 0xFD )
bb98fe42 2354 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2355 ) {
ff32e16e 2356 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2357 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2358 }
2359 if ( zExp < 0 ) {
a2f2d288 2360 if (status->flush_to_zero) {
ff32e16e 2361 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2362 return packFloat32(zSign, 0, 0);
2363 }
158142c2 2364 isTiny =
a2f2d288
PM
2365 (status->float_detect_tininess
2366 == float_tininess_before_rounding)
158142c2
FB
2367 || ( zExp < -1 )
2368 || ( zSig + roundIncrement < 0x80000000 );
2369 shift32RightJamming( zSig, - zExp, &zSig );
2370 zExp = 0;
2371 roundBits = zSig & 0x7F;
ff32e16e
PM
2372 if (isTiny && roundBits) {
2373 float_raise(float_flag_underflow, status);
2374 }
158142c2
FB
2375 }
2376 }
a2f2d288
PM
2377 if (roundBits) {
2378 status->float_exception_flags |= float_flag_inexact;
2379 }
158142c2
FB
2380 zSig = ( zSig + roundIncrement )>>7;
2381 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2382 if ( zSig == 0 ) zExp = 0;
2383 return packFloat32( zSign, zExp, zSig );
2384
2385}
2386
2387/*----------------------------------------------------------------------------
2388| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2389| and significand `zSig', and returns the proper single-precision floating-
2390| point value corresponding to the abstract input. This routine is just like
2391| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2392| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2393| floating-point exponent.
2394*----------------------------------------------------------------------------*/
2395
2396static float32
0c48262d 2397 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2398 float_status *status)
158142c2 2399{
8f506c70 2400 int8_t shiftCount;
158142c2
FB
2401
2402 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2403 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2404 status);
158142c2
FB
2405
2406}
2407
37d18660
PM
2408/*----------------------------------------------------------------------------
2409| If `a' is denormal and we are in flush-to-zero mode then set the
2410| input-denormal exception and return zero. Otherwise just return the value.
2411*----------------------------------------------------------------------------*/
e5a41ffa 2412float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2413{
a2f2d288 2414 if (status->flush_inputs_to_zero) {
37d18660 2415 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2416 float_raise(float_flag_input_denormal, status);
37d18660
PM
2417 return make_float64(float64_val(a) & (1ULL << 63));
2418 }
2419 }
2420 return a;
2421}
2422
158142c2
FB
2423/*----------------------------------------------------------------------------
2424| Normalizes the subnormal double-precision floating-point value represented
2425| by the denormalized significand `aSig'. The normalized exponent and
2426| significand are stored at the locations pointed to by `zExpPtr' and
2427| `zSigPtr', respectively.
2428*----------------------------------------------------------------------------*/
2429
2430static void
0c48262d 2431 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2432{
8f506c70 2433 int8_t shiftCount;
158142c2
FB
2434
2435 shiftCount = countLeadingZeros64( aSig ) - 11;
2436 *zSigPtr = aSig<<shiftCount;
2437 *zExpPtr = 1 - shiftCount;
2438
2439}
2440
2441/*----------------------------------------------------------------------------
2442| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2443| double-precision floating-point value, returning the result. After being
2444| shifted into the proper positions, the three fields are simply added
2445| together to form the result. This means that any integer portion of `zSig'
2446| will be added into the exponent. Since a properly normalized significand
2447| will have an integer portion equal to 1, the `zExp' input should be 1 less
2448| than the desired result exponent whenever `zSig' is a complete, normalized
2449| significand.
2450*----------------------------------------------------------------------------*/
2451
0c48262d 2452static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2453{
2454
f090c9d4 2455 return make_float64(
bb98fe42 2456 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2457
2458}
2459
2460/*----------------------------------------------------------------------------
2461| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2462| and significand `zSig', and returns the proper double-precision floating-
2463| point value corresponding to the abstract input. Ordinarily, the abstract
2464| value is simply rounded and packed into the double-precision format, with
2465| the inexact exception raised if the abstract input cannot be represented
2466| exactly. However, if the abstract value is too large, the overflow and
2467| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2468| returned. If the abstract value is too small, the input value is rounded to
2469| a subnormal number, and the underflow and inexact exceptions are raised if
2470| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2471| precision floating-point number.
2472| The input significand `zSig' has its binary point between bits 62
2473| and 61, which is 10 bits to the left of the usual location. This shifted
2474| significand must be normalized or smaller. If `zSig' is not normalized,
2475| `zExp' must be 0; in that case, the result returned is a subnormal number,
2476| and it must not require rounding. In the usual case that `zSig' is
2477| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2478| The handling of underflow and overflow follows the IEC/IEEE Standard for
2479| Binary Floating-Point Arithmetic.
2480*----------------------------------------------------------------------------*/
2481
0c48262d 2482static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2483 float_status *status)
158142c2 2484{
8f506c70 2485 int8_t roundingMode;
158142c2 2486 flag roundNearestEven;
0c48262d 2487 int roundIncrement, roundBits;
158142c2
FB
2488 flag isTiny;
2489
a2f2d288 2490 roundingMode = status->float_rounding_mode;
158142c2 2491 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2492 switch (roundingMode) {
2493 case float_round_nearest_even:
f9288a76 2494 case float_round_ties_away:
dc355b76
PM
2495 roundIncrement = 0x200;
2496 break;
2497 case float_round_to_zero:
2498 roundIncrement = 0;
2499 break;
2500 case float_round_up:
2501 roundIncrement = zSign ? 0 : 0x3ff;
2502 break;
2503 case float_round_down:
2504 roundIncrement = zSign ? 0x3ff : 0;
2505 break;
9ee6f678
BR
2506 case float_round_to_odd:
2507 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2508 break;
dc355b76
PM
2509 default:
2510 abort();
158142c2
FB
2511 }
2512 roundBits = zSig & 0x3FF;
bb98fe42 2513 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2514 if ( ( 0x7FD < zExp )
2515 || ( ( zExp == 0x7FD )
bb98fe42 2516 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2517 ) {
9ee6f678
BR
2518 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2519 roundIncrement != 0;
ff32e16e 2520 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2521 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2522 }
2523 if ( zExp < 0 ) {
a2f2d288 2524 if (status->flush_to_zero) {
ff32e16e 2525 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2526 return packFloat64(zSign, 0, 0);
2527 }
158142c2 2528 isTiny =
a2f2d288
PM
2529 (status->float_detect_tininess
2530 == float_tininess_before_rounding)
158142c2
FB
2531 || ( zExp < -1 )
2532 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2533 shift64RightJamming( zSig, - zExp, &zSig );
2534 zExp = 0;
2535 roundBits = zSig & 0x3FF;
ff32e16e
PM
2536 if (isTiny && roundBits) {
2537 float_raise(float_flag_underflow, status);
2538 }
9ee6f678
BR
2539 if (roundingMode == float_round_to_odd) {
2540 /*
2541 * For round-to-odd case, the roundIncrement depends on
2542 * zSig which just changed.
2543 */
2544 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2545 }
158142c2
FB
2546 }
2547 }
a2f2d288
PM
2548 if (roundBits) {
2549 status->float_exception_flags |= float_flag_inexact;
2550 }
158142c2
FB
2551 zSig = ( zSig + roundIncrement )>>10;
2552 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2553 if ( zSig == 0 ) zExp = 0;
2554 return packFloat64( zSign, zExp, zSig );
2555
2556}
2557
2558/*----------------------------------------------------------------------------
2559| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2560| and significand `zSig', and returns the proper double-precision floating-
2561| point value corresponding to the abstract input. This routine is just like
2562| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2563| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2564| floating-point exponent.
2565*----------------------------------------------------------------------------*/
2566
2567static float64
0c48262d 2568 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2569 float_status *status)
158142c2 2570{
8f506c70 2571 int8_t shiftCount;
158142c2
FB
2572
2573 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2574 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2575 status);
158142c2
FB
2576
2577}
2578
158142c2
FB
2579/*----------------------------------------------------------------------------
2580| Normalizes the subnormal extended double-precision floating-point value
2581| represented by the denormalized significand `aSig'. The normalized exponent
2582| and significand are stored at the locations pointed to by `zExpPtr' and
2583| `zSigPtr', respectively.
2584*----------------------------------------------------------------------------*/
2585
88857aca
LV
2586void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2587 uint64_t *zSigPtr)
158142c2 2588{
8f506c70 2589 int8_t shiftCount;
158142c2
FB
2590
2591 shiftCount = countLeadingZeros64( aSig );
2592 *zSigPtr = aSig<<shiftCount;
2593 *zExpPtr = 1 - shiftCount;
158142c2
FB
2594}
2595
2596/*----------------------------------------------------------------------------
2597| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2598| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2599| and returns the proper extended double-precision floating-point value
2600| corresponding to the abstract input. Ordinarily, the abstract value is
2601| rounded and packed into the extended double-precision format, with the
2602| inexact exception raised if the abstract input cannot be represented
2603| exactly. However, if the abstract value is too large, the overflow and
2604| inexact exceptions are raised and an infinity or maximal finite value is
2605| returned. If the abstract value is too small, the input value is rounded to
2606| a subnormal number, and the underflow and inexact exceptions are raised if
2607| the abstract input cannot be represented exactly as a subnormal extended
2608| double-precision floating-point number.
2609| If `roundingPrecision' is 32 or 64, the result is rounded to the same
2610| number of bits as single or double precision, respectively. Otherwise, the
2611| result is rounded to the full precision of the extended double-precision
2612| format.
2613| The input significand must be normalized or smaller. If the input
2614| significand is not normalized, `zExp' must be 0; in that case, the result
2615| returned is a subnormal number, and it must not require rounding. The
2616| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
2617| Floating-Point Arithmetic.
2618*----------------------------------------------------------------------------*/
2619
88857aca
LV
2620floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2621 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2622 float_status *status)
158142c2 2623{
8f506c70 2624 int8_t roundingMode;
158142c2 2625 flag roundNearestEven, increment, isTiny;
f42c2224 2626 int64_t roundIncrement, roundMask, roundBits;
158142c2 2627
a2f2d288 2628 roundingMode = status->float_rounding_mode;
158142c2
FB
2629 roundNearestEven = ( roundingMode == float_round_nearest_even );
2630 if ( roundingPrecision == 80 ) goto precision80;
2631 if ( roundingPrecision == 64 ) {
2632 roundIncrement = LIT64( 0x0000000000000400 );
2633 roundMask = LIT64( 0x00000000000007FF );
2634 }
2635 else if ( roundingPrecision == 32 ) {
2636 roundIncrement = LIT64( 0x0000008000000000 );
2637 roundMask = LIT64( 0x000000FFFFFFFFFF );
2638 }
2639 else {
2640 goto precision80;
2641 }
2642 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
2643 switch (roundingMode) {
2644 case float_round_nearest_even:
f9288a76 2645 case float_round_ties_away:
dc355b76
PM
2646 break;
2647 case float_round_to_zero:
2648 roundIncrement = 0;
2649 break;
2650 case float_round_up:
2651 roundIncrement = zSign ? 0 : roundMask;
2652 break;
2653 case float_round_down:
2654 roundIncrement = zSign ? roundMask : 0;
2655 break;
2656 default:
2657 abort();
158142c2
FB
2658 }
2659 roundBits = zSig0 & roundMask;
bb98fe42 2660 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2661 if ( ( 0x7FFE < zExp )
2662 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2663 ) {
2664 goto overflow;
2665 }
2666 if ( zExp <= 0 ) {
a2f2d288 2667 if (status->flush_to_zero) {
ff32e16e 2668 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2669 return packFloatx80(zSign, 0, 0);
2670 }
158142c2 2671 isTiny =
a2f2d288
PM
2672 (status->float_detect_tininess
2673 == float_tininess_before_rounding)
158142c2
FB
2674 || ( zExp < 0 )
2675 || ( zSig0 <= zSig0 + roundIncrement );
2676 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2677 zExp = 0;
2678 roundBits = zSig0 & roundMask;
ff32e16e
PM
2679 if (isTiny && roundBits) {
2680 float_raise(float_flag_underflow, status);
2681 }
a2f2d288
PM
2682 if (roundBits) {
2683 status->float_exception_flags |= float_flag_inexact;
2684 }
158142c2 2685 zSig0 += roundIncrement;
bb98fe42 2686 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2687 roundIncrement = roundMask + 1;
2688 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2689 roundMask |= roundIncrement;
2690 }
2691 zSig0 &= ~ roundMask;
2692 return packFloatx80( zSign, zExp, zSig0 );
2693 }
2694 }
a2f2d288
PM
2695 if (roundBits) {
2696 status->float_exception_flags |= float_flag_inexact;
2697 }
158142c2
FB
2698 zSig0 += roundIncrement;
2699 if ( zSig0 < roundIncrement ) {
2700 ++zExp;
2701 zSig0 = LIT64( 0x8000000000000000 );
2702 }
2703 roundIncrement = roundMask + 1;
2704 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2705 roundMask |= roundIncrement;
2706 }
2707 zSig0 &= ~ roundMask;
2708 if ( zSig0 == 0 ) zExp = 0;
2709 return packFloatx80( zSign, zExp, zSig0 );
2710 precision80:
dc355b76
PM
2711 switch (roundingMode) {
2712 case float_round_nearest_even:
f9288a76 2713 case float_round_ties_away:
dc355b76
PM
2714 increment = ((int64_t)zSig1 < 0);
2715 break;
2716 case float_round_to_zero:
2717 increment = 0;
2718 break;
2719 case float_round_up:
2720 increment = !zSign && zSig1;
2721 break;
2722 case float_round_down:
2723 increment = zSign && zSig1;
2724 break;
2725 default:
2726 abort();
158142c2 2727 }
bb98fe42 2728 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2729 if ( ( 0x7FFE < zExp )
2730 || ( ( zExp == 0x7FFE )
2731 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2732 && increment
2733 )
2734 ) {
2735 roundMask = 0;
2736 overflow:
ff32e16e 2737 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2738 if ( ( roundingMode == float_round_to_zero )
2739 || ( zSign && ( roundingMode == float_round_up ) )
2740 || ( ! zSign && ( roundingMode == float_round_down ) )
2741 ) {
2742 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2743 }
0f605c88
LV
2744 return packFloatx80(zSign,
2745 floatx80_infinity_high,
2746 floatx80_infinity_low);
158142c2
FB
2747 }
2748 if ( zExp <= 0 ) {
2749 isTiny =
a2f2d288
PM
2750 (status->float_detect_tininess
2751 == float_tininess_before_rounding)
158142c2
FB
2752 || ( zExp < 0 )
2753 || ! increment
2754 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2755 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2756 zExp = 0;
ff32e16e
PM
2757 if (isTiny && zSig1) {
2758 float_raise(float_flag_underflow, status);
2759 }
a2f2d288
PM
2760 if (zSig1) {
2761 status->float_exception_flags |= float_flag_inexact;
2762 }
dc355b76
PM
2763 switch (roundingMode) {
2764 case float_round_nearest_even:
f9288a76 2765 case float_round_ties_away:
dc355b76
PM
2766 increment = ((int64_t)zSig1 < 0);
2767 break;
2768 case float_round_to_zero:
2769 increment = 0;
2770 break;
2771 case float_round_up:
2772 increment = !zSign && zSig1;
2773 break;
2774 case float_round_down:
2775 increment = zSign && zSig1;
2776 break;
2777 default:
2778 abort();
158142c2
FB
2779 }
2780 if ( increment ) {
2781 ++zSig0;
2782 zSig0 &=
bb98fe42
AF
2783 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2784 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2785 }
2786 return packFloatx80( zSign, zExp, zSig0 );
2787 }
2788 }
a2f2d288
PM
2789 if (zSig1) {
2790 status->float_exception_flags |= float_flag_inexact;
2791 }
158142c2
FB
2792 if ( increment ) {
2793 ++zSig0;
2794 if ( zSig0 == 0 ) {
2795 ++zExp;
2796 zSig0 = LIT64( 0x8000000000000000 );
2797 }
2798 else {
bb98fe42 2799 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2800 }
2801 }
2802 else {
2803 if ( zSig0 == 0 ) zExp = 0;
2804 }
2805 return packFloatx80( zSign, zExp, zSig0 );
2806
2807}
2808
2809/*----------------------------------------------------------------------------
2810| Takes an abstract floating-point value having sign `zSign', exponent
2811| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
2812| and returns the proper extended double-precision floating-point value
2813| corresponding to the abstract input. This routine is just like
2814| `roundAndPackFloatx80' except that the input significand does not have to be
2815| normalized.
2816*----------------------------------------------------------------------------*/
2817
88857aca
LV
2818floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2819 flag zSign, int32_t zExp,
2820 uint64_t zSig0, uint64_t zSig1,
2821 float_status *status)
158142c2 2822{
8f506c70 2823 int8_t shiftCount;
158142c2
FB
2824
2825 if ( zSig0 == 0 ) {
2826 zSig0 = zSig1;
2827 zSig1 = 0;
2828 zExp -= 64;
2829 }
2830 shiftCount = countLeadingZeros64( zSig0 );
2831 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2832 zExp -= shiftCount;
ff32e16e
PM
2833 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2834 zSig0, zSig1, status);
158142c2
FB
2835
2836}
2837
158142c2
FB
2838/*----------------------------------------------------------------------------
2839| Returns the least-significant 64 fraction bits of the quadruple-precision
2840| floating-point value `a'.
2841*----------------------------------------------------------------------------*/
2842
a49db98d 2843static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
2844{
2845
2846 return a.low;
2847
2848}
2849
2850/*----------------------------------------------------------------------------
2851| Returns the most-significant 48 fraction bits of the quadruple-precision
2852| floating-point value `a'.
2853*----------------------------------------------------------------------------*/
2854
a49db98d 2855static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
2856{
2857
2858 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2859
2860}
2861
2862/*----------------------------------------------------------------------------
2863| Returns the exponent bits of the quadruple-precision floating-point value
2864| `a'.
2865*----------------------------------------------------------------------------*/
2866
f4014512 2867static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
2868{
2869
2870 return ( a.high>>48 ) & 0x7FFF;
2871
2872}
2873
2874/*----------------------------------------------------------------------------
2875| Returns the sign bit of the quadruple-precision floating-point value `a'.
2876*----------------------------------------------------------------------------*/
2877
a49db98d 2878static inline flag extractFloat128Sign( float128 a )
158142c2
FB
2879{
2880
2881 return a.high>>63;
2882
2883}
2884
2885/*----------------------------------------------------------------------------
2886| Normalizes the subnormal quadruple-precision floating-point value
2887| represented by the denormalized significand formed by the concatenation of
2888| `aSig0' and `aSig1'. The normalized exponent is stored at the location
2889| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
2890| significand are stored at the location pointed to by `zSig0Ptr', and the
2891| least significant 64 bits of the normalized significand are stored at the
2892| location pointed to by `zSig1Ptr'.
2893*----------------------------------------------------------------------------*/
2894
2895static void
2896 normalizeFloat128Subnormal(
bb98fe42
AF
2897 uint64_t aSig0,
2898 uint64_t aSig1,
f4014512 2899 int32_t *zExpPtr,
bb98fe42
AF
2900 uint64_t *zSig0Ptr,
2901 uint64_t *zSig1Ptr
158142c2
FB
2902 )
2903{
8f506c70 2904 int8_t shiftCount;
158142c2
FB
2905
2906 if ( aSig0 == 0 ) {
2907 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2908 if ( shiftCount < 0 ) {
2909 *zSig0Ptr = aSig1>>( - shiftCount );
2910 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2911 }
2912 else {
2913 *zSig0Ptr = aSig1<<shiftCount;
2914 *zSig1Ptr = 0;
2915 }
2916 *zExpPtr = - shiftCount - 63;
2917 }
2918 else {
2919 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2920 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2921 *zExpPtr = 1 - shiftCount;
2922 }
2923
2924}
2925
2926/*----------------------------------------------------------------------------
2927| Packs the sign `zSign', the exponent `zExp', and the significand formed
2928| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
2929| floating-point value, returning the result. After being shifted into the
2930| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
2931| added together to form the most significant 32 bits of the result. This
2932| means that any integer portion of `zSig0' will be added into the exponent.
2933| Since a properly normalized significand will have an integer portion equal
2934| to 1, the `zExp' input should be 1 less than the desired result exponent
2935| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
2936| significand.
2937*----------------------------------------------------------------------------*/
2938
a49db98d 2939static inline float128
f4014512 2940 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
2941{
2942 float128 z;
2943
2944 z.low = zSig1;
bb98fe42 2945 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
2946 return z;
2947
2948}
2949
2950/*----------------------------------------------------------------------------
2951| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2952| and extended significand formed by the concatenation of `zSig0', `zSig1',
2953| and `zSig2', and returns the proper quadruple-precision floating-point value
2954| corresponding to the abstract input. Ordinarily, the abstract value is
2955| simply rounded and packed into the quadruple-precision format, with the
2956| inexact exception raised if the abstract input cannot be represented
2957| exactly. However, if the abstract value is too large, the overflow and
2958| inexact exceptions are raised and an infinity or maximal finite value is
2959| returned. If the abstract value is too small, the input value is rounded to
2960| a subnormal number, and the underflow and inexact exceptions are raised if
2961| the abstract input cannot be represented exactly as a subnormal quadruple-
2962| precision floating-point number.
2963| The input significand must be normalized or smaller. If the input
2964| significand is not normalized, `zExp' must be 0; in that case, the result
2965| returned is a subnormal number, and it must not require rounding. In the
2966| usual case that the input significand is normalized, `zExp' must be 1 less
2967| than the ``true'' floating-point exponent. The handling of underflow and
2968| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2969*----------------------------------------------------------------------------*/
2970
f4014512 2971static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
2972 uint64_t zSig0, uint64_t zSig1,
2973 uint64_t zSig2, float_status *status)
158142c2 2974{
8f506c70 2975 int8_t roundingMode;
158142c2
FB
2976 flag roundNearestEven, increment, isTiny;
2977
a2f2d288 2978 roundingMode = status->float_rounding_mode;
158142c2 2979 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2980 switch (roundingMode) {
2981 case float_round_nearest_even:
f9288a76 2982 case float_round_ties_away:
dc355b76
PM
2983 increment = ((int64_t)zSig2 < 0);
2984 break;
2985 case float_round_to_zero:
2986 increment = 0;
2987 break;
2988 case float_round_up:
2989 increment = !zSign && zSig2;
2990 break;
2991 case float_round_down:
2992 increment = zSign && zSig2;
2993 break;
9ee6f678
BR
2994 case float_round_to_odd:
2995 increment = !(zSig1 & 0x1) && zSig2;
2996 break;
dc355b76
PM
2997 default:
2998 abort();
158142c2 2999 }
bb98fe42 3000 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3001 if ( ( 0x7FFD < zExp )
3002 || ( ( zExp == 0x7FFD )
3003 && eq128(
3004 LIT64( 0x0001FFFFFFFFFFFF ),
3005 LIT64( 0xFFFFFFFFFFFFFFFF ),
3006 zSig0,
3007 zSig1
3008 )
3009 && increment
3010 )
3011 ) {
ff32e16e 3012 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3013 if ( ( roundingMode == float_round_to_zero )
3014 || ( zSign && ( roundingMode == float_round_up ) )
3015 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3016 || (roundingMode == float_round_to_odd)
158142c2
FB
3017 ) {
3018 return
3019 packFloat128(
3020 zSign,
3021 0x7FFE,
3022 LIT64( 0x0000FFFFFFFFFFFF ),
3023 LIT64( 0xFFFFFFFFFFFFFFFF )
3024 );
3025 }
3026 return packFloat128( zSign, 0x7FFF, 0, 0 );
3027 }
3028 if ( zExp < 0 ) {
a2f2d288 3029 if (status->flush_to_zero) {
ff32e16e 3030 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3031 return packFloat128(zSign, 0, 0, 0);
3032 }
158142c2 3033 isTiny =
a2f2d288
PM
3034 (status->float_detect_tininess
3035 == float_tininess_before_rounding)
158142c2
FB
3036 || ( zExp < -1 )
3037 || ! increment
3038 || lt128(
3039 zSig0,
3040 zSig1,
3041 LIT64( 0x0001FFFFFFFFFFFF ),
3042 LIT64( 0xFFFFFFFFFFFFFFFF )
3043 );
3044 shift128ExtraRightJamming(
3045 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3046 zExp = 0;
ff32e16e
PM
3047 if (isTiny && zSig2) {
3048 float_raise(float_flag_underflow, status);
3049 }
dc355b76
PM
3050 switch (roundingMode) {
3051 case float_round_nearest_even:
f9288a76 3052 case float_round_ties_away:
dc355b76
PM
3053 increment = ((int64_t)zSig2 < 0);
3054 break;
3055 case float_round_to_zero:
3056 increment = 0;
3057 break;
3058 case float_round_up:
3059 increment = !zSign && zSig2;
3060 break;
3061 case float_round_down:
3062 increment = zSign && zSig2;
3063 break;
9ee6f678
BR
3064 case float_round_to_odd:
3065 increment = !(zSig1 & 0x1) && zSig2;
3066 break;
dc355b76
PM
3067 default:
3068 abort();
158142c2
FB
3069 }
3070 }
3071 }
a2f2d288
PM
3072 if (zSig2) {
3073 status->float_exception_flags |= float_flag_inexact;
3074 }
158142c2
FB
3075 if ( increment ) {
3076 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3077 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3078 }
3079 else {
3080 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3081 }
3082 return packFloat128( zSign, zExp, zSig0, zSig1 );
3083
3084}
3085
3086/*----------------------------------------------------------------------------
3087| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3088| and significand formed by the concatenation of `zSig0' and `zSig1', and
3089| returns the proper quadruple-precision floating-point value corresponding
3090| to the abstract input. This routine is just like `roundAndPackFloat128'
3091| except that the input significand has fewer bits and does not have to be
3092| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3093| point exponent.
3094*----------------------------------------------------------------------------*/
3095
f4014512 3096static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3097 uint64_t zSig0, uint64_t zSig1,
3098 float_status *status)
158142c2 3099{
8f506c70 3100 int8_t shiftCount;
bb98fe42 3101 uint64_t zSig2;
158142c2
FB
3102
3103 if ( zSig0 == 0 ) {
3104 zSig0 = zSig1;
3105 zSig1 = 0;
3106 zExp -= 64;
3107 }
3108 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3109 if ( 0 <= shiftCount ) {
3110 zSig2 = 0;
3111 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3112 }
3113 else {
3114 shift128ExtraRightJamming(
3115 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3116 }
3117 zExp -= shiftCount;
ff32e16e 3118 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3119
3120}
3121
158142c2 3122
158142c2
FB
3123/*----------------------------------------------------------------------------
3124| Returns the result of converting the 32-bit two's complement integer `a'
3125| to the extended double-precision floating-point format. The conversion
3126| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3127| Arithmetic.
3128*----------------------------------------------------------------------------*/
3129
e5a41ffa 3130floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3131{
3132 flag zSign;
3a87d009 3133 uint32_t absA;
8f506c70 3134 int8_t shiftCount;
bb98fe42 3135 uint64_t zSig;
158142c2
FB
3136
3137 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3138 zSign = ( a < 0 );
3139 absA = zSign ? - a : a;
3140 shiftCount = countLeadingZeros32( absA ) + 32;
3141 zSig = absA;
3142 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3143
3144}
3145
158142c2
FB
3146/*----------------------------------------------------------------------------
3147| Returns the result of converting the 32-bit two's complement integer `a' to
3148| the quadruple-precision floating-point format. The conversion is performed
3149| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3150*----------------------------------------------------------------------------*/
3151
e5a41ffa 3152float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3153{
3154 flag zSign;
3a87d009 3155 uint32_t absA;
8f506c70 3156 int8_t shiftCount;
bb98fe42 3157 uint64_t zSig0;
158142c2
FB
3158
3159 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3160 zSign = ( a < 0 );
3161 absA = zSign ? - a : a;
3162 shiftCount = countLeadingZeros32( absA ) + 17;
3163 zSig0 = absA;
3164 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3165
3166}
3167
158142c2
FB
3168/*----------------------------------------------------------------------------
3169| Returns the result of converting the 64-bit two's complement integer `a'
3170| to the extended double-precision floating-point format. The conversion
3171| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3172| Arithmetic.
3173*----------------------------------------------------------------------------*/
3174
e5a41ffa 3175floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3176{
3177 flag zSign;
182f42fd 3178 uint64_t absA;
8f506c70 3179 int8_t shiftCount;
158142c2
FB
3180
3181 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3182 zSign = ( a < 0 );
3183 absA = zSign ? - a : a;
3184 shiftCount = countLeadingZeros64( absA );
3185 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3186
3187}
3188
158142c2
FB
3189/*----------------------------------------------------------------------------
3190| Returns the result of converting the 64-bit two's complement integer `a' to
3191| the quadruple-precision floating-point format. The conversion is performed
3192| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3193*----------------------------------------------------------------------------*/
3194
e5a41ffa 3195float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3196{
3197 flag zSign;
182f42fd 3198 uint64_t absA;
8f506c70 3199 int8_t shiftCount;
f4014512 3200 int32_t zExp;
bb98fe42 3201 uint64_t zSig0, zSig1;
158142c2
FB
3202
3203 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3204 zSign = ( a < 0 );
3205 absA = zSign ? - a : a;
3206 shiftCount = countLeadingZeros64( absA ) + 49;
3207 zExp = 0x406E - shiftCount;
3208 if ( 64 <= shiftCount ) {
3209 zSig1 = 0;
3210 zSig0 = absA;
3211 shiftCount -= 64;
3212 }
3213 else {
3214 zSig1 = absA;
3215 zSig0 = 0;
3216 }
3217 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3218 return packFloat128( zSign, zExp, zSig0, zSig1 );
3219
3220}
3221
6bb8e0f1
PM
3222/*----------------------------------------------------------------------------
3223| Returns the result of converting the 64-bit unsigned integer `a'
3224| to the quadruple-precision floating-point format. The conversion is performed
3225| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3226*----------------------------------------------------------------------------*/
3227
e5a41ffa 3228float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3229{
3230 if (a == 0) {
3231 return float128_zero;
3232 }
6603d506 3233 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3234}
3235
158142c2
FB
3236/*----------------------------------------------------------------------------
3237| Returns the result of converting the single-precision floating-point value
3238| `a' to the extended double-precision floating-point format. The conversion
3239| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3240| Arithmetic.
3241*----------------------------------------------------------------------------*/
3242
e5a41ffa 3243floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3244{
3245 flag aSign;
0c48262d 3246 int aExp;
bb98fe42 3247 uint32_t aSig;
158142c2 3248
ff32e16e 3249 a = float32_squash_input_denormal(a, status);
158142c2
FB
3250 aSig = extractFloat32Frac( a );
3251 aExp = extractFloat32Exp( a );
3252 aSign = extractFloat32Sign( a );
3253 if ( aExp == 0xFF ) {
ff32e16e
PM
3254 if (aSig) {
3255 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3256 }
0f605c88
LV
3257 return packFloatx80(aSign,
3258 floatx80_infinity_high,
3259 floatx80_infinity_low);
158142c2
FB
3260 }
3261 if ( aExp == 0 ) {
3262 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3263 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3264 }
3265 aSig |= 0x00800000;
bb98fe42 3266 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3267
3268}
3269
158142c2
FB
3270/*----------------------------------------------------------------------------
3271| Returns the result of converting the single-precision floating-point value
3272| `a' to the double-precision floating-point format. The conversion is
3273| performed according to the IEC/IEEE Standard for Binary Floating-Point
3274| Arithmetic.
3275*----------------------------------------------------------------------------*/
3276
e5a41ffa 3277float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3278{
3279 flag aSign;
0c48262d 3280 int aExp;
bb98fe42 3281 uint32_t aSig;
158142c2 3282
ff32e16e 3283 a = float32_squash_input_denormal(a, status);
158142c2
FB
3284 aSig = extractFloat32Frac( a );
3285 aExp = extractFloat32Exp( a );
3286 aSign = extractFloat32Sign( a );
3287 if ( aExp == 0xFF ) {
ff32e16e
PM
3288 if (aSig) {
3289 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3290 }
158142c2
FB
3291 return packFloat128( aSign, 0x7FFF, 0, 0 );
3292 }
3293 if ( aExp == 0 ) {
3294 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3295 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3296 --aExp;
3297 }
bb98fe42 3298 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3299
3300}
3301
158142c2
FB
3302/*----------------------------------------------------------------------------
3303| Returns the remainder of the single-precision floating-point value `a'
3304| with respect to the corresponding value `b'. The operation is performed
3305| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3306*----------------------------------------------------------------------------*/
3307
e5a41ffa 3308float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3309{
ed086f3d 3310 flag aSign, zSign;
0c48262d 3311 int aExp, bExp, expDiff;
bb98fe42
AF
3312 uint32_t aSig, bSig;
3313 uint32_t q;
3314 uint64_t aSig64, bSig64, q64;
3315 uint32_t alternateASig;
3316 int32_t sigMean;
ff32e16e
PM
3317 a = float32_squash_input_denormal(a, status);
3318 b = float32_squash_input_denormal(b, status);
158142c2
FB
3319
3320 aSig = extractFloat32Frac( a );
3321 aExp = extractFloat32Exp( a );
3322 aSign = extractFloat32Sign( a );
3323 bSig = extractFloat32Frac( b );
3324 bExp = extractFloat32Exp( b );
158142c2
FB
3325 if ( aExp == 0xFF ) {
3326 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3327 return propagateFloat32NaN(a, b, status);
158142c2 3328 }
ff32e16e 3329 float_raise(float_flag_invalid, status);
af39bc8c 3330 return float32_default_nan(status);
158142c2
FB
3331 }
3332 if ( bExp == 0xFF ) {
ff32e16e
PM
3333 if (bSig) {
3334 return propagateFloat32NaN(a, b, status);
3335 }
158142c2
FB
3336 return a;
3337 }
3338 if ( bExp == 0 ) {
3339 if ( bSig == 0 ) {
ff32e16e 3340 float_raise(float_flag_invalid, status);
af39bc8c 3341 return float32_default_nan(status);
158142c2
FB
3342 }
3343 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3344 }
3345 if ( aExp == 0 ) {
3346 if ( aSig == 0 ) return a;
3347 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3348 }
3349 expDiff = aExp - bExp;
3350 aSig |= 0x00800000;
3351 bSig |= 0x00800000;
3352 if ( expDiff < 32 ) {
3353 aSig <<= 8;
3354 bSig <<= 8;
3355 if ( expDiff < 0 ) {
3356 if ( expDiff < -1 ) return a;
3357 aSig >>= 1;
3358 }
3359 q = ( bSig <= aSig );
3360 if ( q ) aSig -= bSig;
3361 if ( 0 < expDiff ) {
bb98fe42 3362 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3363 q >>= 32 - expDiff;
3364 bSig >>= 2;
3365 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3366 }
3367 else {
3368 aSig >>= 2;
3369 bSig >>= 2;
3370 }
3371 }
3372 else {
3373 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3374 aSig64 = ( (uint64_t) aSig )<<40;
3375 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3376 expDiff -= 64;
3377 while ( 0 < expDiff ) {
3378 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3379 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3380 aSig64 = - ( ( bSig * q64 )<<38 );
3381 expDiff -= 62;
3382 }
3383 expDiff += 64;
3384 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3385 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3386 q = q64>>( 64 - expDiff );
3387 bSig <<= 6;
3388 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3389 }
3390 do {
3391 alternateASig = aSig;
3392 ++q;
3393 aSig -= bSig;
bb98fe42 3394 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3395 sigMean = aSig + alternateASig;
3396 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3397 aSig = alternateASig;
3398 }
bb98fe42 3399 zSign = ( (int32_t) aSig < 0 );
158142c2 3400 if ( zSign ) aSig = - aSig;
ff32e16e 3401 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3402}
3403
369be8f6 3404
158142c2 3405
8229c991
AJ
3406/*----------------------------------------------------------------------------
3407| Returns the binary exponential of the single-precision floating-point value
3408| `a'. The operation is performed according to the IEC/IEEE Standard for
3409| Binary Floating-Point Arithmetic.
3410|
3411| Uses the following identities:
3412|
3413| 1. -------------------------------------------------------------------------
3414| x x*ln(2)
3415| 2 = e
3416|
3417| 2. -------------------------------------------------------------------------
3418| 2 3 4 5 n
3419| x x x x x x x
3420| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3421| 1! 2! 3! 4! 5! n!
3422*----------------------------------------------------------------------------*/
3423
3424static const float64 float32_exp2_coefficients[15] =
3425{
d5138cf4
PM
3426 const_float64( 0x3ff0000000000000ll ), /* 1 */
3427 const_float64( 0x3fe0000000000000ll ), /* 2 */
3428 const_float64( 0x3fc5555555555555ll ), /* 3 */
3429 const_float64( 0x3fa5555555555555ll ), /* 4 */
3430 const_float64( 0x3f81111111111111ll ), /* 5 */
3431 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3432 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3433 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3434 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3435 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3436 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3437 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3438 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3439 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3440 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3441};
3442
e5a41ffa 3443float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3444{
3445 flag aSign;
0c48262d 3446 int aExp;
bb98fe42 3447 uint32_t aSig;
8229c991
AJ
3448 float64 r, x, xn;
3449 int i;
ff32e16e 3450 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3451
3452 aSig = extractFloat32Frac( a );
3453 aExp = extractFloat32Exp( a );
3454 aSign = extractFloat32Sign( a );
3455
3456 if ( aExp == 0xFF) {
ff32e16e
PM
3457 if (aSig) {
3458 return propagateFloat32NaN(a, float32_zero, status);
3459 }
8229c991
AJ
3460 return (aSign) ? float32_zero : a;
3461 }
3462 if (aExp == 0) {
3463 if (aSig == 0) return float32_one;
3464 }
3465
ff32e16e 3466 float_raise(float_flag_inexact, status);
8229c991
AJ
3467
3468 /* ******************************* */
3469 /* using float64 for approximation */
3470 /* ******************************* */
ff32e16e
PM
3471 x = float32_to_float64(a, status);
3472 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3473
3474 xn = x;
3475 r = float64_one;
3476 for (i = 0 ; i < 15 ; i++) {
3477 float64 f;
3478
ff32e16e
PM
3479 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3480 r = float64_add(r, f, status);
8229c991 3481
ff32e16e 3482 xn = float64_mul(xn, x, status);
8229c991
AJ
3483 }
3484
3485 return float64_to_float32(r, status);
3486}
3487
374dfc33
AJ
3488/*----------------------------------------------------------------------------
3489| Returns the binary log of the single-precision floating-point value `a'.
3490| The operation is performed according to the IEC/IEEE Standard for Binary
3491| Floating-Point Arithmetic.
3492*----------------------------------------------------------------------------*/
e5a41ffa 3493float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3494{
3495 flag aSign, zSign;
0c48262d 3496 int aExp;
bb98fe42 3497 uint32_t aSig, zSig, i;
374dfc33 3498
ff32e16e 3499 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3500 aSig = extractFloat32Frac( a );
3501 aExp = extractFloat32Exp( a );
3502 aSign = extractFloat32Sign( a );
3503
3504 if ( aExp == 0 ) {
3505 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3506 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3507 }
3508 if ( aSign ) {
ff32e16e 3509 float_raise(float_flag_invalid, status);
af39bc8c 3510 return float32_default_nan(status);
374dfc33
AJ
3511 }
3512 if ( aExp == 0xFF ) {
ff32e16e
PM
3513 if (aSig) {
3514 return propagateFloat32NaN(a, float32_zero, status);
3515 }
374dfc33
AJ
3516 return a;
3517 }
3518
3519 aExp -= 0x7F;
3520 aSig |= 0x00800000;
3521 zSign = aExp < 0;
3522 zSig = aExp << 23;
3523
3524 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3525 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3526 if ( aSig & 0x01000000 ) {
3527 aSig >>= 1;
3528 zSig |= i;
3529 }
3530 }
3531
3532 if ( zSign )
3533 zSig = -zSig;
3534
ff32e16e 3535 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3536}
3537
158142c2
FB
3538/*----------------------------------------------------------------------------
3539| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3540| the corresponding value `b', and 0 otherwise. The invalid exception is
3541| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3542| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3543*----------------------------------------------------------------------------*/
3544
e5a41ffa 3545int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3546{
b689362d 3547 uint32_t av, bv;
ff32e16e
PM
3548 a = float32_squash_input_denormal(a, status);
3549 b = float32_squash_input_denormal(b, status);
158142c2
FB
3550
3551 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3552 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3553 ) {
ff32e16e 3554 float_raise(float_flag_invalid, status);
158142c2
FB
3555 return 0;
3556 }
b689362d
AJ
3557 av = float32_val(a);
3558 bv = float32_val(b);
3559 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3560}
3561
3562/*----------------------------------------------------------------------------
3563| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3564| or equal to the corresponding value `b', and 0 otherwise. The invalid
3565| exception is raised if either operand is a NaN. The comparison is performed
3566| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3567*----------------------------------------------------------------------------*/
3568
e5a41ffa 3569int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3570{
3571 flag aSign, bSign;
bb98fe42 3572 uint32_t av, bv;
ff32e16e
PM
3573 a = float32_squash_input_denormal(a, status);
3574 b = float32_squash_input_denormal(b, status);
158142c2
FB
3575
3576 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3577 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3578 ) {
ff32e16e 3579 float_raise(float_flag_invalid, status);
158142c2
FB
3580 return 0;
3581 }
3582 aSign = extractFloat32Sign( a );
3583 bSign = extractFloat32Sign( b );
f090c9d4
PB
3584 av = float32_val(a);
3585 bv = float32_val(b);
bb98fe42 3586 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3587 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3588
3589}
3590
3591/*----------------------------------------------------------------------------
3592| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3593| the corresponding value `b', and 0 otherwise. The invalid exception is
3594| raised if either operand is a NaN. The comparison is performed according
3595| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3596*----------------------------------------------------------------------------*/
3597
e5a41ffa 3598int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3599{
3600 flag aSign, bSign;
bb98fe42 3601 uint32_t av, bv;
ff32e16e
PM
3602 a = float32_squash_input_denormal(a, status);
3603 b = float32_squash_input_denormal(b, status);
158142c2
FB
3604
3605 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3606 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3607 ) {
ff32e16e 3608 float_raise(float_flag_invalid, status);
158142c2
FB
3609 return 0;
3610 }
3611 aSign = extractFloat32Sign( a );
3612 bSign = extractFloat32Sign( b );
f090c9d4
PB
3613 av = float32_val(a);
3614 bv = float32_val(b);
bb98fe42 3615 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3616 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3617
3618}
3619
67b7861d
AJ
3620/*----------------------------------------------------------------------------
3621| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
3622| be compared, and 0 otherwise. The invalid exception is raised if either
3623| operand is a NaN. The comparison is performed according to the IEC/IEEE
3624| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
3625*----------------------------------------------------------------------------*/
3626
e5a41ffa 3627int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 3628{
ff32e16e
PM
3629 a = float32_squash_input_denormal(a, status);
3630 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3631
3632 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3633 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3634 ) {
ff32e16e 3635 float_raise(float_flag_invalid, status);
67b7861d
AJ
3636 return 1;
3637 }
3638 return 0;
3639}
b689362d 3640
158142c2
FB
3641/*----------------------------------------------------------------------------
3642| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
3643| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3644| exception. The comparison is performed according to the IEC/IEEE Standard
3645| for Binary Floating-Point Arithmetic.
158142c2
FB
3646*----------------------------------------------------------------------------*/
3647
e5a41ffa 3648int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 3649{
ff32e16e
PM
3650 a = float32_squash_input_denormal(a, status);
3651 b = float32_squash_input_denormal(b, status);
158142c2
FB
3652
3653 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3654 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3655 ) {
af39bc8c
AM
3656 if (float32_is_signaling_nan(a, status)
3657 || float32_is_signaling_nan(b, status)) {
ff32e16e 3658 float_raise(float_flag_invalid, status);
b689362d 3659 }
158142c2
FB
3660 return 0;
3661 }
b689362d
AJ
3662 return ( float32_val(a) == float32_val(b) ) ||
3663 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
3664}
3665
3666/*----------------------------------------------------------------------------
3667| Returns 1 if the single-precision floating-point value `a' is less than or
3668| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3669| cause an exception. Otherwise, the comparison is performed according to the
3670| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3671*----------------------------------------------------------------------------*/
3672
e5a41ffa 3673int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3674{
3675 flag aSign, bSign;
bb98fe42 3676 uint32_t av, bv;
ff32e16e
PM
3677 a = float32_squash_input_denormal(a, status);
3678 b = float32_squash_input_denormal(b, status);
158142c2
FB
3679
3680 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3681 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3682 ) {
af39bc8c
AM
3683 if (float32_is_signaling_nan(a, status)
3684 || float32_is_signaling_nan(b, status)) {
ff32e16e 3685 float_raise(float_flag_invalid, status);
158142c2
FB
3686 }
3687 return 0;
3688 }
3689 aSign = extractFloat32Sign( a );
3690 bSign = extractFloat32Sign( b );
f090c9d4
PB
3691 av = float32_val(a);
3692 bv = float32_val(b);
bb98fe42 3693 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3694 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3695
3696}
3697
3698/*----------------------------------------------------------------------------
3699| Returns 1 if the single-precision floating-point value `a' is less than
3700| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3701| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 3702| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3703*----------------------------------------------------------------------------*/
3704
ab52f973 3705int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 3706{
ab52f973
AB
3707 flag aSign, bSign;
3708 uint32_t av, bv;
3709 a = float32_squash_input_denormal(a, status);
3710 b = float32_squash_input_denormal(b, status);
158142c2 3711
ab52f973
AB
3712 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3713 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3714 ) {
3715 if (float32_is_signaling_nan(a, status)
3716 || float32_is_signaling_nan(b, status)) {
ff32e16e 3717 float_raise(float_flag_invalid, status);
158142c2 3718 }
ab52f973 3719 return 0;
158142c2 3720 }
ab52f973
AB
3721 aSign = extractFloat32Sign( a );
3722 bSign = extractFloat32Sign( b );
3723 av = float32_val(a);
3724 bv = float32_val(b);
3725 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3726 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3727
3728}
3729
3730/*----------------------------------------------------------------------------
ab52f973
AB
3731| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3732| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3733| comparison is performed according to the IEC/IEEE Standard for Binary
3734| Floating-Point Arithmetic.
158142c2
FB
3735*----------------------------------------------------------------------------*/
3736
ab52f973 3737int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 3738{
ab52f973
AB
3739 a = float32_squash_input_denormal(a, status);
3740 b = float32_squash_input_denormal(b, status);
158142c2 3741
ab52f973
AB
3742 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3743 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3744 ) {
3745 if (float32_is_signaling_nan(a, status)
3746 || float32_is_signaling_nan(b, status)) {
3747 float_raise(float_flag_invalid, status);
158142c2 3748 }
ab52f973 3749 return 1;
158142c2 3750 }
ab52f973 3751 return 0;
158142c2
FB
3752}
3753
210cbd49
AB
3754/*----------------------------------------------------------------------------
3755| If `a' is denormal and we are in flush-to-zero mode then set the
3756| input-denormal exception and return zero. Otherwise just return the value.
3757*----------------------------------------------------------------------------*/
3758float16 float16_squash_input_denormal(float16 a, float_status *status)
3759{
3760 if (status->flush_inputs_to_zero) {
3761 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3762 float_raise(float_flag_input_denormal, status);
3763 return make_float16(float16_val(a) & 0x8000);
3764 }
3765 }
3766 return a;
3767}
3768
158142c2
FB
3769/*----------------------------------------------------------------------------
3770| Returns the result of converting the double-precision floating-point value
3771| `a' to the extended double-precision floating-point format. The conversion
3772| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3773| Arithmetic.
3774*----------------------------------------------------------------------------*/
3775
e5a41ffa 3776floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
3777{
3778 flag aSign;
0c48262d 3779 int aExp;
bb98fe42 3780 uint64_t aSig;
158142c2 3781
ff32e16e 3782 a = float64_squash_input_denormal(a, status);
158142c2
FB
3783 aSig = extractFloat64Frac( a );
3784 aExp = extractFloat64Exp( a );
3785 aSign = extractFloat64Sign( a );
3786 if ( aExp == 0x7FF ) {
ff32e16e
PM
3787 if (aSig) {
3788 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3789 }
0f605c88
LV
3790 return packFloatx80(aSign,
3791 floatx80_infinity_high,
3792 floatx80_infinity_low);
158142c2
FB
3793 }
3794 if ( aExp == 0 ) {
3795 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3796 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3797 }
3798 return
3799 packFloatx80(
3800 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3801
3802}
3803
158142c2
FB
3804/*----------------------------------------------------------------------------
3805| Returns the result of converting the double-precision floating-point value
3806| `a' to the quadruple-precision floating-point format. The conversion is
3807| performed according to the IEC/IEEE Standard for Binary Floating-Point
3808| Arithmetic.
3809*----------------------------------------------------------------------------*/
3810
e5a41ffa 3811float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
3812{
3813 flag aSign;
0c48262d 3814 int aExp;
bb98fe42 3815 uint64_t aSig, zSig0, zSig1;
158142c2 3816
ff32e16e 3817 a = float64_squash_input_denormal(a, status);
158142c2
FB
3818 aSig = extractFloat64Frac( a );
3819 aExp = extractFloat64Exp( a );
3820 aSign = extractFloat64Sign( a );
3821 if ( aExp == 0x7FF ) {
ff32e16e
PM
3822 if (aSig) {
3823 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3824 }
158142c2
FB
3825 return packFloat128( aSign, 0x7FFF, 0, 0 );
3826 }
3827 if ( aExp == 0 ) {
3828 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3829 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3830 --aExp;
3831 }
3832 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3833 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3834
3835}
3836
158142c2
FB
3837
3838/*----------------------------------------------------------------------------
3839| Returns the remainder of the double-precision floating-point value `a'
3840| with respect to the corresponding value `b'. The operation is performed
3841| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3842*----------------------------------------------------------------------------*/
3843
e5a41ffa 3844float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 3845{
ed086f3d 3846 flag aSign, zSign;
0c48262d 3847 int aExp, bExp, expDiff;
bb98fe42
AF
3848 uint64_t aSig, bSig;
3849 uint64_t q, alternateASig;
3850 int64_t sigMean;
158142c2 3851
ff32e16e
PM
3852 a = float64_squash_input_denormal(a, status);
3853 b = float64_squash_input_denormal(b, status);
158142c2
FB
3854 aSig = extractFloat64Frac( a );
3855 aExp = extractFloat64Exp( a );
3856 aSign = extractFloat64Sign( a );
3857 bSig = extractFloat64Frac( b );
3858 bExp = extractFloat64Exp( b );
158142c2
FB
3859 if ( aExp == 0x7FF ) {
3860 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 3861 return propagateFloat64NaN(a, b, status);
158142c2 3862 }
ff32e16e 3863 float_raise(float_flag_invalid, status);
af39bc8c 3864 return float64_default_nan(status);
158142c2
FB
3865 }
3866 if ( bExp == 0x7FF ) {
ff32e16e
PM
3867 if (bSig) {
3868 return propagateFloat64NaN(a, b, status);
3869 }
158142c2
FB
3870 return a;
3871 }
3872 if ( bExp == 0 ) {
3873 if ( bSig == 0 ) {
ff32e16e 3874 float_raise(float_flag_invalid, status);
af39bc8c 3875 return float64_default_nan(status);
158142c2
FB
3876 }
3877 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3878 }
3879 if ( aExp == 0 ) {
3880 if ( aSig == 0 ) return a;
3881 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3882 }
3883 expDiff = aExp - bExp;
3884 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3885 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3886 if ( expDiff < 0 ) {
3887 if ( expDiff < -1 ) return a;
3888 aSig >>= 1;
3889 }
3890 q = ( bSig <= aSig );
3891 if ( q ) aSig -= bSig;
3892 expDiff -= 64;
3893 while ( 0 < expDiff ) {
3894 q = estimateDiv128To64( aSig, 0, bSig );
3895 q = ( 2 < q ) ? q - 2 : 0;
3896 aSig = - ( ( bSig>>2 ) * q );
3897 expDiff -= 62;
3898 }
3899 expDiff += 64;
3900 if ( 0 < expDiff ) {
3901 q = estimateDiv128To64( aSig, 0, bSig );
3902 q = ( 2 < q ) ? q - 2 : 0;
3903 q >>= 64 - expDiff;
3904 bSig >>= 2;
3905 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3906 }
3907 else {
3908 aSig >>= 2;
3909 bSig >>= 2;
3910 }
3911 do {
3912 alternateASig = aSig;
3913 ++q;
3914 aSig -= bSig;
bb98fe42 3915 } while ( 0 <= (int64_t) aSig );
158142c2
FB
3916 sigMean = aSig + alternateASig;
3917 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3918 aSig = alternateASig;
3919 }
bb98fe42 3920 zSign = ( (int64_t) aSig < 0 );
158142c2 3921 if ( zSign ) aSig = - aSig;
ff32e16e 3922 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3923
3924}
3925
374dfc33
AJ
3926/*----------------------------------------------------------------------------
3927| Returns the binary log of the double-precision floating-point value `a'.
3928| The operation is performed according to the IEC/IEEE Standard for Binary
3929| Floating-Point Arithmetic.
3930*----------------------------------------------------------------------------*/
e5a41ffa 3931float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
3932{
3933 flag aSign, zSign;
0c48262d 3934 int aExp;
bb98fe42 3935 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 3936 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
3937
3938 aSig = extractFloat64Frac( a );
3939 aExp = extractFloat64Exp( a );
3940 aSign = extractFloat64Sign( a );
3941
3942 if ( aExp == 0 ) {
3943 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3944 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3945 }
3946 if ( aSign ) {
ff32e16e 3947 float_raise(float_flag_invalid, status);
af39bc8c 3948 return float64_default_nan(status);
374dfc33
AJ
3949 }
3950 if ( aExp == 0x7FF ) {
ff32e16e
PM
3951 if (aSig) {
3952 return propagateFloat64NaN(a, float64_zero, status);
3953 }
374dfc33
AJ
3954 return a;
3955 }
3956
3957 aExp -= 0x3FF;
3958 aSig |= LIT64( 0x0010000000000000 );
3959 zSign = aExp < 0;
bb98fe42 3960 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
3961 for (i = 1LL << 51; i > 0; i >>= 1) {
3962 mul64To128( aSig, aSig, &aSig0, &aSig1 );
3963 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3964 if ( aSig & LIT64( 0x0020000000000000 ) ) {
3965 aSig >>= 1;
3966 zSig |= i;
3967 }
3968 }
3969
3970 if ( zSign )
3971 zSig = -zSig;
ff32e16e 3972 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
3973}
3974
158142c2
FB
3975/*----------------------------------------------------------------------------
3976| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
3977| corresponding value `b', and 0 otherwise. The invalid exception is raised
3978| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3979| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3980*----------------------------------------------------------------------------*/
3981
e5a41ffa 3982int float64_eq(float64 a, float64 b, float_status *status)
158142c2 3983{
bb98fe42 3984 uint64_t av, bv;
ff32e16e
PM
3985 a = float64_squash_input_denormal(a, status);
3986 b = float64_squash_input_denormal(b, status);
158142c2
FB
3987
3988 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3989 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3990 ) {
ff32e16e 3991 float_raise(float_flag_invalid, status);
158142c2
FB
3992 return 0;
3993 }
f090c9d4 3994 av = float64_val(a);
a1b91bb4 3995 bv = float64_val(b);
bb98fe42 3996 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3997
3998}
3999
4000/*----------------------------------------------------------------------------
4001| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4002| equal to the corresponding value `b', and 0 otherwise. The invalid
4003| exception is raised if either operand is a NaN. The comparison is performed
4004| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4005*----------------------------------------------------------------------------*/
4006
e5a41ffa 4007int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4008{
4009 flag aSign, bSign;
bb98fe42 4010 uint64_t av, bv;
ff32e16e
PM
4011 a = float64_squash_input_denormal(a, status);
4012 b = float64_squash_input_denormal(b, status);
158142c2
FB
4013
4014 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4015 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4016 ) {
ff32e16e 4017 float_raise(float_flag_invalid, status);
158142c2
FB
4018 return 0;
4019 }
4020 aSign = extractFloat64Sign( a );
4021 bSign = extractFloat64Sign( b );
f090c9d4 4022 av = float64_val(a);
a1b91bb4 4023 bv = float64_val(b);
bb98fe42 4024 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4025 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4026
4027}
4028
4029/*----------------------------------------------------------------------------
4030| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4031| the corresponding value `b', and 0 otherwise. The invalid exception is
4032| raised if either operand is a NaN. The comparison is performed according
4033| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4034*----------------------------------------------------------------------------*/
4035
e5a41ffa 4036int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4037{
4038 flag aSign, bSign;
bb98fe42 4039 uint64_t av, bv;
158142c2 4040
ff32e16e
PM
4041 a = float64_squash_input_denormal(a, status);
4042 b = float64_squash_input_denormal(b, status);
158142c2
FB
4043 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4044 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4045 ) {
ff32e16e 4046 float_raise(float_flag_invalid, status);
158142c2
FB
4047 return 0;
4048 }
4049 aSign = extractFloat64Sign( a );
4050 bSign = extractFloat64Sign( b );
f090c9d4 4051 av = float64_val(a);
a1b91bb4 4052 bv = float64_val(b);
bb98fe42 4053 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4054 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4055
4056}
4057
67b7861d
AJ
4058/*----------------------------------------------------------------------------
4059| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4060| be compared, and 0 otherwise. The invalid exception is raised if either
4061| operand is a NaN. The comparison is performed according to the IEC/IEEE
4062| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4063*----------------------------------------------------------------------------*/
4064
e5a41ffa 4065int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4066{
ff32e16e
PM
4067 a = float64_squash_input_denormal(a, status);
4068 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4069
4070 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4071 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4072 ) {
ff32e16e 4073 float_raise(float_flag_invalid, status);
67b7861d
AJ
4074 return 1;
4075 }
4076 return 0;
4077}
4078
158142c2
FB
4079/*----------------------------------------------------------------------------
4080| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4081| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4082| exception.The comparison is performed according to the IEC/IEEE Standard
4083| for Binary Floating-Point Arithmetic.
158142c2
FB
4084*----------------------------------------------------------------------------*/
4085
e5a41ffa 4086int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4087{
bb98fe42 4088 uint64_t av, bv;
ff32e16e
PM
4089 a = float64_squash_input_denormal(a, status);
4090 b = float64_squash_input_denormal(b, status);
158142c2
FB
4091
4092 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4093 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4094 ) {
af39bc8c
AM
4095 if (float64_is_signaling_nan(a, status)
4096 || float64_is_signaling_nan(b, status)) {
ff32e16e 4097 float_raise(float_flag_invalid, status);
b689362d 4098 }
158142c2
FB
4099 return 0;
4100 }
f090c9d4 4101 av = float64_val(a);
a1b91bb4 4102 bv = float64_val(b);
bb98fe42 4103 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4104
4105}
4106
4107/*----------------------------------------------------------------------------
4108| Returns 1 if the double-precision floating-point value `a' is less than or
4109| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4110| cause an exception. Otherwise, the comparison is performed according to the
4111| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4112*----------------------------------------------------------------------------*/
4113
e5a41ffa 4114int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4115{
4116 flag aSign, bSign;
bb98fe42 4117 uint64_t av, bv;
ff32e16e
PM
4118 a = float64_squash_input_denormal(a, status);
4119 b = float64_squash_input_denormal(b, status);
158142c2
FB
4120
4121 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4122 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4123 ) {
af39bc8c
AM
4124 if (float64_is_signaling_nan(a, status)
4125 || float64_is_signaling_nan(b, status)) {
ff32e16e 4126 float_raise(float_flag_invalid, status);
158142c2
FB
4127 }
4128 return 0;
4129 }
4130 aSign = extractFloat64Sign( a );
4131 bSign = extractFloat64Sign( b );
f090c9d4 4132 av = float64_val(a);
a1b91bb4 4133 bv = float64_val(b);
bb98fe42 4134 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4135 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4136
4137}
4138
4139/*----------------------------------------------------------------------------
4140| Returns 1 if the double-precision floating-point value `a' is less than
4141| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4142| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4143| Standard for Binary Floating-Point Arithmetic.
4144*----------------------------------------------------------------------------*/
4145
e5a41ffa 4146int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4147{
4148 flag aSign, bSign;
bb98fe42 4149 uint64_t av, bv;
ff32e16e
PM
4150 a = float64_squash_input_denormal(a, status);
4151 b = float64_squash_input_denormal(b, status);
158142c2
FB
4152
4153 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4154 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4155 ) {
af39bc8c
AM
4156 if (float64_is_signaling_nan(a, status)
4157 || float64_is_signaling_nan(b, status)) {
ff32e16e 4158 float_raise(float_flag_invalid, status);
158142c2
FB
4159 }
4160 return 0;
4161 }
4162 aSign = extractFloat64Sign( a );
4163 bSign = extractFloat64Sign( b );
f090c9d4 4164 av = float64_val(a);
a1b91bb4 4165 bv = float64_val(b);
bb98fe42 4166 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4167 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4168
4169}
4170
67b7861d
AJ
4171/*----------------------------------------------------------------------------
4172| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4173| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4174| comparison is performed according to the IEC/IEEE Standard for Binary
4175| Floating-Point Arithmetic.
4176*----------------------------------------------------------------------------*/
4177
e5a41ffa 4178int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4179{
ff32e16e
PM
4180 a = float64_squash_input_denormal(a, status);
4181 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4182
4183 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4184 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4185 ) {
af39bc8c
AM
4186 if (float64_is_signaling_nan(a, status)
4187 || float64_is_signaling_nan(b, status)) {
ff32e16e 4188 float_raise(float_flag_invalid, status);
67b7861d
AJ
4189 }
4190 return 1;
4191 }
4192 return 0;
4193}
4194
158142c2
FB
4195/*----------------------------------------------------------------------------
4196| Returns the result of converting the extended double-precision floating-
4197| point value `a' to the 32-bit two's complement integer format. The
4198| conversion is performed according to the IEC/IEEE Standard for Binary
4199| Floating-Point Arithmetic---which means in particular that the conversion
4200| is rounded according to the current rounding mode. If `a' is a NaN, the
4201| largest positive integer is returned. Otherwise, if the conversion
4202| overflows, the largest integer with the same sign as `a' is returned.
4203*----------------------------------------------------------------------------*/
4204
f4014512 4205int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4206{
4207 flag aSign;
f4014512 4208 int32_t aExp, shiftCount;
bb98fe42 4209 uint64_t aSig;
158142c2 4210
d1eb8f2a
AD
4211 if (floatx80_invalid_encoding(a)) {
4212 float_raise(float_flag_invalid, status);
4213 return 1 << 31;
4214 }
158142c2
FB
4215 aSig = extractFloatx80Frac( a );
4216 aExp = extractFloatx80Exp( a );
4217 aSign = extractFloatx80Sign( a );
bb98fe42 4218 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4219 shiftCount = 0x4037 - aExp;
4220 if ( shiftCount <= 0 ) shiftCount = 1;
4221 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4222 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4223
4224}
4225
4226/*----------------------------------------------------------------------------
4227| Returns the result of converting the extended double-precision floating-
4228| point value `a' to the 32-bit two's complement integer format. The
4229| conversion is performed according to the IEC/IEEE Standard for Binary
4230| Floating-Point Arithmetic, except that the conversion is always rounded
4231| toward zero. If `a' is a NaN, the largest positive integer is returned.
4232| Otherwise, if the conversion overflows, the largest integer with the same
4233| sign as `a' is returned.
4234*----------------------------------------------------------------------------*/
4235
f4014512 4236int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4237{
4238 flag aSign;
f4014512 4239 int32_t aExp, shiftCount;
bb98fe42 4240 uint64_t aSig, savedASig;
b3a6a2e0 4241 int32_t z;
158142c2 4242
d1eb8f2a
AD
4243 if (floatx80_invalid_encoding(a)) {
4244 float_raise(float_flag_invalid, status);
4245 return 1 << 31;
4246 }
158142c2
FB
4247 aSig = extractFloatx80Frac( a );
4248 aExp = extractFloatx80Exp( a );
4249 aSign = extractFloatx80Sign( a );
4250 if ( 0x401E < aExp ) {
bb98fe42 4251 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4252 goto invalid;
4253 }
4254 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4255 if (aExp || aSig) {
4256 status->float_exception_flags |= float_flag_inexact;
4257 }
158142c2
FB
4258 return 0;
4259 }
4260 shiftCount = 0x403E - aExp;
4261 savedASig = aSig;
4262 aSig >>= shiftCount;
4263 z = aSig;
4264 if ( aSign ) z = - z;
4265 if ( ( z < 0 ) ^ aSign ) {
4266 invalid:
ff32e16e 4267 float_raise(float_flag_invalid, status);
bb98fe42 4268 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4269 }
4270 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4271 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4272 }
4273 return z;
4274
4275}
4276
4277/*----------------------------------------------------------------------------
4278| Returns the result of converting the extended double-precision floating-
4279| point value `a' to the 64-bit two's complement integer format. The
4280| conversion is performed according to the IEC/IEEE Standard for Binary
4281| Floating-Point Arithmetic---which means in particular that the conversion
4282| is rounded according to the current rounding mode. If `a' is a NaN,
4283| the largest positive integer is returned. Otherwise, if the conversion
4284| overflows, the largest integer with the same sign as `a' is returned.
4285*----------------------------------------------------------------------------*/
4286
f42c2224 4287int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4288{
4289 flag aSign;
f4014512 4290 int32_t aExp, shiftCount;
bb98fe42 4291 uint64_t aSig, aSigExtra;
158142c2 4292
d1eb8f2a
AD
4293 if (floatx80_invalid_encoding(a)) {
4294 float_raise(float_flag_invalid, status);
4295 return 1ULL << 63;
4296 }
158142c2
FB
4297 aSig = extractFloatx80Frac( a );
4298 aExp = extractFloatx80Exp( a );
4299 aSign = extractFloatx80Sign( a );
4300 shiftCount = 0x403E - aExp;
4301 if ( shiftCount <= 0 ) {
4302 if ( shiftCount ) {
ff32e16e 4303 float_raise(float_flag_invalid, status);
0f605c88 4304 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4305 return LIT64( 0x7FFFFFFFFFFFFFFF );
4306 }
bb98fe42 4307 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4308 }
4309 aSigExtra = 0;
4310 }
4311 else {
4312 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4313 }
ff32e16e 4314 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4315
4316}
4317
4318/*----------------------------------------------------------------------------
4319| Returns the result of converting the extended double-precision floating-
4320| point value `a' to the 64-bit two's complement integer format. The
4321| conversion is performed according to the IEC/IEEE Standard for Binary
4322| Floating-Point Arithmetic, except that the conversion is always rounded
4323| toward zero. If `a' is a NaN, the largest positive integer is returned.
4324| Otherwise, if the conversion overflows, the largest integer with the same
4325| sign as `a' is returned.
4326*----------------------------------------------------------------------------*/
4327
f42c2224 4328int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4329{
4330 flag aSign;
f4014512 4331 int32_t aExp, shiftCount;
bb98fe42 4332 uint64_t aSig;
f42c2224 4333 int64_t z;
158142c2 4334
d1eb8f2a
AD
4335 if (floatx80_invalid_encoding(a)) {
4336 float_raise(float_flag_invalid, status);
4337 return 1ULL << 63;
4338 }
158142c2
FB
4339 aSig = extractFloatx80Frac( a );
4340 aExp = extractFloatx80Exp( a );
4341 aSign = extractFloatx80Sign( a );
4342 shiftCount = aExp - 0x403E;
4343 if ( 0 <= shiftCount ) {
4344 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4345 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4346 float_raise(float_flag_invalid, status);
158142c2
FB
4347 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4348 return LIT64( 0x7FFFFFFFFFFFFFFF );
4349 }
4350 }
bb98fe42 4351 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4352 }
4353 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4354 if (aExp | aSig) {
4355 status->float_exception_flags |= float_flag_inexact;
4356 }
158142c2
FB
4357 return 0;
4358 }
4359 z = aSig>>( - shiftCount );
bb98fe42 4360 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4361 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4362 }
4363 if ( aSign ) z = - z;
4364 return z;
4365
4366}
4367
4368/*----------------------------------------------------------------------------
4369| Returns the result of converting the extended double-precision floating-
4370| point value `a' to the single-precision floating-point format. The
4371| conversion is performed according to the IEC/IEEE Standard for Binary
4372| Floating-Point Arithmetic.
4373*----------------------------------------------------------------------------*/
4374
e5a41ffa 4375float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4376{
4377 flag aSign;
f4014512 4378 int32_t aExp;
bb98fe42 4379 uint64_t aSig;
158142c2 4380
d1eb8f2a
AD
4381 if (floatx80_invalid_encoding(a)) {
4382 float_raise(float_flag_invalid, status);
4383 return float32_default_nan(status);
4384 }
158142c2
FB
4385 aSig = extractFloatx80Frac( a );
4386 aExp = extractFloatx80Exp( a );
4387 aSign = extractFloatx80Sign( a );
4388 if ( aExp == 0x7FFF ) {
bb98fe42 4389 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4390 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4391 }
4392 return packFloat32( aSign, 0xFF, 0 );
4393 }
4394 shift64RightJamming( aSig, 33, &aSig );
4395 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4396 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4397
4398}
4399
4400/*----------------------------------------------------------------------------
4401| Returns the result of converting the extended double-precision floating-
4402| point value `a' to the double-precision floating-point format. The
4403| conversion is performed according to the IEC/IEEE Standard for Binary
4404| Floating-Point Arithmetic.
4405*----------------------------------------------------------------------------*/
4406
e5a41ffa 4407float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4408{
4409 flag aSign;
f4014512 4410 int32_t aExp;
bb98fe42 4411 uint64_t aSig, zSig;
158142c2 4412
d1eb8f2a
AD
4413 if (floatx80_invalid_encoding(a)) {
4414 float_raise(float_flag_invalid, status);
4415 return float64_default_nan(status);
4416 }
158142c2
FB
4417 aSig = extractFloatx80Frac( a );
4418 aExp = extractFloatx80Exp( a );
4419 aSign = extractFloatx80Sign( a );
4420 if ( aExp == 0x7FFF ) {
bb98fe42 4421 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4422 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4423 }
4424 return packFloat64( aSign, 0x7FF, 0 );
4425 }
4426 shift64RightJamming( aSig, 1, &zSig );
4427 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4428 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4429
4430}
4431
158142c2
FB
4432/*----------------------------------------------------------------------------
4433| Returns the result of converting the extended double-precision floating-
4434| point value `a' to the quadruple-precision floating-point format. The
4435| conversion is performed according to the IEC/IEEE Standard for Binary
4436| Floating-Point Arithmetic.
4437*----------------------------------------------------------------------------*/
4438
e5a41ffa 4439float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4440{
4441 flag aSign;
0c48262d 4442 int aExp;
bb98fe42 4443 uint64_t aSig, zSig0, zSig1;
158142c2 4444
d1eb8f2a
AD
4445 if (floatx80_invalid_encoding(a)) {
4446 float_raise(float_flag_invalid, status);
4447 return float128_default_nan(status);
4448 }
158142c2
FB
4449 aSig = extractFloatx80Frac( a );
4450 aExp = extractFloatx80Exp( a );
4451 aSign = extractFloatx80Sign( a );
bb98fe42 4452 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4453 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4454 }
4455 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4456 return packFloat128( aSign, aExp, zSig0, zSig1 );
4457
4458}
4459
0f721292
LV
4460/*----------------------------------------------------------------------------
4461| Rounds the extended double-precision floating-point value `a'
4462| to the precision provided by floatx80_rounding_precision and returns the
4463| result as an extended double-precision floating-point value.
4464| The operation is performed according to the IEC/IEEE Standard for Binary
4465| Floating-Point Arithmetic.
4466*----------------------------------------------------------------------------*/
4467
4468floatx80 floatx80_round(floatx80 a, float_status *status)
4469{
4470 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4471 extractFloatx80Sign(a),
4472 extractFloatx80Exp(a),
4473 extractFloatx80Frac(a), 0, status);
4474}
4475
158142c2
FB
4476/*----------------------------------------------------------------------------
4477| Rounds the extended double-precision floating-point value `a' to an integer,
4478| and returns the result as an extended quadruple-precision floating-point
4479| value. The operation is performed according to the IEC/IEEE Standard for
4480| Binary Floating-Point Arithmetic.
4481*----------------------------------------------------------------------------*/
4482
e5a41ffa 4483floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4484{
4485 flag aSign;
f4014512 4486 int32_t aExp;
bb98fe42 4487 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4488 floatx80 z;
4489
d1eb8f2a
AD
4490 if (floatx80_invalid_encoding(a)) {
4491 float_raise(float_flag_invalid, status);
4492 return floatx80_default_nan(status);
4493 }
158142c2
FB
4494 aExp = extractFloatx80Exp( a );
4495 if ( 0x403E <= aExp ) {
bb98fe42 4496 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4497 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4498 }
4499 return a;
4500 }
4501 if ( aExp < 0x3FFF ) {
4502 if ( ( aExp == 0 )
bb98fe42 4503 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4504 return a;
4505 }
a2f2d288 4506 status->float_exception_flags |= float_flag_inexact;
158142c2 4507 aSign = extractFloatx80Sign( a );
a2f2d288 4508 switch (status->float_rounding_mode) {
158142c2 4509 case float_round_nearest_even:
bb98fe42 4510 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4511 ) {
4512 return
4513 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4514 }
4515 break;
f9288a76
PM
4516 case float_round_ties_away:
4517 if (aExp == 0x3FFE) {
4518 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4519 }
4520 break;
158142c2
FB
4521 case float_round_down:
4522 return
4523 aSign ?
4524 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4525 : packFloatx80( 0, 0, 0 );
4526 case float_round_up:
4527 return
4528 aSign ? packFloatx80( 1, 0, 0 )
4529 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4530 }
4531 return packFloatx80( aSign, 0, 0 );
4532 }
4533 lastBitMask = 1;
4534 lastBitMask <<= 0x403E - aExp;
4535 roundBitsMask = lastBitMask - 1;
4536 z = a;
a2f2d288 4537 switch (status->float_rounding_mode) {
dc355b76 4538 case float_round_nearest_even:
158142c2 4539 z.low += lastBitMask>>1;
dc355b76
PM
4540 if ((z.low & roundBitsMask) == 0) {
4541 z.low &= ~lastBitMask;
4542 }
4543 break;
f9288a76
PM
4544 case float_round_ties_away:
4545 z.low += lastBitMask >> 1;
4546 break;
dc355b76
PM
4547 case float_round_to_zero:
4548 break;
4549 case float_round_up:
4550 if (!extractFloatx80Sign(z)) {
4551 z.low += roundBitsMask;
4552 }
4553 break;
4554 case float_round_down:
4555 if (extractFloatx80Sign(z)) {
158142c2
FB
4556 z.low += roundBitsMask;
4557 }
dc355b76
PM
4558 break;
4559 default:
4560 abort();
158142c2
FB
4561 }
4562 z.low &= ~ roundBitsMask;
4563 if ( z.low == 0 ) {
4564 ++z.high;
4565 z.low = LIT64( 0x8000000000000000 );
4566 }
a2f2d288
PM
4567 if (z.low != a.low) {
4568 status->float_exception_flags |= float_flag_inexact;
4569 }
158142c2
FB
4570 return z;
4571
4572}
4573
4574/*----------------------------------------------------------------------------
4575| Returns the result of adding the absolute values of the extended double-
4576| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4577| negated before being returned. `zSign' is ignored if the result is a NaN.
4578| The addition is performed according to the IEC/IEEE Standard for Binary
4579| Floating-Point Arithmetic.
4580*----------------------------------------------------------------------------*/
4581
e5a41ffa
PM
4582static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4583 float_status *status)
158142c2 4584{
f4014512 4585 int32_t aExp, bExp, zExp;
bb98fe42 4586 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4587 int32_t expDiff;
158142c2
FB
4588
4589 aSig = extractFloatx80Frac( a );
4590 aExp = extractFloatx80Exp( a );
4591 bSig = extractFloatx80Frac( b );
4592 bExp = extractFloatx80Exp( b );
4593 expDiff = aExp - bExp;
4594 if ( 0 < expDiff ) {
4595 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4596 if ((uint64_t)(aSig << 1)) {
4597 return propagateFloatx80NaN(a, b, status);
4598 }
158142c2
FB
4599 return a;
4600 }
4601 if ( bExp == 0 ) --expDiff;
4602 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4603 zExp = aExp;
4604 }
4605 else if ( expDiff < 0 ) {
4606 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4607 if ((uint64_t)(bSig << 1)) {
4608 return propagateFloatx80NaN(a, b, status);
4609 }
0f605c88
LV
4610 return packFloatx80(zSign,
4611 floatx80_infinity_high,
4612 floatx80_infinity_low);
158142c2
FB
4613 }
4614 if ( aExp == 0 ) ++expDiff;
4615 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4616 zExp = bExp;
4617 }
4618 else {
4619 if ( aExp == 0x7FFF ) {
bb98fe42 4620 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4621 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4622 }
4623 return a;
4624 }
4625 zSig1 = 0;
4626 zSig0 = aSig + bSig;
4627 if ( aExp == 0 ) {
4628 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4629 goto roundAndPack;
4630 }
4631 zExp = aExp;
4632 goto shiftRight1;
4633 }
4634 zSig0 = aSig + bSig;
bb98fe42 4635 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
4636 shiftRight1:
4637 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4638 zSig0 |= LIT64( 0x8000000000000000 );
4639 ++zExp;
4640 roundAndPack:
a2f2d288 4641 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4642 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4643}
4644
4645/*----------------------------------------------------------------------------
4646| Returns the result of subtracting the absolute values of the extended
4647| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4648| difference is negated before being returned. `zSign' is ignored if the
4649| result is a NaN. The subtraction is performed according to the IEC/IEEE
4650| Standard for Binary Floating-Point Arithmetic.
4651*----------------------------------------------------------------------------*/
4652
e5a41ffa
PM
4653static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4654 float_status *status)
158142c2 4655{
f4014512 4656 int32_t aExp, bExp, zExp;
bb98fe42 4657 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4658 int32_t expDiff;
158142c2
FB
4659
4660 aSig = extractFloatx80Frac( a );
4661 aExp = extractFloatx80Exp( a );
4662 bSig = extractFloatx80Frac( b );
4663 bExp = extractFloatx80Exp( b );
4664 expDiff = aExp - bExp;
4665 if ( 0 < expDiff ) goto aExpBigger;
4666 if ( expDiff < 0 ) goto bExpBigger;
4667 if ( aExp == 0x7FFF ) {
bb98fe42 4668 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4669 return propagateFloatx80NaN(a, b, status);
158142c2 4670 }
ff32e16e 4671 float_raise(float_flag_invalid, status);
af39bc8c 4672 return floatx80_default_nan(status);
158142c2
FB
4673 }
4674 if ( aExp == 0 ) {
4675 aExp = 1;
4676 bExp = 1;
4677 }
4678 zSig1 = 0;
4679 if ( bSig < aSig ) goto aBigger;
4680 if ( aSig < bSig ) goto bBigger;
a2f2d288 4681 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
4682 bExpBigger:
4683 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4684 if ((uint64_t)(bSig << 1)) {
4685 return propagateFloatx80NaN(a, b, status);
4686 }
0f605c88
LV
4687 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4688 floatx80_infinity_low);
158142c2
FB
4689 }
4690 if ( aExp == 0 ) ++expDiff;
4691 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4692 bBigger:
4693 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4694 zExp = bExp;
4695 zSign ^= 1;
4696 goto normalizeRoundAndPack;
4697 aExpBigger:
4698 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4699 if ((uint64_t)(aSig << 1)) {
4700 return propagateFloatx80NaN(a, b, status);
4701 }
158142c2
FB
4702 return a;
4703 }
4704 if ( bExp == 0 ) --expDiff;
4705 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4706 aBigger:
4707 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4708 zExp = aExp;
4709 normalizeRoundAndPack:
a2f2d288 4710 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4711 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4712}
4713
4714/*----------------------------------------------------------------------------
4715| Returns the result of adding the extended double-precision floating-point
4716| values `a' and `b'. The operation is performed according to the IEC/IEEE
4717| Standard for Binary Floating-Point Arithmetic.
4718*----------------------------------------------------------------------------*/
4719
e5a41ffa 4720floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4721{
4722 flag aSign, bSign;
4723
d1eb8f2a
AD
4724 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4725 float_raise(float_flag_invalid, status);
4726 return floatx80_default_nan(status);
4727 }
158142c2
FB
4728 aSign = extractFloatx80Sign( a );
4729 bSign = extractFloatx80Sign( b );
4730 if ( aSign == bSign ) {
ff32e16e 4731 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4732 }
4733 else {
ff32e16e 4734 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4735 }
4736
4737}
4738
4739/*----------------------------------------------------------------------------
4740| Returns the result of subtracting the extended double-precision floating-
4741| point values `a' and `b'. The operation is performed according to the
4742| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4743*----------------------------------------------------------------------------*/
4744
e5a41ffa 4745floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4746{
4747 flag aSign, bSign;
4748
d1eb8f2a
AD
4749 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4750 float_raise(float_flag_invalid, status);
4751 return floatx80_default_nan(status);
4752 }
158142c2
FB
4753 aSign = extractFloatx80Sign( a );
4754 bSign = extractFloatx80Sign( b );
4755 if ( aSign == bSign ) {
ff32e16e 4756 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4757 }
4758 else {
ff32e16e 4759 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4760 }
4761
4762}
4763
4764/*----------------------------------------------------------------------------
4765| Returns the result of multiplying the extended double-precision floating-
4766| point values `a' and `b'. The operation is performed according to the
4767| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4768*----------------------------------------------------------------------------*/
4769
e5a41ffa 4770floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4771{
4772 flag aSign, bSign, zSign;
f4014512 4773 int32_t aExp, bExp, zExp;
bb98fe42 4774 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 4775
d1eb8f2a
AD
4776 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4777 float_raise(float_flag_invalid, status);
4778 return floatx80_default_nan(status);
4779 }
158142c2
FB
4780 aSig = extractFloatx80Frac( a );
4781 aExp = extractFloatx80Exp( a );
4782 aSign = extractFloatx80Sign( a );
4783 bSig = extractFloatx80Frac( b );
4784 bExp = extractFloatx80Exp( b );
4785 bSign = extractFloatx80Sign( b );
4786 zSign = aSign ^ bSign;
4787 if ( aExp == 0x7FFF ) {
bb98fe42
AF
4788 if ( (uint64_t) ( aSig<<1 )
4789 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 4790 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4791 }
4792 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
4793 return packFloatx80(zSign, floatx80_infinity_high,
4794 floatx80_infinity_low);
158142c2
FB
4795 }
4796 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4797 if ((uint64_t)(bSig << 1)) {
4798 return propagateFloatx80NaN(a, b, status);
4799 }
158142c2
FB
4800 if ( ( aExp | aSig ) == 0 ) {
4801 invalid:
ff32e16e 4802 float_raise(float_flag_invalid, status);
af39bc8c 4803 return floatx80_default_nan(status);
158142c2 4804 }
0f605c88
LV
4805 return packFloatx80(zSign, floatx80_infinity_high,
4806 floatx80_infinity_low);
158142c2
FB
4807 }
4808 if ( aExp == 0 ) {
4809 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4810 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4811 }
4812 if ( bExp == 0 ) {
4813 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4814 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4815 }
4816 zExp = aExp + bExp - 0x3FFE;
4817 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 4818 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
4819 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4820 --zExp;
4821 }
a2f2d288 4822 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4823 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4824}
4825
4826/*----------------------------------------------------------------------------
4827| Returns the result of dividing the extended double-precision floating-point
4828| value `a' by the corresponding value `b'. The operation is performed
4829| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4830*----------------------------------------------------------------------------*/
4831
e5a41ffa 4832floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4833{
4834 flag aSign, bSign, zSign;
f4014512 4835 int32_t aExp, bExp, zExp;
bb98fe42
AF
4836 uint64_t aSig, bSig, zSig0, zSig1;
4837 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 4838
d1eb8f2a
AD
4839 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4840 float_raise(float_flag_invalid, status);
4841 return floatx80_default_nan(status);
4842 }
158142c2
FB
4843 aSig = extractFloatx80Frac( a );
4844 aExp = extractFloatx80Exp( a );
4845 aSign = extractFloatx80Sign( a );
4846 bSig = extractFloatx80Frac( b );
4847 bExp = extractFloatx80Exp( b );
4848 bSign = extractFloatx80Sign( b );
4849 zSign = aSign ^ bSign;
4850 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4851 if ((uint64_t)(aSig << 1)) {
4852 return propagateFloatx80NaN(a, b, status);
4853 }
158142c2 4854 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4855 if ((uint64_t)(bSig << 1)) {
4856 return propagateFloatx80NaN(a, b, status);
4857 }
158142c2
FB
4858 goto invalid;
4859 }
0f605c88
LV
4860 return packFloatx80(zSign, floatx80_infinity_high,
4861 floatx80_infinity_low);
158142c2
FB
4862 }
4863 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4864 if ((uint64_t)(bSig << 1)) {
4865 return propagateFloatx80NaN(a, b, status);
4866 }
158142c2
FB
4867 return packFloatx80( zSign, 0, 0 );
4868 }
4869 if ( bExp == 0 ) {
4870 if ( bSig == 0 ) {
4871 if ( ( aExp | aSig ) == 0 ) {
4872 invalid:
ff32e16e 4873 float_raise(float_flag_invalid, status);
af39bc8c 4874 return floatx80_default_nan(status);
158142c2 4875 }
ff32e16e 4876 float_raise(float_flag_divbyzero, status);
0f605c88
LV
4877 return packFloatx80(zSign, floatx80_infinity_high,
4878 floatx80_infinity_low);
158142c2
FB
4879 }
4880 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4881 }
4882 if ( aExp == 0 ) {
4883 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4884 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4885 }
4886 zExp = aExp - bExp + 0x3FFE;
4887 rem1 = 0;
4888 if ( bSig <= aSig ) {
4889 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4890 ++zExp;
4891 }
4892 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4893 mul64To128( bSig, zSig0, &term0, &term1 );
4894 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 4895 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
4896 --zSig0;
4897 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4898 }
4899 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 4900 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
4901 mul64To128( bSig, zSig1, &term1, &term2 );
4902 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 4903 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
4904 --zSig1;
4905 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4906 }
4907 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4908 }
a2f2d288 4909 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4910 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4911}
4912
4913/*----------------------------------------------------------------------------
4914| Returns the remainder of the extended double-precision floating-point value
4915| `a' with respect to the corresponding value `b'. The operation is performed
4916| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4917*----------------------------------------------------------------------------*/
4918
e5a41ffa 4919floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 4920{
ed086f3d 4921 flag aSign, zSign;
f4014512 4922 int32_t aExp, bExp, expDiff;
bb98fe42
AF
4923 uint64_t aSig0, aSig1, bSig;
4924 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 4925
d1eb8f2a
AD
4926 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4927 float_raise(float_flag_invalid, status);
4928 return floatx80_default_nan(status);
4929 }
158142c2
FB
4930 aSig0 = extractFloatx80Frac( a );
4931 aExp = extractFloatx80Exp( a );
4932 aSign = extractFloatx80Sign( a );
4933 bSig = extractFloatx80Frac( b );
4934 bExp = extractFloatx80Exp( b );
158142c2 4935 if ( aExp == 0x7FFF ) {
bb98fe42
AF
4936 if ( (uint64_t) ( aSig0<<1 )
4937 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 4938 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4939 }
4940 goto invalid;
4941 }
4942 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4943 if ((uint64_t)(bSig << 1)) {
4944 return propagateFloatx80NaN(a, b, status);
4945 }
158142c2
FB
4946 return a;
4947 }
4948 if ( bExp == 0 ) {
4949 if ( bSig == 0 ) {
4950 invalid:
ff32e16e 4951 float_raise(float_flag_invalid, status);
af39bc8c 4952 return floatx80_default_nan(status);
158142c2
FB
4953 }
4954 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4955 }
4956 if ( aExp == 0 ) {
bb98fe42 4957 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
4958 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4959 }
4960 bSig |= LIT64( 0x8000000000000000 );
4961 zSign = aSign;
4962 expDiff = aExp - bExp;
4963 aSig1 = 0;
4964 if ( expDiff < 0 ) {
4965 if ( expDiff < -1 ) return a;
4966 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4967 expDiff = 0;
4968 }
4969 q = ( bSig <= aSig0 );
4970 if ( q ) aSig0 -= bSig;
4971 expDiff -= 64;
4972 while ( 0 < expDiff ) {
4973 q = estimateDiv128To64( aSig0, aSig1, bSig );
4974 q = ( 2 < q ) ? q - 2 : 0;
4975 mul64To128( bSig, q, &term0, &term1 );
4976 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4977 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4978 expDiff -= 62;
4979 }
4980 expDiff += 64;
4981 if ( 0 < expDiff ) {
4982 q = estimateDiv128To64( aSig0, aSig1, bSig );
4983 q = ( 2 < q ) ? q - 2 : 0;
4984 q >>= 64 - expDiff;
4985 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4986 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4987 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4988 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4989 ++q;
4990 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4991 }
4992 }
4993 else {
4994 term1 = 0;
4995 term0 = bSig;
4996 }
4997 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4998 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4999 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5000 && ( q & 1 ) )
5001 ) {
5002 aSig0 = alternateASig0;
5003 aSig1 = alternateASig1;
5004 zSign = ! zSign;
5005 }
5006 return
5007 normalizeRoundAndPackFloatx80(
ff32e16e 5008 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5009
5010}
5011
5012/*----------------------------------------------------------------------------
5013| Returns the square root of the extended double-precision floating-point
5014| value `a'. The operation is performed according to the IEC/IEEE Standard
5015| for Binary Floating-Point Arithmetic.
5016*----------------------------------------------------------------------------*/
5017
e5a41ffa 5018floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5019{
5020 flag aSign;
f4014512 5021 int32_t aExp, zExp;
bb98fe42
AF
5022 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5023 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5024
d1eb8f2a
AD
5025 if (floatx80_invalid_encoding(a)) {
5026 float_raise(float_flag_invalid, status);
5027 return floatx80_default_nan(status);
5028 }
158142c2
FB
5029 aSig0 = extractFloatx80Frac( a );
5030 aExp = extractFloatx80Exp( a );
5031 aSign = extractFloatx80Sign( a );
5032 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5033 if ((uint64_t)(aSig0 << 1)) {
5034 return propagateFloatx80NaN(a, a, status);
5035 }
158142c2
FB
5036 if ( ! aSign ) return a;
5037 goto invalid;
5038 }
5039 if ( aSign ) {
5040 if ( ( aExp | aSig0 ) == 0 ) return a;
5041 invalid:
ff32e16e 5042 float_raise(float_flag_invalid, status);
af39bc8c 5043 return floatx80_default_nan(status);
158142c2
FB
5044 }
5045 if ( aExp == 0 ) {
5046 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5047 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5048 }
5049 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5050 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5051 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5052 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5053 doubleZSig0 = zSig0<<1;
5054 mul64To128( zSig0, zSig0, &term0, &term1 );
5055 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5056 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5057 --zSig0;
5058 doubleZSig0 -= 2;
5059 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5060 }
5061 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5062 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5063 if ( zSig1 == 0 ) zSig1 = 1;
5064 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5065 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5066 mul64To128( zSig1, zSig1, &term2, &term3 );
5067 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5068 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5069 --zSig1;
5070 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5071 term3 |= 1;
5072 term2 |= doubleZSig0;
5073 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5074 }
5075 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5076 }
5077 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5078 zSig0 |= doubleZSig0;
a2f2d288
PM
5079 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5080 0, zExp, zSig0, zSig1, status);
158142c2
FB
5081}
5082
5083/*----------------------------------------------------------------------------
b689362d
AJ
5084| Returns 1 if the extended double-precision floating-point value `a' is equal
5085| to the corresponding value `b', and 0 otherwise. The invalid exception is
5086| raised if either operand is a NaN. Otherwise, the comparison is performed
5087| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5088*----------------------------------------------------------------------------*/
5089
e5a41ffa 5090int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5091{
5092
d1eb8f2a
AD
5093 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5094 || (extractFloatx80Exp(a) == 0x7FFF
5095 && (uint64_t) (extractFloatx80Frac(a) << 1))
5096 || (extractFloatx80Exp(b) == 0x7FFF
5097 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5098 ) {
ff32e16e 5099 float_raise(float_flag_invalid, status);
158142c2
FB
5100 return 0;
5101 }
5102 return
5103 ( a.low == b.low )
5104 && ( ( a.high == b.high )
5105 || ( ( a.low == 0 )
bb98fe42 5106 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5107 );
5108
5109}
5110
5111/*----------------------------------------------------------------------------
5112| Returns 1 if the extended double-precision floating-point value `a' is
5113| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5114| invalid exception is raised if either operand is a NaN. The comparison is
5115| performed according to the IEC/IEEE Standard for Binary Floating-Point
5116| Arithmetic.
158142c2
FB
5117*----------------------------------------------------------------------------*/
5118
e5a41ffa 5119int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5120{
5121 flag aSign, bSign;
5122
d1eb8f2a
AD
5123 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5124 || (extractFloatx80Exp(a) == 0x7FFF
5125 && (uint64_t) (extractFloatx80Frac(a) << 1))
5126 || (extractFloatx80Exp(b) == 0x7FFF
5127 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5128 ) {
ff32e16e 5129 float_raise(float_flag_invalid, status);
158142c2
FB
5130 return 0;
5131 }
5132 aSign = extractFloatx80Sign( a );
5133 bSign = extractFloatx80Sign( b );
5134 if ( aSign != bSign ) {
5135 return
5136 aSign
bb98fe42 5137 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5138 == 0 );
5139 }
5140 return
5141 aSign ? le128( b.high, b.low, a.high, a.low )
5142 : le128( a.high, a.low, b.high, b.low );
5143
5144}
5145
5146/*----------------------------------------------------------------------------
5147| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5148| less than the corresponding value `b', and 0 otherwise. The invalid
5149| exception is raised if either operand is a NaN. The comparison is performed
5150| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5151*----------------------------------------------------------------------------*/
5152
e5a41ffa 5153int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5154{
5155 flag aSign, bSign;
5156
d1eb8f2a
AD
5157 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5158 || (extractFloatx80Exp(a) == 0x7FFF
5159 && (uint64_t) (extractFloatx80Frac(a) << 1))
5160 || (extractFloatx80Exp(b) == 0x7FFF
5161 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5162 ) {
ff32e16e 5163 float_raise(float_flag_invalid, status);
158142c2
FB
5164 return 0;
5165 }
5166 aSign = extractFloatx80Sign( a );
5167 bSign = extractFloatx80Sign( b );
5168 if ( aSign != bSign ) {
5169 return
5170 aSign
bb98fe42 5171 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5172 != 0 );
5173 }
5174 return
5175 aSign ? lt128( b.high, b.low, a.high, a.low )
5176 : lt128( a.high, a.low, b.high, b.low );
5177
5178}
5179
67b7861d
AJ
5180/*----------------------------------------------------------------------------
5181| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5182| cannot be compared, and 0 otherwise. The invalid exception is raised if
5183| either operand is a NaN. The comparison is performed according to the
5184| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5185*----------------------------------------------------------------------------*/
e5a41ffa 5186int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5187{
d1eb8f2a
AD
5188 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5189 || (extractFloatx80Exp(a) == 0x7FFF
5190 && (uint64_t) (extractFloatx80Frac(a) << 1))
5191 || (extractFloatx80Exp(b) == 0x7FFF
5192 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5193 ) {
ff32e16e 5194 float_raise(float_flag_invalid, status);
67b7861d
AJ
5195 return 1;
5196 }
5197 return 0;
5198}
5199
158142c2 5200/*----------------------------------------------------------------------------
b689362d 5201| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5202| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5203| cause an exception. The comparison is performed according to the IEC/IEEE
5204| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5205*----------------------------------------------------------------------------*/
5206
e5a41ffa 5207int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5208{
5209
d1eb8f2a
AD
5210 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5211 float_raise(float_flag_invalid, status);
5212 return 0;
5213 }
158142c2 5214 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5215 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5216 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5217 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5218 ) {
af39bc8c
AM
5219 if (floatx80_is_signaling_nan(a, status)
5220 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5221 float_raise(float_flag_invalid, status);
b689362d 5222 }
158142c2
FB
5223 return 0;
5224 }
5225 return
5226 ( a.low == b.low )
5227 && ( ( a.high == b.high )
5228 || ( ( a.low == 0 )
bb98fe42 5229 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5230 );
5231
5232}
5233
5234/*----------------------------------------------------------------------------
5235| Returns 1 if the extended double-precision floating-point value `a' is less
5236| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5237| do not cause an exception. Otherwise, the comparison is performed according
5238| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5239*----------------------------------------------------------------------------*/
5240
e5a41ffa 5241int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5242{
5243 flag aSign, bSign;
5244
d1eb8f2a
AD
5245 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5246 float_raise(float_flag_invalid, status);
5247 return 0;
5248 }
158142c2 5249 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5250 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5251 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5252 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5253 ) {
af39bc8c
AM
5254 if (floatx80_is_signaling_nan(a, status)
5255 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5256 float_raise(float_flag_invalid, status);
158142c2
FB
5257 }
5258 return 0;
5259 }
5260 aSign = extractFloatx80Sign( a );
5261 bSign = extractFloatx80Sign( b );
5262 if ( aSign != bSign ) {
5263 return
5264 aSign
bb98fe42 5265 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5266 == 0 );
5267 }
5268 return
5269 aSign ? le128( b.high, b.low, a.high, a.low )
5270 : le128( a.high, a.low, b.high, b.low );
5271
5272}
5273
5274/*----------------------------------------------------------------------------
5275| Returns 1 if the extended double-precision floating-point value `a' is less
5276| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5277| an exception. Otherwise, the comparison is performed according to the
5278| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5279*----------------------------------------------------------------------------*/
5280
e5a41ffa 5281int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5282{
5283 flag aSign, bSign;
5284
d1eb8f2a
AD
5285 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5286 float_raise(float_flag_invalid, status);
5287 return 0;
5288 }
158142c2 5289 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5290 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5291 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5292 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5293 ) {
af39bc8c
AM
5294 if (floatx80_is_signaling_nan(a, status)
5295 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5296 float_raise(float_flag_invalid, status);
158142c2
FB
5297 }
5298 return 0;
5299 }
5300 aSign = extractFloatx80Sign( a );
5301 bSign = extractFloatx80Sign( b );
5302 if ( aSign != bSign ) {
5303 return
5304 aSign
bb98fe42 5305 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5306 != 0 );
5307 }
5308 return
5309 aSign ? lt128( b.high, b.low, a.high, a.low )
5310 : lt128( a.high, a.low, b.high, b.low );
5311
5312}
5313
67b7861d
AJ
5314/*----------------------------------------------------------------------------
5315| Returns 1 if the extended double-precision floating-point values `a' and `b'
5316| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5317| The comparison is performed according to the IEC/IEEE Standard for Binary
5318| Floating-Point Arithmetic.
5319*----------------------------------------------------------------------------*/
e5a41ffa 5320int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5321{
d1eb8f2a
AD
5322 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5323 float_raise(float_flag_invalid, status);
5324 return 1;
5325 }
67b7861d
AJ
5326 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5327 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5328 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5329 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5330 ) {
af39bc8c
AM
5331 if (floatx80_is_signaling_nan(a, status)
5332 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5333 float_raise(float_flag_invalid, status);
67b7861d
AJ
5334 }
5335 return 1;
5336 }
5337 return 0;
5338}
5339
158142c2
FB
5340/*----------------------------------------------------------------------------
5341| Returns the result of converting the quadruple-precision floating-point
5342| value `a' to the 32-bit two's complement integer format. The conversion
5343| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5344| Arithmetic---which means in particular that the conversion is rounded
5345| according to the current rounding mode. If `a' is a NaN, the largest
5346| positive integer is returned. Otherwise, if the conversion overflows, the
5347| largest integer with the same sign as `a' is returned.
5348*----------------------------------------------------------------------------*/
5349
f4014512 5350int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5351{
5352 flag aSign;
f4014512 5353 int32_t aExp, shiftCount;
bb98fe42 5354 uint64_t aSig0, aSig1;
158142c2
FB
5355
5356 aSig1 = extractFloat128Frac1( a );
5357 aSig0 = extractFloat128Frac0( a );
5358 aExp = extractFloat128Exp( a );
5359 aSign = extractFloat128Sign( a );
5360 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5361 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5362 aSig0 |= ( aSig1 != 0 );
5363 shiftCount = 0x4028 - aExp;
5364 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5365 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5366
5367}
5368
5369/*----------------------------------------------------------------------------
5370| Returns the result of converting the quadruple-precision floating-point
5371| value `a' to the 32-bit two's complement integer format. The conversion
5372| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5373| Arithmetic, except that the conversion is always rounded toward zero. If
5374| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5375| conversion overflows, the largest integer with the same sign as `a' is
5376| returned.
5377*----------------------------------------------------------------------------*/
5378
f4014512 5379int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5380{
5381 flag aSign;
f4014512 5382 int32_t aExp, shiftCount;
bb98fe42 5383 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5384 int32_t z;
158142c2
FB
5385
5386 aSig1 = extractFloat128Frac1( a );
5387 aSig0 = extractFloat128Frac0( a );
5388 aExp = extractFloat128Exp( a );
5389 aSign = extractFloat128Sign( a );
5390 aSig0 |= ( aSig1 != 0 );
5391 if ( 0x401E < aExp ) {
5392 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5393 goto invalid;
5394 }
5395 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5396 if (aExp || aSig0) {
5397 status->float_exception_flags |= float_flag_inexact;
5398 }
158142c2
FB
5399 return 0;
5400 }
5401 aSig0 |= LIT64( 0x0001000000000000 );
5402 shiftCount = 0x402F - aExp;
5403 savedASig = aSig0;
5404 aSig0 >>= shiftCount;
5405 z = aSig0;
5406 if ( aSign ) z = - z;
5407 if ( ( z < 0 ) ^ aSign ) {
5408 invalid:
ff32e16e 5409 float_raise(float_flag_invalid, status);
bb98fe42 5410 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5411 }
5412 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5413 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5414 }
5415 return z;
5416
5417}
5418
5419/*----------------------------------------------------------------------------
5420| Returns the result of converting the quadruple-precision floating-point
5421| value `a' to the 64-bit two's complement integer format. The conversion
5422| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5423| Arithmetic---which means in particular that the conversion is rounded
5424| according to the current rounding mode. If `a' is a NaN, the largest
5425| positive integer is returned. Otherwise, if the conversion overflows, the
5426| largest integer with the same sign as `a' is returned.
5427*----------------------------------------------------------------------------*/
5428
f42c2224 5429int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5430{
5431 flag aSign;
f4014512 5432 int32_t aExp, shiftCount;
bb98fe42 5433 uint64_t aSig0, aSig1;
158142c2
FB
5434
5435 aSig1 = extractFloat128Frac1( a );
5436 aSig0 = extractFloat128Frac0( a );
5437 aExp = extractFloat128Exp( a );
5438 aSign = extractFloat128Sign( a );
5439 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5440 shiftCount = 0x402F - aExp;
5441 if ( shiftCount <= 0 ) {
5442 if ( 0x403E < aExp ) {
ff32e16e 5443 float_raise(float_flag_invalid, status);
158142c2
FB
5444 if ( ! aSign
5445 || ( ( aExp == 0x7FFF )
5446 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5447 )
5448 ) {
5449 return LIT64( 0x7FFFFFFFFFFFFFFF );
5450 }
bb98fe42 5451 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5452 }
5453 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5454 }
5455 else {
5456 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5457 }
ff32e16e 5458 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5459
5460}
5461
5462/*----------------------------------------------------------------------------
5463| Returns the result of converting the quadruple-precision floating-point
5464| value `a' to the 64-bit two's complement integer format. The conversion
5465| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5466| Arithmetic, except that the conversion is always rounded toward zero.
5467| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5468| the conversion overflows, the largest integer with the same sign as `a' is
5469| returned.
5470*----------------------------------------------------------------------------*/
5471
f42c2224 5472int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5473{
5474 flag aSign;
f4014512 5475 int32_t aExp, shiftCount;
bb98fe42 5476 uint64_t aSig0, aSig1;
f42c2224 5477 int64_t z;
158142c2
FB
5478
5479 aSig1 = extractFloat128Frac1( a );
5480 aSig0 = extractFloat128Frac0( a );
5481 aExp = extractFloat128Exp( a );
5482 aSign = extractFloat128Sign( a );
5483 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5484 shiftCount = aExp - 0x402F;
5485 if ( 0 < shiftCount ) {
5486 if ( 0x403E <= aExp ) {
5487 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5488 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5489 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5490 if (aSig1) {
5491 status->float_exception_flags |= float_flag_inexact;
5492 }
158142c2
FB
5493 }
5494 else {
ff32e16e 5495 float_raise(float_flag_invalid, status);
158142c2
FB
5496 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5497 return LIT64( 0x7FFFFFFFFFFFFFFF );
5498 }
5499 }
bb98fe42 5500 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5501 }
5502 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5503 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5504 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5505 }
5506 }
5507 else {
5508 if ( aExp < 0x3FFF ) {
5509 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5510 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5511 }
5512 return 0;
5513 }
5514 z = aSig0>>( - shiftCount );
5515 if ( aSig1
bb98fe42 5516 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5517 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5518 }
5519 }
5520 if ( aSign ) z = - z;
5521 return z;
5522
5523}
5524
2e6d8568
BR
5525/*----------------------------------------------------------------------------
5526| Returns the result of converting the quadruple-precision floating-point value
5527| `a' to the 64-bit unsigned integer format. The conversion is
5528| performed according to the IEC/IEEE Standard for Binary Floating-Point
5529| Arithmetic---which means in particular that the conversion is rounded
5530| according to the current rounding mode. If `a' is a NaN, the largest
5531| positive integer is returned. If the conversion overflows, the
5532| largest unsigned integer is returned. If 'a' is negative, the value is
5533| rounded and zero is returned; negative values that do not round to zero
5534| will raise the inexact exception.
5535*----------------------------------------------------------------------------*/
5536
5537uint64_t float128_to_uint64(float128 a, float_status *status)
5538{
5539 flag aSign;
5540 int aExp;
5541 int shiftCount;
5542 uint64_t aSig0, aSig1;
5543
5544 aSig0 = extractFloat128Frac0(a);
5545 aSig1 = extractFloat128Frac1(a);
5546 aExp = extractFloat128Exp(a);
5547 aSign = extractFloat128Sign(a);
5548 if (aSign && (aExp > 0x3FFE)) {
5549 float_raise(float_flag_invalid, status);
5550 if (float128_is_any_nan(a)) {
5551 return LIT64(0xFFFFFFFFFFFFFFFF);
5552 } else {
5553 return 0;
5554 }
5555 }
5556 if (aExp) {
5557 aSig0 |= LIT64(0x0001000000000000);
5558 }
5559 shiftCount = 0x402F - aExp;
5560 if (shiftCount <= 0) {
5561 if (0x403E < aExp) {
5562 float_raise(float_flag_invalid, status);
5563 return LIT64(0xFFFFFFFFFFFFFFFF);
5564 }
5565 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5566 } else {
5567 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5568 }
5569 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5570}
5571
5572uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5573{
5574 uint64_t v;
5575 signed char current_rounding_mode = status->float_rounding_mode;
5576
5577 set_float_rounding_mode(float_round_to_zero, status);
5578 v = float128_to_uint64(a, status);
5579 set_float_rounding_mode(current_rounding_mode, status);
5580
5581 return v;
5582}
5583
158142c2
FB
5584/*----------------------------------------------------------------------------
5585| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5586| value `a' to the 32-bit unsigned integer format. The conversion
5587| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5588| Arithmetic except that the conversion is always rounded toward zero.
5589| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5590| if the conversion overflows, the largest unsigned integer is returned.
5591| If 'a' is negative, the value is rounded and zero is returned; negative
5592| values that do not round to zero will raise the inexact exception.
5593*----------------------------------------------------------------------------*/
5594
5595uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5596{
5597 uint64_t v;
5598 uint32_t res;
5599 int old_exc_flags = get_float_exception_flags(status);
5600
5601 v = float128_to_uint64_round_to_zero(a, status);
5602 if (v > 0xffffffff) {
5603 res = 0xffffffff;
5604 } else {
5605 return v;
5606 }
5607 set_float_exception_flags(old_exc_flags, status);
5608 float_raise(float_flag_invalid, status);
5609 return res;
5610}
5611
5612/*----------------------------------------------------------------------------
5613| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
5614| value `a' to the single-precision floating-point format. The conversion
5615| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5616| Arithmetic.
5617*----------------------------------------------------------------------------*/
5618
e5a41ffa 5619float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
5620{
5621 flag aSign;
f4014512 5622 int32_t aExp;
bb98fe42
AF
5623 uint64_t aSig0, aSig1;
5624 uint32_t zSig;
158142c2
FB
5625
5626 aSig1 = extractFloat128Frac1( a );
5627 aSig0 = extractFloat128Frac0( a );
5628 aExp = extractFloat128Exp( a );
5629 aSign = extractFloat128Sign( a );
5630 if ( aExp == 0x7FFF ) {
5631 if ( aSig0 | aSig1 ) {
ff32e16e 5632 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
5633 }
5634 return packFloat32( aSign, 0xFF, 0 );
5635 }
5636 aSig0 |= ( aSig1 != 0 );
5637 shift64RightJamming( aSig0, 18, &aSig0 );
5638 zSig = aSig0;
5639 if ( aExp || zSig ) {
5640 zSig |= 0x40000000;
5641 aExp -= 0x3F81;
5642 }
ff32e16e 5643 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
5644
5645}
5646
5647/*----------------------------------------------------------------------------
5648| Returns the result of converting the quadruple-precision floating-point
5649| value `a' to the double-precision floating-point format. The conversion
5650| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5651| Arithmetic.
5652*----------------------------------------------------------------------------*/
5653
e5a41ffa 5654float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
5655{
5656 flag aSign;
f4014512 5657 int32_t aExp;
bb98fe42 5658 uint64_t aSig0, aSig1;
158142c2
FB
5659
5660 aSig1 = extractFloat128Frac1( a );
5661 aSig0 = extractFloat128Frac0( a );
5662 aExp = extractFloat128Exp( a );
5663 aSign = extractFloat128Sign( a );
5664 if ( aExp == 0x7FFF ) {
5665 if ( aSig0 | aSig1 ) {
ff32e16e 5666 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
5667 }
5668 return packFloat64( aSign, 0x7FF, 0 );
5669 }
5670 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5671 aSig0 |= ( aSig1 != 0 );
5672 if ( aExp || aSig0 ) {
5673 aSig0 |= LIT64( 0x4000000000000000 );
5674 aExp -= 0x3C01;
5675 }
ff32e16e 5676 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
5677
5678}
5679
158142c2
FB
5680/*----------------------------------------------------------------------------
5681| Returns the result of converting the quadruple-precision floating-point
5682| value `a' to the extended double-precision floating-point format. The
5683| conversion is performed according to the IEC/IEEE Standard for Binary
5684| Floating-Point Arithmetic.
5685*----------------------------------------------------------------------------*/
5686
e5a41ffa 5687floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
5688{
5689 flag aSign;
f4014512 5690 int32_t aExp;
bb98fe42 5691 uint64_t aSig0, aSig1;
158142c2
FB
5692
5693 aSig1 = extractFloat128Frac1( a );
5694 aSig0 = extractFloat128Frac0( a );
5695 aExp = extractFloat128Exp( a );
5696 aSign = extractFloat128Sign( a );
5697 if ( aExp == 0x7FFF ) {
5698 if ( aSig0 | aSig1 ) {
ff32e16e 5699 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 5700 }
0f605c88
LV
5701 return packFloatx80(aSign, floatx80_infinity_high,
5702 floatx80_infinity_low);
158142c2
FB
5703 }
5704 if ( aExp == 0 ) {
5705 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5706 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5707 }
5708 else {
5709 aSig0 |= LIT64( 0x0001000000000000 );
5710 }
5711 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 5712 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
5713
5714}
5715
158142c2
FB
5716/*----------------------------------------------------------------------------
5717| Rounds the quadruple-precision floating-point value `a' to an integer, and
5718| returns the result as a quadruple-precision floating-point value. The
5719| operation is performed according to the IEC/IEEE Standard for Binary
5720| Floating-Point Arithmetic.
5721*----------------------------------------------------------------------------*/
5722
e5a41ffa 5723float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
5724{
5725 flag aSign;
f4014512 5726 int32_t aExp;
bb98fe42 5727 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5728 float128 z;
5729
5730 aExp = extractFloat128Exp( a );
5731 if ( 0x402F <= aExp ) {
5732 if ( 0x406F <= aExp ) {
5733 if ( ( aExp == 0x7FFF )
5734 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5735 ) {
ff32e16e 5736 return propagateFloat128NaN(a, a, status);
158142c2
FB
5737 }
5738 return a;
5739 }
5740 lastBitMask = 1;
5741 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5742 roundBitsMask = lastBitMask - 1;
5743 z = a;
a2f2d288 5744 switch (status->float_rounding_mode) {
dc355b76 5745 case float_round_nearest_even:
158142c2
FB
5746 if ( lastBitMask ) {
5747 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5748 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5749 }
5750 else {
bb98fe42 5751 if ( (int64_t) z.low < 0 ) {
158142c2 5752 ++z.high;
bb98fe42 5753 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
5754 }
5755 }
dc355b76 5756 break;
f9288a76
PM
5757 case float_round_ties_away:
5758 if (lastBitMask) {
5759 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
5760 } else {
5761 if ((int64_t) z.low < 0) {
5762 ++z.high;
5763 }
5764 }
5765 break;
dc355b76
PM
5766 case float_round_to_zero:
5767 break;
5768 case float_round_up:
5769 if (!extractFloat128Sign(z)) {
5770 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
5771 }
5772 break;
5773 case float_round_down:
5774 if (extractFloat128Sign(z)) {
5775 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 5776 }
dc355b76
PM
5777 break;
5778 default:
5779 abort();
158142c2
FB
5780 }
5781 z.low &= ~ roundBitsMask;
5782 }
5783 else {
5784 if ( aExp < 0x3FFF ) {
bb98fe42 5785 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 5786 status->float_exception_flags |= float_flag_inexact;
158142c2 5787 aSign = extractFloat128Sign( a );
a2f2d288 5788 switch (status->float_rounding_mode) {
158142c2
FB
5789 case float_round_nearest_even:
5790 if ( ( aExp == 0x3FFE )
5791 && ( extractFloat128Frac0( a )
5792 | extractFloat128Frac1( a ) )
5793 ) {
5794 return packFloat128( aSign, 0x3FFF, 0, 0 );
5795 }
5796 break;
f9288a76
PM
5797 case float_round_ties_away:
5798 if (aExp == 0x3FFE) {
5799 return packFloat128(aSign, 0x3FFF, 0, 0);
5800 }
5801 break;
158142c2
FB
5802 case float_round_down:
5803 return
5804 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5805 : packFloat128( 0, 0, 0, 0 );
5806 case float_round_up:
5807 return
5808 aSign ? packFloat128( 1, 0, 0, 0 )
5809 : packFloat128( 0, 0x3FFF, 0, 0 );
5810 }
5811 return packFloat128( aSign, 0, 0, 0 );
5812 }
5813 lastBitMask = 1;
5814 lastBitMask <<= 0x402F - aExp;
5815 roundBitsMask = lastBitMask - 1;
5816 z.low = 0;
5817 z.high = a.high;
a2f2d288 5818 switch (status->float_rounding_mode) {
dc355b76 5819 case float_round_nearest_even:
158142c2
FB
5820 z.high += lastBitMask>>1;
5821 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5822 z.high &= ~ lastBitMask;
5823 }
dc355b76 5824 break;
f9288a76
PM
5825 case float_round_ties_away:
5826 z.high += lastBitMask>>1;
5827 break;
dc355b76
PM
5828 case float_round_to_zero:
5829 break;
5830 case float_round_up:
5831 if (!extractFloat128Sign(z)) {
158142c2
FB
5832 z.high |= ( a.low != 0 );
5833 z.high += roundBitsMask;
5834 }
dc355b76
PM
5835 break;
5836 case float_round_down:
5837 if (extractFloat128Sign(z)) {
5838 z.high |= (a.low != 0);
5839 z.high += roundBitsMask;
5840 }
5841 break;
5842 default:
5843 abort();
158142c2
FB
5844 }
5845 z.high &= ~ roundBitsMask;
5846 }
5847 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 5848 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5849 }
5850 return z;
5851
5852}
5853
5854/*----------------------------------------------------------------------------
5855| Returns the result of adding the absolute values of the quadruple-precision
5856| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5857| before being returned. `zSign' is ignored if the result is a NaN.
5858| The addition is performed according to the IEC/IEEE Standard for Binary
5859| Floating-Point Arithmetic.
5860*----------------------------------------------------------------------------*/
5861
e5a41ffa
PM
5862static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
5863 float_status *status)
158142c2 5864{
f4014512 5865 int32_t aExp, bExp, zExp;
bb98fe42 5866 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 5867 int32_t expDiff;
158142c2
FB
5868
5869 aSig1 = extractFloat128Frac1( a );
5870 aSig0 = extractFloat128Frac0( a );
5871 aExp = extractFloat128Exp( a );
5872 bSig1 = extractFloat128Frac1( b );
5873 bSig0 = extractFloat128Frac0( b );
5874 bExp = extractFloat128Exp( b );
5875 expDiff = aExp - bExp;
5876 if ( 0 < expDiff ) {
5877 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5878 if (aSig0 | aSig1) {
5879 return propagateFloat128NaN(a, b, status);
5880 }
158142c2
FB
5881 return a;
5882 }
5883 if ( bExp == 0 ) {
5884 --expDiff;
5885 }
5886 else {
5887 bSig0 |= LIT64( 0x0001000000000000 );
5888 }
5889 shift128ExtraRightJamming(
5890 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5891 zExp = aExp;
5892 }
5893 else if ( expDiff < 0 ) {
5894 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5895 if (bSig0 | bSig1) {
5896 return propagateFloat128NaN(a, b, status);
5897 }
158142c2
FB
5898 return packFloat128( zSign, 0x7FFF, 0, 0 );
5899 }
5900 if ( aExp == 0 ) {
5901 ++expDiff;
5902 }
5903 else {
5904 aSig0 |= LIT64( 0x0001000000000000 );
5905 }
5906 shift128ExtraRightJamming(
5907 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5908 zExp = bExp;
5909 }
5910 else {
5911 if ( aExp == 0x7FFF ) {
5912 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 5913 return propagateFloat128NaN(a, b, status);
158142c2
FB
5914 }
5915 return a;
5916 }
5917 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 5918 if ( aExp == 0 ) {
a2f2d288 5919 if (status->flush_to_zero) {
e6afc87f 5920 if (zSig0 | zSig1) {
ff32e16e 5921 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
5922 }
5923 return packFloat128(zSign, 0, 0, 0);
5924 }
fe76d976
PB
5925 return packFloat128( zSign, 0, zSig0, zSig1 );
5926 }
158142c2
FB
5927 zSig2 = 0;
5928 zSig0 |= LIT64( 0x0002000000000000 );
5929 zExp = aExp;
5930 goto shiftRight1;
5931 }
5932 aSig0 |= LIT64( 0x0001000000000000 );
5933 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5934 --zExp;
5935 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5936 ++zExp;
5937 shiftRight1:
5938 shift128ExtraRightJamming(
5939 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5940 roundAndPack:
ff32e16e 5941 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
5942
5943}
5944
5945/*----------------------------------------------------------------------------
5946| Returns the result of subtracting the absolute values of the quadruple-
5947| precision floating-point values `a' and `b'. If `zSign' is 1, the
5948| difference is negated before being returned. `zSign' is ignored if the
5949| result is a NaN. The subtraction is performed according to the IEC/IEEE
5950| Standard for Binary Floating-Point Arithmetic.
5951*----------------------------------------------------------------------------*/
5952
e5a41ffa
PM
5953static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
5954 float_status *status)
158142c2 5955{
f4014512 5956 int32_t aExp, bExp, zExp;
bb98fe42 5957 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 5958 int32_t expDiff;
158142c2
FB
5959
5960 aSig1 = extractFloat128Frac1( a );
5961 aSig0 = extractFloat128Frac0( a );
5962 aExp = extractFloat128Exp( a );
5963 bSig1 = extractFloat128Frac1( b );
5964 bSig0 = extractFloat128Frac0( b );
5965 bExp = extractFloat128Exp( b );
5966 expDiff = aExp - bExp;
5967 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5968 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5969 if ( 0 < expDiff ) goto aExpBigger;
5970 if ( expDiff < 0 ) goto bExpBigger;
5971 if ( aExp == 0x7FFF ) {
5972 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 5973 return propagateFloat128NaN(a, b, status);
158142c2 5974 }
ff32e16e 5975 float_raise(float_flag_invalid, status);
af39bc8c 5976 return float128_default_nan(status);
158142c2
FB
5977 }
5978 if ( aExp == 0 ) {
5979 aExp = 1;
5980 bExp = 1;
5981 }
5982 if ( bSig0 < aSig0 ) goto aBigger;
5983 if ( aSig0 < bSig0 ) goto bBigger;
5984 if ( bSig1 < aSig1 ) goto aBigger;
5985 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
5986 return packFloat128(status->float_rounding_mode == float_round_down,
5987 0, 0, 0);
158142c2
FB
5988 bExpBigger:
5989 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5990 if (bSig0 | bSig1) {
5991 return propagateFloat128NaN(a, b, status);
5992 }
158142c2
FB
5993 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5994 }
5995 if ( aExp == 0 ) {
5996 ++expDiff;
5997 }
5998 else {
5999 aSig0 |= LIT64( 0x4000000000000000 );
6000 }
6001 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6002 bSig0 |= LIT64( 0x4000000000000000 );
6003 bBigger:
6004 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6005 zExp = bExp;
6006 zSign ^= 1;
6007 goto normalizeRoundAndPack;
6008 aExpBigger:
6009 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6010 if (aSig0 | aSig1) {
6011 return propagateFloat128NaN(a, b, status);
6012 }
158142c2
FB
6013 return a;
6014 }
6015 if ( bExp == 0 ) {
6016 --expDiff;
6017 }
6018 else {
6019 bSig0 |= LIT64( 0x4000000000000000 );
6020 }
6021 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6022 aSig0 |= LIT64( 0x4000000000000000 );
6023 aBigger:
6024 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6025 zExp = aExp;
6026 normalizeRoundAndPack:
6027 --zExp;
ff32e16e
PM
6028 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6029 status);
158142c2
FB
6030
6031}
6032
6033/*----------------------------------------------------------------------------
6034| Returns the result of adding the quadruple-precision floating-point values
6035| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6036| for Binary Floating-Point Arithmetic.
6037*----------------------------------------------------------------------------*/
6038
e5a41ffa 6039float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6040{
6041 flag aSign, bSign;
6042
6043 aSign = extractFloat128Sign( a );
6044 bSign = extractFloat128Sign( b );
6045 if ( aSign == bSign ) {
ff32e16e 6046 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6047 }
6048 else {
ff32e16e 6049 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6050 }
6051
6052}
6053
6054/*----------------------------------------------------------------------------
6055| Returns the result of subtracting the quadruple-precision floating-point
6056| values `a' and `b'. The operation is performed according to the IEC/IEEE
6057| Standard for Binary Floating-Point Arithmetic.
6058*----------------------------------------------------------------------------*/
6059
e5a41ffa 6060float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6061{
6062 flag aSign, bSign;
6063
6064 aSign = extractFloat128Sign( a );
6065 bSign = extractFloat128Sign( b );
6066 if ( aSign == bSign ) {
ff32e16e 6067 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6068 }
6069 else {
ff32e16e 6070 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6071 }
6072
6073}
6074
6075/*----------------------------------------------------------------------------
6076| Returns the result of multiplying the quadruple-precision floating-point
6077| values `a' and `b'. The operation is performed according to the IEC/IEEE
6078| Standard for Binary Floating-Point Arithmetic.
6079*----------------------------------------------------------------------------*/
6080
e5a41ffa 6081float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6082{
6083 flag aSign, bSign, zSign;
f4014512 6084 int32_t aExp, bExp, zExp;
bb98fe42 6085 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6086
6087 aSig1 = extractFloat128Frac1( a );
6088 aSig0 = extractFloat128Frac0( a );
6089 aExp = extractFloat128Exp( a );
6090 aSign = extractFloat128Sign( a );
6091 bSig1 = extractFloat128Frac1( b );
6092 bSig0 = extractFloat128Frac0( b );
6093 bExp = extractFloat128Exp( b );
6094 bSign = extractFloat128Sign( b );
6095 zSign = aSign ^ bSign;
6096 if ( aExp == 0x7FFF ) {
6097 if ( ( aSig0 | aSig1 )
6098 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6099 return propagateFloat128NaN(a, b, status);
158142c2
FB
6100 }
6101 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6102 return packFloat128( zSign, 0x7FFF, 0, 0 );
6103 }
6104 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6105 if (bSig0 | bSig1) {
6106 return propagateFloat128NaN(a, b, status);
6107 }
158142c2
FB
6108 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6109 invalid:
ff32e16e 6110 float_raise(float_flag_invalid, status);
af39bc8c 6111 return float128_default_nan(status);
158142c2
FB
6112 }
6113 return packFloat128( zSign, 0x7FFF, 0, 0 );
6114 }
6115 if ( aExp == 0 ) {
6116 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6117 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6118 }
6119 if ( bExp == 0 ) {
6120 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6121 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6122 }
6123 zExp = aExp + bExp - 0x4000;
6124 aSig0 |= LIT64( 0x0001000000000000 );
6125 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6126 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6127 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6128 zSig2 |= ( zSig3 != 0 );
6129 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6130 shift128ExtraRightJamming(
6131 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6132 ++zExp;
6133 }
ff32e16e 6134 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6135
6136}
6137
6138/*----------------------------------------------------------------------------
6139| Returns the result of dividing the quadruple-precision floating-point value
6140| `a' by the corresponding value `b'. The operation is performed according to
6141| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6142*----------------------------------------------------------------------------*/
6143
e5a41ffa 6144float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6145{
6146 flag aSign, bSign, zSign;
f4014512 6147 int32_t aExp, bExp, zExp;
bb98fe42
AF
6148 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6149 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6150
6151 aSig1 = extractFloat128Frac1( a );
6152 aSig0 = extractFloat128Frac0( a );
6153 aExp = extractFloat128Exp( a );
6154 aSign = extractFloat128Sign( a );
6155 bSig1 = extractFloat128Frac1( b );
6156 bSig0 = extractFloat128Frac0( b );
6157 bExp = extractFloat128Exp( b );
6158 bSign = extractFloat128Sign( b );
6159 zSign = aSign ^ bSign;
6160 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6161 if (aSig0 | aSig1) {
6162 return propagateFloat128NaN(a, b, status);
6163 }
158142c2 6164 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6165 if (bSig0 | bSig1) {
6166 return propagateFloat128NaN(a, b, status);
6167 }
158142c2
FB
6168 goto invalid;
6169 }
6170 return packFloat128( zSign, 0x7FFF, 0, 0 );
6171 }
6172 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6173 if (bSig0 | bSig1) {
6174 return propagateFloat128NaN(a, b, status);
6175 }
158142c2
FB
6176 return packFloat128( zSign, 0, 0, 0 );
6177 }
6178 if ( bExp == 0 ) {
6179 if ( ( bSig0 | bSig1 ) == 0 ) {
6180 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6181 invalid:
ff32e16e 6182 float_raise(float_flag_invalid, status);
af39bc8c 6183 return float128_default_nan(status);
158142c2 6184 }
ff32e16e 6185 float_raise(float_flag_divbyzero, status);
158142c2
FB
6186 return packFloat128( zSign, 0x7FFF, 0, 0 );
6187 }
6188 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6189 }
6190 if ( aExp == 0 ) {
6191 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6192 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6193 }
6194 zExp = aExp - bExp + 0x3FFD;
6195 shortShift128Left(
6196 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6197 shortShift128Left(
6198 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6199 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6200 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6201 ++zExp;
6202 }
6203 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6204 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6205 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6206 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6207 --zSig0;
6208 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6209 }
6210 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6211 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6212 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6213 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6214 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6215 --zSig1;
6216 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6217 }
6218 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6219 }
6220 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6221 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6222
6223}
6224
6225/*----------------------------------------------------------------------------
6226| Returns the remainder of the quadruple-precision floating-point value `a'
6227| with respect to the corresponding value `b'. The operation is performed
6228| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6229*----------------------------------------------------------------------------*/
6230
e5a41ffa 6231float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6232{
ed086f3d 6233 flag aSign, zSign;
f4014512 6234 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6235 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6236 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6237 int64_t sigMean0;
158142c2
FB
6238
6239 aSig1 = extractFloat128Frac1( a );
6240 aSig0 = extractFloat128Frac0( a );
6241 aExp = extractFloat128Exp( a );
6242 aSign = extractFloat128Sign( a );
6243 bSig1 = extractFloat128Frac1( b );
6244 bSig0 = extractFloat128Frac0( b );
6245 bExp = extractFloat128Exp( b );
158142c2
FB
6246 if ( aExp == 0x7FFF ) {
6247 if ( ( aSig0 | aSig1 )
6248 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6249 return propagateFloat128NaN(a, b, status);
158142c2
FB
6250 }
6251 goto invalid;
6252 }
6253 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6254 if (bSig0 | bSig1) {
6255 return propagateFloat128NaN(a, b, status);
6256 }
158142c2
FB
6257 return a;
6258 }
6259 if ( bExp == 0 ) {
6260 if ( ( bSig0 | bSig1 ) == 0 ) {
6261 invalid:
ff32e16e 6262 float_raise(float_flag_invalid, status);
af39bc8c 6263 return float128_default_nan(status);
158142c2
FB
6264 }
6265 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6266 }
6267 if ( aExp == 0 ) {
6268 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6269 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6270 }
6271 expDiff = aExp - bExp;
6272 if ( expDiff < -1 ) return a;
6273 shortShift128Left(
6274 aSig0 | LIT64( 0x0001000000000000 ),
6275 aSig1,
6276 15 - ( expDiff < 0 ),
6277 &aSig0,
6278 &aSig1
6279 );
6280 shortShift128Left(
6281 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6282 q = le128( bSig0, bSig1, aSig0, aSig1 );
6283 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6284 expDiff -= 64;
6285 while ( 0 < expDiff ) {
6286 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6287 q = ( 4 < q ) ? q - 4 : 0;
6288 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6289 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6290 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6291 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6292 expDiff -= 61;
6293 }
6294 if ( -64 < expDiff ) {
6295 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6296 q = ( 4 < q ) ? q - 4 : 0;
6297 q >>= - expDiff;
6298 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6299 expDiff += 52;
6300 if ( expDiff < 0 ) {
6301 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6302 }
6303 else {
6304 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6305 }
6306 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6307 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6308 }
6309 else {
6310 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6311 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6312 }
6313 do {
6314 alternateASig0 = aSig0;
6315 alternateASig1 = aSig1;
6316 ++q;
6317 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6318 } while ( 0 <= (int64_t) aSig0 );
158142c2 6319 add128(
bb98fe42 6320 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6321 if ( ( sigMean0 < 0 )
6322 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6323 aSig0 = alternateASig0;
6324 aSig1 = alternateASig1;
6325 }
bb98fe42 6326 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6327 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6328 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6329 status);
158142c2
FB
6330}
6331
6332/*----------------------------------------------------------------------------
6333| Returns the square root of the quadruple-precision floating-point value `a'.
6334| The operation is performed according to the IEC/IEEE Standard for Binary
6335| Floating-Point Arithmetic.
6336*----------------------------------------------------------------------------*/
6337
e5a41ffa 6338float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6339{
6340 flag aSign;
f4014512 6341 int32_t aExp, zExp;
bb98fe42
AF
6342 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6343 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6344
6345 aSig1 = extractFloat128Frac1( a );
6346 aSig0 = extractFloat128Frac0( a );
6347 aExp = extractFloat128Exp( a );
6348 aSign = extractFloat128Sign( a );
6349 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6350 if (aSig0 | aSig1) {
6351 return propagateFloat128NaN(a, a, status);
6352 }
158142c2
FB
6353 if ( ! aSign ) return a;
6354 goto invalid;
6355 }
6356 if ( aSign ) {
6357 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6358 invalid:
ff32e16e 6359 float_raise(float_flag_invalid, status);
af39bc8c 6360 return float128_default_nan(status);
158142c2
FB
6361 }
6362 if ( aExp == 0 ) {
6363 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6364 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6365 }
6366 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6367 aSig0 |= LIT64( 0x0001000000000000 );
6368 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6369 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6370 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6371 doubleZSig0 = zSig0<<1;
6372 mul64To128( zSig0, zSig0, &term0, &term1 );
6373 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6374 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6375 --zSig0;
6376 doubleZSig0 -= 2;
6377 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6378 }
6379 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6380 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6381 if ( zSig1 == 0 ) zSig1 = 1;
6382 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6383 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6384 mul64To128( zSig1, zSig1, &term2, &term3 );
6385 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6386 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6387 --zSig1;
6388 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6389 term3 |= 1;
6390 term2 |= doubleZSig0;
6391 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6392 }
6393 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6394 }
6395 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6396 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6397
6398}
6399
6400/*----------------------------------------------------------------------------
6401| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6402| the corresponding value `b', and 0 otherwise. The invalid exception is
6403| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6404| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6405*----------------------------------------------------------------------------*/
6406
e5a41ffa 6407int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6408{
6409
6410 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6411 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6412 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6413 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6414 ) {
ff32e16e 6415 float_raise(float_flag_invalid, status);
158142c2
FB
6416 return 0;
6417 }
6418 return
6419 ( a.low == b.low )
6420 && ( ( a.high == b.high )
6421 || ( ( a.low == 0 )
bb98fe42 6422 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6423 );
6424
6425}
6426
6427/*----------------------------------------------------------------------------
6428| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6429| or equal to the corresponding value `b', and 0 otherwise. The invalid
6430| exception is raised if either operand is a NaN. The comparison is performed
6431| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6432*----------------------------------------------------------------------------*/
6433
e5a41ffa 6434int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6435{
6436 flag aSign, bSign;
6437
6438 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6439 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6440 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6441 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6442 ) {
ff32e16e 6443 float_raise(float_flag_invalid, status);
158142c2
FB
6444 return 0;
6445 }
6446 aSign = extractFloat128Sign( a );
6447 bSign = extractFloat128Sign( b );
6448 if ( aSign != bSign ) {
6449 return
6450 aSign
bb98fe42 6451 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6452 == 0 );
6453 }
6454 return
6455 aSign ? le128( b.high, b.low, a.high, a.low )
6456 : le128( a.high, a.low, b.high, b.low );
6457
6458}
6459
6460/*----------------------------------------------------------------------------
6461| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6462| the corresponding value `b', and 0 otherwise. The invalid exception is
6463| raised if either operand is a NaN. The comparison is performed according
6464| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6465*----------------------------------------------------------------------------*/
6466
e5a41ffa 6467int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6468{
6469 flag aSign, bSign;
6470
6471 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6472 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6473 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6474 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6475 ) {
ff32e16e 6476 float_raise(float_flag_invalid, status);
158142c2
FB
6477 return 0;
6478 }
6479 aSign = extractFloat128Sign( a );
6480 bSign = extractFloat128Sign( b );
6481 if ( aSign != bSign ) {
6482 return
6483 aSign
bb98fe42 6484 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6485 != 0 );
6486 }
6487 return
6488 aSign ? lt128( b.high, b.low, a.high, a.low )
6489 : lt128( a.high, a.low, b.high, b.low );
6490
6491}
6492
67b7861d
AJ
6493/*----------------------------------------------------------------------------
6494| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6495| be compared, and 0 otherwise. The invalid exception is raised if either
6496| operand is a NaN. The comparison is performed according to the IEC/IEEE
6497| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6498*----------------------------------------------------------------------------*/
6499
e5a41ffa 6500int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6501{
6502 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6503 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6504 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6505 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6506 ) {
ff32e16e 6507 float_raise(float_flag_invalid, status);
67b7861d
AJ
6508 return 1;
6509 }
6510 return 0;
6511}
6512
158142c2
FB
6513/*----------------------------------------------------------------------------
6514| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6515| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6516| exception. The comparison is performed according to the IEC/IEEE Standard
6517| for Binary Floating-Point Arithmetic.
158142c2
FB
6518*----------------------------------------------------------------------------*/
6519
e5a41ffa 6520int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6521{
6522
6523 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6524 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6525 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6526 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6527 ) {
af39bc8c
AM
6528 if (float128_is_signaling_nan(a, status)
6529 || float128_is_signaling_nan(b, status)) {
ff32e16e 6530 float_raise(float_flag_invalid, status);
b689362d 6531 }
158142c2
FB
6532 return 0;
6533 }
6534 return
6535 ( a.low == b.low )
6536 && ( ( a.high == b.high )
6537 || ( ( a.low == 0 )
bb98fe42 6538 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6539 );
6540
6541}
6542
6543/*----------------------------------------------------------------------------
6544| Returns 1 if the quadruple-precision floating-point value `a' is less than
6545| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6546| cause an exception. Otherwise, the comparison is performed according to the
6547| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6548*----------------------------------------------------------------------------*/
6549
e5a41ffa 6550int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6551{
6552 flag aSign, bSign;
6553
6554 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6555 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6556 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6557 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6558 ) {
af39bc8c
AM
6559 if (float128_is_signaling_nan(a, status)
6560 || float128_is_signaling_nan(b, status)) {
ff32e16e 6561 float_raise(float_flag_invalid, status);
158142c2
FB
6562 }
6563 return 0;
6564 }
6565 aSign = extractFloat128Sign( a );
6566 bSign = extractFloat128Sign( b );
6567 if ( aSign != bSign ) {
6568 return
6569 aSign
bb98fe42 6570 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6571 == 0 );
6572 }
6573 return
6574 aSign ? le128( b.high, b.low, a.high, a.low )
6575 : le128( a.high, a.low, b.high, b.low );
6576
6577}
6578
6579/*----------------------------------------------------------------------------
6580| Returns 1 if the quadruple-precision floating-point value `a' is less than
6581| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6582| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6583| Standard for Binary Floating-Point Arithmetic.
6584*----------------------------------------------------------------------------*/
6585
e5a41ffa 6586int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6587{
6588 flag aSign, bSign;
6589
6590 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6591 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6592 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6593 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6594 ) {
af39bc8c
AM
6595 if (float128_is_signaling_nan(a, status)
6596 || float128_is_signaling_nan(b, status)) {
ff32e16e 6597 float_raise(float_flag_invalid, status);
158142c2
FB
6598 }
6599 return 0;
6600 }
6601 aSign = extractFloat128Sign( a );
6602 bSign = extractFloat128Sign( b );
6603 if ( aSign != bSign ) {
6604 return
6605 aSign
bb98fe42 6606 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6607 != 0 );
6608 }
6609 return
6610 aSign ? lt128( b.high, b.low, a.high, a.low )
6611 : lt128( a.high, a.low, b.high, b.low );
6612
6613}
6614
67b7861d
AJ
6615/*----------------------------------------------------------------------------
6616| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6617| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6618| comparison is performed according to the IEC/IEEE Standard for Binary
6619| Floating-Point Arithmetic.
6620*----------------------------------------------------------------------------*/
6621
e5a41ffa 6622int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
6623{
6624 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6625 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6626 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6627 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6628 ) {
af39bc8c
AM
6629 if (float128_is_signaling_nan(a, status)
6630 || float128_is_signaling_nan(b, status)) {
ff32e16e 6631 float_raise(float_flag_invalid, status);
67b7861d
AJ
6632 }
6633 return 1;
6634 }
6635 return 0;
6636}
6637
e5a41ffa
PM
6638static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6639 int is_quiet, float_status *status)
f6714d36
AJ
6640{
6641 flag aSign, bSign;
6642
d1eb8f2a
AD
6643 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6644 float_raise(float_flag_invalid, status);
6645 return float_relation_unordered;
6646 }
f6714d36
AJ
6647 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6648 ( extractFloatx80Frac( a )<<1 ) ) ||
6649 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6650 ( extractFloatx80Frac( b )<<1 ) )) {
6651 if (!is_quiet ||
af39bc8c
AM
6652 floatx80_is_signaling_nan(a, status) ||
6653 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6654 float_raise(float_flag_invalid, status);
f6714d36
AJ
6655 }
6656 return float_relation_unordered;
6657 }
6658 aSign = extractFloatx80Sign( a );
6659 bSign = extractFloatx80Sign( b );
6660 if ( aSign != bSign ) {
6661
6662 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6663 ( ( a.low | b.low ) == 0 ) ) {
6664 /* zero case */
6665 return float_relation_equal;
6666 } else {
6667 return 1 - (2 * aSign);
6668 }
6669 } else {
6670 if (a.low == b.low && a.high == b.high) {
6671 return float_relation_equal;
6672 } else {
6673 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6674 }
6675 }
6676}
6677
e5a41ffa 6678int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6679{
ff32e16e 6680 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6681}
6682
e5a41ffa 6683int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 6684{
ff32e16e 6685 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6686}
6687
e5a41ffa
PM
6688static inline int float128_compare_internal(float128 a, float128 b,
6689 int is_quiet, float_status *status)
1f587329
BS
6690{
6691 flag aSign, bSign;
6692
6693 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6694 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6695 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6696 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6697 if (!is_quiet ||
af39bc8c
AM
6698 float128_is_signaling_nan(a, status) ||
6699 float128_is_signaling_nan(b, status)) {
ff32e16e 6700 float_raise(float_flag_invalid, status);
1f587329
BS
6701 }
6702 return float_relation_unordered;
6703 }
6704 aSign = extractFloat128Sign( a );
6705 bSign = extractFloat128Sign( b );
6706 if ( aSign != bSign ) {
6707 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6708 /* zero case */
6709 return float_relation_equal;
6710 } else {
6711 return 1 - (2 * aSign);
6712 }
6713 } else {
6714 if (a.low == b.low && a.high == b.high) {
6715 return float_relation_equal;
6716 } else {
6717 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6718 }
6719 }
6720}
6721
e5a41ffa 6722int float128_compare(float128 a, float128 b, float_status *status)
1f587329 6723{
ff32e16e 6724 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6725}
6726
e5a41ffa 6727int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 6728{
ff32e16e 6729 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6730}
6731
e5a41ffa 6732floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
6733{
6734 flag aSign;
326b9e98 6735 int32_t aExp;
bb98fe42 6736 uint64_t aSig;
9ee6e8bb 6737
d1eb8f2a
AD
6738 if (floatx80_invalid_encoding(a)) {
6739 float_raise(float_flag_invalid, status);
6740 return floatx80_default_nan(status);
6741 }
9ee6e8bb
PB
6742 aSig = extractFloatx80Frac( a );
6743 aExp = extractFloatx80Exp( a );
6744 aSign = extractFloatx80Sign( a );
6745
326b9e98
AJ
6746 if ( aExp == 0x7FFF ) {
6747 if ( aSig<<1 ) {
ff32e16e 6748 return propagateFloatx80NaN(a, a, status);
326b9e98 6749 }
9ee6e8bb
PB
6750 return a;
6751 }
326b9e98 6752
3c85c37f
PM
6753 if (aExp == 0) {
6754 if (aSig == 0) {
6755 return a;
6756 }
6757 aExp++;
6758 }
69397542 6759
326b9e98
AJ
6760 if (n > 0x10000) {
6761 n = 0x10000;
6762 } else if (n < -0x10000) {
6763 n = -0x10000;
6764 }
6765
9ee6e8bb 6766 aExp += n;
a2f2d288
PM
6767 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6768 aSign, aExp, aSig, 0, status);
9ee6e8bb 6769}
9ee6e8bb 6770
e5a41ffa 6771float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
6772{
6773 flag aSign;
326b9e98 6774 int32_t aExp;
bb98fe42 6775 uint64_t aSig0, aSig1;
9ee6e8bb
PB
6776
6777 aSig1 = extractFloat128Frac1( a );
6778 aSig0 = extractFloat128Frac0( a );
6779 aExp = extractFloat128Exp( a );
6780 aSign = extractFloat128Sign( a );
6781 if ( aExp == 0x7FFF ) {
326b9e98 6782 if ( aSig0 | aSig1 ) {
ff32e16e 6783 return propagateFloat128NaN(a, a, status);
326b9e98 6784 }
9ee6e8bb
PB
6785 return a;
6786 }
3c85c37f 6787 if (aExp != 0) {
69397542 6788 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 6789 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 6790 return a;
3c85c37f
PM
6791 } else {
6792 aExp++;
6793 }
69397542 6794
326b9e98
AJ
6795 if (n > 0x10000) {
6796 n = 0x10000;
6797 } else if (n < -0x10000) {
6798 n = -0x10000;
6799 }
6800
69397542
PB
6801 aExp += n - 1;
6802 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 6803 , status);
9ee6e8bb
PB
6804
6805}