]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
fpu/softfloat: Specialize on snan_bit_is_one
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
184/*
185 * Structure holding all of the decomposed parts of a float. The
186 * exponent is unbiased and the fraction is normalized. All
187 * calculations are done with a 64 bit fraction and then rounded as
188 * appropriate for the final format.
189 *
190 * Thanks to the packed FloatClass a decent compiler should be able to
191 * fit the whole structure into registers and avoid using the stack
192 * for parameter passing.
193 */
194
195typedef struct {
196 uint64_t frac;
197 int32_t exp;
198 FloatClass cls;
199 bool sign;
200} FloatParts;
201
202#define DECOMPOSED_BINARY_POINT (64 - 2)
203#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
204#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
205
206/* Structure holding all of the relevant parameters for a format.
207 * exp_size: the size of the exponent field
208 * exp_bias: the offset applied to the exponent field
209 * exp_max: the maximum normalised exponent
210 * frac_size: the size of the fraction field
211 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
212 * The following are computed based the size of fraction
213 * frac_lsb: least significant bit of fraction
ca3a3d5a 214 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 215 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
216 * The following optional modifiers are available:
217 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
218 */
219typedef struct {
220 int exp_size;
221 int exp_bias;
222 int exp_max;
223 int frac_size;
224 int frac_shift;
225 uint64_t frac_lsb;
226 uint64_t frac_lsbm1;
227 uint64_t round_mask;
228 uint64_t roundeven_mask;
ca3a3d5a 229 bool arm_althp;
a90119b5
AB
230} FloatFmt;
231
232/* Expand fields based on the size of exponent and fraction */
233#define FLOAT_PARAMS(E, F) \
234 .exp_size = E, \
235 .exp_bias = ((1 << E) - 1) >> 1, \
236 .exp_max = (1 << E) - 1, \
237 .frac_size = F, \
238 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
239 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
240 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
241 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
242 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
243
244static const FloatFmt float16_params = {
245 FLOAT_PARAMS(5, 10)
246};
247
6fed16b2
AB
248static const FloatFmt float16_params_ahp = {
249 FLOAT_PARAMS(5, 10),
250 .arm_althp = true
251};
252
a90119b5
AB
253static const FloatFmt float32_params = {
254 FLOAT_PARAMS(8, 23)
255};
256
257static const FloatFmt float64_params = {
258 FLOAT_PARAMS(11, 52)
259};
260
6fff2167
AB
261/* Unpack a float to parts, but do not canonicalize. */
262static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
263{
264 const int sign_pos = fmt.frac_size + fmt.exp_size;
265
266 return (FloatParts) {
267 .cls = float_class_unclassified,
268 .sign = extract64(raw, sign_pos, 1),
269 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
270 .frac = extract64(raw, 0, fmt.frac_size),
271 };
272}
273
274static inline FloatParts float16_unpack_raw(float16 f)
275{
276 return unpack_raw(float16_params, f);
277}
278
279static inline FloatParts float32_unpack_raw(float32 f)
280{
281 return unpack_raw(float32_params, f);
282}
283
284static inline FloatParts float64_unpack_raw(float64 f)
285{
286 return unpack_raw(float64_params, f);
287}
288
289/* Pack a float from parts, but do not canonicalize. */
290static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
291{
292 const int sign_pos = fmt.frac_size + fmt.exp_size;
293 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
294 return deposit64(ret, sign_pos, 1, p.sign);
295}
296
297static inline float16 float16_pack_raw(FloatParts p)
298{
299 return make_float16(pack_raw(float16_params, p));
300}
301
302static inline float32 float32_pack_raw(FloatParts p)
303{
304 return make_float32(pack_raw(float32_params, p));
305}
306
307static inline float64 float64_pack_raw(FloatParts p)
308{
309 return make_float64(pack_raw(float64_params, p));
310}
311
0664335a
RH
312/*----------------------------------------------------------------------------
313| Functions and definitions to determine: (1) whether tininess for underflow
314| is detected before or after rounding by default, (2) what (if anything)
315| happens when exceptions are raised, (3) how signaling NaNs are distinguished
316| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
317| are propagated from function inputs to output. These details are target-
318| specific.
319*----------------------------------------------------------------------------*/
320#include "softfloat-specialize.h"
321
6fff2167
AB
322/* Canonicalize EXP and FRAC, setting CLS. */
323static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
324 float_status *status)
325{
ca3a3d5a 326 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
327 if (part.frac == 0) {
328 part.cls = float_class_inf;
329 } else {
94933df0 330 part.frac <<= parm->frac_shift;
298b468e
RH
331 part.cls = (parts_is_snan_frac(part.frac, status)
332 ? float_class_snan : float_class_qnan);
6fff2167
AB
333 }
334 } else if (part.exp == 0) {
335 if (likely(part.frac == 0)) {
336 part.cls = float_class_zero;
337 } else if (status->flush_inputs_to_zero) {
338 float_raise(float_flag_input_denormal, status);
339 part.cls = float_class_zero;
340 part.frac = 0;
341 } else {
342 int shift = clz64(part.frac) - 1;
343 part.cls = float_class_normal;
344 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
345 part.frac <<= shift;
346 }
347 } else {
348 part.cls = float_class_normal;
349 part.exp -= parm->exp_bias;
350 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
351 }
352 return part;
353}
354
355/* Round and uncanonicalize a floating-point number by parts. There
356 * are FRAC_SHIFT bits that may require rounding at the bottom of the
357 * fraction; these bits will be removed. The exponent will be biased
358 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
359 */
360
361static FloatParts round_canonical(FloatParts p, float_status *s,
362 const FloatFmt *parm)
363{
364 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
365 const uint64_t round_mask = parm->round_mask;
366 const uint64_t roundeven_mask = parm->roundeven_mask;
367 const int exp_max = parm->exp_max;
368 const int frac_shift = parm->frac_shift;
369 uint64_t frac, inc;
370 int exp, flags = 0;
371 bool overflow_norm;
372
373 frac = p.frac;
374 exp = p.exp;
375
376 switch (p.cls) {
377 case float_class_normal:
378 switch (s->float_rounding_mode) {
379 case float_round_nearest_even:
380 overflow_norm = false;
381 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
382 break;
383 case float_round_ties_away:
384 overflow_norm = false;
385 inc = frac_lsbm1;
386 break;
387 case float_round_to_zero:
388 overflow_norm = true;
389 inc = 0;
390 break;
391 case float_round_up:
392 inc = p.sign ? 0 : round_mask;
393 overflow_norm = p.sign;
394 break;
395 case float_round_down:
396 inc = p.sign ? round_mask : 0;
397 overflow_norm = !p.sign;
398 break;
399 default:
400 g_assert_not_reached();
401 }
402
403 exp += parm->exp_bias;
404 if (likely(exp > 0)) {
405 if (frac & round_mask) {
406 flags |= float_flag_inexact;
407 frac += inc;
408 if (frac & DECOMPOSED_OVERFLOW_BIT) {
409 frac >>= 1;
410 exp++;
411 }
412 }
413 frac >>= frac_shift;
414
ca3a3d5a
AB
415 if (parm->arm_althp) {
416 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
417 if (unlikely(exp > exp_max)) {
418 /* Overflow. Return the maximum normal. */
419 flags = float_flag_invalid;
420 exp = exp_max;
421 frac = -1;
422 }
423 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
424 flags |= float_flag_overflow | float_flag_inexact;
425 if (overflow_norm) {
426 exp = exp_max - 1;
427 frac = -1;
428 } else {
429 p.cls = float_class_inf;
430 goto do_inf;
431 }
432 }
433 } else if (s->flush_to_zero) {
434 flags |= float_flag_output_denormal;
435 p.cls = float_class_zero;
436 goto do_zero;
437 } else {
438 bool is_tiny = (s->float_detect_tininess
439 == float_tininess_before_rounding)
440 || (exp < 0)
441 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
442
443 shift64RightJamming(frac, 1 - exp, &frac);
444 if (frac & round_mask) {
445 /* Need to recompute round-to-even. */
446 if (s->float_rounding_mode == float_round_nearest_even) {
447 inc = ((frac & roundeven_mask) != frac_lsbm1
448 ? frac_lsbm1 : 0);
449 }
450 flags |= float_flag_inexact;
451 frac += inc;
452 }
453
454 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
455 frac >>= frac_shift;
456
457 if (is_tiny && (flags & float_flag_inexact)) {
458 flags |= float_flag_underflow;
459 }
460 if (exp == 0 && frac == 0) {
461 p.cls = float_class_zero;
462 }
463 }
464 break;
465
466 case float_class_zero:
467 do_zero:
468 exp = 0;
469 frac = 0;
470 break;
471
472 case float_class_inf:
473 do_inf:
ca3a3d5a 474 assert(!parm->arm_althp);
6fff2167
AB
475 exp = exp_max;
476 frac = 0;
477 break;
478
479 case float_class_qnan:
480 case float_class_snan:
ca3a3d5a 481 assert(!parm->arm_althp);
6fff2167 482 exp = exp_max;
94933df0 483 frac >>= parm->frac_shift;
6fff2167
AB
484 break;
485
486 default:
487 g_assert_not_reached();
488 }
489
490 float_raise(flags, s);
491 p.exp = exp;
492 p.frac = frac;
493 return p;
494}
495
6fed16b2
AB
496/* Explicit FloatFmt version */
497static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
498 const FloatFmt *params)
499{
500 return canonicalize(float16_unpack_raw(f), params, s);
501}
502
6fff2167
AB
503static FloatParts float16_unpack_canonical(float16 f, float_status *s)
504{
6fed16b2
AB
505 return float16a_unpack_canonical(f, s, &float16_params);
506}
507
508static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
509 const FloatFmt *params)
510{
511 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
512}
513
514static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
515{
6fed16b2 516 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
517}
518
519static FloatParts float32_unpack_canonical(float32 f, float_status *s)
520{
521 return canonicalize(float32_unpack_raw(f), &float32_params, s);
522}
523
524static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
525{
0bcfbcbe 526 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
527}
528
529static FloatParts float64_unpack_canonical(float64 f, float_status *s)
530{
531 return canonicalize(float64_unpack_raw(f), &float64_params, s);
532}
533
534static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
535{
0bcfbcbe 536 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
537}
538
539/* Simple helpers for checking if what NaN we have */
540static bool is_nan(FloatClass c)
541{
542 return unlikely(c >= float_class_qnan);
543}
544static bool is_snan(FloatClass c)
545{
546 return c == float_class_snan;
547}
548static bool is_qnan(FloatClass c)
549{
550 return c == float_class_qnan;
551}
552
dbe4d53a
AB
553static FloatParts return_nan(FloatParts a, float_status *s)
554{
555 switch (a.cls) {
556 case float_class_snan:
557 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 558 a = parts_silence_nan(a, s);
dbe4d53a
AB
559 /* fall through */
560 case float_class_qnan:
561 if (s->default_nan_mode) {
f7e598e2 562 return parts_default_nan(s);
dbe4d53a
AB
563 }
564 break;
565
566 default:
567 g_assert_not_reached();
568 }
569 return a;
570}
571
6fff2167
AB
572static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
573{
574 if (is_snan(a.cls) || is_snan(b.cls)) {
575 s->float_exception_flags |= float_flag_invalid;
576 }
577
578 if (s->default_nan_mode) {
f7e598e2 579 return parts_default_nan(s);
6fff2167
AB
580 } else {
581 if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
582 is_qnan(b.cls), is_snan(b.cls),
583 a.frac > b.frac ||
584 (a.frac == b.frac && a.sign < b.sign))) {
585 a = b;
586 }
0bcfbcbe
RH
587 if (is_snan(a.cls)) {
588 return parts_silence_nan(a, s);
589 }
6fff2167
AB
590 }
591 return a;
592}
593
d446830a
AB
594static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
595 bool inf_zero, float_status *s)
596{
1839189b
PM
597 int which;
598
d446830a
AB
599 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
600 s->float_exception_flags |= float_flag_invalid;
601 }
602
1839189b
PM
603 which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
604 is_qnan(b.cls), is_snan(b.cls),
605 is_qnan(c.cls), is_snan(c.cls),
606 inf_zero, s);
607
d446830a 608 if (s->default_nan_mode) {
1839189b
PM
609 /* Note that this check is after pickNaNMulAdd so that function
610 * has an opportunity to set the Invalid flag.
611 */
f7e598e2 612 which = 3;
1839189b 613 }
d446830a 614
1839189b
PM
615 switch (which) {
616 case 0:
617 break;
618 case 1:
619 a = b;
620 break;
621 case 2:
622 a = c;
623 break;
624 case 3:
f7e598e2 625 return parts_default_nan(s);
1839189b
PM
626 default:
627 g_assert_not_reached();
d446830a 628 }
1839189b 629
0bcfbcbe
RH
630 if (is_snan(a.cls)) {
631 return parts_silence_nan(a, s);
632 }
d446830a
AB
633 return a;
634}
635
6fff2167
AB
636/*
637 * Returns the result of adding or subtracting the values of the
638 * floating-point values `a' and `b'. The operation is performed
639 * according to the IEC/IEEE Standard for Binary Floating-Point
640 * Arithmetic.
641 */
642
643static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
644 float_status *s)
645{
646 bool a_sign = a.sign;
647 bool b_sign = b.sign ^ subtract;
648
649 if (a_sign != b_sign) {
650 /* Subtraction */
651
652 if (a.cls == float_class_normal && b.cls == float_class_normal) {
653 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
654 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
655 a.frac = a.frac - b.frac;
656 } else {
657 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
658 a.frac = b.frac - a.frac;
659 a.exp = b.exp;
660 a_sign ^= 1;
661 }
662
663 if (a.frac == 0) {
664 a.cls = float_class_zero;
665 a.sign = s->float_rounding_mode == float_round_down;
666 } else {
667 int shift = clz64(a.frac) - 1;
668 a.frac = a.frac << shift;
669 a.exp = a.exp - shift;
670 a.sign = a_sign;
671 }
672 return a;
673 }
674 if (is_nan(a.cls) || is_nan(b.cls)) {
675 return pick_nan(a, b, s);
676 }
677 if (a.cls == float_class_inf) {
678 if (b.cls == float_class_inf) {
679 float_raise(float_flag_invalid, s);
f7e598e2 680 return parts_default_nan(s);
6fff2167
AB
681 }
682 return a;
683 }
684 if (a.cls == float_class_zero && b.cls == float_class_zero) {
685 a.sign = s->float_rounding_mode == float_round_down;
686 return a;
687 }
688 if (a.cls == float_class_zero || b.cls == float_class_inf) {
689 b.sign = a_sign ^ 1;
690 return b;
691 }
692 if (b.cls == float_class_zero) {
693 return a;
694 }
695 } else {
696 /* Addition */
697 if (a.cls == float_class_normal && b.cls == float_class_normal) {
698 if (a.exp > b.exp) {
699 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
700 } else if (a.exp < b.exp) {
701 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
702 a.exp = b.exp;
703 }
704 a.frac += b.frac;
705 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
706 a.frac >>= 1;
707 a.exp += 1;
708 }
709 return a;
710 }
711 if (is_nan(a.cls) || is_nan(b.cls)) {
712 return pick_nan(a, b, s);
713 }
714 if (a.cls == float_class_inf || b.cls == float_class_zero) {
715 return a;
716 }
717 if (b.cls == float_class_inf || a.cls == float_class_zero) {
718 b.sign = b_sign;
719 return b;
720 }
721 }
722 g_assert_not_reached();
723}
724
725/*
726 * Returns the result of adding or subtracting the floating-point
727 * values `a' and `b'. The operation is performed according to the
728 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
729 */
730
731float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
732 float_status *status)
733{
734 FloatParts pa = float16_unpack_canonical(a, status);
735 FloatParts pb = float16_unpack_canonical(b, status);
736 FloatParts pr = addsub_floats(pa, pb, false, status);
737
738 return float16_round_pack_canonical(pr, status);
739}
740
741float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
742 float_status *status)
743{
744 FloatParts pa = float32_unpack_canonical(a, status);
745 FloatParts pb = float32_unpack_canonical(b, status);
746 FloatParts pr = addsub_floats(pa, pb, false, status);
747
748 return float32_round_pack_canonical(pr, status);
749}
750
751float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
752 float_status *status)
753{
754 FloatParts pa = float64_unpack_canonical(a, status);
755 FloatParts pb = float64_unpack_canonical(b, status);
756 FloatParts pr = addsub_floats(pa, pb, false, status);
757
758 return float64_round_pack_canonical(pr, status);
759}
760
761float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
762 float_status *status)
763{
764 FloatParts pa = float16_unpack_canonical(a, status);
765 FloatParts pb = float16_unpack_canonical(b, status);
766 FloatParts pr = addsub_floats(pa, pb, true, status);
767
768 return float16_round_pack_canonical(pr, status);
769}
770
771float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
772 float_status *status)
773{
774 FloatParts pa = float32_unpack_canonical(a, status);
775 FloatParts pb = float32_unpack_canonical(b, status);
776 FloatParts pr = addsub_floats(pa, pb, true, status);
777
778 return float32_round_pack_canonical(pr, status);
779}
780
781float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
782 float_status *status)
783{
784 FloatParts pa = float64_unpack_canonical(a, status);
785 FloatParts pb = float64_unpack_canonical(b, status);
786 FloatParts pr = addsub_floats(pa, pb, true, status);
787
788 return float64_round_pack_canonical(pr, status);
789}
790
74d707e2
AB
791/*
792 * Returns the result of multiplying the floating-point values `a' and
793 * `b'. The operation is performed according to the IEC/IEEE Standard
794 * for Binary Floating-Point Arithmetic.
795 */
796
797static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
798{
799 bool sign = a.sign ^ b.sign;
800
801 if (a.cls == float_class_normal && b.cls == float_class_normal) {
802 uint64_t hi, lo;
803 int exp = a.exp + b.exp;
804
805 mul64To128(a.frac, b.frac, &hi, &lo);
806 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
807 if (lo & DECOMPOSED_OVERFLOW_BIT) {
808 shift64RightJamming(lo, 1, &lo);
809 exp += 1;
810 }
811
812 /* Re-use a */
813 a.exp = exp;
814 a.sign = sign;
815 a.frac = lo;
816 return a;
817 }
818 /* handle all the NaN cases */
819 if (is_nan(a.cls) || is_nan(b.cls)) {
820 return pick_nan(a, b, s);
821 }
822 /* Inf * Zero == NaN */
823 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
824 (a.cls == float_class_zero && b.cls == float_class_inf)) {
825 s->float_exception_flags |= float_flag_invalid;
f7e598e2 826 return parts_default_nan(s);
74d707e2
AB
827 }
828 /* Multiply by 0 or Inf */
829 if (a.cls == float_class_inf || a.cls == float_class_zero) {
830 a.sign = sign;
831 return a;
832 }
833 if (b.cls == float_class_inf || b.cls == float_class_zero) {
834 b.sign = sign;
835 return b;
836 }
837 g_assert_not_reached();
838}
839
840float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
841 float_status *status)
842{
843 FloatParts pa = float16_unpack_canonical(a, status);
844 FloatParts pb = float16_unpack_canonical(b, status);
845 FloatParts pr = mul_floats(pa, pb, status);
846
847 return float16_round_pack_canonical(pr, status);
848}
849
850float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
851 float_status *status)
852{
853 FloatParts pa = float32_unpack_canonical(a, status);
854 FloatParts pb = float32_unpack_canonical(b, status);
855 FloatParts pr = mul_floats(pa, pb, status);
856
857 return float32_round_pack_canonical(pr, status);
858}
859
860float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
861 float_status *status)
862{
863 FloatParts pa = float64_unpack_canonical(a, status);
864 FloatParts pb = float64_unpack_canonical(b, status);
865 FloatParts pr = mul_floats(pa, pb, status);
866
867 return float64_round_pack_canonical(pr, status);
868}
869
d446830a
AB
870/*
871 * Returns the result of multiplying the floating-point values `a' and
872 * `b' then adding 'c', with no intermediate rounding step after the
873 * multiplication. The operation is performed according to the
874 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
875 * The flags argument allows the caller to select negation of the
876 * addend, the intermediate product, or the final result. (The
877 * difference between this and having the caller do a separate
878 * negation is that negating externally will flip the sign bit on
879 * NaNs.)
880 */
881
882static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
883 int flags, float_status *s)
884{
885 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
886 ((1 << float_class_inf) | (1 << float_class_zero));
887 bool p_sign;
888 bool sign_flip = flags & float_muladd_negate_result;
889 FloatClass p_class;
890 uint64_t hi, lo;
891 int p_exp;
892
893 /* It is implementation-defined whether the cases of (0,inf,qnan)
894 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
895 * they return if they do), so we have to hand this information
896 * off to the target-specific pick-a-NaN routine.
897 */
898 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
899 return pick_nan_muladd(a, b, c, inf_zero, s);
900 }
901
902 if (inf_zero) {
903 s->float_exception_flags |= float_flag_invalid;
f7e598e2 904 return parts_default_nan(s);
d446830a
AB
905 }
906
907 if (flags & float_muladd_negate_c) {
908 c.sign ^= 1;
909 }
910
911 p_sign = a.sign ^ b.sign;
912
913 if (flags & float_muladd_negate_product) {
914 p_sign ^= 1;
915 }
916
917 if (a.cls == float_class_inf || b.cls == float_class_inf) {
918 p_class = float_class_inf;
919 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
920 p_class = float_class_zero;
921 } else {
922 p_class = float_class_normal;
923 }
924
925 if (c.cls == float_class_inf) {
926 if (p_class == float_class_inf && p_sign != c.sign) {
927 s->float_exception_flags |= float_flag_invalid;
f7e598e2 928 return parts_default_nan(s);
d446830a
AB
929 } else {
930 a.cls = float_class_inf;
931 a.sign = c.sign ^ sign_flip;
f7e598e2 932 return a;
d446830a 933 }
d446830a
AB
934 }
935
936 if (p_class == float_class_inf) {
937 a.cls = float_class_inf;
938 a.sign = p_sign ^ sign_flip;
939 return a;
940 }
941
942 if (p_class == float_class_zero) {
943 if (c.cls == float_class_zero) {
944 if (p_sign != c.sign) {
945 p_sign = s->float_rounding_mode == float_round_down;
946 }
947 c.sign = p_sign;
948 } else if (flags & float_muladd_halve_result) {
949 c.exp -= 1;
950 }
951 c.sign ^= sign_flip;
952 return c;
953 }
954
955 /* a & b should be normals now... */
956 assert(a.cls == float_class_normal &&
957 b.cls == float_class_normal);
958
959 p_exp = a.exp + b.exp;
960
961 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
962 * result.
963 */
964 mul64To128(a.frac, b.frac, &hi, &lo);
965 /* binary point now at bit 124 */
966
967 /* check for overflow */
968 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
969 shift128RightJamming(hi, lo, 1, &hi, &lo);
970 p_exp += 1;
971 }
972
973 /* + add/sub */
974 if (c.cls == float_class_zero) {
975 /* move binary point back to 62 */
976 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
977 } else {
978 int exp_diff = p_exp - c.exp;
979 if (p_sign == c.sign) {
980 /* Addition */
981 if (exp_diff <= 0) {
982 shift128RightJamming(hi, lo,
983 DECOMPOSED_BINARY_POINT - exp_diff,
984 &hi, &lo);
985 lo += c.frac;
986 p_exp = c.exp;
987 } else {
988 uint64_t c_hi, c_lo;
989 /* shift c to the same binary point as the product (124) */
990 c_hi = c.frac >> 2;
991 c_lo = 0;
992 shift128RightJamming(c_hi, c_lo,
993 exp_diff,
994 &c_hi, &c_lo);
995 add128(hi, lo, c_hi, c_lo, &hi, &lo);
996 /* move binary point back to 62 */
997 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
998 }
999
1000 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1001 shift64RightJamming(lo, 1, &lo);
1002 p_exp += 1;
1003 }
1004
1005 } else {
1006 /* Subtraction */
1007 uint64_t c_hi, c_lo;
1008 /* make C binary point match product at bit 124 */
1009 c_hi = c.frac >> 2;
1010 c_lo = 0;
1011
1012 if (exp_diff <= 0) {
1013 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1014 if (exp_diff == 0
1015 &&
1016 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1017 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1018 } else {
1019 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1020 p_sign ^= 1;
1021 p_exp = c.exp;
1022 }
1023 } else {
1024 shift128RightJamming(c_hi, c_lo,
1025 exp_diff,
1026 &c_hi, &c_lo);
1027 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1028 }
1029
1030 if (hi == 0 && lo == 0) {
1031 a.cls = float_class_zero;
1032 a.sign = s->float_rounding_mode == float_round_down;
1033 a.sign ^= sign_flip;
1034 return a;
1035 } else {
1036 int shift;
1037 if (hi != 0) {
1038 shift = clz64(hi);
1039 } else {
1040 shift = clz64(lo) + 64;
1041 }
1042 /* Normalizing to a binary point of 124 is the
1043 correct adjust for the exponent. However since we're
1044 shifting, we might as well put the binary point back
1045 at 62 where we really want it. Therefore shift as
1046 if we're leaving 1 bit at the top of the word, but
1047 adjust the exponent as if we're leaving 3 bits. */
1048 shift -= 1;
1049 if (shift >= 64) {
1050 lo = lo << (shift - 64);
1051 } else {
1052 hi = (hi << shift) | (lo >> (64 - shift));
1053 lo = hi | ((lo << shift) != 0);
1054 }
1055 p_exp -= shift - 2;
1056 }
1057 }
1058 }
1059
1060 if (flags & float_muladd_halve_result) {
1061 p_exp -= 1;
1062 }
1063
1064 /* finally prepare our result */
1065 a.cls = float_class_normal;
1066 a.sign = p_sign ^ sign_flip;
1067 a.exp = p_exp;
1068 a.frac = lo;
1069
1070 return a;
1071}
1072
1073float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1074 int flags, float_status *status)
1075{
1076 FloatParts pa = float16_unpack_canonical(a, status);
1077 FloatParts pb = float16_unpack_canonical(b, status);
1078 FloatParts pc = float16_unpack_canonical(c, status);
1079 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1080
1081 return float16_round_pack_canonical(pr, status);
1082}
1083
1084float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1085 int flags, float_status *status)
1086{
1087 FloatParts pa = float32_unpack_canonical(a, status);
1088 FloatParts pb = float32_unpack_canonical(b, status);
1089 FloatParts pc = float32_unpack_canonical(c, status);
1090 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1091
1092 return float32_round_pack_canonical(pr, status);
1093}
1094
1095float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1096 int flags, float_status *status)
1097{
1098 FloatParts pa = float64_unpack_canonical(a, status);
1099 FloatParts pb = float64_unpack_canonical(b, status);
1100 FloatParts pc = float64_unpack_canonical(c, status);
1101 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1102
1103 return float64_round_pack_canonical(pr, status);
1104}
1105
cf07323d
AB
1106/*
1107 * Returns the result of dividing the floating-point value `a' by the
1108 * corresponding value `b'. The operation is performed according to
1109 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1110 */
1111
1112static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1113{
1114 bool sign = a.sign ^ b.sign;
1115
1116 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1117 uint64_t temp_lo, temp_hi;
1118 int exp = a.exp - b.exp;
1119 if (a.frac < b.frac) {
1120 exp -= 1;
1121 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1122 &temp_hi, &temp_lo);
1123 } else {
1124 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1125 &temp_hi, &temp_lo);
1126 }
1127 /* LSB of quot is set if inexact which roundandpack will use
1128 * to set flags. Yet again we re-use a for the result */
1129 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1130 a.sign = sign;
1131 a.exp = exp;
1132 return a;
1133 }
1134 /* handle all the NaN cases */
1135 if (is_nan(a.cls) || is_nan(b.cls)) {
1136 return pick_nan(a, b, s);
1137 }
1138 /* 0/0 or Inf/Inf */
1139 if (a.cls == b.cls
1140 &&
1141 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1142 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1143 return parts_default_nan(s);
cf07323d 1144 }
9cb4e398
AB
1145 /* Inf / x or 0 / x */
1146 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1147 a.sign = sign;
1148 return a;
1149 }
cf07323d
AB
1150 /* Div 0 => Inf */
1151 if (b.cls == float_class_zero) {
1152 s->float_exception_flags |= float_flag_divbyzero;
1153 a.cls = float_class_inf;
1154 a.sign = sign;
1155 return a;
1156 }
cf07323d
AB
1157 /* Div by Inf */
1158 if (b.cls == float_class_inf) {
1159 a.cls = float_class_zero;
1160 a.sign = sign;
1161 return a;
1162 }
1163 g_assert_not_reached();
1164}
1165
1166float16 float16_div(float16 a, float16 b, float_status *status)
1167{
1168 FloatParts pa = float16_unpack_canonical(a, status);
1169 FloatParts pb = float16_unpack_canonical(b, status);
1170 FloatParts pr = div_floats(pa, pb, status);
1171
1172 return float16_round_pack_canonical(pr, status);
1173}
1174
1175float32 float32_div(float32 a, float32 b, float_status *status)
1176{
1177 FloatParts pa = float32_unpack_canonical(a, status);
1178 FloatParts pb = float32_unpack_canonical(b, status);
1179 FloatParts pr = div_floats(pa, pb, status);
1180
1181 return float32_round_pack_canonical(pr, status);
1182}
1183
1184float64 float64_div(float64 a, float64 b, float_status *status)
1185{
1186 FloatParts pa = float64_unpack_canonical(a, status);
1187 FloatParts pb = float64_unpack_canonical(b, status);
1188 FloatParts pr = div_floats(pa, pb, status);
1189
1190 return float64_round_pack_canonical(pr, status);
1191}
1192
6fed16b2
AB
1193/*
1194 * Float to Float conversions
1195 *
1196 * Returns the result of converting one float format to another. The
1197 * conversion is performed according to the IEC/IEEE Standard for
1198 * Binary Floating-Point Arithmetic.
1199 *
1200 * The float_to_float helper only needs to take care of raising
1201 * invalid exceptions and handling the conversion on NaNs.
1202 */
1203
1204static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1205 float_status *s)
1206{
1207 if (dstf->arm_althp) {
1208 switch (a.cls) {
1209 case float_class_qnan:
1210 case float_class_snan:
1211 /* There is no NaN in the destination format. Raise Invalid
1212 * and return a zero with the sign of the input NaN.
1213 */
1214 s->float_exception_flags |= float_flag_invalid;
1215 a.cls = float_class_zero;
1216 a.frac = 0;
1217 a.exp = 0;
1218 break;
1219
1220 case float_class_inf:
1221 /* There is no Inf in the destination format. Raise Invalid
1222 * and return the maximum normal with the correct sign.
1223 */
1224 s->float_exception_flags |= float_flag_invalid;
1225 a.cls = float_class_normal;
1226 a.exp = dstf->exp_max;
1227 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1228 break;
1229
1230 default:
1231 break;
1232 }
1233 } else if (is_nan(a.cls)) {
1234 if (is_snan(a.cls)) {
1235 s->float_exception_flags |= float_flag_invalid;
1236 a = parts_silence_nan(a, s);
1237 }
1238 if (s->default_nan_mode) {
1239 return parts_default_nan(s);
1240 }
1241 }
1242 return a;
1243}
1244
1245float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1246{
1247 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1248 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1249 FloatParts pr = float_to_float(p, &float32_params, s);
1250 return float32_round_pack_canonical(pr, s);
1251}
1252
1253float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1254{
1255 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1256 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1257 FloatParts pr = float_to_float(p, &float64_params, s);
1258 return float64_round_pack_canonical(pr, s);
1259}
1260
1261float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1262{
1263 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1264 FloatParts p = float32_unpack_canonical(a, s);
1265 FloatParts pr = float_to_float(p, fmt16, s);
1266 return float16a_round_pack_canonical(pr, s, fmt16);
1267}
1268
1269float64 float32_to_float64(float32 a, float_status *s)
1270{
1271 FloatParts p = float32_unpack_canonical(a, s);
1272 FloatParts pr = float_to_float(p, &float64_params, s);
1273 return float64_round_pack_canonical(pr, s);
1274}
1275
1276float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1277{
1278 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1279 FloatParts p = float64_unpack_canonical(a, s);
1280 FloatParts pr = float_to_float(p, fmt16, s);
1281 return float16a_round_pack_canonical(pr, s, fmt16);
1282}
1283
1284float32 float64_to_float32(float64 a, float_status *s)
1285{
1286 FloatParts p = float64_unpack_canonical(a, s);
1287 FloatParts pr = float_to_float(p, &float32_params, s);
1288 return float32_round_pack_canonical(pr, s);
1289}
1290
dbe4d53a
AB
1291/*
1292 * Rounds the floating-point value `a' to an integer, and returns the
1293 * result as a floating-point value. The operation is performed
1294 * according to the IEC/IEEE Standard for Binary Floating-Point
1295 * Arithmetic.
1296 */
1297
1298static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1299{
1300 if (is_nan(a.cls)) {
1301 return return_nan(a, s);
1302 }
1303
1304 switch (a.cls) {
1305 case float_class_zero:
1306 case float_class_inf:
1307 case float_class_qnan:
1308 /* already "integral" */
1309 break;
1310 case float_class_normal:
1311 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1312 /* already integral */
1313 break;
1314 }
1315 if (a.exp < 0) {
1316 bool one;
1317 /* all fractional */
1318 s->float_exception_flags |= float_flag_inexact;
1319 switch (rounding_mode) {
1320 case float_round_nearest_even:
1321 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1322 break;
1323 case float_round_ties_away:
1324 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1325 break;
1326 case float_round_to_zero:
1327 one = false;
1328 break;
1329 case float_round_up:
1330 one = !a.sign;
1331 break;
1332 case float_round_down:
1333 one = a.sign;
1334 break;
1335 default:
1336 g_assert_not_reached();
1337 }
1338
1339 if (one) {
1340 a.frac = DECOMPOSED_IMPLICIT_BIT;
1341 a.exp = 0;
1342 } else {
1343 a.cls = float_class_zero;
1344 }
1345 } else {
1346 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1347 uint64_t frac_lsbm1 = frac_lsb >> 1;
1348 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1349 uint64_t rnd_mask = rnd_even_mask >> 1;
1350 uint64_t inc;
1351
1352 switch (rounding_mode) {
1353 case float_round_nearest_even:
1354 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1355 break;
1356 case float_round_ties_away:
1357 inc = frac_lsbm1;
1358 break;
1359 case float_round_to_zero:
1360 inc = 0;
1361 break;
1362 case float_round_up:
1363 inc = a.sign ? 0 : rnd_mask;
1364 break;
1365 case float_round_down:
1366 inc = a.sign ? rnd_mask : 0;
1367 break;
1368 default:
1369 g_assert_not_reached();
1370 }
1371
1372 if (a.frac & rnd_mask) {
1373 s->float_exception_flags |= float_flag_inexact;
1374 a.frac += inc;
1375 a.frac &= ~rnd_mask;
1376 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1377 a.frac >>= 1;
1378 a.exp++;
1379 }
1380 }
1381 }
1382 break;
1383 default:
1384 g_assert_not_reached();
1385 }
1386 return a;
1387}
1388
1389float16 float16_round_to_int(float16 a, float_status *s)
1390{
1391 FloatParts pa = float16_unpack_canonical(a, s);
1392 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1393 return float16_round_pack_canonical(pr, s);
1394}
1395
1396float32 float32_round_to_int(float32 a, float_status *s)
1397{
1398 FloatParts pa = float32_unpack_canonical(a, s);
1399 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1400 return float32_round_pack_canonical(pr, s);
1401}
1402
1403float64 float64_round_to_int(float64 a, float_status *s)
1404{
1405 FloatParts pa = float64_unpack_canonical(a, s);
1406 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1407 return float64_round_pack_canonical(pr, s);
1408}
1409
1410float64 float64_trunc_to_int(float64 a, float_status *s)
1411{
1412 FloatParts pa = float64_unpack_canonical(a, s);
1413 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1414 return float64_round_pack_canonical(pr, s);
1415}
1416
ab52f973
AB
1417/*
1418 * Returns the result of converting the floating-point value `a' to
1419 * the two's complement integer format. The conversion is performed
1420 * according to the IEC/IEEE Standard for Binary Floating-Point
1421 * Arithmetic---which means in particular that the conversion is
1422 * rounded according to the current rounding mode. If `a' is a NaN,
1423 * the largest positive integer is returned. Otherwise, if the
1424 * conversion overflows, the largest integer with the same sign as `a'
1425 * is returned.
1426*/
1427
1428static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1429 int64_t min, int64_t max,
1430 float_status *s)
1431{
1432 uint64_t r;
1433 int orig_flags = get_float_exception_flags(s);
1434 FloatParts p = round_to_int(in, rmode, s);
1435
1436 switch (p.cls) {
1437 case float_class_snan:
1438 case float_class_qnan:
801bc563 1439 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1440 return max;
1441 case float_class_inf:
801bc563 1442 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1443 return p.sign ? min : max;
1444 case float_class_zero:
1445 return 0;
1446 case float_class_normal:
1447 if (p.exp < DECOMPOSED_BINARY_POINT) {
1448 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1449 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1450 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1451 } else {
1452 r = UINT64_MAX;
1453 }
1454 if (p.sign) {
33358375 1455 if (r <= -(uint64_t) min) {
ab52f973
AB
1456 return -r;
1457 } else {
1458 s->float_exception_flags = orig_flags | float_flag_invalid;
1459 return min;
1460 }
1461 } else {
33358375 1462 if (r <= max) {
ab52f973
AB
1463 return r;
1464 } else {
1465 s->float_exception_flags = orig_flags | float_flag_invalid;
1466 return max;
1467 }
1468 }
1469 default:
1470 g_assert_not_reached();
1471 }
1472}
1473
1474#define FLOAT_TO_INT(fsz, isz) \
1475int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1476 float_status *s) \
1477{ \
1478 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1479 return round_to_int_and_pack(p, s->float_rounding_mode, \
1480 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1481 s); \
1482} \
1483 \
1484int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1485 (float ## fsz a, float_status *s) \
1486{ \
1487 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1488 return round_to_int_and_pack(p, float_round_to_zero, \
1489 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1490 s); \
1491}
1492
1493FLOAT_TO_INT(16, 16)
1494FLOAT_TO_INT(16, 32)
1495FLOAT_TO_INT(16, 64)
1496
1497FLOAT_TO_INT(32, 16)
1498FLOAT_TO_INT(32, 32)
1499FLOAT_TO_INT(32, 64)
1500
1501FLOAT_TO_INT(64, 16)
1502FLOAT_TO_INT(64, 32)
1503FLOAT_TO_INT(64, 64)
1504
1505#undef FLOAT_TO_INT
1506
1507/*
1508 * Returns the result of converting the floating-point value `a' to
1509 * the unsigned integer format. The conversion is performed according
1510 * to the IEC/IEEE Standard for Binary Floating-Point
1511 * Arithmetic---which means in particular that the conversion is
1512 * rounded according to the current rounding mode. If `a' is a NaN,
1513 * the largest unsigned integer is returned. Otherwise, if the
1514 * conversion overflows, the largest unsigned integer is returned. If
1515 * the 'a' is negative, the result is rounded and zero is returned;
1516 * values that do not round to zero will raise the inexact exception
1517 * flag.
1518 */
1519
1520static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1521 float_status *s)
1522{
1523 int orig_flags = get_float_exception_flags(s);
1524 FloatParts p = round_to_int(in, rmode, s);
1525
1526 switch (p.cls) {
1527 case float_class_snan:
1528 case float_class_qnan:
1529 s->float_exception_flags = orig_flags | float_flag_invalid;
1530 return max;
1531 case float_class_inf:
801bc563 1532 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1533 return p.sign ? 0 : max;
1534 case float_class_zero:
1535 return 0;
1536 case float_class_normal:
1537 {
1538 uint64_t r;
1539 if (p.sign) {
1540 s->float_exception_flags = orig_flags | float_flag_invalid;
1541 return 0;
1542 }
1543
1544 if (p.exp < DECOMPOSED_BINARY_POINT) {
1545 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1546 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1547 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1548 } else {
1549 s->float_exception_flags = orig_flags | float_flag_invalid;
1550 return max;
1551 }
1552
1553 /* For uint64 this will never trip, but if p.exp is too large
1554 * to shift a decomposed fraction we shall have exited via the
1555 * 3rd leg above.
1556 */
1557 if (r > max) {
1558 s->float_exception_flags = orig_flags | float_flag_invalid;
1559 return max;
1560 } else {
1561 return r;
1562 }
1563 }
1564 default:
1565 g_assert_not_reached();
1566 }
1567}
1568
1569#define FLOAT_TO_UINT(fsz, isz) \
1570uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1571 float_status *s) \
1572{ \
1573 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1574 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1575 UINT ## isz ## _MAX, s); \
1576} \
1577 \
1578uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1579 (float ## fsz a, float_status *s) \
1580{ \
1581 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
bd49e602
RH
1582 return round_to_uint_and_pack(p, float_round_to_zero, \
1583 UINT ## isz ## _MAX, s); \
ab52f973
AB
1584}
1585
1586FLOAT_TO_UINT(16, 16)
1587FLOAT_TO_UINT(16, 32)
1588FLOAT_TO_UINT(16, 64)
1589
1590FLOAT_TO_UINT(32, 16)
1591FLOAT_TO_UINT(32, 32)
1592FLOAT_TO_UINT(32, 64)
1593
1594FLOAT_TO_UINT(64, 16)
1595FLOAT_TO_UINT(64, 32)
1596FLOAT_TO_UINT(64, 64)
1597
1598#undef FLOAT_TO_UINT
1599
c02e1fb8
AB
1600/*
1601 * Integer to float conversions
1602 *
1603 * Returns the result of converting the two's complement integer `a'
1604 * to the floating-point format. The conversion is performed according
1605 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1606 */
1607
1608static FloatParts int_to_float(int64_t a, float_status *status)
1609{
a5a5f5e2 1610 FloatParts r = {};
c02e1fb8
AB
1611 if (a == 0) {
1612 r.cls = float_class_zero;
1613 r.sign = false;
1614 } else if (a == (1ULL << 63)) {
1615 r.cls = float_class_normal;
1616 r.sign = true;
1617 r.frac = DECOMPOSED_IMPLICIT_BIT;
1618 r.exp = 63;
1619 } else {
1620 uint64_t f;
1621 if (a < 0) {
1622 f = -a;
1623 r.sign = true;
1624 } else {
1625 f = a;
1626 r.sign = false;
1627 }
1628 int shift = clz64(f) - 1;
1629 r.cls = float_class_normal;
1630 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1631 r.frac = f << shift;
1632 }
1633
1634 return r;
1635}
1636
1637float16 int64_to_float16(int64_t a, float_status *status)
1638{
1639 FloatParts pa = int_to_float(a, status);
1640 return float16_round_pack_canonical(pa, status);
1641}
1642
1643float16 int32_to_float16(int32_t a, float_status *status)
1644{
1645 return int64_to_float16(a, status);
1646}
1647
1648float16 int16_to_float16(int16_t a, float_status *status)
1649{
1650 return int64_to_float16(a, status);
1651}
1652
1653float32 int64_to_float32(int64_t a, float_status *status)
1654{
1655 FloatParts pa = int_to_float(a, status);
1656 return float32_round_pack_canonical(pa, status);
1657}
1658
1659float32 int32_to_float32(int32_t a, float_status *status)
1660{
1661 return int64_to_float32(a, status);
1662}
1663
1664float32 int16_to_float32(int16_t a, float_status *status)
1665{
1666 return int64_to_float32(a, status);
1667}
1668
1669float64 int64_to_float64(int64_t a, float_status *status)
1670{
1671 FloatParts pa = int_to_float(a, status);
1672 return float64_round_pack_canonical(pa, status);
1673}
1674
1675float64 int32_to_float64(int32_t a, float_status *status)
1676{
1677 return int64_to_float64(a, status);
1678}
1679
1680float64 int16_to_float64(int16_t a, float_status *status)
1681{
1682 return int64_to_float64(a, status);
1683}
1684
1685
1686/*
1687 * Unsigned Integer to float conversions
1688 *
1689 * Returns the result of converting the unsigned integer `a' to the
1690 * floating-point format. The conversion is performed according to the
1691 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1692 */
1693
1694static FloatParts uint_to_float(uint64_t a, float_status *status)
1695{
1696 FloatParts r = { .sign = false};
1697
1698 if (a == 0) {
1699 r.cls = float_class_zero;
1700 } else {
1701 int spare_bits = clz64(a) - 1;
1702 r.cls = float_class_normal;
1703 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1704 if (spare_bits < 0) {
1705 shift64RightJamming(a, -spare_bits, &a);
1706 r.frac = a;
1707 } else {
1708 r.frac = a << spare_bits;
1709 }
1710 }
1711
1712 return r;
1713}
1714
1715float16 uint64_to_float16(uint64_t a, float_status *status)
1716{
1717 FloatParts pa = uint_to_float(a, status);
1718 return float16_round_pack_canonical(pa, status);
1719}
1720
1721float16 uint32_to_float16(uint32_t a, float_status *status)
1722{
1723 return uint64_to_float16(a, status);
1724}
1725
1726float16 uint16_to_float16(uint16_t a, float_status *status)
1727{
1728 return uint64_to_float16(a, status);
1729}
1730
1731float32 uint64_to_float32(uint64_t a, float_status *status)
1732{
1733 FloatParts pa = uint_to_float(a, status);
1734 return float32_round_pack_canonical(pa, status);
1735}
1736
1737float32 uint32_to_float32(uint32_t a, float_status *status)
1738{
1739 return uint64_to_float32(a, status);
1740}
1741
1742float32 uint16_to_float32(uint16_t a, float_status *status)
1743{
1744 return uint64_to_float32(a, status);
1745}
1746
1747float64 uint64_to_float64(uint64_t a, float_status *status)
1748{
1749 FloatParts pa = uint_to_float(a, status);
1750 return float64_round_pack_canonical(pa, status);
1751}
1752
1753float64 uint32_to_float64(uint32_t a, float_status *status)
1754{
1755 return uint64_to_float64(a, status);
1756}
1757
1758float64 uint16_to_float64(uint16_t a, float_status *status)
1759{
1760 return uint64_to_float64(a, status);
1761}
1762
89360067
AB
1763/* Float Min/Max */
1764/* min() and max() functions. These can't be implemented as
1765 * 'compare and pick one input' because that would mishandle
1766 * NaNs and +0 vs -0.
1767 *
1768 * minnum() and maxnum() functions. These are similar to the min()
1769 * and max() functions but if one of the arguments is a QNaN and
1770 * the other is numerical then the numerical argument is returned.
1771 * SNaNs will get quietened before being returned.
1772 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
1773 * and maxNum() operations. min() and max() are the typical min/max
1774 * semantics provided by many CPUs which predate that specification.
1775 *
1776 * minnummag() and maxnummag() functions correspond to minNumMag()
1777 * and minNumMag() from the IEEE-754 2008.
1778 */
1779static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1780 bool ieee, bool ismag, float_status *s)
1781{
1782 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1783 if (ieee) {
1784 /* Takes two floating-point values `a' and `b', one of
1785 * which is a NaN, and returns the appropriate NaN
1786 * result. If either `a' or `b' is a signaling NaN,
1787 * the invalid exception is raised.
1788 */
1789 if (is_snan(a.cls) || is_snan(b.cls)) {
1790 return pick_nan(a, b, s);
1791 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1792 return b;
1793 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1794 return a;
1795 }
1796 }
1797 return pick_nan(a, b, s);
1798 } else {
1799 int a_exp, b_exp;
89360067
AB
1800
1801 switch (a.cls) {
1802 case float_class_normal:
1803 a_exp = a.exp;
1804 break;
1805 case float_class_inf:
1806 a_exp = INT_MAX;
1807 break;
1808 case float_class_zero:
1809 a_exp = INT_MIN;
1810 break;
1811 default:
1812 g_assert_not_reached();
1813 break;
1814 }
1815 switch (b.cls) {
1816 case float_class_normal:
1817 b_exp = b.exp;
1818 break;
1819 case float_class_inf:
1820 b_exp = INT_MAX;
1821 break;
1822 case float_class_zero:
1823 b_exp = INT_MIN;
1824 break;
1825 default:
1826 g_assert_not_reached();
1827 break;
1828 }
1829
6245327a
EC
1830 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1831 bool a_less = a_exp < b_exp;
1832 if (a_exp == b_exp) {
1833 a_less = a.frac < b.frac;
1834 }
1835 return a_less ^ ismin ? b : a;
89360067
AB
1836 }
1837
6245327a 1838 if (a.sign == b.sign) {
89360067
AB
1839 bool a_less = a_exp < b_exp;
1840 if (a_exp == b_exp) {
1841 a_less = a.frac < b.frac;
1842 }
6245327a 1843 return a.sign ^ a_less ^ ismin ? b : a;
89360067 1844 } else {
6245327a 1845 return a.sign ^ ismin ? b : a;
89360067
AB
1846 }
1847 }
1848}
1849
1850#define MINMAX(sz, name, ismin, isiee, ismag) \
1851float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1852 float_status *s) \
1853{ \
1854 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1855 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1856 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1857 \
1858 return float ## sz ## _round_pack_canonical(pr, s); \
1859}
1860
1861MINMAX(16, min, true, false, false)
1862MINMAX(16, minnum, true, true, false)
1863MINMAX(16, minnummag, true, true, true)
1864MINMAX(16, max, false, false, false)
1865MINMAX(16, maxnum, false, true, false)
1866MINMAX(16, maxnummag, false, true, true)
1867
1868MINMAX(32, min, true, false, false)
1869MINMAX(32, minnum, true, true, false)
1870MINMAX(32, minnummag, true, true, true)
1871MINMAX(32, max, false, false, false)
1872MINMAX(32, maxnum, false, true, false)
1873MINMAX(32, maxnummag, false, true, true)
1874
1875MINMAX(64, min, true, false, false)
1876MINMAX(64, minnum, true, true, false)
1877MINMAX(64, minnummag, true, true, true)
1878MINMAX(64, max, false, false, false)
1879MINMAX(64, maxnum, false, true, false)
1880MINMAX(64, maxnummag, false, true, true)
1881
1882#undef MINMAX
1883
0c4c9092
AB
1884/* Floating point compare */
1885static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1886 float_status *s)
1887{
1888 if (is_nan(a.cls) || is_nan(b.cls)) {
1889 if (!is_quiet ||
1890 a.cls == float_class_snan ||
1891 b.cls == float_class_snan) {
1892 s->float_exception_flags |= float_flag_invalid;
1893 }
1894 return float_relation_unordered;
1895 }
1896
1897 if (a.cls == float_class_zero) {
1898 if (b.cls == float_class_zero) {
1899 return float_relation_equal;
1900 }
1901 return b.sign ? float_relation_greater : float_relation_less;
1902 } else if (b.cls == float_class_zero) {
1903 return a.sign ? float_relation_less : float_relation_greater;
1904 }
1905
1906 /* The only really important thing about infinity is its sign. If
1907 * both are infinities the sign marks the smallest of the two.
1908 */
1909 if (a.cls == float_class_inf) {
1910 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1911 return float_relation_equal;
1912 }
1913 return a.sign ? float_relation_less : float_relation_greater;
1914 } else if (b.cls == float_class_inf) {
1915 return b.sign ? float_relation_greater : float_relation_less;
1916 }
1917
1918 if (a.sign != b.sign) {
1919 return a.sign ? float_relation_less : float_relation_greater;
1920 }
1921
1922 if (a.exp == b.exp) {
1923 if (a.frac == b.frac) {
1924 return float_relation_equal;
1925 }
1926 if (a.sign) {
1927 return a.frac > b.frac ?
1928 float_relation_less : float_relation_greater;
1929 } else {
1930 return a.frac > b.frac ?
1931 float_relation_greater : float_relation_less;
1932 }
1933 } else {
1934 if (a.sign) {
1935 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1936 } else {
1937 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1938 }
1939 }
1940}
1941
1942#define COMPARE(sz) \
1943int float ## sz ## _compare(float ## sz a, float ## sz b, \
1944 float_status *s) \
1945{ \
1946 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1947 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1948 return compare_floats(pa, pb, false, s); \
1949} \
1950int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1951 float_status *s) \
1952{ \
1953 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1954 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1955 return compare_floats(pa, pb, true, s); \
1956}
1957
1958COMPARE(16)
1959COMPARE(32)
1960COMPARE(64)
1961
1962#undef COMPARE
1963
0bfc9f19
AB
1964/* Multiply A by 2 raised to the power N. */
1965static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1966{
1967 if (unlikely(is_nan(a.cls))) {
1968 return return_nan(a, s);
1969 }
1970 if (a.cls == float_class_normal) {
ce8d4082
RH
1971 /* The largest float type (even though not supported by FloatParts)
1972 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
1973 * still allows rounding to infinity, without allowing overflow
1974 * within the int32_t that backs FloatParts.exp.
1975 */
1976 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
1977 a.exp += n;
1978 }
1979 return a;
1980}
1981
1982float16 float16_scalbn(float16 a, int n, float_status *status)
1983{
1984 FloatParts pa = float16_unpack_canonical(a, status);
1985 FloatParts pr = scalbn_decomposed(pa, n, status);
1986 return float16_round_pack_canonical(pr, status);
1987}
1988
1989float32 float32_scalbn(float32 a, int n, float_status *status)
1990{
1991 FloatParts pa = float32_unpack_canonical(a, status);
1992 FloatParts pr = scalbn_decomposed(pa, n, status);
1993 return float32_round_pack_canonical(pr, status);
1994}
1995
1996float64 float64_scalbn(float64 a, int n, float_status *status)
1997{
1998 FloatParts pa = float64_unpack_canonical(a, status);
1999 FloatParts pr = scalbn_decomposed(pa, n, status);
2000 return float64_round_pack_canonical(pr, status);
2001}
2002
c13bb2da
AB
2003/*
2004 * Square Root
2005 *
2006 * The old softfloat code did an approximation step before zeroing in
2007 * on the final result. However for simpleness we just compute the
2008 * square root by iterating down from the implicit bit to enough extra
2009 * bits to ensure we get a correctly rounded result.
2010 *
2011 * This does mean however the calculation is slower than before,
2012 * especially for 64 bit floats.
2013 */
2014
2015static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2016{
2017 uint64_t a_frac, r_frac, s_frac;
2018 int bit, last_bit;
2019
2020 if (is_nan(a.cls)) {
2021 return return_nan(a, s);
2022 }
2023 if (a.cls == float_class_zero) {
2024 return a; /* sqrt(+-0) = +-0 */
2025 }
2026 if (a.sign) {
2027 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2028 return parts_default_nan(s);
c13bb2da
AB
2029 }
2030 if (a.cls == float_class_inf) {
2031 return a; /* sqrt(+inf) = +inf */
2032 }
2033
2034 assert(a.cls == float_class_normal);
2035
2036 /* We need two overflow bits at the top. Adding room for that is a
2037 * right shift. If the exponent is odd, we can discard the low bit
2038 * by multiplying the fraction by 2; that's a left shift. Combine
2039 * those and we shift right if the exponent is even.
2040 */
2041 a_frac = a.frac;
2042 if (!(a.exp & 1)) {
2043 a_frac >>= 1;
2044 }
2045 a.exp >>= 1;
2046
2047 /* Bit-by-bit computation of sqrt. */
2048 r_frac = 0;
2049 s_frac = 0;
2050
2051 /* Iterate from implicit bit down to the 3 extra bits to compute a
2052 * properly rounded result. Remember we've inserted one more bit
2053 * at the top, so these positions are one less.
2054 */
2055 bit = DECOMPOSED_BINARY_POINT - 1;
2056 last_bit = MAX(p->frac_shift - 4, 0);
2057 do {
2058 uint64_t q = 1ULL << bit;
2059 uint64_t t_frac = s_frac + q;
2060 if (t_frac <= a_frac) {
2061 s_frac = t_frac + q;
2062 a_frac -= t_frac;
2063 r_frac += q;
2064 }
2065 a_frac <<= 1;
2066 } while (--bit >= last_bit);
2067
2068 /* Undo the right shift done above. If there is any remaining
2069 * fraction, the result is inexact. Set the sticky bit.
2070 */
2071 a.frac = (r_frac << 1) + (a_frac != 0);
2072
2073 return a;
2074}
2075
2076float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2077{
2078 FloatParts pa = float16_unpack_canonical(a, status);
2079 FloatParts pr = sqrt_float(pa, status, &float16_params);
2080 return float16_round_pack_canonical(pr, status);
2081}
2082
2083float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2084{
2085 FloatParts pa = float32_unpack_canonical(a, status);
2086 FloatParts pr = sqrt_float(pa, status, &float32_params);
2087 return float32_round_pack_canonical(pr, status);
2088}
2089
2090float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2091{
2092 FloatParts pa = float64_unpack_canonical(a, status);
2093 FloatParts pr = sqrt_float(pa, status, &float64_params);
2094 return float64_round_pack_canonical(pr, status);
2095}
2096
2097
158142c2
FB
2098/*----------------------------------------------------------------------------
2099| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2100| and 7, and returns the properly rounded 32-bit integer corresponding to the
2101| input. If `zSign' is 1, the input is negated before being converted to an
2102| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2103| is simply rounded to an integer, with the inexact exception raised if the
2104| input cannot be represented exactly as an integer. However, if the fixed-
2105| point input is too large, the invalid exception is raised and the largest
2106| positive or negative integer is returned.
2107*----------------------------------------------------------------------------*/
2108
f4014512 2109static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2110{
8f506c70 2111 int8_t roundingMode;
158142c2 2112 flag roundNearestEven;
8f506c70 2113 int8_t roundIncrement, roundBits;
760e1416 2114 int32_t z;
158142c2 2115
a2f2d288 2116 roundingMode = status->float_rounding_mode;
158142c2 2117 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2118 switch (roundingMode) {
2119 case float_round_nearest_even:
f9288a76 2120 case float_round_ties_away:
dc355b76
PM
2121 roundIncrement = 0x40;
2122 break;
2123 case float_round_to_zero:
2124 roundIncrement = 0;
2125 break;
2126 case float_round_up:
2127 roundIncrement = zSign ? 0 : 0x7f;
2128 break;
2129 case float_round_down:
2130 roundIncrement = zSign ? 0x7f : 0;
2131 break;
2132 default:
2133 abort();
158142c2
FB
2134 }
2135 roundBits = absZ & 0x7F;
2136 absZ = ( absZ + roundIncrement )>>7;
2137 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2138 z = absZ;
2139 if ( zSign ) z = - z;
2140 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2141 float_raise(float_flag_invalid, status);
bb98fe42 2142 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2143 }
a2f2d288
PM
2144 if (roundBits) {
2145 status->float_exception_flags |= float_flag_inexact;
2146 }
158142c2
FB
2147 return z;
2148
2149}
2150
2151/*----------------------------------------------------------------------------
2152| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2153| `absZ1', with binary point between bits 63 and 64 (between the input words),
2154| and returns the properly rounded 64-bit integer corresponding to the input.
2155| If `zSign' is 1, the input is negated before being converted to an integer.
2156| Ordinarily, the fixed-point input is simply rounded to an integer, with
2157| the inexact exception raised if the input cannot be represented exactly as
2158| an integer. However, if the fixed-point input is too large, the invalid
2159| exception is raised and the largest positive or negative integer is
2160| returned.
2161*----------------------------------------------------------------------------*/
2162
f42c2224 2163static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2164 float_status *status)
158142c2 2165{
8f506c70 2166 int8_t roundingMode;
158142c2 2167 flag roundNearestEven, increment;
760e1416 2168 int64_t z;
158142c2 2169
a2f2d288 2170 roundingMode = status->float_rounding_mode;
158142c2 2171 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2172 switch (roundingMode) {
2173 case float_round_nearest_even:
f9288a76 2174 case float_round_ties_away:
dc355b76
PM
2175 increment = ((int64_t) absZ1 < 0);
2176 break;
2177 case float_round_to_zero:
2178 increment = 0;
2179 break;
2180 case float_round_up:
2181 increment = !zSign && absZ1;
2182 break;
2183 case float_round_down:
2184 increment = zSign && absZ1;
2185 break;
2186 default:
2187 abort();
158142c2
FB
2188 }
2189 if ( increment ) {
2190 ++absZ0;
2191 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2192 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2193 }
2194 z = absZ0;
2195 if ( zSign ) z = - z;
2196 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2197 overflow:
ff32e16e 2198 float_raise(float_flag_invalid, status);
158142c2 2199 return
bb98fe42 2200 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2201 : LIT64( 0x7FFFFFFFFFFFFFFF );
2202 }
a2f2d288
PM
2203 if (absZ1) {
2204 status->float_exception_flags |= float_flag_inexact;
2205 }
158142c2
FB
2206 return z;
2207
2208}
2209
fb3ea83a
TM
2210/*----------------------------------------------------------------------------
2211| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2212| `absZ1', with binary point between bits 63 and 64 (between the input words),
2213| and returns the properly rounded 64-bit unsigned integer corresponding to the
2214| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2215| with the inexact exception raised if the input cannot be represented exactly
2216| as an integer. However, if the fixed-point input is too large, the invalid
2217| exception is raised and the largest unsigned integer is returned.
2218*----------------------------------------------------------------------------*/
2219
f42c2224 2220static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2221 uint64_t absZ1, float_status *status)
fb3ea83a 2222{
8f506c70 2223 int8_t roundingMode;
fb3ea83a
TM
2224 flag roundNearestEven, increment;
2225
a2f2d288 2226 roundingMode = status->float_rounding_mode;
fb3ea83a 2227 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2228 switch (roundingMode) {
2229 case float_round_nearest_even:
f9288a76 2230 case float_round_ties_away:
dc355b76
PM
2231 increment = ((int64_t)absZ1 < 0);
2232 break;
2233 case float_round_to_zero:
2234 increment = 0;
2235 break;
2236 case float_round_up:
2237 increment = !zSign && absZ1;
2238 break;
2239 case float_round_down:
2240 increment = zSign && absZ1;
2241 break;
2242 default:
2243 abort();
fb3ea83a
TM
2244 }
2245 if (increment) {
2246 ++absZ0;
2247 if (absZ0 == 0) {
ff32e16e 2248 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2249 return LIT64(0xFFFFFFFFFFFFFFFF);
2250 }
2251 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2252 }
2253
2254 if (zSign && absZ0) {
ff32e16e 2255 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2256 return 0;
2257 }
2258
2259 if (absZ1) {
a2f2d288 2260 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2261 }
2262 return absZ0;
2263}
2264
37d18660
PM
2265/*----------------------------------------------------------------------------
2266| If `a' is denormal and we are in flush-to-zero mode then set the
2267| input-denormal exception and return zero. Otherwise just return the value.
2268*----------------------------------------------------------------------------*/
e5a41ffa 2269float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2270{
a2f2d288 2271 if (status->flush_inputs_to_zero) {
37d18660 2272 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2273 float_raise(float_flag_input_denormal, status);
37d18660
PM
2274 return make_float32(float32_val(a) & 0x80000000);
2275 }
2276 }
2277 return a;
2278}
2279
158142c2
FB
2280/*----------------------------------------------------------------------------
2281| Normalizes the subnormal single-precision floating-point value represented
2282| by the denormalized significand `aSig'. The normalized exponent and
2283| significand are stored at the locations pointed to by `zExpPtr' and
2284| `zSigPtr', respectively.
2285*----------------------------------------------------------------------------*/
2286
2287static void
0c48262d 2288 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2289{
8f506c70 2290 int8_t shiftCount;
158142c2
FB
2291
2292 shiftCount = countLeadingZeros32( aSig ) - 8;
2293 *zSigPtr = aSig<<shiftCount;
2294 *zExpPtr = 1 - shiftCount;
2295
2296}
2297
158142c2
FB
2298/*----------------------------------------------------------------------------
2299| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2300| and significand `zSig', and returns the proper single-precision floating-
2301| point value corresponding to the abstract input. Ordinarily, the abstract
2302| value is simply rounded and packed into the single-precision format, with
2303| the inexact exception raised if the abstract input cannot be represented
2304| exactly. However, if the abstract value is too large, the overflow and
2305| inexact exceptions are raised and an infinity or maximal finite value is
2306| returned. If the abstract value is too small, the input value is rounded to
2307| a subnormal number, and the underflow and inexact exceptions are raised if
2308| the abstract input cannot be represented exactly as a subnormal single-
2309| precision floating-point number.
2310| The input significand `zSig' has its binary point between bits 30
2311| and 29, which is 7 bits to the left of the usual location. This shifted
2312| significand must be normalized or smaller. If `zSig' is not normalized,
2313| `zExp' must be 0; in that case, the result returned is a subnormal number,
2314| and it must not require rounding. In the usual case that `zSig' is
2315| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2316| The handling of underflow and overflow follows the IEC/IEEE Standard for
2317| Binary Floating-Point Arithmetic.
2318*----------------------------------------------------------------------------*/
2319
0c48262d 2320static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2321 float_status *status)
158142c2 2322{
8f506c70 2323 int8_t roundingMode;
158142c2 2324 flag roundNearestEven;
8f506c70 2325 int8_t roundIncrement, roundBits;
158142c2
FB
2326 flag isTiny;
2327
a2f2d288 2328 roundingMode = status->float_rounding_mode;
158142c2 2329 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2330 switch (roundingMode) {
2331 case float_round_nearest_even:
f9288a76 2332 case float_round_ties_away:
dc355b76
PM
2333 roundIncrement = 0x40;
2334 break;
2335 case float_round_to_zero:
2336 roundIncrement = 0;
2337 break;
2338 case float_round_up:
2339 roundIncrement = zSign ? 0 : 0x7f;
2340 break;
2341 case float_round_down:
2342 roundIncrement = zSign ? 0x7f : 0;
2343 break;
2344 default:
2345 abort();
2346 break;
158142c2
FB
2347 }
2348 roundBits = zSig & 0x7F;
bb98fe42 2349 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2350 if ( ( 0xFD < zExp )
2351 || ( ( zExp == 0xFD )
bb98fe42 2352 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2353 ) {
ff32e16e 2354 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2355 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2356 }
2357 if ( zExp < 0 ) {
a2f2d288 2358 if (status->flush_to_zero) {
ff32e16e 2359 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2360 return packFloat32(zSign, 0, 0);
2361 }
158142c2 2362 isTiny =
a2f2d288
PM
2363 (status->float_detect_tininess
2364 == float_tininess_before_rounding)
158142c2
FB
2365 || ( zExp < -1 )
2366 || ( zSig + roundIncrement < 0x80000000 );
2367 shift32RightJamming( zSig, - zExp, &zSig );
2368 zExp = 0;
2369 roundBits = zSig & 0x7F;
ff32e16e
PM
2370 if (isTiny && roundBits) {
2371 float_raise(float_flag_underflow, status);
2372 }
158142c2
FB
2373 }
2374 }
a2f2d288
PM
2375 if (roundBits) {
2376 status->float_exception_flags |= float_flag_inexact;
2377 }
158142c2
FB
2378 zSig = ( zSig + roundIncrement )>>7;
2379 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2380 if ( zSig == 0 ) zExp = 0;
2381 return packFloat32( zSign, zExp, zSig );
2382
2383}
2384
2385/*----------------------------------------------------------------------------
2386| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2387| and significand `zSig', and returns the proper single-precision floating-
2388| point value corresponding to the abstract input. This routine is just like
2389| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2390| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2391| floating-point exponent.
2392*----------------------------------------------------------------------------*/
2393
2394static float32
0c48262d 2395 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2396 float_status *status)
158142c2 2397{
8f506c70 2398 int8_t shiftCount;
158142c2
FB
2399
2400 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2401 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2402 status);
158142c2
FB
2403
2404}
2405
37d18660
PM
2406/*----------------------------------------------------------------------------
2407| If `a' is denormal and we are in flush-to-zero mode then set the
2408| input-denormal exception and return zero. Otherwise just return the value.
2409*----------------------------------------------------------------------------*/
e5a41ffa 2410float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2411{
a2f2d288 2412 if (status->flush_inputs_to_zero) {
37d18660 2413 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2414 float_raise(float_flag_input_denormal, status);
37d18660
PM
2415 return make_float64(float64_val(a) & (1ULL << 63));
2416 }
2417 }
2418 return a;
2419}
2420
158142c2
FB
2421/*----------------------------------------------------------------------------
2422| Normalizes the subnormal double-precision floating-point value represented
2423| by the denormalized significand `aSig'. The normalized exponent and
2424| significand are stored at the locations pointed to by `zExpPtr' and
2425| `zSigPtr', respectively.
2426*----------------------------------------------------------------------------*/
2427
2428static void
0c48262d 2429 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2430{
8f506c70 2431 int8_t shiftCount;
158142c2
FB
2432
2433 shiftCount = countLeadingZeros64( aSig ) - 11;
2434 *zSigPtr = aSig<<shiftCount;
2435 *zExpPtr = 1 - shiftCount;
2436
2437}
2438
2439/*----------------------------------------------------------------------------
2440| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2441| double-precision floating-point value, returning the result. After being
2442| shifted into the proper positions, the three fields are simply added
2443| together to form the result. This means that any integer portion of `zSig'
2444| will be added into the exponent. Since a properly normalized significand
2445| will have an integer portion equal to 1, the `zExp' input should be 1 less
2446| than the desired result exponent whenever `zSig' is a complete, normalized
2447| significand.
2448*----------------------------------------------------------------------------*/
2449
0c48262d 2450static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2451{
2452
f090c9d4 2453 return make_float64(
bb98fe42 2454 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2455
2456}
2457
2458/*----------------------------------------------------------------------------
2459| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2460| and significand `zSig', and returns the proper double-precision floating-
2461| point value corresponding to the abstract input. Ordinarily, the abstract
2462| value is simply rounded and packed into the double-precision format, with
2463| the inexact exception raised if the abstract input cannot be represented
2464| exactly. However, if the abstract value is too large, the overflow and
2465| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2466| returned. If the abstract value is too small, the input value is rounded to
2467| a subnormal number, and the underflow and inexact exceptions are raised if
2468| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2469| precision floating-point number.
2470| The input significand `zSig' has its binary point between bits 62
2471| and 61, which is 10 bits to the left of the usual location. This shifted
2472| significand must be normalized or smaller. If `zSig' is not normalized,
2473| `zExp' must be 0; in that case, the result returned is a subnormal number,
2474| and it must not require rounding. In the usual case that `zSig' is
2475| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2476| The handling of underflow and overflow follows the IEC/IEEE Standard for
2477| Binary Floating-Point Arithmetic.
2478*----------------------------------------------------------------------------*/
2479
0c48262d 2480static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2481 float_status *status)
158142c2 2482{
8f506c70 2483 int8_t roundingMode;
158142c2 2484 flag roundNearestEven;
0c48262d 2485 int roundIncrement, roundBits;
158142c2
FB
2486 flag isTiny;
2487
a2f2d288 2488 roundingMode = status->float_rounding_mode;
158142c2 2489 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2490 switch (roundingMode) {
2491 case float_round_nearest_even:
f9288a76 2492 case float_round_ties_away:
dc355b76
PM
2493 roundIncrement = 0x200;
2494 break;
2495 case float_round_to_zero:
2496 roundIncrement = 0;
2497 break;
2498 case float_round_up:
2499 roundIncrement = zSign ? 0 : 0x3ff;
2500 break;
2501 case float_round_down:
2502 roundIncrement = zSign ? 0x3ff : 0;
2503 break;
9ee6f678
BR
2504 case float_round_to_odd:
2505 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2506 break;
dc355b76
PM
2507 default:
2508 abort();
158142c2
FB
2509 }
2510 roundBits = zSig & 0x3FF;
bb98fe42 2511 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2512 if ( ( 0x7FD < zExp )
2513 || ( ( zExp == 0x7FD )
bb98fe42 2514 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2515 ) {
9ee6f678
BR
2516 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2517 roundIncrement != 0;
ff32e16e 2518 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2519 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2520 }
2521 if ( zExp < 0 ) {
a2f2d288 2522 if (status->flush_to_zero) {
ff32e16e 2523 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2524 return packFloat64(zSign, 0, 0);
2525 }
158142c2 2526 isTiny =
a2f2d288
PM
2527 (status->float_detect_tininess
2528 == float_tininess_before_rounding)
158142c2
FB
2529 || ( zExp < -1 )
2530 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2531 shift64RightJamming( zSig, - zExp, &zSig );
2532 zExp = 0;
2533 roundBits = zSig & 0x3FF;
ff32e16e
PM
2534 if (isTiny && roundBits) {
2535 float_raise(float_flag_underflow, status);
2536 }
9ee6f678
BR
2537 if (roundingMode == float_round_to_odd) {
2538 /*
2539 * For round-to-odd case, the roundIncrement depends on
2540 * zSig which just changed.
2541 */
2542 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2543 }
158142c2
FB
2544 }
2545 }
a2f2d288
PM
2546 if (roundBits) {
2547 status->float_exception_flags |= float_flag_inexact;
2548 }
158142c2
FB
2549 zSig = ( zSig + roundIncrement )>>10;
2550 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2551 if ( zSig == 0 ) zExp = 0;
2552 return packFloat64( zSign, zExp, zSig );
2553
2554}
2555
2556/*----------------------------------------------------------------------------
2557| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2558| and significand `zSig', and returns the proper double-precision floating-
2559| point value corresponding to the abstract input. This routine is just like
2560| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2561| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2562| floating-point exponent.
2563*----------------------------------------------------------------------------*/
2564
2565static float64
0c48262d 2566 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2567 float_status *status)
158142c2 2568{
8f506c70 2569 int8_t shiftCount;
158142c2
FB
2570
2571 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2572 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2573 status);
158142c2
FB
2574
2575}
2576
158142c2
FB
2577/*----------------------------------------------------------------------------
2578| Normalizes the subnormal extended double-precision floating-point value
2579| represented by the denormalized significand `aSig'. The normalized exponent
2580| and significand are stored at the locations pointed to by `zExpPtr' and
2581| `zSigPtr', respectively.
2582*----------------------------------------------------------------------------*/
2583
88857aca
LV
2584void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2585 uint64_t *zSigPtr)
158142c2 2586{
8f506c70 2587 int8_t shiftCount;
158142c2
FB
2588
2589 shiftCount = countLeadingZeros64( aSig );
2590 *zSigPtr = aSig<<shiftCount;
2591 *zExpPtr = 1 - shiftCount;
158142c2
FB
2592}
2593
2594/*----------------------------------------------------------------------------
2595| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2596| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2597| and returns the proper extended double-precision floating-point value
2598| corresponding to the abstract input. Ordinarily, the abstract value is
2599| rounded and packed into the extended double-precision format, with the
2600| inexact exception raised if the abstract input cannot be represented
2601| exactly. However, if the abstract value is too large, the overflow and
2602| inexact exceptions are raised and an infinity or maximal finite value is
2603| returned. If the abstract value is too small, the input value is rounded to
2604| a subnormal number, and the underflow and inexact exceptions are raised if
2605| the abstract input cannot be represented exactly as a subnormal extended
2606| double-precision floating-point number.
2607| If `roundingPrecision' is 32 or 64, the result is rounded to the same
2608| number of bits as single or double precision, respectively. Otherwise, the
2609| result is rounded to the full precision of the extended double-precision
2610| format.
2611| The input significand must be normalized or smaller. If the input
2612| significand is not normalized, `zExp' must be 0; in that case, the result
2613| returned is a subnormal number, and it must not require rounding. The
2614| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
2615| Floating-Point Arithmetic.
2616*----------------------------------------------------------------------------*/
2617
88857aca
LV
2618floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2619 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2620 float_status *status)
158142c2 2621{
8f506c70 2622 int8_t roundingMode;
158142c2 2623 flag roundNearestEven, increment, isTiny;
f42c2224 2624 int64_t roundIncrement, roundMask, roundBits;
158142c2 2625
a2f2d288 2626 roundingMode = status->float_rounding_mode;
158142c2
FB
2627 roundNearestEven = ( roundingMode == float_round_nearest_even );
2628 if ( roundingPrecision == 80 ) goto precision80;
2629 if ( roundingPrecision == 64 ) {
2630 roundIncrement = LIT64( 0x0000000000000400 );
2631 roundMask = LIT64( 0x00000000000007FF );
2632 }
2633 else if ( roundingPrecision == 32 ) {
2634 roundIncrement = LIT64( 0x0000008000000000 );
2635 roundMask = LIT64( 0x000000FFFFFFFFFF );
2636 }
2637 else {
2638 goto precision80;
2639 }
2640 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
2641 switch (roundingMode) {
2642 case float_round_nearest_even:
f9288a76 2643 case float_round_ties_away:
dc355b76
PM
2644 break;
2645 case float_round_to_zero:
2646 roundIncrement = 0;
2647 break;
2648 case float_round_up:
2649 roundIncrement = zSign ? 0 : roundMask;
2650 break;
2651 case float_round_down:
2652 roundIncrement = zSign ? roundMask : 0;
2653 break;
2654 default:
2655 abort();
158142c2
FB
2656 }
2657 roundBits = zSig0 & roundMask;
bb98fe42 2658 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2659 if ( ( 0x7FFE < zExp )
2660 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2661 ) {
2662 goto overflow;
2663 }
2664 if ( zExp <= 0 ) {
a2f2d288 2665 if (status->flush_to_zero) {
ff32e16e 2666 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2667 return packFloatx80(zSign, 0, 0);
2668 }
158142c2 2669 isTiny =
a2f2d288
PM
2670 (status->float_detect_tininess
2671 == float_tininess_before_rounding)
158142c2
FB
2672 || ( zExp < 0 )
2673 || ( zSig0 <= zSig0 + roundIncrement );
2674 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2675 zExp = 0;
2676 roundBits = zSig0 & roundMask;
ff32e16e
PM
2677 if (isTiny && roundBits) {
2678 float_raise(float_flag_underflow, status);
2679 }
a2f2d288
PM
2680 if (roundBits) {
2681 status->float_exception_flags |= float_flag_inexact;
2682 }
158142c2 2683 zSig0 += roundIncrement;
bb98fe42 2684 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2685 roundIncrement = roundMask + 1;
2686 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2687 roundMask |= roundIncrement;
2688 }
2689 zSig0 &= ~ roundMask;
2690 return packFloatx80( zSign, zExp, zSig0 );
2691 }
2692 }
a2f2d288
PM
2693 if (roundBits) {
2694 status->float_exception_flags |= float_flag_inexact;
2695 }
158142c2
FB
2696 zSig0 += roundIncrement;
2697 if ( zSig0 < roundIncrement ) {
2698 ++zExp;
2699 zSig0 = LIT64( 0x8000000000000000 );
2700 }
2701 roundIncrement = roundMask + 1;
2702 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2703 roundMask |= roundIncrement;
2704 }
2705 zSig0 &= ~ roundMask;
2706 if ( zSig0 == 0 ) zExp = 0;
2707 return packFloatx80( zSign, zExp, zSig0 );
2708 precision80:
dc355b76
PM
2709 switch (roundingMode) {
2710 case float_round_nearest_even:
f9288a76 2711 case float_round_ties_away:
dc355b76
PM
2712 increment = ((int64_t)zSig1 < 0);
2713 break;
2714 case float_round_to_zero:
2715 increment = 0;
2716 break;
2717 case float_round_up:
2718 increment = !zSign && zSig1;
2719 break;
2720 case float_round_down:
2721 increment = zSign && zSig1;
2722 break;
2723 default:
2724 abort();
158142c2 2725 }
bb98fe42 2726 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2727 if ( ( 0x7FFE < zExp )
2728 || ( ( zExp == 0x7FFE )
2729 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2730 && increment
2731 )
2732 ) {
2733 roundMask = 0;
2734 overflow:
ff32e16e 2735 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2736 if ( ( roundingMode == float_round_to_zero )
2737 || ( zSign && ( roundingMode == float_round_up ) )
2738 || ( ! zSign && ( roundingMode == float_round_down ) )
2739 ) {
2740 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2741 }
0f605c88
LV
2742 return packFloatx80(zSign,
2743 floatx80_infinity_high,
2744 floatx80_infinity_low);
158142c2
FB
2745 }
2746 if ( zExp <= 0 ) {
2747 isTiny =
a2f2d288
PM
2748 (status->float_detect_tininess
2749 == float_tininess_before_rounding)
158142c2
FB
2750 || ( zExp < 0 )
2751 || ! increment
2752 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2753 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2754 zExp = 0;
ff32e16e
PM
2755 if (isTiny && zSig1) {
2756 float_raise(float_flag_underflow, status);
2757 }
a2f2d288
PM
2758 if (zSig1) {
2759 status->float_exception_flags |= float_flag_inexact;
2760 }
dc355b76
PM
2761 switch (roundingMode) {
2762 case float_round_nearest_even:
f9288a76 2763 case float_round_ties_away:
dc355b76
PM
2764 increment = ((int64_t)zSig1 < 0);
2765 break;
2766 case float_round_to_zero:
2767 increment = 0;
2768 break;
2769 case float_round_up:
2770 increment = !zSign && zSig1;
2771 break;
2772 case float_round_down:
2773 increment = zSign && zSig1;
2774 break;
2775 default:
2776 abort();
158142c2
FB
2777 }
2778 if ( increment ) {
2779 ++zSig0;
2780 zSig0 &=
bb98fe42
AF
2781 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2782 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2783 }
2784 return packFloatx80( zSign, zExp, zSig0 );
2785 }
2786 }
a2f2d288
PM
2787 if (zSig1) {
2788 status->float_exception_flags |= float_flag_inexact;
2789 }
158142c2
FB
2790 if ( increment ) {
2791 ++zSig0;
2792 if ( zSig0 == 0 ) {
2793 ++zExp;
2794 zSig0 = LIT64( 0x8000000000000000 );
2795 }
2796 else {
bb98fe42 2797 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2798 }
2799 }
2800 else {
2801 if ( zSig0 == 0 ) zExp = 0;
2802 }
2803 return packFloatx80( zSign, zExp, zSig0 );
2804
2805}
2806
2807/*----------------------------------------------------------------------------
2808| Takes an abstract floating-point value having sign `zSign', exponent
2809| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
2810| and returns the proper extended double-precision floating-point value
2811| corresponding to the abstract input. This routine is just like
2812| `roundAndPackFloatx80' except that the input significand does not have to be
2813| normalized.
2814*----------------------------------------------------------------------------*/
2815
88857aca
LV
2816floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2817 flag zSign, int32_t zExp,
2818 uint64_t zSig0, uint64_t zSig1,
2819 float_status *status)
158142c2 2820{
8f506c70 2821 int8_t shiftCount;
158142c2
FB
2822
2823 if ( zSig0 == 0 ) {
2824 zSig0 = zSig1;
2825 zSig1 = 0;
2826 zExp -= 64;
2827 }
2828 shiftCount = countLeadingZeros64( zSig0 );
2829 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2830 zExp -= shiftCount;
ff32e16e
PM
2831 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2832 zSig0, zSig1, status);
158142c2
FB
2833
2834}
2835
158142c2
FB
2836/*----------------------------------------------------------------------------
2837| Returns the least-significant 64 fraction bits of the quadruple-precision
2838| floating-point value `a'.
2839*----------------------------------------------------------------------------*/
2840
a49db98d 2841static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
2842{
2843
2844 return a.low;
2845
2846}
2847
2848/*----------------------------------------------------------------------------
2849| Returns the most-significant 48 fraction bits of the quadruple-precision
2850| floating-point value `a'.
2851*----------------------------------------------------------------------------*/
2852
a49db98d 2853static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
2854{
2855
2856 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2857
2858}
2859
2860/*----------------------------------------------------------------------------
2861| Returns the exponent bits of the quadruple-precision floating-point value
2862| `a'.
2863*----------------------------------------------------------------------------*/
2864
f4014512 2865static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
2866{
2867
2868 return ( a.high>>48 ) & 0x7FFF;
2869
2870}
2871
2872/*----------------------------------------------------------------------------
2873| Returns the sign bit of the quadruple-precision floating-point value `a'.
2874*----------------------------------------------------------------------------*/
2875
a49db98d 2876static inline flag extractFloat128Sign( float128 a )
158142c2
FB
2877{
2878
2879 return a.high>>63;
2880
2881}
2882
2883/*----------------------------------------------------------------------------
2884| Normalizes the subnormal quadruple-precision floating-point value
2885| represented by the denormalized significand formed by the concatenation of
2886| `aSig0' and `aSig1'. The normalized exponent is stored at the location
2887| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
2888| significand are stored at the location pointed to by `zSig0Ptr', and the
2889| least significant 64 bits of the normalized significand are stored at the
2890| location pointed to by `zSig1Ptr'.
2891*----------------------------------------------------------------------------*/
2892
2893static void
2894 normalizeFloat128Subnormal(
bb98fe42
AF
2895 uint64_t aSig0,
2896 uint64_t aSig1,
f4014512 2897 int32_t *zExpPtr,
bb98fe42
AF
2898 uint64_t *zSig0Ptr,
2899 uint64_t *zSig1Ptr
158142c2
FB
2900 )
2901{
8f506c70 2902 int8_t shiftCount;
158142c2
FB
2903
2904 if ( aSig0 == 0 ) {
2905 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2906 if ( shiftCount < 0 ) {
2907 *zSig0Ptr = aSig1>>( - shiftCount );
2908 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2909 }
2910 else {
2911 *zSig0Ptr = aSig1<<shiftCount;
2912 *zSig1Ptr = 0;
2913 }
2914 *zExpPtr = - shiftCount - 63;
2915 }
2916 else {
2917 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2918 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2919 *zExpPtr = 1 - shiftCount;
2920 }
2921
2922}
2923
2924/*----------------------------------------------------------------------------
2925| Packs the sign `zSign', the exponent `zExp', and the significand formed
2926| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
2927| floating-point value, returning the result. After being shifted into the
2928| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
2929| added together to form the most significant 32 bits of the result. This
2930| means that any integer portion of `zSig0' will be added into the exponent.
2931| Since a properly normalized significand will have an integer portion equal
2932| to 1, the `zExp' input should be 1 less than the desired result exponent
2933| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
2934| significand.
2935*----------------------------------------------------------------------------*/
2936
a49db98d 2937static inline float128
f4014512 2938 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
2939{
2940 float128 z;
2941
2942 z.low = zSig1;
bb98fe42 2943 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
2944 return z;
2945
2946}
2947
2948/*----------------------------------------------------------------------------
2949| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2950| and extended significand formed by the concatenation of `zSig0', `zSig1',
2951| and `zSig2', and returns the proper quadruple-precision floating-point value
2952| corresponding to the abstract input. Ordinarily, the abstract value is
2953| simply rounded and packed into the quadruple-precision format, with the
2954| inexact exception raised if the abstract input cannot be represented
2955| exactly. However, if the abstract value is too large, the overflow and
2956| inexact exceptions are raised and an infinity or maximal finite value is
2957| returned. If the abstract value is too small, the input value is rounded to
2958| a subnormal number, and the underflow and inexact exceptions are raised if
2959| the abstract input cannot be represented exactly as a subnormal quadruple-
2960| precision floating-point number.
2961| The input significand must be normalized or smaller. If the input
2962| significand is not normalized, `zExp' must be 0; in that case, the result
2963| returned is a subnormal number, and it must not require rounding. In the
2964| usual case that the input significand is normalized, `zExp' must be 1 less
2965| than the ``true'' floating-point exponent. The handling of underflow and
2966| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2967*----------------------------------------------------------------------------*/
2968
f4014512 2969static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
2970 uint64_t zSig0, uint64_t zSig1,
2971 uint64_t zSig2, float_status *status)
158142c2 2972{
8f506c70 2973 int8_t roundingMode;
158142c2
FB
2974 flag roundNearestEven, increment, isTiny;
2975
a2f2d288 2976 roundingMode = status->float_rounding_mode;
158142c2 2977 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2978 switch (roundingMode) {
2979 case float_round_nearest_even:
f9288a76 2980 case float_round_ties_away:
dc355b76
PM
2981 increment = ((int64_t)zSig2 < 0);
2982 break;
2983 case float_round_to_zero:
2984 increment = 0;
2985 break;
2986 case float_round_up:
2987 increment = !zSign && zSig2;
2988 break;
2989 case float_round_down:
2990 increment = zSign && zSig2;
2991 break;
9ee6f678
BR
2992 case float_round_to_odd:
2993 increment = !(zSig1 & 0x1) && zSig2;
2994 break;
dc355b76
PM
2995 default:
2996 abort();
158142c2 2997 }
bb98fe42 2998 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
2999 if ( ( 0x7FFD < zExp )
3000 || ( ( zExp == 0x7FFD )
3001 && eq128(
3002 LIT64( 0x0001FFFFFFFFFFFF ),
3003 LIT64( 0xFFFFFFFFFFFFFFFF ),
3004 zSig0,
3005 zSig1
3006 )
3007 && increment
3008 )
3009 ) {
ff32e16e 3010 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3011 if ( ( roundingMode == float_round_to_zero )
3012 || ( zSign && ( roundingMode == float_round_up ) )
3013 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3014 || (roundingMode == float_round_to_odd)
158142c2
FB
3015 ) {
3016 return
3017 packFloat128(
3018 zSign,
3019 0x7FFE,
3020 LIT64( 0x0000FFFFFFFFFFFF ),
3021 LIT64( 0xFFFFFFFFFFFFFFFF )
3022 );
3023 }
3024 return packFloat128( zSign, 0x7FFF, 0, 0 );
3025 }
3026 if ( zExp < 0 ) {
a2f2d288 3027 if (status->flush_to_zero) {
ff32e16e 3028 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3029 return packFloat128(zSign, 0, 0, 0);
3030 }
158142c2 3031 isTiny =
a2f2d288
PM
3032 (status->float_detect_tininess
3033 == float_tininess_before_rounding)
158142c2
FB
3034 || ( zExp < -1 )
3035 || ! increment
3036 || lt128(
3037 zSig0,
3038 zSig1,
3039 LIT64( 0x0001FFFFFFFFFFFF ),
3040 LIT64( 0xFFFFFFFFFFFFFFFF )
3041 );
3042 shift128ExtraRightJamming(
3043 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3044 zExp = 0;
ff32e16e
PM
3045 if (isTiny && zSig2) {
3046 float_raise(float_flag_underflow, status);
3047 }
dc355b76
PM
3048 switch (roundingMode) {
3049 case float_round_nearest_even:
f9288a76 3050 case float_round_ties_away:
dc355b76
PM
3051 increment = ((int64_t)zSig2 < 0);
3052 break;
3053 case float_round_to_zero:
3054 increment = 0;
3055 break;
3056 case float_round_up:
3057 increment = !zSign && zSig2;
3058 break;
3059 case float_round_down:
3060 increment = zSign && zSig2;
3061 break;
9ee6f678
BR
3062 case float_round_to_odd:
3063 increment = !(zSig1 & 0x1) && zSig2;
3064 break;
dc355b76
PM
3065 default:
3066 abort();
158142c2
FB
3067 }
3068 }
3069 }
a2f2d288
PM
3070 if (zSig2) {
3071 status->float_exception_flags |= float_flag_inexact;
3072 }
158142c2
FB
3073 if ( increment ) {
3074 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3075 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3076 }
3077 else {
3078 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3079 }
3080 return packFloat128( zSign, zExp, zSig0, zSig1 );
3081
3082}
3083
3084/*----------------------------------------------------------------------------
3085| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3086| and significand formed by the concatenation of `zSig0' and `zSig1', and
3087| returns the proper quadruple-precision floating-point value corresponding
3088| to the abstract input. This routine is just like `roundAndPackFloat128'
3089| except that the input significand has fewer bits and does not have to be
3090| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3091| point exponent.
3092*----------------------------------------------------------------------------*/
3093
f4014512 3094static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3095 uint64_t zSig0, uint64_t zSig1,
3096 float_status *status)
158142c2 3097{
8f506c70 3098 int8_t shiftCount;
bb98fe42 3099 uint64_t zSig2;
158142c2
FB
3100
3101 if ( zSig0 == 0 ) {
3102 zSig0 = zSig1;
3103 zSig1 = 0;
3104 zExp -= 64;
3105 }
3106 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3107 if ( 0 <= shiftCount ) {
3108 zSig2 = 0;
3109 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3110 }
3111 else {
3112 shift128ExtraRightJamming(
3113 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3114 }
3115 zExp -= shiftCount;
ff32e16e 3116 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3117
3118}
3119
158142c2 3120
158142c2
FB
3121/*----------------------------------------------------------------------------
3122| Returns the result of converting the 32-bit two's complement integer `a'
3123| to the extended double-precision floating-point format. The conversion
3124| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3125| Arithmetic.
3126*----------------------------------------------------------------------------*/
3127
e5a41ffa 3128floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3129{
3130 flag zSign;
3a87d009 3131 uint32_t absA;
8f506c70 3132 int8_t shiftCount;
bb98fe42 3133 uint64_t zSig;
158142c2
FB
3134
3135 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3136 zSign = ( a < 0 );
3137 absA = zSign ? - a : a;
3138 shiftCount = countLeadingZeros32( absA ) + 32;
3139 zSig = absA;
3140 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3141
3142}
3143
158142c2
FB
3144/*----------------------------------------------------------------------------
3145| Returns the result of converting the 32-bit two's complement integer `a' to
3146| the quadruple-precision floating-point format. The conversion is performed
3147| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3148*----------------------------------------------------------------------------*/
3149
e5a41ffa 3150float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3151{
3152 flag zSign;
3a87d009 3153 uint32_t absA;
8f506c70 3154 int8_t shiftCount;
bb98fe42 3155 uint64_t zSig0;
158142c2
FB
3156
3157 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3158 zSign = ( a < 0 );
3159 absA = zSign ? - a : a;
3160 shiftCount = countLeadingZeros32( absA ) + 17;
3161 zSig0 = absA;
3162 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3163
3164}
3165
158142c2
FB
3166/*----------------------------------------------------------------------------
3167| Returns the result of converting the 64-bit two's complement integer `a'
3168| to the extended double-precision floating-point format. The conversion
3169| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3170| Arithmetic.
3171*----------------------------------------------------------------------------*/
3172
e5a41ffa 3173floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3174{
3175 flag zSign;
182f42fd 3176 uint64_t absA;
8f506c70 3177 int8_t shiftCount;
158142c2
FB
3178
3179 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3180 zSign = ( a < 0 );
3181 absA = zSign ? - a : a;
3182 shiftCount = countLeadingZeros64( absA );
3183 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3184
3185}
3186
158142c2
FB
3187/*----------------------------------------------------------------------------
3188| Returns the result of converting the 64-bit two's complement integer `a' to
3189| the quadruple-precision floating-point format. The conversion is performed
3190| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3191*----------------------------------------------------------------------------*/
3192
e5a41ffa 3193float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3194{
3195 flag zSign;
182f42fd 3196 uint64_t absA;
8f506c70 3197 int8_t shiftCount;
f4014512 3198 int32_t zExp;
bb98fe42 3199 uint64_t zSig0, zSig1;
158142c2
FB
3200
3201 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3202 zSign = ( a < 0 );
3203 absA = zSign ? - a : a;
3204 shiftCount = countLeadingZeros64( absA ) + 49;
3205 zExp = 0x406E - shiftCount;
3206 if ( 64 <= shiftCount ) {
3207 zSig1 = 0;
3208 zSig0 = absA;
3209 shiftCount -= 64;
3210 }
3211 else {
3212 zSig1 = absA;
3213 zSig0 = 0;
3214 }
3215 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3216 return packFloat128( zSign, zExp, zSig0, zSig1 );
3217
3218}
3219
6bb8e0f1
PM
3220/*----------------------------------------------------------------------------
3221| Returns the result of converting the 64-bit unsigned integer `a'
3222| to the quadruple-precision floating-point format. The conversion is performed
3223| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3224*----------------------------------------------------------------------------*/
3225
e5a41ffa 3226float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3227{
3228 if (a == 0) {
3229 return float128_zero;
3230 }
6603d506 3231 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3232}
3233
158142c2
FB
3234/*----------------------------------------------------------------------------
3235| Returns the result of converting the single-precision floating-point value
3236| `a' to the extended double-precision floating-point format. The conversion
3237| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3238| Arithmetic.
3239*----------------------------------------------------------------------------*/
3240
e5a41ffa 3241floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3242{
3243 flag aSign;
0c48262d 3244 int aExp;
bb98fe42 3245 uint32_t aSig;
158142c2 3246
ff32e16e 3247 a = float32_squash_input_denormal(a, status);
158142c2
FB
3248 aSig = extractFloat32Frac( a );
3249 aExp = extractFloat32Exp( a );
3250 aSign = extractFloat32Sign( a );
3251 if ( aExp == 0xFF ) {
ff32e16e
PM
3252 if (aSig) {
3253 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3254 }
0f605c88
LV
3255 return packFloatx80(aSign,
3256 floatx80_infinity_high,
3257 floatx80_infinity_low);
158142c2
FB
3258 }
3259 if ( aExp == 0 ) {
3260 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3261 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3262 }
3263 aSig |= 0x00800000;
bb98fe42 3264 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3265
3266}
3267
158142c2
FB
3268/*----------------------------------------------------------------------------
3269| Returns the result of converting the single-precision floating-point value
3270| `a' to the double-precision floating-point format. The conversion is
3271| performed according to the IEC/IEEE Standard for Binary Floating-Point
3272| Arithmetic.
3273*----------------------------------------------------------------------------*/
3274
e5a41ffa 3275float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3276{
3277 flag aSign;
0c48262d 3278 int aExp;
bb98fe42 3279 uint32_t aSig;
158142c2 3280
ff32e16e 3281 a = float32_squash_input_denormal(a, status);
158142c2
FB
3282 aSig = extractFloat32Frac( a );
3283 aExp = extractFloat32Exp( a );
3284 aSign = extractFloat32Sign( a );
3285 if ( aExp == 0xFF ) {
ff32e16e
PM
3286 if (aSig) {
3287 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3288 }
158142c2
FB
3289 return packFloat128( aSign, 0x7FFF, 0, 0 );
3290 }
3291 if ( aExp == 0 ) {
3292 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3293 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3294 --aExp;
3295 }
bb98fe42 3296 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3297
3298}
3299
158142c2
FB
3300/*----------------------------------------------------------------------------
3301| Returns the remainder of the single-precision floating-point value `a'
3302| with respect to the corresponding value `b'. The operation is performed
3303| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3304*----------------------------------------------------------------------------*/
3305
e5a41ffa 3306float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3307{
ed086f3d 3308 flag aSign, zSign;
0c48262d 3309 int aExp, bExp, expDiff;
bb98fe42
AF
3310 uint32_t aSig, bSig;
3311 uint32_t q;
3312 uint64_t aSig64, bSig64, q64;
3313 uint32_t alternateASig;
3314 int32_t sigMean;
ff32e16e
PM
3315 a = float32_squash_input_denormal(a, status);
3316 b = float32_squash_input_denormal(b, status);
158142c2
FB
3317
3318 aSig = extractFloat32Frac( a );
3319 aExp = extractFloat32Exp( a );
3320 aSign = extractFloat32Sign( a );
3321 bSig = extractFloat32Frac( b );
3322 bExp = extractFloat32Exp( b );
158142c2
FB
3323 if ( aExp == 0xFF ) {
3324 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3325 return propagateFloat32NaN(a, b, status);
158142c2 3326 }
ff32e16e 3327 float_raise(float_flag_invalid, status);
af39bc8c 3328 return float32_default_nan(status);
158142c2
FB
3329 }
3330 if ( bExp == 0xFF ) {
ff32e16e
PM
3331 if (bSig) {
3332 return propagateFloat32NaN(a, b, status);
3333 }
158142c2
FB
3334 return a;
3335 }
3336 if ( bExp == 0 ) {
3337 if ( bSig == 0 ) {
ff32e16e 3338 float_raise(float_flag_invalid, status);
af39bc8c 3339 return float32_default_nan(status);
158142c2
FB
3340 }
3341 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3342 }
3343 if ( aExp == 0 ) {
3344 if ( aSig == 0 ) return a;
3345 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3346 }
3347 expDiff = aExp - bExp;
3348 aSig |= 0x00800000;
3349 bSig |= 0x00800000;
3350 if ( expDiff < 32 ) {
3351 aSig <<= 8;
3352 bSig <<= 8;
3353 if ( expDiff < 0 ) {
3354 if ( expDiff < -1 ) return a;
3355 aSig >>= 1;
3356 }
3357 q = ( bSig <= aSig );
3358 if ( q ) aSig -= bSig;
3359 if ( 0 < expDiff ) {
bb98fe42 3360 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3361 q >>= 32 - expDiff;
3362 bSig >>= 2;
3363 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3364 }
3365 else {
3366 aSig >>= 2;
3367 bSig >>= 2;
3368 }
3369 }
3370 else {
3371 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3372 aSig64 = ( (uint64_t) aSig )<<40;
3373 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3374 expDiff -= 64;
3375 while ( 0 < expDiff ) {
3376 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3377 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3378 aSig64 = - ( ( bSig * q64 )<<38 );
3379 expDiff -= 62;
3380 }
3381 expDiff += 64;
3382 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3383 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3384 q = q64>>( 64 - expDiff );
3385 bSig <<= 6;
3386 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3387 }
3388 do {
3389 alternateASig = aSig;
3390 ++q;
3391 aSig -= bSig;
bb98fe42 3392 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3393 sigMean = aSig + alternateASig;
3394 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3395 aSig = alternateASig;
3396 }
bb98fe42 3397 zSign = ( (int32_t) aSig < 0 );
158142c2 3398 if ( zSign ) aSig = - aSig;
ff32e16e 3399 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3400}
3401
369be8f6 3402
158142c2 3403
8229c991
AJ
3404/*----------------------------------------------------------------------------
3405| Returns the binary exponential of the single-precision floating-point value
3406| `a'. The operation is performed according to the IEC/IEEE Standard for
3407| Binary Floating-Point Arithmetic.
3408|
3409| Uses the following identities:
3410|
3411| 1. -------------------------------------------------------------------------
3412| x x*ln(2)
3413| 2 = e
3414|
3415| 2. -------------------------------------------------------------------------
3416| 2 3 4 5 n
3417| x x x x x x x
3418| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3419| 1! 2! 3! 4! 5! n!
3420*----------------------------------------------------------------------------*/
3421
3422static const float64 float32_exp2_coefficients[15] =
3423{
d5138cf4
PM
3424 const_float64( 0x3ff0000000000000ll ), /* 1 */
3425 const_float64( 0x3fe0000000000000ll ), /* 2 */
3426 const_float64( 0x3fc5555555555555ll ), /* 3 */
3427 const_float64( 0x3fa5555555555555ll ), /* 4 */
3428 const_float64( 0x3f81111111111111ll ), /* 5 */
3429 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3430 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3431 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3432 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3433 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3434 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3435 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3436 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3437 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3438 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3439};
3440
e5a41ffa 3441float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3442{
3443 flag aSign;
0c48262d 3444 int aExp;
bb98fe42 3445 uint32_t aSig;
8229c991
AJ
3446 float64 r, x, xn;
3447 int i;
ff32e16e 3448 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3449
3450 aSig = extractFloat32Frac( a );
3451 aExp = extractFloat32Exp( a );
3452 aSign = extractFloat32Sign( a );
3453
3454 if ( aExp == 0xFF) {
ff32e16e
PM
3455 if (aSig) {
3456 return propagateFloat32NaN(a, float32_zero, status);
3457 }
8229c991
AJ
3458 return (aSign) ? float32_zero : a;
3459 }
3460 if (aExp == 0) {
3461 if (aSig == 0) return float32_one;
3462 }
3463
ff32e16e 3464 float_raise(float_flag_inexact, status);
8229c991
AJ
3465
3466 /* ******************************* */
3467 /* using float64 for approximation */
3468 /* ******************************* */
ff32e16e
PM
3469 x = float32_to_float64(a, status);
3470 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3471
3472 xn = x;
3473 r = float64_one;
3474 for (i = 0 ; i < 15 ; i++) {
3475 float64 f;
3476
ff32e16e
PM
3477 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3478 r = float64_add(r, f, status);
8229c991 3479
ff32e16e 3480 xn = float64_mul(xn, x, status);
8229c991
AJ
3481 }
3482
3483 return float64_to_float32(r, status);
3484}
3485
374dfc33
AJ
3486/*----------------------------------------------------------------------------
3487| Returns the binary log of the single-precision floating-point value `a'.
3488| The operation is performed according to the IEC/IEEE Standard for Binary
3489| Floating-Point Arithmetic.
3490*----------------------------------------------------------------------------*/
e5a41ffa 3491float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3492{
3493 flag aSign, zSign;
0c48262d 3494 int aExp;
bb98fe42 3495 uint32_t aSig, zSig, i;
374dfc33 3496
ff32e16e 3497 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3498 aSig = extractFloat32Frac( a );
3499 aExp = extractFloat32Exp( a );
3500 aSign = extractFloat32Sign( a );
3501
3502 if ( aExp == 0 ) {
3503 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3504 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3505 }
3506 if ( aSign ) {
ff32e16e 3507 float_raise(float_flag_invalid, status);
af39bc8c 3508 return float32_default_nan(status);
374dfc33
AJ
3509 }
3510 if ( aExp == 0xFF ) {
ff32e16e
PM
3511 if (aSig) {
3512 return propagateFloat32NaN(a, float32_zero, status);
3513 }
374dfc33
AJ
3514 return a;
3515 }
3516
3517 aExp -= 0x7F;
3518 aSig |= 0x00800000;
3519 zSign = aExp < 0;
3520 zSig = aExp << 23;
3521
3522 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3523 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3524 if ( aSig & 0x01000000 ) {
3525 aSig >>= 1;
3526 zSig |= i;
3527 }
3528 }
3529
3530 if ( zSign )
3531 zSig = -zSig;
3532
ff32e16e 3533 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3534}
3535
158142c2
FB
3536/*----------------------------------------------------------------------------
3537| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3538| the corresponding value `b', and 0 otherwise. The invalid exception is
3539| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3540| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3541*----------------------------------------------------------------------------*/
3542
e5a41ffa 3543int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3544{
b689362d 3545 uint32_t av, bv;
ff32e16e
PM
3546 a = float32_squash_input_denormal(a, status);
3547 b = float32_squash_input_denormal(b, status);
158142c2
FB
3548
3549 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3550 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3551 ) {
ff32e16e 3552 float_raise(float_flag_invalid, status);
158142c2
FB
3553 return 0;
3554 }
b689362d
AJ
3555 av = float32_val(a);
3556 bv = float32_val(b);
3557 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3558}
3559
3560/*----------------------------------------------------------------------------
3561| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3562| or equal to the corresponding value `b', and 0 otherwise. The invalid
3563| exception is raised if either operand is a NaN. The comparison is performed
3564| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3565*----------------------------------------------------------------------------*/
3566
e5a41ffa 3567int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3568{
3569 flag aSign, bSign;
bb98fe42 3570 uint32_t av, bv;
ff32e16e
PM
3571 a = float32_squash_input_denormal(a, status);
3572 b = float32_squash_input_denormal(b, status);
158142c2
FB
3573
3574 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3575 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3576 ) {
ff32e16e 3577 float_raise(float_flag_invalid, status);
158142c2
FB
3578 return 0;
3579 }
3580 aSign = extractFloat32Sign( a );
3581 bSign = extractFloat32Sign( b );
f090c9d4
PB
3582 av = float32_val(a);
3583 bv = float32_val(b);
bb98fe42 3584 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3585 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3586
3587}
3588
3589/*----------------------------------------------------------------------------
3590| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3591| the corresponding value `b', and 0 otherwise. The invalid exception is
3592| raised if either operand is a NaN. The comparison is performed according
3593| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3594*----------------------------------------------------------------------------*/
3595
e5a41ffa 3596int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3597{
3598 flag aSign, bSign;
bb98fe42 3599 uint32_t av, bv;
ff32e16e
PM
3600 a = float32_squash_input_denormal(a, status);
3601 b = float32_squash_input_denormal(b, status);
158142c2
FB
3602
3603 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3604 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3605 ) {
ff32e16e 3606 float_raise(float_flag_invalid, status);
158142c2
FB
3607 return 0;
3608 }
3609 aSign = extractFloat32Sign( a );
3610 bSign = extractFloat32Sign( b );
f090c9d4
PB
3611 av = float32_val(a);
3612 bv = float32_val(b);
bb98fe42 3613 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3614 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3615
3616}
3617
67b7861d
AJ
3618/*----------------------------------------------------------------------------
3619| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
3620| be compared, and 0 otherwise. The invalid exception is raised if either
3621| operand is a NaN. The comparison is performed according to the IEC/IEEE
3622| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
3623*----------------------------------------------------------------------------*/
3624
e5a41ffa 3625int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 3626{
ff32e16e
PM
3627 a = float32_squash_input_denormal(a, status);
3628 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3629
3630 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3631 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3632 ) {
ff32e16e 3633 float_raise(float_flag_invalid, status);
67b7861d
AJ
3634 return 1;
3635 }
3636 return 0;
3637}
b689362d 3638
158142c2
FB
3639/*----------------------------------------------------------------------------
3640| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
3641| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3642| exception. The comparison is performed according to the IEC/IEEE Standard
3643| for Binary Floating-Point Arithmetic.
158142c2
FB
3644*----------------------------------------------------------------------------*/
3645
e5a41ffa 3646int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 3647{
ff32e16e
PM
3648 a = float32_squash_input_denormal(a, status);
3649 b = float32_squash_input_denormal(b, status);
158142c2
FB
3650
3651 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3652 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3653 ) {
af39bc8c
AM
3654 if (float32_is_signaling_nan(a, status)
3655 || float32_is_signaling_nan(b, status)) {
ff32e16e 3656 float_raise(float_flag_invalid, status);
b689362d 3657 }
158142c2
FB
3658 return 0;
3659 }
b689362d
AJ
3660 return ( float32_val(a) == float32_val(b) ) ||
3661 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
3662}
3663
3664/*----------------------------------------------------------------------------
3665| Returns 1 if the single-precision floating-point value `a' is less than or
3666| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3667| cause an exception. Otherwise, the comparison is performed according to the
3668| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3669*----------------------------------------------------------------------------*/
3670
e5a41ffa 3671int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3672{
3673 flag aSign, bSign;
bb98fe42 3674 uint32_t av, bv;
ff32e16e
PM
3675 a = float32_squash_input_denormal(a, status);
3676 b = float32_squash_input_denormal(b, status);
158142c2
FB
3677
3678 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3679 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3680 ) {
af39bc8c
AM
3681 if (float32_is_signaling_nan(a, status)
3682 || float32_is_signaling_nan(b, status)) {
ff32e16e 3683 float_raise(float_flag_invalid, status);
158142c2
FB
3684 }
3685 return 0;
3686 }
3687 aSign = extractFloat32Sign( a );
3688 bSign = extractFloat32Sign( b );
f090c9d4
PB
3689 av = float32_val(a);
3690 bv = float32_val(b);
bb98fe42 3691 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3692 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3693
3694}
3695
3696/*----------------------------------------------------------------------------
3697| Returns 1 if the single-precision floating-point value `a' is less than
3698| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3699| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 3700| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3701*----------------------------------------------------------------------------*/
3702
ab52f973 3703int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 3704{
ab52f973
AB
3705 flag aSign, bSign;
3706 uint32_t av, bv;
3707 a = float32_squash_input_denormal(a, status);
3708 b = float32_squash_input_denormal(b, status);
158142c2 3709
ab52f973
AB
3710 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3711 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3712 ) {
3713 if (float32_is_signaling_nan(a, status)
3714 || float32_is_signaling_nan(b, status)) {
ff32e16e 3715 float_raise(float_flag_invalid, status);
158142c2 3716 }
ab52f973 3717 return 0;
158142c2 3718 }
ab52f973
AB
3719 aSign = extractFloat32Sign( a );
3720 bSign = extractFloat32Sign( b );
3721 av = float32_val(a);
3722 bv = float32_val(b);
3723 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3724 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3725
3726}
3727
3728/*----------------------------------------------------------------------------
ab52f973
AB
3729| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3730| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3731| comparison is performed according to the IEC/IEEE Standard for Binary
3732| Floating-Point Arithmetic.
158142c2
FB
3733*----------------------------------------------------------------------------*/
3734
ab52f973 3735int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 3736{
ab52f973
AB
3737 a = float32_squash_input_denormal(a, status);
3738 b = float32_squash_input_denormal(b, status);
158142c2 3739
ab52f973
AB
3740 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3741 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3742 ) {
3743 if (float32_is_signaling_nan(a, status)
3744 || float32_is_signaling_nan(b, status)) {
3745 float_raise(float_flag_invalid, status);
158142c2 3746 }
ab52f973 3747 return 1;
158142c2 3748 }
ab52f973 3749 return 0;
158142c2
FB
3750}
3751
210cbd49
AB
3752/*----------------------------------------------------------------------------
3753| If `a' is denormal and we are in flush-to-zero mode then set the
3754| input-denormal exception and return zero. Otherwise just return the value.
3755*----------------------------------------------------------------------------*/
3756float16 float16_squash_input_denormal(float16 a, float_status *status)
3757{
3758 if (status->flush_inputs_to_zero) {
3759 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3760 float_raise(float_flag_input_denormal, status);
3761 return make_float16(float16_val(a) & 0x8000);
3762 }
3763 }
3764 return a;
3765}
3766
158142c2
FB
3767/*----------------------------------------------------------------------------
3768| Returns the result of converting the double-precision floating-point value
3769| `a' to the extended double-precision floating-point format. The conversion
3770| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3771| Arithmetic.
3772*----------------------------------------------------------------------------*/
3773
e5a41ffa 3774floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
3775{
3776 flag aSign;
0c48262d 3777 int aExp;
bb98fe42 3778 uint64_t aSig;
158142c2 3779
ff32e16e 3780 a = float64_squash_input_denormal(a, status);
158142c2
FB
3781 aSig = extractFloat64Frac( a );
3782 aExp = extractFloat64Exp( a );
3783 aSign = extractFloat64Sign( a );
3784 if ( aExp == 0x7FF ) {
ff32e16e
PM
3785 if (aSig) {
3786 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3787 }
0f605c88
LV
3788 return packFloatx80(aSign,
3789 floatx80_infinity_high,
3790 floatx80_infinity_low);
158142c2
FB
3791 }
3792 if ( aExp == 0 ) {
3793 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3794 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3795 }
3796 return
3797 packFloatx80(
3798 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3799
3800}
3801
158142c2
FB
3802/*----------------------------------------------------------------------------
3803| Returns the result of converting the double-precision floating-point value
3804| `a' to the quadruple-precision floating-point format. The conversion is
3805| performed according to the IEC/IEEE Standard for Binary Floating-Point
3806| Arithmetic.
3807*----------------------------------------------------------------------------*/
3808
e5a41ffa 3809float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
3810{
3811 flag aSign;
0c48262d 3812 int aExp;
bb98fe42 3813 uint64_t aSig, zSig0, zSig1;
158142c2 3814
ff32e16e 3815 a = float64_squash_input_denormal(a, status);
158142c2
FB
3816 aSig = extractFloat64Frac( a );
3817 aExp = extractFloat64Exp( a );
3818 aSign = extractFloat64Sign( a );
3819 if ( aExp == 0x7FF ) {
ff32e16e
PM
3820 if (aSig) {
3821 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3822 }
158142c2
FB
3823 return packFloat128( aSign, 0x7FFF, 0, 0 );
3824 }
3825 if ( aExp == 0 ) {
3826 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3827 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3828 --aExp;
3829 }
3830 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3831 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3832
3833}
3834
158142c2
FB
3835
3836/*----------------------------------------------------------------------------
3837| Returns the remainder of the double-precision floating-point value `a'
3838| with respect to the corresponding value `b'. The operation is performed
3839| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3840*----------------------------------------------------------------------------*/
3841
e5a41ffa 3842float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 3843{
ed086f3d 3844 flag aSign, zSign;
0c48262d 3845 int aExp, bExp, expDiff;
bb98fe42
AF
3846 uint64_t aSig, bSig;
3847 uint64_t q, alternateASig;
3848 int64_t sigMean;
158142c2 3849
ff32e16e
PM
3850 a = float64_squash_input_denormal(a, status);
3851 b = float64_squash_input_denormal(b, status);
158142c2
FB
3852 aSig = extractFloat64Frac( a );
3853 aExp = extractFloat64Exp( a );
3854 aSign = extractFloat64Sign( a );
3855 bSig = extractFloat64Frac( b );
3856 bExp = extractFloat64Exp( b );
158142c2
FB
3857 if ( aExp == 0x7FF ) {
3858 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 3859 return propagateFloat64NaN(a, b, status);
158142c2 3860 }
ff32e16e 3861 float_raise(float_flag_invalid, status);
af39bc8c 3862 return float64_default_nan(status);
158142c2
FB
3863 }
3864 if ( bExp == 0x7FF ) {
ff32e16e
PM
3865 if (bSig) {
3866 return propagateFloat64NaN(a, b, status);
3867 }
158142c2
FB
3868 return a;
3869 }
3870 if ( bExp == 0 ) {
3871 if ( bSig == 0 ) {
ff32e16e 3872 float_raise(float_flag_invalid, status);
af39bc8c 3873 return float64_default_nan(status);
158142c2
FB
3874 }
3875 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3876 }
3877 if ( aExp == 0 ) {
3878 if ( aSig == 0 ) return a;
3879 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3880 }
3881 expDiff = aExp - bExp;
3882 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3883 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3884 if ( expDiff < 0 ) {
3885 if ( expDiff < -1 ) return a;
3886 aSig >>= 1;
3887 }
3888 q = ( bSig <= aSig );
3889 if ( q ) aSig -= bSig;
3890 expDiff -= 64;
3891 while ( 0 < expDiff ) {
3892 q = estimateDiv128To64( aSig, 0, bSig );
3893 q = ( 2 < q ) ? q - 2 : 0;
3894 aSig = - ( ( bSig>>2 ) * q );
3895 expDiff -= 62;
3896 }
3897 expDiff += 64;
3898 if ( 0 < expDiff ) {
3899 q = estimateDiv128To64( aSig, 0, bSig );
3900 q = ( 2 < q ) ? q - 2 : 0;
3901 q >>= 64 - expDiff;
3902 bSig >>= 2;
3903 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3904 }
3905 else {
3906 aSig >>= 2;
3907 bSig >>= 2;
3908 }
3909 do {
3910 alternateASig = aSig;
3911 ++q;
3912 aSig -= bSig;
bb98fe42 3913 } while ( 0 <= (int64_t) aSig );
158142c2
FB
3914 sigMean = aSig + alternateASig;
3915 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3916 aSig = alternateASig;
3917 }
bb98fe42 3918 zSign = ( (int64_t) aSig < 0 );
158142c2 3919 if ( zSign ) aSig = - aSig;
ff32e16e 3920 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3921
3922}
3923
374dfc33
AJ
3924/*----------------------------------------------------------------------------
3925| Returns the binary log of the double-precision floating-point value `a'.
3926| The operation is performed according to the IEC/IEEE Standard for Binary
3927| Floating-Point Arithmetic.
3928*----------------------------------------------------------------------------*/
e5a41ffa 3929float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
3930{
3931 flag aSign, zSign;
0c48262d 3932 int aExp;
bb98fe42 3933 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 3934 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
3935
3936 aSig = extractFloat64Frac( a );
3937 aExp = extractFloat64Exp( a );
3938 aSign = extractFloat64Sign( a );
3939
3940 if ( aExp == 0 ) {
3941 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3942 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3943 }
3944 if ( aSign ) {
ff32e16e 3945 float_raise(float_flag_invalid, status);
af39bc8c 3946 return float64_default_nan(status);
374dfc33
AJ
3947 }
3948 if ( aExp == 0x7FF ) {
ff32e16e
PM
3949 if (aSig) {
3950 return propagateFloat64NaN(a, float64_zero, status);
3951 }
374dfc33
AJ
3952 return a;
3953 }
3954
3955 aExp -= 0x3FF;
3956 aSig |= LIT64( 0x0010000000000000 );
3957 zSign = aExp < 0;
bb98fe42 3958 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
3959 for (i = 1LL << 51; i > 0; i >>= 1) {
3960 mul64To128( aSig, aSig, &aSig0, &aSig1 );
3961 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3962 if ( aSig & LIT64( 0x0020000000000000 ) ) {
3963 aSig >>= 1;
3964 zSig |= i;
3965 }
3966 }
3967
3968 if ( zSign )
3969 zSig = -zSig;
ff32e16e 3970 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
3971}
3972
158142c2
FB
3973/*----------------------------------------------------------------------------
3974| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
3975| corresponding value `b', and 0 otherwise. The invalid exception is raised
3976| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3977| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3978*----------------------------------------------------------------------------*/
3979
e5a41ffa 3980int float64_eq(float64 a, float64 b, float_status *status)
158142c2 3981{
bb98fe42 3982 uint64_t av, bv;
ff32e16e
PM
3983 a = float64_squash_input_denormal(a, status);
3984 b = float64_squash_input_denormal(b, status);
158142c2
FB
3985
3986 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3987 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3988 ) {
ff32e16e 3989 float_raise(float_flag_invalid, status);
158142c2
FB
3990 return 0;
3991 }
f090c9d4 3992 av = float64_val(a);
a1b91bb4 3993 bv = float64_val(b);
bb98fe42 3994 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3995
3996}
3997
3998/*----------------------------------------------------------------------------
3999| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4000| equal to the corresponding value `b', and 0 otherwise. The invalid
4001| exception is raised if either operand is a NaN. The comparison is performed
4002| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4003*----------------------------------------------------------------------------*/
4004
e5a41ffa 4005int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4006{
4007 flag aSign, bSign;
bb98fe42 4008 uint64_t av, bv;
ff32e16e
PM
4009 a = float64_squash_input_denormal(a, status);
4010 b = float64_squash_input_denormal(b, status);
158142c2
FB
4011
4012 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4013 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4014 ) {
ff32e16e 4015 float_raise(float_flag_invalid, status);
158142c2
FB
4016 return 0;
4017 }
4018 aSign = extractFloat64Sign( a );
4019 bSign = extractFloat64Sign( b );
f090c9d4 4020 av = float64_val(a);
a1b91bb4 4021 bv = float64_val(b);
bb98fe42 4022 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4023 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4024
4025}
4026
4027/*----------------------------------------------------------------------------
4028| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4029| the corresponding value `b', and 0 otherwise. The invalid exception is
4030| raised if either operand is a NaN. The comparison is performed according
4031| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4032*----------------------------------------------------------------------------*/
4033
e5a41ffa 4034int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4035{
4036 flag aSign, bSign;
bb98fe42 4037 uint64_t av, bv;
158142c2 4038
ff32e16e
PM
4039 a = float64_squash_input_denormal(a, status);
4040 b = float64_squash_input_denormal(b, status);
158142c2
FB
4041 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4042 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4043 ) {
ff32e16e 4044 float_raise(float_flag_invalid, status);
158142c2
FB
4045 return 0;
4046 }
4047 aSign = extractFloat64Sign( a );
4048 bSign = extractFloat64Sign( b );
f090c9d4 4049 av = float64_val(a);
a1b91bb4 4050 bv = float64_val(b);
bb98fe42 4051 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4052 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4053
4054}
4055
67b7861d
AJ
4056/*----------------------------------------------------------------------------
4057| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4058| be compared, and 0 otherwise. The invalid exception is raised if either
4059| operand is a NaN. The comparison is performed according to the IEC/IEEE
4060| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4061*----------------------------------------------------------------------------*/
4062
e5a41ffa 4063int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4064{
ff32e16e
PM
4065 a = float64_squash_input_denormal(a, status);
4066 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4067
4068 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4069 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4070 ) {
ff32e16e 4071 float_raise(float_flag_invalid, status);
67b7861d
AJ
4072 return 1;
4073 }
4074 return 0;
4075}
4076
158142c2
FB
4077/*----------------------------------------------------------------------------
4078| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4079| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4080| exception.The comparison is performed according to the IEC/IEEE Standard
4081| for Binary Floating-Point Arithmetic.
158142c2
FB
4082*----------------------------------------------------------------------------*/
4083
e5a41ffa 4084int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4085{
bb98fe42 4086 uint64_t av, bv;
ff32e16e
PM
4087 a = float64_squash_input_denormal(a, status);
4088 b = float64_squash_input_denormal(b, status);
158142c2
FB
4089
4090 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4091 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4092 ) {
af39bc8c
AM
4093 if (float64_is_signaling_nan(a, status)
4094 || float64_is_signaling_nan(b, status)) {
ff32e16e 4095 float_raise(float_flag_invalid, status);
b689362d 4096 }
158142c2
FB
4097 return 0;
4098 }
f090c9d4 4099 av = float64_val(a);
a1b91bb4 4100 bv = float64_val(b);
bb98fe42 4101 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4102
4103}
4104
4105/*----------------------------------------------------------------------------
4106| Returns 1 if the double-precision floating-point value `a' is less than or
4107| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4108| cause an exception. Otherwise, the comparison is performed according to the
4109| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4110*----------------------------------------------------------------------------*/
4111
e5a41ffa 4112int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4113{
4114 flag aSign, bSign;
bb98fe42 4115 uint64_t av, bv;
ff32e16e
PM
4116 a = float64_squash_input_denormal(a, status);
4117 b = float64_squash_input_denormal(b, status);
158142c2
FB
4118
4119 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4120 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4121 ) {
af39bc8c
AM
4122 if (float64_is_signaling_nan(a, status)
4123 || float64_is_signaling_nan(b, status)) {
ff32e16e 4124 float_raise(float_flag_invalid, status);
158142c2
FB
4125 }
4126 return 0;
4127 }
4128 aSign = extractFloat64Sign( a );
4129 bSign = extractFloat64Sign( b );
f090c9d4 4130 av = float64_val(a);
a1b91bb4 4131 bv = float64_val(b);
bb98fe42 4132 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4133 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4134
4135}
4136
4137/*----------------------------------------------------------------------------
4138| Returns 1 if the double-precision floating-point value `a' is less than
4139| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4140| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4141| Standard for Binary Floating-Point Arithmetic.
4142*----------------------------------------------------------------------------*/
4143
e5a41ffa 4144int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4145{
4146 flag aSign, bSign;
bb98fe42 4147 uint64_t av, bv;
ff32e16e
PM
4148 a = float64_squash_input_denormal(a, status);
4149 b = float64_squash_input_denormal(b, status);
158142c2
FB
4150
4151 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4152 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4153 ) {
af39bc8c
AM
4154 if (float64_is_signaling_nan(a, status)
4155 || float64_is_signaling_nan(b, status)) {
ff32e16e 4156 float_raise(float_flag_invalid, status);
158142c2
FB
4157 }
4158 return 0;
4159 }
4160 aSign = extractFloat64Sign( a );
4161 bSign = extractFloat64Sign( b );
f090c9d4 4162 av = float64_val(a);
a1b91bb4 4163 bv = float64_val(b);
bb98fe42 4164 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4165 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4166
4167}
4168
67b7861d
AJ
4169/*----------------------------------------------------------------------------
4170| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4171| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4172| comparison is performed according to the IEC/IEEE Standard for Binary
4173| Floating-Point Arithmetic.
4174*----------------------------------------------------------------------------*/
4175
e5a41ffa 4176int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4177{
ff32e16e
PM
4178 a = float64_squash_input_denormal(a, status);
4179 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4180
4181 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4182 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4183 ) {
af39bc8c
AM
4184 if (float64_is_signaling_nan(a, status)
4185 || float64_is_signaling_nan(b, status)) {
ff32e16e 4186 float_raise(float_flag_invalid, status);
67b7861d
AJ
4187 }
4188 return 1;
4189 }
4190 return 0;
4191}
4192
158142c2
FB
4193/*----------------------------------------------------------------------------
4194| Returns the result of converting the extended double-precision floating-
4195| point value `a' to the 32-bit two's complement integer format. The
4196| conversion is performed according to the IEC/IEEE Standard for Binary
4197| Floating-Point Arithmetic---which means in particular that the conversion
4198| is rounded according to the current rounding mode. If `a' is a NaN, the
4199| largest positive integer is returned. Otherwise, if the conversion
4200| overflows, the largest integer with the same sign as `a' is returned.
4201*----------------------------------------------------------------------------*/
4202
f4014512 4203int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4204{
4205 flag aSign;
f4014512 4206 int32_t aExp, shiftCount;
bb98fe42 4207 uint64_t aSig;
158142c2 4208
d1eb8f2a
AD
4209 if (floatx80_invalid_encoding(a)) {
4210 float_raise(float_flag_invalid, status);
4211 return 1 << 31;
4212 }
158142c2
FB
4213 aSig = extractFloatx80Frac( a );
4214 aExp = extractFloatx80Exp( a );
4215 aSign = extractFloatx80Sign( a );
bb98fe42 4216 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4217 shiftCount = 0x4037 - aExp;
4218 if ( shiftCount <= 0 ) shiftCount = 1;
4219 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4220 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4221
4222}
4223
4224/*----------------------------------------------------------------------------
4225| Returns the result of converting the extended double-precision floating-
4226| point value `a' to the 32-bit two's complement integer format. The
4227| conversion is performed according to the IEC/IEEE Standard for Binary
4228| Floating-Point Arithmetic, except that the conversion is always rounded
4229| toward zero. If `a' is a NaN, the largest positive integer is returned.
4230| Otherwise, if the conversion overflows, the largest integer with the same
4231| sign as `a' is returned.
4232*----------------------------------------------------------------------------*/
4233
f4014512 4234int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4235{
4236 flag aSign;
f4014512 4237 int32_t aExp, shiftCount;
bb98fe42 4238 uint64_t aSig, savedASig;
b3a6a2e0 4239 int32_t z;
158142c2 4240
d1eb8f2a
AD
4241 if (floatx80_invalid_encoding(a)) {
4242 float_raise(float_flag_invalid, status);
4243 return 1 << 31;
4244 }
158142c2
FB
4245 aSig = extractFloatx80Frac( a );
4246 aExp = extractFloatx80Exp( a );
4247 aSign = extractFloatx80Sign( a );
4248 if ( 0x401E < aExp ) {
bb98fe42 4249 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4250 goto invalid;
4251 }
4252 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4253 if (aExp || aSig) {
4254 status->float_exception_flags |= float_flag_inexact;
4255 }
158142c2
FB
4256 return 0;
4257 }
4258 shiftCount = 0x403E - aExp;
4259 savedASig = aSig;
4260 aSig >>= shiftCount;
4261 z = aSig;
4262 if ( aSign ) z = - z;
4263 if ( ( z < 0 ) ^ aSign ) {
4264 invalid:
ff32e16e 4265 float_raise(float_flag_invalid, status);
bb98fe42 4266 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4267 }
4268 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4269 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4270 }
4271 return z;
4272
4273}
4274
4275/*----------------------------------------------------------------------------
4276| Returns the result of converting the extended double-precision floating-
4277| point value `a' to the 64-bit two's complement integer format. The
4278| conversion is performed according to the IEC/IEEE Standard for Binary
4279| Floating-Point Arithmetic---which means in particular that the conversion
4280| is rounded according to the current rounding mode. If `a' is a NaN,
4281| the largest positive integer is returned. Otherwise, if the conversion
4282| overflows, the largest integer with the same sign as `a' is returned.
4283*----------------------------------------------------------------------------*/
4284
f42c2224 4285int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4286{
4287 flag aSign;
f4014512 4288 int32_t aExp, shiftCount;
bb98fe42 4289 uint64_t aSig, aSigExtra;
158142c2 4290
d1eb8f2a
AD
4291 if (floatx80_invalid_encoding(a)) {
4292 float_raise(float_flag_invalid, status);
4293 return 1ULL << 63;
4294 }
158142c2
FB
4295 aSig = extractFloatx80Frac( a );
4296 aExp = extractFloatx80Exp( a );
4297 aSign = extractFloatx80Sign( a );
4298 shiftCount = 0x403E - aExp;
4299 if ( shiftCount <= 0 ) {
4300 if ( shiftCount ) {
ff32e16e 4301 float_raise(float_flag_invalid, status);
0f605c88 4302 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4303 return LIT64( 0x7FFFFFFFFFFFFFFF );
4304 }
bb98fe42 4305 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4306 }
4307 aSigExtra = 0;
4308 }
4309 else {
4310 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4311 }
ff32e16e 4312 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4313
4314}
4315
4316/*----------------------------------------------------------------------------
4317| Returns the result of converting the extended double-precision floating-
4318| point value `a' to the 64-bit two's complement integer format. The
4319| conversion is performed according to the IEC/IEEE Standard for Binary
4320| Floating-Point Arithmetic, except that the conversion is always rounded
4321| toward zero. If `a' is a NaN, the largest positive integer is returned.
4322| Otherwise, if the conversion overflows, the largest integer with the same
4323| sign as `a' is returned.
4324*----------------------------------------------------------------------------*/
4325
f42c2224 4326int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4327{
4328 flag aSign;
f4014512 4329 int32_t aExp, shiftCount;
bb98fe42 4330 uint64_t aSig;
f42c2224 4331 int64_t z;
158142c2 4332
d1eb8f2a
AD
4333 if (floatx80_invalid_encoding(a)) {
4334 float_raise(float_flag_invalid, status);
4335 return 1ULL << 63;
4336 }
158142c2
FB
4337 aSig = extractFloatx80Frac( a );
4338 aExp = extractFloatx80Exp( a );
4339 aSign = extractFloatx80Sign( a );
4340 shiftCount = aExp - 0x403E;
4341 if ( 0 <= shiftCount ) {
4342 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4343 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4344 float_raise(float_flag_invalid, status);
158142c2
FB
4345 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4346 return LIT64( 0x7FFFFFFFFFFFFFFF );
4347 }
4348 }
bb98fe42 4349 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4350 }
4351 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4352 if (aExp | aSig) {
4353 status->float_exception_flags |= float_flag_inexact;
4354 }
158142c2
FB
4355 return 0;
4356 }
4357 z = aSig>>( - shiftCount );
bb98fe42 4358 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4359 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4360 }
4361 if ( aSign ) z = - z;
4362 return z;
4363
4364}
4365
4366/*----------------------------------------------------------------------------
4367| Returns the result of converting the extended double-precision floating-
4368| point value `a' to the single-precision floating-point format. The
4369| conversion is performed according to the IEC/IEEE Standard for Binary
4370| Floating-Point Arithmetic.
4371*----------------------------------------------------------------------------*/
4372
e5a41ffa 4373float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4374{
4375 flag aSign;
f4014512 4376 int32_t aExp;
bb98fe42 4377 uint64_t aSig;
158142c2 4378
d1eb8f2a
AD
4379 if (floatx80_invalid_encoding(a)) {
4380 float_raise(float_flag_invalid, status);
4381 return float32_default_nan(status);
4382 }
158142c2
FB
4383 aSig = extractFloatx80Frac( a );
4384 aExp = extractFloatx80Exp( a );
4385 aSign = extractFloatx80Sign( a );
4386 if ( aExp == 0x7FFF ) {
bb98fe42 4387 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4388 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4389 }
4390 return packFloat32( aSign, 0xFF, 0 );
4391 }
4392 shift64RightJamming( aSig, 33, &aSig );
4393 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4394 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4395
4396}
4397
4398/*----------------------------------------------------------------------------
4399| Returns the result of converting the extended double-precision floating-
4400| point value `a' to the double-precision floating-point format. The
4401| conversion is performed according to the IEC/IEEE Standard for Binary
4402| Floating-Point Arithmetic.
4403*----------------------------------------------------------------------------*/
4404
e5a41ffa 4405float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4406{
4407 flag aSign;
f4014512 4408 int32_t aExp;
bb98fe42 4409 uint64_t aSig, zSig;
158142c2 4410
d1eb8f2a
AD
4411 if (floatx80_invalid_encoding(a)) {
4412 float_raise(float_flag_invalid, status);
4413 return float64_default_nan(status);
4414 }
158142c2
FB
4415 aSig = extractFloatx80Frac( a );
4416 aExp = extractFloatx80Exp( a );
4417 aSign = extractFloatx80Sign( a );
4418 if ( aExp == 0x7FFF ) {
bb98fe42 4419 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4420 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4421 }
4422 return packFloat64( aSign, 0x7FF, 0 );
4423 }
4424 shift64RightJamming( aSig, 1, &zSig );
4425 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4426 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4427
4428}
4429
158142c2
FB
4430/*----------------------------------------------------------------------------
4431| Returns the result of converting the extended double-precision floating-
4432| point value `a' to the quadruple-precision floating-point format. The
4433| conversion is performed according to the IEC/IEEE Standard for Binary
4434| Floating-Point Arithmetic.
4435*----------------------------------------------------------------------------*/
4436
e5a41ffa 4437float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4438{
4439 flag aSign;
0c48262d 4440 int aExp;
bb98fe42 4441 uint64_t aSig, zSig0, zSig1;
158142c2 4442
d1eb8f2a
AD
4443 if (floatx80_invalid_encoding(a)) {
4444 float_raise(float_flag_invalid, status);
4445 return float128_default_nan(status);
4446 }
158142c2
FB
4447 aSig = extractFloatx80Frac( a );
4448 aExp = extractFloatx80Exp( a );
4449 aSign = extractFloatx80Sign( a );
bb98fe42 4450 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4451 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4452 }
4453 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4454 return packFloat128( aSign, aExp, zSig0, zSig1 );
4455
4456}
4457
0f721292
LV
4458/*----------------------------------------------------------------------------
4459| Rounds the extended double-precision floating-point value `a'
4460| to the precision provided by floatx80_rounding_precision and returns the
4461| result as an extended double-precision floating-point value.
4462| The operation is performed according to the IEC/IEEE Standard for Binary
4463| Floating-Point Arithmetic.
4464*----------------------------------------------------------------------------*/
4465
4466floatx80 floatx80_round(floatx80 a, float_status *status)
4467{
4468 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4469 extractFloatx80Sign(a),
4470 extractFloatx80Exp(a),
4471 extractFloatx80Frac(a), 0, status);
4472}
4473
158142c2
FB
4474/*----------------------------------------------------------------------------
4475| Rounds the extended double-precision floating-point value `a' to an integer,
4476| and returns the result as an extended quadruple-precision floating-point
4477| value. The operation is performed according to the IEC/IEEE Standard for
4478| Binary Floating-Point Arithmetic.
4479*----------------------------------------------------------------------------*/
4480
e5a41ffa 4481floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4482{
4483 flag aSign;
f4014512 4484 int32_t aExp;
bb98fe42 4485 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4486 floatx80 z;
4487
d1eb8f2a
AD
4488 if (floatx80_invalid_encoding(a)) {
4489 float_raise(float_flag_invalid, status);
4490 return floatx80_default_nan(status);
4491 }
158142c2
FB
4492 aExp = extractFloatx80Exp( a );
4493 if ( 0x403E <= aExp ) {
bb98fe42 4494 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4495 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4496 }
4497 return a;
4498 }
4499 if ( aExp < 0x3FFF ) {
4500 if ( ( aExp == 0 )
bb98fe42 4501 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4502 return a;
4503 }
a2f2d288 4504 status->float_exception_flags |= float_flag_inexact;
158142c2 4505 aSign = extractFloatx80Sign( a );
a2f2d288 4506 switch (status->float_rounding_mode) {
158142c2 4507 case float_round_nearest_even:
bb98fe42 4508 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4509 ) {
4510 return
4511 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4512 }
4513 break;
f9288a76
PM
4514 case float_round_ties_away:
4515 if (aExp == 0x3FFE) {
4516 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4517 }
4518 break;
158142c2
FB
4519 case float_round_down:
4520 return
4521 aSign ?
4522 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4523 : packFloatx80( 0, 0, 0 );
4524 case float_round_up:
4525 return
4526 aSign ? packFloatx80( 1, 0, 0 )
4527 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4528 }
4529 return packFloatx80( aSign, 0, 0 );
4530 }
4531 lastBitMask = 1;
4532 lastBitMask <<= 0x403E - aExp;
4533 roundBitsMask = lastBitMask - 1;
4534 z = a;
a2f2d288 4535 switch (status->float_rounding_mode) {
dc355b76 4536 case float_round_nearest_even:
158142c2 4537 z.low += lastBitMask>>1;
dc355b76
PM
4538 if ((z.low & roundBitsMask) == 0) {
4539 z.low &= ~lastBitMask;
4540 }
4541 break;
f9288a76
PM
4542 case float_round_ties_away:
4543 z.low += lastBitMask >> 1;
4544 break;
dc355b76
PM
4545 case float_round_to_zero:
4546 break;
4547 case float_round_up:
4548 if (!extractFloatx80Sign(z)) {
4549 z.low += roundBitsMask;
4550 }
4551 break;
4552 case float_round_down:
4553 if (extractFloatx80Sign(z)) {
158142c2
FB
4554 z.low += roundBitsMask;
4555 }
dc355b76
PM
4556 break;
4557 default:
4558 abort();
158142c2
FB
4559 }
4560 z.low &= ~ roundBitsMask;
4561 if ( z.low == 0 ) {
4562 ++z.high;
4563 z.low = LIT64( 0x8000000000000000 );
4564 }
a2f2d288
PM
4565 if (z.low != a.low) {
4566 status->float_exception_flags |= float_flag_inexact;
4567 }
158142c2
FB
4568 return z;
4569
4570}
4571
4572/*----------------------------------------------------------------------------
4573| Returns the result of adding the absolute values of the extended double-
4574| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4575| negated before being returned. `zSign' is ignored if the result is a NaN.
4576| The addition is performed according to the IEC/IEEE Standard for Binary
4577| Floating-Point Arithmetic.
4578*----------------------------------------------------------------------------*/
4579
e5a41ffa
PM
4580static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4581 float_status *status)
158142c2 4582{
f4014512 4583 int32_t aExp, bExp, zExp;
bb98fe42 4584 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4585 int32_t expDiff;
158142c2
FB
4586
4587 aSig = extractFloatx80Frac( a );
4588 aExp = extractFloatx80Exp( a );
4589 bSig = extractFloatx80Frac( b );
4590 bExp = extractFloatx80Exp( b );
4591 expDiff = aExp - bExp;
4592 if ( 0 < expDiff ) {
4593 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4594 if ((uint64_t)(aSig << 1)) {
4595 return propagateFloatx80NaN(a, b, status);
4596 }
158142c2
FB
4597 return a;
4598 }
4599 if ( bExp == 0 ) --expDiff;
4600 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4601 zExp = aExp;
4602 }
4603 else if ( expDiff < 0 ) {
4604 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4605 if ((uint64_t)(bSig << 1)) {
4606 return propagateFloatx80NaN(a, b, status);
4607 }
0f605c88
LV
4608 return packFloatx80(zSign,
4609 floatx80_infinity_high,
4610 floatx80_infinity_low);
158142c2
FB
4611 }
4612 if ( aExp == 0 ) ++expDiff;
4613 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4614 zExp = bExp;
4615 }
4616 else {
4617 if ( aExp == 0x7FFF ) {
bb98fe42 4618 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4619 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4620 }
4621 return a;
4622 }
4623 zSig1 = 0;
4624 zSig0 = aSig + bSig;
4625 if ( aExp == 0 ) {
4626 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4627 goto roundAndPack;
4628 }
4629 zExp = aExp;
4630 goto shiftRight1;
4631 }
4632 zSig0 = aSig + bSig;
bb98fe42 4633 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
4634 shiftRight1:
4635 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4636 zSig0 |= LIT64( 0x8000000000000000 );
4637 ++zExp;
4638 roundAndPack:
a2f2d288 4639 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4640 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4641}
4642
4643/*----------------------------------------------------------------------------
4644| Returns the result of subtracting the absolute values of the extended
4645| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4646| difference is negated before being returned. `zSign' is ignored if the
4647| result is a NaN. The subtraction is performed according to the IEC/IEEE
4648| Standard for Binary Floating-Point Arithmetic.
4649*----------------------------------------------------------------------------*/
4650
e5a41ffa
PM
4651static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4652 float_status *status)
158142c2 4653{
f4014512 4654 int32_t aExp, bExp, zExp;
bb98fe42 4655 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4656 int32_t expDiff;
158142c2
FB
4657
4658 aSig = extractFloatx80Frac( a );
4659 aExp = extractFloatx80Exp( a );
4660 bSig = extractFloatx80Frac( b );
4661 bExp = extractFloatx80Exp( b );
4662 expDiff = aExp - bExp;
4663 if ( 0 < expDiff ) goto aExpBigger;
4664 if ( expDiff < 0 ) goto bExpBigger;
4665 if ( aExp == 0x7FFF ) {
bb98fe42 4666 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4667 return propagateFloatx80NaN(a, b, status);
158142c2 4668 }
ff32e16e 4669 float_raise(float_flag_invalid, status);
af39bc8c 4670 return floatx80_default_nan(status);
158142c2
FB
4671 }
4672 if ( aExp == 0 ) {
4673 aExp = 1;
4674 bExp = 1;
4675 }
4676 zSig1 = 0;
4677 if ( bSig < aSig ) goto aBigger;
4678 if ( aSig < bSig ) goto bBigger;
a2f2d288 4679 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
4680 bExpBigger:
4681 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4682 if ((uint64_t)(bSig << 1)) {
4683 return propagateFloatx80NaN(a, b, status);
4684 }
0f605c88
LV
4685 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4686 floatx80_infinity_low);
158142c2
FB
4687 }
4688 if ( aExp == 0 ) ++expDiff;
4689 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4690 bBigger:
4691 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4692 zExp = bExp;
4693 zSign ^= 1;
4694 goto normalizeRoundAndPack;
4695 aExpBigger:
4696 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4697 if ((uint64_t)(aSig << 1)) {
4698 return propagateFloatx80NaN(a, b, status);
4699 }
158142c2
FB
4700 return a;
4701 }
4702 if ( bExp == 0 ) --expDiff;
4703 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4704 aBigger:
4705 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4706 zExp = aExp;
4707 normalizeRoundAndPack:
a2f2d288 4708 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4709 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4710}
4711
4712/*----------------------------------------------------------------------------
4713| Returns the result of adding the extended double-precision floating-point
4714| values `a' and `b'. The operation is performed according to the IEC/IEEE
4715| Standard for Binary Floating-Point Arithmetic.
4716*----------------------------------------------------------------------------*/
4717
e5a41ffa 4718floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4719{
4720 flag aSign, bSign;
4721
d1eb8f2a
AD
4722 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4723 float_raise(float_flag_invalid, status);
4724 return floatx80_default_nan(status);
4725 }
158142c2
FB
4726 aSign = extractFloatx80Sign( a );
4727 bSign = extractFloatx80Sign( b );
4728 if ( aSign == bSign ) {
ff32e16e 4729 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4730 }
4731 else {
ff32e16e 4732 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4733 }
4734
4735}
4736
4737/*----------------------------------------------------------------------------
4738| Returns the result of subtracting the extended double-precision floating-
4739| point values `a' and `b'. The operation is performed according to the
4740| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4741*----------------------------------------------------------------------------*/
4742
e5a41ffa 4743floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4744{
4745 flag aSign, bSign;
4746
d1eb8f2a
AD
4747 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4748 float_raise(float_flag_invalid, status);
4749 return floatx80_default_nan(status);
4750 }
158142c2
FB
4751 aSign = extractFloatx80Sign( a );
4752 bSign = extractFloatx80Sign( b );
4753 if ( aSign == bSign ) {
ff32e16e 4754 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4755 }
4756 else {
ff32e16e 4757 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4758 }
4759
4760}
4761
4762/*----------------------------------------------------------------------------
4763| Returns the result of multiplying the extended double-precision floating-
4764| point values `a' and `b'. The operation is performed according to the
4765| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4766*----------------------------------------------------------------------------*/
4767
e5a41ffa 4768floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4769{
4770 flag aSign, bSign, zSign;
f4014512 4771 int32_t aExp, bExp, zExp;
bb98fe42 4772 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 4773
d1eb8f2a
AD
4774 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4775 float_raise(float_flag_invalid, status);
4776 return floatx80_default_nan(status);
4777 }
158142c2
FB
4778 aSig = extractFloatx80Frac( a );
4779 aExp = extractFloatx80Exp( a );
4780 aSign = extractFloatx80Sign( a );
4781 bSig = extractFloatx80Frac( b );
4782 bExp = extractFloatx80Exp( b );
4783 bSign = extractFloatx80Sign( b );
4784 zSign = aSign ^ bSign;
4785 if ( aExp == 0x7FFF ) {
bb98fe42
AF
4786 if ( (uint64_t) ( aSig<<1 )
4787 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 4788 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4789 }
4790 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
4791 return packFloatx80(zSign, floatx80_infinity_high,
4792 floatx80_infinity_low);
158142c2
FB
4793 }
4794 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4795 if ((uint64_t)(bSig << 1)) {
4796 return propagateFloatx80NaN(a, b, status);
4797 }
158142c2
FB
4798 if ( ( aExp | aSig ) == 0 ) {
4799 invalid:
ff32e16e 4800 float_raise(float_flag_invalid, status);
af39bc8c 4801 return floatx80_default_nan(status);
158142c2 4802 }
0f605c88
LV
4803 return packFloatx80(zSign, floatx80_infinity_high,
4804 floatx80_infinity_low);
158142c2
FB
4805 }
4806 if ( aExp == 0 ) {
4807 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4808 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4809 }
4810 if ( bExp == 0 ) {
4811 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4812 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4813 }
4814 zExp = aExp + bExp - 0x3FFE;
4815 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 4816 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
4817 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4818 --zExp;
4819 }
a2f2d288 4820 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4821 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4822}
4823
4824/*----------------------------------------------------------------------------
4825| Returns the result of dividing the extended double-precision floating-point
4826| value `a' by the corresponding value `b'. The operation is performed
4827| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4828*----------------------------------------------------------------------------*/
4829
e5a41ffa 4830floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4831{
4832 flag aSign, bSign, zSign;
f4014512 4833 int32_t aExp, bExp, zExp;
bb98fe42
AF
4834 uint64_t aSig, bSig, zSig0, zSig1;
4835 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 4836
d1eb8f2a
AD
4837 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4838 float_raise(float_flag_invalid, status);
4839 return floatx80_default_nan(status);
4840 }
158142c2
FB
4841 aSig = extractFloatx80Frac( a );
4842 aExp = extractFloatx80Exp( a );
4843 aSign = extractFloatx80Sign( a );
4844 bSig = extractFloatx80Frac( b );
4845 bExp = extractFloatx80Exp( b );
4846 bSign = extractFloatx80Sign( b );
4847 zSign = aSign ^ bSign;
4848 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4849 if ((uint64_t)(aSig << 1)) {
4850 return propagateFloatx80NaN(a, b, status);
4851 }
158142c2 4852 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4853 if ((uint64_t)(bSig << 1)) {
4854 return propagateFloatx80NaN(a, b, status);
4855 }
158142c2
FB
4856 goto invalid;
4857 }
0f605c88
LV
4858 return packFloatx80(zSign, floatx80_infinity_high,
4859 floatx80_infinity_low);
158142c2
FB
4860 }
4861 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4862 if ((uint64_t)(bSig << 1)) {
4863 return propagateFloatx80NaN(a, b, status);
4864 }
158142c2
FB
4865 return packFloatx80( zSign, 0, 0 );
4866 }
4867 if ( bExp == 0 ) {
4868 if ( bSig == 0 ) {
4869 if ( ( aExp | aSig ) == 0 ) {
4870 invalid:
ff32e16e 4871 float_raise(float_flag_invalid, status);
af39bc8c 4872 return floatx80_default_nan(status);
158142c2 4873 }
ff32e16e 4874 float_raise(float_flag_divbyzero, status);
0f605c88
LV
4875 return packFloatx80(zSign, floatx80_infinity_high,
4876 floatx80_infinity_low);
158142c2
FB
4877 }
4878 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4879 }
4880 if ( aExp == 0 ) {
4881 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4882 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4883 }
4884 zExp = aExp - bExp + 0x3FFE;
4885 rem1 = 0;
4886 if ( bSig <= aSig ) {
4887 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4888 ++zExp;
4889 }
4890 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4891 mul64To128( bSig, zSig0, &term0, &term1 );
4892 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 4893 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
4894 --zSig0;
4895 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4896 }
4897 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 4898 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
4899 mul64To128( bSig, zSig1, &term1, &term2 );
4900 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 4901 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
4902 --zSig1;
4903 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4904 }
4905 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4906 }
a2f2d288 4907 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4908 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4909}
4910
4911/*----------------------------------------------------------------------------
4912| Returns the remainder of the extended double-precision floating-point value
4913| `a' with respect to the corresponding value `b'. The operation is performed
4914| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4915*----------------------------------------------------------------------------*/
4916
e5a41ffa 4917floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 4918{
ed086f3d 4919 flag aSign, zSign;
f4014512 4920 int32_t aExp, bExp, expDiff;
bb98fe42
AF
4921 uint64_t aSig0, aSig1, bSig;
4922 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 4923
d1eb8f2a
AD
4924 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4925 float_raise(float_flag_invalid, status);
4926 return floatx80_default_nan(status);
4927 }
158142c2
FB
4928 aSig0 = extractFloatx80Frac( a );
4929 aExp = extractFloatx80Exp( a );
4930 aSign = extractFloatx80Sign( a );
4931 bSig = extractFloatx80Frac( b );
4932 bExp = extractFloatx80Exp( b );
158142c2 4933 if ( aExp == 0x7FFF ) {
bb98fe42
AF
4934 if ( (uint64_t) ( aSig0<<1 )
4935 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 4936 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4937 }
4938 goto invalid;
4939 }
4940 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4941 if ((uint64_t)(bSig << 1)) {
4942 return propagateFloatx80NaN(a, b, status);
4943 }
158142c2
FB
4944 return a;
4945 }
4946 if ( bExp == 0 ) {
4947 if ( bSig == 0 ) {
4948 invalid:
ff32e16e 4949 float_raise(float_flag_invalid, status);
af39bc8c 4950 return floatx80_default_nan(status);
158142c2
FB
4951 }
4952 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4953 }
4954 if ( aExp == 0 ) {
bb98fe42 4955 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
4956 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4957 }
4958 bSig |= LIT64( 0x8000000000000000 );
4959 zSign = aSign;
4960 expDiff = aExp - bExp;
4961 aSig1 = 0;
4962 if ( expDiff < 0 ) {
4963 if ( expDiff < -1 ) return a;
4964 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4965 expDiff = 0;
4966 }
4967 q = ( bSig <= aSig0 );
4968 if ( q ) aSig0 -= bSig;
4969 expDiff -= 64;
4970 while ( 0 < expDiff ) {
4971 q = estimateDiv128To64( aSig0, aSig1, bSig );
4972 q = ( 2 < q ) ? q - 2 : 0;
4973 mul64To128( bSig, q, &term0, &term1 );
4974 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4975 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4976 expDiff -= 62;
4977 }
4978 expDiff += 64;
4979 if ( 0 < expDiff ) {
4980 q = estimateDiv128To64( aSig0, aSig1, bSig );
4981 q = ( 2 < q ) ? q - 2 : 0;
4982 q >>= 64 - expDiff;
4983 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4984 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4985 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4986 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4987 ++q;
4988 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4989 }
4990 }
4991 else {
4992 term1 = 0;
4993 term0 = bSig;
4994 }
4995 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4996 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4997 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4998 && ( q & 1 ) )
4999 ) {
5000 aSig0 = alternateASig0;
5001 aSig1 = alternateASig1;
5002 zSign = ! zSign;
5003 }
5004 return
5005 normalizeRoundAndPackFloatx80(
ff32e16e 5006 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5007
5008}
5009
5010/*----------------------------------------------------------------------------
5011| Returns the square root of the extended double-precision floating-point
5012| value `a'. The operation is performed according to the IEC/IEEE Standard
5013| for Binary Floating-Point Arithmetic.
5014*----------------------------------------------------------------------------*/
5015
e5a41ffa 5016floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5017{
5018 flag aSign;
f4014512 5019 int32_t aExp, zExp;
bb98fe42
AF
5020 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5021 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5022
d1eb8f2a
AD
5023 if (floatx80_invalid_encoding(a)) {
5024 float_raise(float_flag_invalid, status);
5025 return floatx80_default_nan(status);
5026 }
158142c2
FB
5027 aSig0 = extractFloatx80Frac( a );
5028 aExp = extractFloatx80Exp( a );
5029 aSign = extractFloatx80Sign( a );
5030 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5031 if ((uint64_t)(aSig0 << 1)) {
5032 return propagateFloatx80NaN(a, a, status);
5033 }
158142c2
FB
5034 if ( ! aSign ) return a;
5035 goto invalid;
5036 }
5037 if ( aSign ) {
5038 if ( ( aExp | aSig0 ) == 0 ) return a;
5039 invalid:
ff32e16e 5040 float_raise(float_flag_invalid, status);
af39bc8c 5041 return floatx80_default_nan(status);
158142c2
FB
5042 }
5043 if ( aExp == 0 ) {
5044 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5045 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5046 }
5047 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5048 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5049 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5050 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5051 doubleZSig0 = zSig0<<1;
5052 mul64To128( zSig0, zSig0, &term0, &term1 );
5053 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5054 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5055 --zSig0;
5056 doubleZSig0 -= 2;
5057 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5058 }
5059 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5060 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5061 if ( zSig1 == 0 ) zSig1 = 1;
5062 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5063 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5064 mul64To128( zSig1, zSig1, &term2, &term3 );
5065 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5066 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5067 --zSig1;
5068 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5069 term3 |= 1;
5070 term2 |= doubleZSig0;
5071 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5072 }
5073 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5074 }
5075 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5076 zSig0 |= doubleZSig0;
a2f2d288
PM
5077 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5078 0, zExp, zSig0, zSig1, status);
158142c2
FB
5079}
5080
5081/*----------------------------------------------------------------------------
b689362d
AJ
5082| Returns 1 if the extended double-precision floating-point value `a' is equal
5083| to the corresponding value `b', and 0 otherwise. The invalid exception is
5084| raised if either operand is a NaN. Otherwise, the comparison is performed
5085| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5086*----------------------------------------------------------------------------*/
5087
e5a41ffa 5088int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5089{
5090
d1eb8f2a
AD
5091 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5092 || (extractFloatx80Exp(a) == 0x7FFF
5093 && (uint64_t) (extractFloatx80Frac(a) << 1))
5094 || (extractFloatx80Exp(b) == 0x7FFF
5095 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5096 ) {
ff32e16e 5097 float_raise(float_flag_invalid, status);
158142c2
FB
5098 return 0;
5099 }
5100 return
5101 ( a.low == b.low )
5102 && ( ( a.high == b.high )
5103 || ( ( a.low == 0 )
bb98fe42 5104 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5105 );
5106
5107}
5108
5109/*----------------------------------------------------------------------------
5110| Returns 1 if the extended double-precision floating-point value `a' is
5111| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5112| invalid exception is raised if either operand is a NaN. The comparison is
5113| performed according to the IEC/IEEE Standard for Binary Floating-Point
5114| Arithmetic.
158142c2
FB
5115*----------------------------------------------------------------------------*/
5116
e5a41ffa 5117int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5118{
5119 flag aSign, bSign;
5120
d1eb8f2a
AD
5121 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5122 || (extractFloatx80Exp(a) == 0x7FFF
5123 && (uint64_t) (extractFloatx80Frac(a) << 1))
5124 || (extractFloatx80Exp(b) == 0x7FFF
5125 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5126 ) {
ff32e16e 5127 float_raise(float_flag_invalid, status);
158142c2
FB
5128 return 0;
5129 }
5130 aSign = extractFloatx80Sign( a );
5131 bSign = extractFloatx80Sign( b );
5132 if ( aSign != bSign ) {
5133 return
5134 aSign
bb98fe42 5135 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5136 == 0 );
5137 }
5138 return
5139 aSign ? le128( b.high, b.low, a.high, a.low )
5140 : le128( a.high, a.low, b.high, b.low );
5141
5142}
5143
5144/*----------------------------------------------------------------------------
5145| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5146| less than the corresponding value `b', and 0 otherwise. The invalid
5147| exception is raised if either operand is a NaN. The comparison is performed
5148| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5149*----------------------------------------------------------------------------*/
5150
e5a41ffa 5151int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5152{
5153 flag aSign, bSign;
5154
d1eb8f2a
AD
5155 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5156 || (extractFloatx80Exp(a) == 0x7FFF
5157 && (uint64_t) (extractFloatx80Frac(a) << 1))
5158 || (extractFloatx80Exp(b) == 0x7FFF
5159 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5160 ) {
ff32e16e 5161 float_raise(float_flag_invalid, status);
158142c2
FB
5162 return 0;
5163 }
5164 aSign = extractFloatx80Sign( a );
5165 bSign = extractFloatx80Sign( b );
5166 if ( aSign != bSign ) {
5167 return
5168 aSign
bb98fe42 5169 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5170 != 0 );
5171 }
5172 return
5173 aSign ? lt128( b.high, b.low, a.high, a.low )
5174 : lt128( a.high, a.low, b.high, b.low );
5175
5176}
5177
67b7861d
AJ
5178/*----------------------------------------------------------------------------
5179| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5180| cannot be compared, and 0 otherwise. The invalid exception is raised if
5181| either operand is a NaN. The comparison is performed according to the
5182| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5183*----------------------------------------------------------------------------*/
e5a41ffa 5184int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5185{
d1eb8f2a
AD
5186 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5187 || (extractFloatx80Exp(a) == 0x7FFF
5188 && (uint64_t) (extractFloatx80Frac(a) << 1))
5189 || (extractFloatx80Exp(b) == 0x7FFF
5190 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5191 ) {
ff32e16e 5192 float_raise(float_flag_invalid, status);
67b7861d
AJ
5193 return 1;
5194 }
5195 return 0;
5196}
5197
158142c2 5198/*----------------------------------------------------------------------------
b689362d 5199| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5200| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5201| cause an exception. The comparison is performed according to the IEC/IEEE
5202| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5203*----------------------------------------------------------------------------*/
5204
e5a41ffa 5205int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5206{
5207
d1eb8f2a
AD
5208 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5209 float_raise(float_flag_invalid, status);
5210 return 0;
5211 }
158142c2 5212 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5213 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5214 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5215 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5216 ) {
af39bc8c
AM
5217 if (floatx80_is_signaling_nan(a, status)
5218 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5219 float_raise(float_flag_invalid, status);
b689362d 5220 }
158142c2
FB
5221 return 0;
5222 }
5223 return
5224 ( a.low == b.low )
5225 && ( ( a.high == b.high )
5226 || ( ( a.low == 0 )
bb98fe42 5227 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5228 );
5229
5230}
5231
5232/*----------------------------------------------------------------------------
5233| Returns 1 if the extended double-precision floating-point value `a' is less
5234| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5235| do not cause an exception. Otherwise, the comparison is performed according
5236| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5237*----------------------------------------------------------------------------*/
5238
e5a41ffa 5239int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5240{
5241 flag aSign, bSign;
5242
d1eb8f2a
AD
5243 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5244 float_raise(float_flag_invalid, status);
5245 return 0;
5246 }
158142c2 5247 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5248 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5249 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5250 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5251 ) {
af39bc8c
AM
5252 if (floatx80_is_signaling_nan(a, status)
5253 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5254 float_raise(float_flag_invalid, status);
158142c2
FB
5255 }
5256 return 0;
5257 }
5258 aSign = extractFloatx80Sign( a );
5259 bSign = extractFloatx80Sign( b );
5260 if ( aSign != bSign ) {
5261 return
5262 aSign
bb98fe42 5263 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5264 == 0 );
5265 }
5266 return
5267 aSign ? le128( b.high, b.low, a.high, a.low )
5268 : le128( a.high, a.low, b.high, b.low );
5269
5270}
5271
5272/*----------------------------------------------------------------------------
5273| Returns 1 if the extended double-precision floating-point value `a' is less
5274| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5275| an exception. Otherwise, the comparison is performed according to the
5276| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5277*----------------------------------------------------------------------------*/
5278
e5a41ffa 5279int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5280{
5281 flag aSign, bSign;
5282
d1eb8f2a
AD
5283 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5284 float_raise(float_flag_invalid, status);
5285 return 0;
5286 }
158142c2 5287 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5288 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5289 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5290 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5291 ) {
af39bc8c
AM
5292 if (floatx80_is_signaling_nan(a, status)
5293 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5294 float_raise(float_flag_invalid, status);
158142c2
FB
5295 }
5296 return 0;
5297 }
5298 aSign = extractFloatx80Sign( a );
5299 bSign = extractFloatx80Sign( b );
5300 if ( aSign != bSign ) {
5301 return
5302 aSign
bb98fe42 5303 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5304 != 0 );
5305 }
5306 return
5307 aSign ? lt128( b.high, b.low, a.high, a.low )
5308 : lt128( a.high, a.low, b.high, b.low );
5309
5310}
5311
67b7861d
AJ
5312/*----------------------------------------------------------------------------
5313| Returns 1 if the extended double-precision floating-point values `a' and `b'
5314| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5315| The comparison is performed according to the IEC/IEEE Standard for Binary
5316| Floating-Point Arithmetic.
5317*----------------------------------------------------------------------------*/
e5a41ffa 5318int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5319{
d1eb8f2a
AD
5320 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5321 float_raise(float_flag_invalid, status);
5322 return 1;
5323 }
67b7861d
AJ
5324 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5325 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5326 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5327 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5328 ) {
af39bc8c
AM
5329 if (floatx80_is_signaling_nan(a, status)
5330 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5331 float_raise(float_flag_invalid, status);
67b7861d
AJ
5332 }
5333 return 1;
5334 }
5335 return 0;
5336}
5337
158142c2
FB
5338/*----------------------------------------------------------------------------
5339| Returns the result of converting the quadruple-precision floating-point
5340| value `a' to the 32-bit two's complement integer format. The conversion
5341| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5342| Arithmetic---which means in particular that the conversion is rounded
5343| according to the current rounding mode. If `a' is a NaN, the largest
5344| positive integer is returned. Otherwise, if the conversion overflows, the
5345| largest integer with the same sign as `a' is returned.
5346*----------------------------------------------------------------------------*/
5347
f4014512 5348int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5349{
5350 flag aSign;
f4014512 5351 int32_t aExp, shiftCount;
bb98fe42 5352 uint64_t aSig0, aSig1;
158142c2
FB
5353
5354 aSig1 = extractFloat128Frac1( a );
5355 aSig0 = extractFloat128Frac0( a );
5356 aExp = extractFloat128Exp( a );
5357 aSign = extractFloat128Sign( a );
5358 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5359 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5360 aSig0 |= ( aSig1 != 0 );
5361 shiftCount = 0x4028 - aExp;
5362 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5363 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5364
5365}
5366
5367/*----------------------------------------------------------------------------
5368| Returns the result of converting the quadruple-precision floating-point
5369| value `a' to the 32-bit two's complement integer format. The conversion
5370| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5371| Arithmetic, except that the conversion is always rounded toward zero. If
5372| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5373| conversion overflows, the largest integer with the same sign as `a' is
5374| returned.
5375*----------------------------------------------------------------------------*/
5376
f4014512 5377int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5378{
5379 flag aSign;
f4014512 5380 int32_t aExp, shiftCount;
bb98fe42 5381 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5382 int32_t z;
158142c2
FB
5383
5384 aSig1 = extractFloat128Frac1( a );
5385 aSig0 = extractFloat128Frac0( a );
5386 aExp = extractFloat128Exp( a );
5387 aSign = extractFloat128Sign( a );
5388 aSig0 |= ( aSig1 != 0 );
5389 if ( 0x401E < aExp ) {
5390 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5391 goto invalid;
5392 }
5393 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5394 if (aExp || aSig0) {
5395 status->float_exception_flags |= float_flag_inexact;
5396 }
158142c2
FB
5397 return 0;
5398 }
5399 aSig0 |= LIT64( 0x0001000000000000 );
5400 shiftCount = 0x402F - aExp;
5401 savedASig = aSig0;
5402 aSig0 >>= shiftCount;
5403 z = aSig0;
5404 if ( aSign ) z = - z;
5405 if ( ( z < 0 ) ^ aSign ) {
5406 invalid:
ff32e16e 5407 float_raise(float_flag_invalid, status);
bb98fe42 5408 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5409 }
5410 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5411 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5412 }
5413 return z;
5414
5415}
5416
5417/*----------------------------------------------------------------------------
5418| Returns the result of converting the quadruple-precision floating-point
5419| value `a' to the 64-bit two's complement integer format. The conversion
5420| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5421| Arithmetic---which means in particular that the conversion is rounded
5422| according to the current rounding mode. If `a' is a NaN, the largest
5423| positive integer is returned. Otherwise, if the conversion overflows, the
5424| largest integer with the same sign as `a' is returned.
5425*----------------------------------------------------------------------------*/
5426
f42c2224 5427int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5428{
5429 flag aSign;
f4014512 5430 int32_t aExp, shiftCount;
bb98fe42 5431 uint64_t aSig0, aSig1;
158142c2
FB
5432
5433 aSig1 = extractFloat128Frac1( a );
5434 aSig0 = extractFloat128Frac0( a );
5435 aExp = extractFloat128Exp( a );
5436 aSign = extractFloat128Sign( a );
5437 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5438 shiftCount = 0x402F - aExp;
5439 if ( shiftCount <= 0 ) {
5440 if ( 0x403E < aExp ) {
ff32e16e 5441 float_raise(float_flag_invalid, status);
158142c2
FB
5442 if ( ! aSign
5443 || ( ( aExp == 0x7FFF )
5444 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5445 )
5446 ) {
5447 return LIT64( 0x7FFFFFFFFFFFFFFF );
5448 }
bb98fe42 5449 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5450 }
5451 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5452 }
5453 else {
5454 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5455 }
ff32e16e 5456 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5457
5458}
5459
5460/*----------------------------------------------------------------------------
5461| Returns the result of converting the quadruple-precision floating-point
5462| value `a' to the 64-bit two's complement integer format. The conversion
5463| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5464| Arithmetic, except that the conversion is always rounded toward zero.
5465| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5466| the conversion overflows, the largest integer with the same sign as `a' is
5467| returned.
5468*----------------------------------------------------------------------------*/
5469
f42c2224 5470int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5471{
5472 flag aSign;
f4014512 5473 int32_t aExp, shiftCount;
bb98fe42 5474 uint64_t aSig0, aSig1;
f42c2224 5475 int64_t z;
158142c2
FB
5476
5477 aSig1 = extractFloat128Frac1( a );
5478 aSig0 = extractFloat128Frac0( a );
5479 aExp = extractFloat128Exp( a );
5480 aSign = extractFloat128Sign( a );
5481 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5482 shiftCount = aExp - 0x402F;
5483 if ( 0 < shiftCount ) {
5484 if ( 0x403E <= aExp ) {
5485 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5486 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5487 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5488 if (aSig1) {
5489 status->float_exception_flags |= float_flag_inexact;
5490 }
158142c2
FB
5491 }
5492 else {
ff32e16e 5493 float_raise(float_flag_invalid, status);
158142c2
FB
5494 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5495 return LIT64( 0x7FFFFFFFFFFFFFFF );
5496 }
5497 }
bb98fe42 5498 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5499 }
5500 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5501 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5502 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5503 }
5504 }
5505 else {
5506 if ( aExp < 0x3FFF ) {
5507 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5508 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5509 }
5510 return 0;
5511 }
5512 z = aSig0>>( - shiftCount );
5513 if ( aSig1
bb98fe42 5514 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5515 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5516 }
5517 }
5518 if ( aSign ) z = - z;
5519 return z;
5520
5521}
5522
2e6d8568
BR
5523/*----------------------------------------------------------------------------
5524| Returns the result of converting the quadruple-precision floating-point value
5525| `a' to the 64-bit unsigned integer format. The conversion is
5526| performed according to the IEC/IEEE Standard for Binary Floating-Point
5527| Arithmetic---which means in particular that the conversion is rounded
5528| according to the current rounding mode. If `a' is a NaN, the largest
5529| positive integer is returned. If the conversion overflows, the
5530| largest unsigned integer is returned. If 'a' is negative, the value is
5531| rounded and zero is returned; negative values that do not round to zero
5532| will raise the inexact exception.
5533*----------------------------------------------------------------------------*/
5534
5535uint64_t float128_to_uint64(float128 a, float_status *status)
5536{
5537 flag aSign;
5538 int aExp;
5539 int shiftCount;
5540 uint64_t aSig0, aSig1;
5541
5542 aSig0 = extractFloat128Frac0(a);
5543 aSig1 = extractFloat128Frac1(a);
5544 aExp = extractFloat128Exp(a);
5545 aSign = extractFloat128Sign(a);
5546 if (aSign && (aExp > 0x3FFE)) {
5547 float_raise(float_flag_invalid, status);
5548 if (float128_is_any_nan(a)) {
5549 return LIT64(0xFFFFFFFFFFFFFFFF);
5550 } else {
5551 return 0;
5552 }
5553 }
5554 if (aExp) {
5555 aSig0 |= LIT64(0x0001000000000000);
5556 }
5557 shiftCount = 0x402F - aExp;
5558 if (shiftCount <= 0) {
5559 if (0x403E < aExp) {
5560 float_raise(float_flag_invalid, status);
5561 return LIT64(0xFFFFFFFFFFFFFFFF);
5562 }
5563 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5564 } else {
5565 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5566 }
5567 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5568}
5569
5570uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5571{
5572 uint64_t v;
5573 signed char current_rounding_mode = status->float_rounding_mode;
5574
5575 set_float_rounding_mode(float_round_to_zero, status);
5576 v = float128_to_uint64(a, status);
5577 set_float_rounding_mode(current_rounding_mode, status);
5578
5579 return v;
5580}
5581
158142c2
FB
5582/*----------------------------------------------------------------------------
5583| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5584| value `a' to the 32-bit unsigned integer format. The conversion
5585| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5586| Arithmetic except that the conversion is always rounded toward zero.
5587| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5588| if the conversion overflows, the largest unsigned integer is returned.
5589| If 'a' is negative, the value is rounded and zero is returned; negative
5590| values that do not round to zero will raise the inexact exception.
5591*----------------------------------------------------------------------------*/
5592
5593uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5594{
5595 uint64_t v;
5596 uint32_t res;
5597 int old_exc_flags = get_float_exception_flags(status);
5598
5599 v = float128_to_uint64_round_to_zero(a, status);
5600 if (v > 0xffffffff) {
5601 res = 0xffffffff;
5602 } else {
5603 return v;
5604 }
5605 set_float_exception_flags(old_exc_flags, status);
5606 float_raise(float_flag_invalid, status);
5607 return res;
5608}
5609
5610/*----------------------------------------------------------------------------
5611| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
5612| value `a' to the single-precision floating-point format. The conversion
5613| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5614| Arithmetic.
5615*----------------------------------------------------------------------------*/
5616
e5a41ffa 5617float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
5618{
5619 flag aSign;
f4014512 5620 int32_t aExp;
bb98fe42
AF
5621 uint64_t aSig0, aSig1;
5622 uint32_t zSig;
158142c2
FB
5623
5624 aSig1 = extractFloat128Frac1( a );
5625 aSig0 = extractFloat128Frac0( a );
5626 aExp = extractFloat128Exp( a );
5627 aSign = extractFloat128Sign( a );
5628 if ( aExp == 0x7FFF ) {
5629 if ( aSig0 | aSig1 ) {
ff32e16e 5630 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
5631 }
5632 return packFloat32( aSign, 0xFF, 0 );
5633 }
5634 aSig0 |= ( aSig1 != 0 );
5635 shift64RightJamming( aSig0, 18, &aSig0 );
5636 zSig = aSig0;
5637 if ( aExp || zSig ) {
5638 zSig |= 0x40000000;
5639 aExp -= 0x3F81;
5640 }
ff32e16e 5641 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
5642
5643}
5644
5645/*----------------------------------------------------------------------------
5646| Returns the result of converting the quadruple-precision floating-point
5647| value `a' to the double-precision floating-point format. The conversion
5648| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5649| Arithmetic.
5650*----------------------------------------------------------------------------*/
5651
e5a41ffa 5652float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
5653{
5654 flag aSign;
f4014512 5655 int32_t aExp;
bb98fe42 5656 uint64_t aSig0, aSig1;
158142c2
FB
5657
5658 aSig1 = extractFloat128Frac1( a );
5659 aSig0 = extractFloat128Frac0( a );
5660 aExp = extractFloat128Exp( a );
5661 aSign = extractFloat128Sign( a );
5662 if ( aExp == 0x7FFF ) {
5663 if ( aSig0 | aSig1 ) {
ff32e16e 5664 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
5665 }
5666 return packFloat64( aSign, 0x7FF, 0 );
5667 }
5668 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5669 aSig0 |= ( aSig1 != 0 );
5670 if ( aExp || aSig0 ) {
5671 aSig0 |= LIT64( 0x4000000000000000 );
5672 aExp -= 0x3C01;
5673 }
ff32e16e 5674 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
5675
5676}
5677
158142c2
FB
5678/*----------------------------------------------------------------------------
5679| Returns the result of converting the quadruple-precision floating-point
5680| value `a' to the extended double-precision floating-point format. The
5681| conversion is performed according to the IEC/IEEE Standard for Binary
5682| Floating-Point Arithmetic.
5683*----------------------------------------------------------------------------*/
5684
e5a41ffa 5685floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
5686{
5687 flag aSign;
f4014512 5688 int32_t aExp;
bb98fe42 5689 uint64_t aSig0, aSig1;
158142c2
FB
5690
5691 aSig1 = extractFloat128Frac1( a );
5692 aSig0 = extractFloat128Frac0( a );
5693 aExp = extractFloat128Exp( a );
5694 aSign = extractFloat128Sign( a );
5695 if ( aExp == 0x7FFF ) {
5696 if ( aSig0 | aSig1 ) {
ff32e16e 5697 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 5698 }
0f605c88
LV
5699 return packFloatx80(aSign, floatx80_infinity_high,
5700 floatx80_infinity_low);
158142c2
FB
5701 }
5702 if ( aExp == 0 ) {
5703 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5704 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5705 }
5706 else {
5707 aSig0 |= LIT64( 0x0001000000000000 );
5708 }
5709 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 5710 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
5711
5712}
5713
158142c2
FB
5714/*----------------------------------------------------------------------------
5715| Rounds the quadruple-precision floating-point value `a' to an integer, and
5716| returns the result as a quadruple-precision floating-point value. The
5717| operation is performed according to the IEC/IEEE Standard for Binary
5718| Floating-Point Arithmetic.
5719*----------------------------------------------------------------------------*/
5720
e5a41ffa 5721float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
5722{
5723 flag aSign;
f4014512 5724 int32_t aExp;
bb98fe42 5725 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5726 float128 z;
5727
5728 aExp = extractFloat128Exp( a );
5729 if ( 0x402F <= aExp ) {
5730 if ( 0x406F <= aExp ) {
5731 if ( ( aExp == 0x7FFF )
5732 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5733 ) {
ff32e16e 5734 return propagateFloat128NaN(a, a, status);
158142c2
FB
5735 }
5736 return a;
5737 }
5738 lastBitMask = 1;
5739 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5740 roundBitsMask = lastBitMask - 1;
5741 z = a;
a2f2d288 5742 switch (status->float_rounding_mode) {
dc355b76 5743 case float_round_nearest_even:
158142c2
FB
5744 if ( lastBitMask ) {
5745 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5746 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5747 }
5748 else {
bb98fe42 5749 if ( (int64_t) z.low < 0 ) {
158142c2 5750 ++z.high;
bb98fe42 5751 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
5752 }
5753 }
dc355b76 5754 break;
f9288a76
PM
5755 case float_round_ties_away:
5756 if (lastBitMask) {
5757 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
5758 } else {
5759 if ((int64_t) z.low < 0) {
5760 ++z.high;
5761 }
5762 }
5763 break;
dc355b76
PM
5764 case float_round_to_zero:
5765 break;
5766 case float_round_up:
5767 if (!extractFloat128Sign(z)) {
5768 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
5769 }
5770 break;
5771 case float_round_down:
5772 if (extractFloat128Sign(z)) {
5773 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 5774 }
dc355b76
PM
5775 break;
5776 default:
5777 abort();
158142c2
FB
5778 }
5779 z.low &= ~ roundBitsMask;
5780 }
5781 else {
5782 if ( aExp < 0x3FFF ) {
bb98fe42 5783 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 5784 status->float_exception_flags |= float_flag_inexact;
158142c2 5785 aSign = extractFloat128Sign( a );
a2f2d288 5786 switch (status->float_rounding_mode) {
158142c2
FB
5787 case float_round_nearest_even:
5788 if ( ( aExp == 0x3FFE )
5789 && ( extractFloat128Frac0( a )
5790 | extractFloat128Frac1( a ) )
5791 ) {
5792 return packFloat128( aSign, 0x3FFF, 0, 0 );
5793 }
5794 break;
f9288a76
PM
5795 case float_round_ties_away:
5796 if (aExp == 0x3FFE) {
5797 return packFloat128(aSign, 0x3FFF, 0, 0);
5798 }
5799 break;
158142c2
FB
5800 case float_round_down:
5801 return
5802 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5803 : packFloat128( 0, 0, 0, 0 );
5804 case float_round_up:
5805 return
5806 aSign ? packFloat128( 1, 0, 0, 0 )
5807 : packFloat128( 0, 0x3FFF, 0, 0 );
5808 }
5809 return packFloat128( aSign, 0, 0, 0 );
5810 }
5811 lastBitMask = 1;
5812 lastBitMask <<= 0x402F - aExp;
5813 roundBitsMask = lastBitMask - 1;
5814 z.low = 0;
5815 z.high = a.high;
a2f2d288 5816 switch (status->float_rounding_mode) {
dc355b76 5817 case float_round_nearest_even:
158142c2
FB
5818 z.high += lastBitMask>>1;
5819 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5820 z.high &= ~ lastBitMask;
5821 }
dc355b76 5822 break;
f9288a76
PM
5823 case float_round_ties_away:
5824 z.high += lastBitMask>>1;
5825 break;
dc355b76
PM
5826 case float_round_to_zero:
5827 break;
5828 case float_round_up:
5829 if (!extractFloat128Sign(z)) {
158142c2
FB
5830 z.high |= ( a.low != 0 );
5831 z.high += roundBitsMask;
5832 }
dc355b76
PM
5833 break;
5834 case float_round_down:
5835 if (extractFloat128Sign(z)) {
5836 z.high |= (a.low != 0);
5837 z.high += roundBitsMask;
5838 }
5839 break;
5840 default:
5841 abort();
158142c2
FB
5842 }
5843 z.high &= ~ roundBitsMask;
5844 }
5845 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 5846 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5847 }
5848 return z;
5849
5850}
5851
5852/*----------------------------------------------------------------------------
5853| Returns the result of adding the absolute values of the quadruple-precision
5854| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5855| before being returned. `zSign' is ignored if the result is a NaN.
5856| The addition is performed according to the IEC/IEEE Standard for Binary
5857| Floating-Point Arithmetic.
5858*----------------------------------------------------------------------------*/
5859
e5a41ffa
PM
5860static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
5861 float_status *status)
158142c2 5862{
f4014512 5863 int32_t aExp, bExp, zExp;
bb98fe42 5864 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 5865 int32_t expDiff;
158142c2
FB
5866
5867 aSig1 = extractFloat128Frac1( a );
5868 aSig0 = extractFloat128Frac0( a );
5869 aExp = extractFloat128Exp( a );
5870 bSig1 = extractFloat128Frac1( b );
5871 bSig0 = extractFloat128Frac0( b );
5872 bExp = extractFloat128Exp( b );
5873 expDiff = aExp - bExp;
5874 if ( 0 < expDiff ) {
5875 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5876 if (aSig0 | aSig1) {
5877 return propagateFloat128NaN(a, b, status);
5878 }
158142c2
FB
5879 return a;
5880 }
5881 if ( bExp == 0 ) {
5882 --expDiff;
5883 }
5884 else {
5885 bSig0 |= LIT64( 0x0001000000000000 );
5886 }
5887 shift128ExtraRightJamming(
5888 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5889 zExp = aExp;
5890 }
5891 else if ( expDiff < 0 ) {
5892 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5893 if (bSig0 | bSig1) {
5894 return propagateFloat128NaN(a, b, status);
5895 }
158142c2
FB
5896 return packFloat128( zSign, 0x7FFF, 0, 0 );
5897 }
5898 if ( aExp == 0 ) {
5899 ++expDiff;
5900 }
5901 else {
5902 aSig0 |= LIT64( 0x0001000000000000 );
5903 }
5904 shift128ExtraRightJamming(
5905 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5906 zExp = bExp;
5907 }
5908 else {
5909 if ( aExp == 0x7FFF ) {
5910 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 5911 return propagateFloat128NaN(a, b, status);
158142c2
FB
5912 }
5913 return a;
5914 }
5915 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 5916 if ( aExp == 0 ) {
a2f2d288 5917 if (status->flush_to_zero) {
e6afc87f 5918 if (zSig0 | zSig1) {
ff32e16e 5919 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
5920 }
5921 return packFloat128(zSign, 0, 0, 0);
5922 }
fe76d976
PB
5923 return packFloat128( zSign, 0, zSig0, zSig1 );
5924 }
158142c2
FB
5925 zSig2 = 0;
5926 zSig0 |= LIT64( 0x0002000000000000 );
5927 zExp = aExp;
5928 goto shiftRight1;
5929 }
5930 aSig0 |= LIT64( 0x0001000000000000 );
5931 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5932 --zExp;
5933 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5934 ++zExp;
5935 shiftRight1:
5936 shift128ExtraRightJamming(
5937 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5938 roundAndPack:
ff32e16e 5939 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
5940
5941}
5942
5943/*----------------------------------------------------------------------------
5944| Returns the result of subtracting the absolute values of the quadruple-
5945| precision floating-point values `a' and `b'. If `zSign' is 1, the
5946| difference is negated before being returned. `zSign' is ignored if the
5947| result is a NaN. The subtraction is performed according to the IEC/IEEE
5948| Standard for Binary Floating-Point Arithmetic.
5949*----------------------------------------------------------------------------*/
5950
e5a41ffa
PM
5951static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
5952 float_status *status)
158142c2 5953{
f4014512 5954 int32_t aExp, bExp, zExp;
bb98fe42 5955 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 5956 int32_t expDiff;
158142c2
FB
5957
5958 aSig1 = extractFloat128Frac1( a );
5959 aSig0 = extractFloat128Frac0( a );
5960 aExp = extractFloat128Exp( a );
5961 bSig1 = extractFloat128Frac1( b );
5962 bSig0 = extractFloat128Frac0( b );
5963 bExp = extractFloat128Exp( b );
5964 expDiff = aExp - bExp;
5965 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5966 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5967 if ( 0 < expDiff ) goto aExpBigger;
5968 if ( expDiff < 0 ) goto bExpBigger;
5969 if ( aExp == 0x7FFF ) {
5970 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 5971 return propagateFloat128NaN(a, b, status);
158142c2 5972 }
ff32e16e 5973 float_raise(float_flag_invalid, status);
af39bc8c 5974 return float128_default_nan(status);
158142c2
FB
5975 }
5976 if ( aExp == 0 ) {
5977 aExp = 1;
5978 bExp = 1;
5979 }
5980 if ( bSig0 < aSig0 ) goto aBigger;
5981 if ( aSig0 < bSig0 ) goto bBigger;
5982 if ( bSig1 < aSig1 ) goto aBigger;
5983 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
5984 return packFloat128(status->float_rounding_mode == float_round_down,
5985 0, 0, 0);
158142c2
FB
5986 bExpBigger:
5987 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5988 if (bSig0 | bSig1) {
5989 return propagateFloat128NaN(a, b, status);
5990 }
158142c2
FB
5991 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5992 }
5993 if ( aExp == 0 ) {
5994 ++expDiff;
5995 }
5996 else {
5997 aSig0 |= LIT64( 0x4000000000000000 );
5998 }
5999 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6000 bSig0 |= LIT64( 0x4000000000000000 );
6001 bBigger:
6002 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6003 zExp = bExp;
6004 zSign ^= 1;
6005 goto normalizeRoundAndPack;
6006 aExpBigger:
6007 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6008 if (aSig0 | aSig1) {
6009 return propagateFloat128NaN(a, b, status);
6010 }
158142c2
FB
6011 return a;
6012 }
6013 if ( bExp == 0 ) {
6014 --expDiff;
6015 }
6016 else {
6017 bSig0 |= LIT64( 0x4000000000000000 );
6018 }
6019 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6020 aSig0 |= LIT64( 0x4000000000000000 );
6021 aBigger:
6022 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6023 zExp = aExp;
6024 normalizeRoundAndPack:
6025 --zExp;
ff32e16e
PM
6026 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6027 status);
158142c2
FB
6028
6029}
6030
6031/*----------------------------------------------------------------------------
6032| Returns the result of adding the quadruple-precision floating-point values
6033| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6034| for Binary Floating-Point Arithmetic.
6035*----------------------------------------------------------------------------*/
6036
e5a41ffa 6037float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6038{
6039 flag aSign, bSign;
6040
6041 aSign = extractFloat128Sign( a );
6042 bSign = extractFloat128Sign( b );
6043 if ( aSign == bSign ) {
ff32e16e 6044 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6045 }
6046 else {
ff32e16e 6047 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6048 }
6049
6050}
6051
6052/*----------------------------------------------------------------------------
6053| Returns the result of subtracting the quadruple-precision floating-point
6054| values `a' and `b'. The operation is performed according to the IEC/IEEE
6055| Standard for Binary Floating-Point Arithmetic.
6056*----------------------------------------------------------------------------*/
6057
e5a41ffa 6058float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6059{
6060 flag aSign, bSign;
6061
6062 aSign = extractFloat128Sign( a );
6063 bSign = extractFloat128Sign( b );
6064 if ( aSign == bSign ) {
ff32e16e 6065 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6066 }
6067 else {
ff32e16e 6068 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6069 }
6070
6071}
6072
6073/*----------------------------------------------------------------------------
6074| Returns the result of multiplying the quadruple-precision floating-point
6075| values `a' and `b'. The operation is performed according to the IEC/IEEE
6076| Standard for Binary Floating-Point Arithmetic.
6077*----------------------------------------------------------------------------*/
6078
e5a41ffa 6079float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6080{
6081 flag aSign, bSign, zSign;
f4014512 6082 int32_t aExp, bExp, zExp;
bb98fe42 6083 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6084
6085 aSig1 = extractFloat128Frac1( a );
6086 aSig0 = extractFloat128Frac0( a );
6087 aExp = extractFloat128Exp( a );
6088 aSign = extractFloat128Sign( a );
6089 bSig1 = extractFloat128Frac1( b );
6090 bSig0 = extractFloat128Frac0( b );
6091 bExp = extractFloat128Exp( b );
6092 bSign = extractFloat128Sign( b );
6093 zSign = aSign ^ bSign;
6094 if ( aExp == 0x7FFF ) {
6095 if ( ( aSig0 | aSig1 )
6096 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6097 return propagateFloat128NaN(a, b, status);
158142c2
FB
6098 }
6099 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6100 return packFloat128( zSign, 0x7FFF, 0, 0 );
6101 }
6102 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6103 if (bSig0 | bSig1) {
6104 return propagateFloat128NaN(a, b, status);
6105 }
158142c2
FB
6106 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6107 invalid:
ff32e16e 6108 float_raise(float_flag_invalid, status);
af39bc8c 6109 return float128_default_nan(status);
158142c2
FB
6110 }
6111 return packFloat128( zSign, 0x7FFF, 0, 0 );
6112 }
6113 if ( aExp == 0 ) {
6114 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6115 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6116 }
6117 if ( bExp == 0 ) {
6118 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6119 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6120 }
6121 zExp = aExp + bExp - 0x4000;
6122 aSig0 |= LIT64( 0x0001000000000000 );
6123 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6124 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6125 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6126 zSig2 |= ( zSig3 != 0 );
6127 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6128 shift128ExtraRightJamming(
6129 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6130 ++zExp;
6131 }
ff32e16e 6132 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6133
6134}
6135
6136/*----------------------------------------------------------------------------
6137| Returns the result of dividing the quadruple-precision floating-point value
6138| `a' by the corresponding value `b'. The operation is performed according to
6139| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6140*----------------------------------------------------------------------------*/
6141
e5a41ffa 6142float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6143{
6144 flag aSign, bSign, zSign;
f4014512 6145 int32_t aExp, bExp, zExp;
bb98fe42
AF
6146 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6147 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6148
6149 aSig1 = extractFloat128Frac1( a );
6150 aSig0 = extractFloat128Frac0( a );
6151 aExp = extractFloat128Exp( a );
6152 aSign = extractFloat128Sign( a );
6153 bSig1 = extractFloat128Frac1( b );
6154 bSig0 = extractFloat128Frac0( b );
6155 bExp = extractFloat128Exp( b );
6156 bSign = extractFloat128Sign( b );
6157 zSign = aSign ^ bSign;
6158 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6159 if (aSig0 | aSig1) {
6160 return propagateFloat128NaN(a, b, status);
6161 }
158142c2 6162 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6163 if (bSig0 | bSig1) {
6164 return propagateFloat128NaN(a, b, status);
6165 }
158142c2
FB
6166 goto invalid;
6167 }
6168 return packFloat128( zSign, 0x7FFF, 0, 0 );
6169 }
6170 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6171 if (bSig0 | bSig1) {
6172 return propagateFloat128NaN(a, b, status);
6173 }
158142c2
FB
6174 return packFloat128( zSign, 0, 0, 0 );
6175 }
6176 if ( bExp == 0 ) {
6177 if ( ( bSig0 | bSig1 ) == 0 ) {
6178 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6179 invalid:
ff32e16e 6180 float_raise(float_flag_invalid, status);
af39bc8c 6181 return float128_default_nan(status);
158142c2 6182 }
ff32e16e 6183 float_raise(float_flag_divbyzero, status);
158142c2
FB
6184 return packFloat128( zSign, 0x7FFF, 0, 0 );
6185 }
6186 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6187 }
6188 if ( aExp == 0 ) {
6189 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6190 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6191 }
6192 zExp = aExp - bExp + 0x3FFD;
6193 shortShift128Left(
6194 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6195 shortShift128Left(
6196 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6197 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6198 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6199 ++zExp;
6200 }
6201 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6202 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6203 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6204 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6205 --zSig0;
6206 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6207 }
6208 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6209 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6210 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6211 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6212 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6213 --zSig1;
6214 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6215 }
6216 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6217 }
6218 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6219 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6220
6221}
6222
6223/*----------------------------------------------------------------------------
6224| Returns the remainder of the quadruple-precision floating-point value `a'
6225| with respect to the corresponding value `b'. The operation is performed
6226| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6227*----------------------------------------------------------------------------*/
6228
e5a41ffa 6229float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6230{
ed086f3d 6231 flag aSign, zSign;
f4014512 6232 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6233 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6234 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6235 int64_t sigMean0;
158142c2
FB
6236
6237 aSig1 = extractFloat128Frac1( a );
6238 aSig0 = extractFloat128Frac0( a );
6239 aExp = extractFloat128Exp( a );
6240 aSign = extractFloat128Sign( a );
6241 bSig1 = extractFloat128Frac1( b );
6242 bSig0 = extractFloat128Frac0( b );
6243 bExp = extractFloat128Exp( b );
158142c2
FB
6244 if ( aExp == 0x7FFF ) {
6245 if ( ( aSig0 | aSig1 )
6246 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6247 return propagateFloat128NaN(a, b, status);
158142c2
FB
6248 }
6249 goto invalid;
6250 }
6251 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6252 if (bSig0 | bSig1) {
6253 return propagateFloat128NaN(a, b, status);
6254 }
158142c2
FB
6255 return a;
6256 }
6257 if ( bExp == 0 ) {
6258 if ( ( bSig0 | bSig1 ) == 0 ) {
6259 invalid:
ff32e16e 6260 float_raise(float_flag_invalid, status);
af39bc8c 6261 return float128_default_nan(status);
158142c2
FB
6262 }
6263 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6264 }
6265 if ( aExp == 0 ) {
6266 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6267 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6268 }
6269 expDiff = aExp - bExp;
6270 if ( expDiff < -1 ) return a;
6271 shortShift128Left(
6272 aSig0 | LIT64( 0x0001000000000000 ),
6273 aSig1,
6274 15 - ( expDiff < 0 ),
6275 &aSig0,
6276 &aSig1
6277 );
6278 shortShift128Left(
6279 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6280 q = le128( bSig0, bSig1, aSig0, aSig1 );
6281 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6282 expDiff -= 64;
6283 while ( 0 < expDiff ) {
6284 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6285 q = ( 4 < q ) ? q - 4 : 0;
6286 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6287 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6288 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6289 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6290 expDiff -= 61;
6291 }
6292 if ( -64 < expDiff ) {
6293 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6294 q = ( 4 < q ) ? q - 4 : 0;
6295 q >>= - expDiff;
6296 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6297 expDiff += 52;
6298 if ( expDiff < 0 ) {
6299 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6300 }
6301 else {
6302 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6303 }
6304 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6305 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6306 }
6307 else {
6308 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6309 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6310 }
6311 do {
6312 alternateASig0 = aSig0;
6313 alternateASig1 = aSig1;
6314 ++q;
6315 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6316 } while ( 0 <= (int64_t) aSig0 );
158142c2 6317 add128(
bb98fe42 6318 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6319 if ( ( sigMean0 < 0 )
6320 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6321 aSig0 = alternateASig0;
6322 aSig1 = alternateASig1;
6323 }
bb98fe42 6324 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6325 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6326 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6327 status);
158142c2
FB
6328}
6329
6330/*----------------------------------------------------------------------------
6331| Returns the square root of the quadruple-precision floating-point value `a'.
6332| The operation is performed according to the IEC/IEEE Standard for Binary
6333| Floating-Point Arithmetic.
6334*----------------------------------------------------------------------------*/
6335
e5a41ffa 6336float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6337{
6338 flag aSign;
f4014512 6339 int32_t aExp, zExp;
bb98fe42
AF
6340 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6341 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6342
6343 aSig1 = extractFloat128Frac1( a );
6344 aSig0 = extractFloat128Frac0( a );
6345 aExp = extractFloat128Exp( a );
6346 aSign = extractFloat128Sign( a );
6347 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6348 if (aSig0 | aSig1) {
6349 return propagateFloat128NaN(a, a, status);
6350 }
158142c2
FB
6351 if ( ! aSign ) return a;
6352 goto invalid;
6353 }
6354 if ( aSign ) {
6355 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6356 invalid:
ff32e16e 6357 float_raise(float_flag_invalid, status);
af39bc8c 6358 return float128_default_nan(status);
158142c2
FB
6359 }
6360 if ( aExp == 0 ) {
6361 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6362 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6363 }
6364 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6365 aSig0 |= LIT64( 0x0001000000000000 );
6366 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6367 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6368 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6369 doubleZSig0 = zSig0<<1;
6370 mul64To128( zSig0, zSig0, &term0, &term1 );
6371 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6372 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6373 --zSig0;
6374 doubleZSig0 -= 2;
6375 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6376 }
6377 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6378 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6379 if ( zSig1 == 0 ) zSig1 = 1;
6380 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6381 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6382 mul64To128( zSig1, zSig1, &term2, &term3 );
6383 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6384 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6385 --zSig1;
6386 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6387 term3 |= 1;
6388 term2 |= doubleZSig0;
6389 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6390 }
6391 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6392 }
6393 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6394 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6395
6396}
6397
6398/*----------------------------------------------------------------------------
6399| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6400| the corresponding value `b', and 0 otherwise. The invalid exception is
6401| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6402| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6403*----------------------------------------------------------------------------*/
6404
e5a41ffa 6405int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6406{
6407
6408 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6409 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6410 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6411 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6412 ) {
ff32e16e 6413 float_raise(float_flag_invalid, status);
158142c2
FB
6414 return 0;
6415 }
6416 return
6417 ( a.low == b.low )
6418 && ( ( a.high == b.high )
6419 || ( ( a.low == 0 )
bb98fe42 6420 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6421 );
6422
6423}
6424
6425/*----------------------------------------------------------------------------
6426| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6427| or equal to the corresponding value `b', and 0 otherwise. The invalid
6428| exception is raised if either operand is a NaN. The comparison is performed
6429| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6430*----------------------------------------------------------------------------*/
6431
e5a41ffa 6432int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6433{
6434 flag aSign, bSign;
6435
6436 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6437 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6438 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6439 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6440 ) {
ff32e16e 6441 float_raise(float_flag_invalid, status);
158142c2
FB
6442 return 0;
6443 }
6444 aSign = extractFloat128Sign( a );
6445 bSign = extractFloat128Sign( b );
6446 if ( aSign != bSign ) {
6447 return
6448 aSign
bb98fe42 6449 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6450 == 0 );
6451 }
6452 return
6453 aSign ? le128( b.high, b.low, a.high, a.low )
6454 : le128( a.high, a.low, b.high, b.low );
6455
6456}
6457
6458/*----------------------------------------------------------------------------
6459| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6460| the corresponding value `b', and 0 otherwise. The invalid exception is
6461| raised if either operand is a NaN. The comparison is performed according
6462| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6463*----------------------------------------------------------------------------*/
6464
e5a41ffa 6465int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6466{
6467 flag aSign, bSign;
6468
6469 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6470 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6471 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6472 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6473 ) {
ff32e16e 6474 float_raise(float_flag_invalid, status);
158142c2
FB
6475 return 0;
6476 }
6477 aSign = extractFloat128Sign( a );
6478 bSign = extractFloat128Sign( b );
6479 if ( aSign != bSign ) {
6480 return
6481 aSign
bb98fe42 6482 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6483 != 0 );
6484 }
6485 return
6486 aSign ? lt128( b.high, b.low, a.high, a.low )
6487 : lt128( a.high, a.low, b.high, b.low );
6488
6489}
6490
67b7861d
AJ
6491/*----------------------------------------------------------------------------
6492| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6493| be compared, and 0 otherwise. The invalid exception is raised if either
6494| operand is a NaN. The comparison is performed according to the IEC/IEEE
6495| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6496*----------------------------------------------------------------------------*/
6497
e5a41ffa 6498int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6499{
6500 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6501 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6502 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6503 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6504 ) {
ff32e16e 6505 float_raise(float_flag_invalid, status);
67b7861d
AJ
6506 return 1;
6507 }
6508 return 0;
6509}
6510
158142c2
FB
6511/*----------------------------------------------------------------------------
6512| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6513| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6514| exception. The comparison is performed according to the IEC/IEEE Standard
6515| for Binary Floating-Point Arithmetic.
158142c2
FB
6516*----------------------------------------------------------------------------*/
6517
e5a41ffa 6518int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6519{
6520
6521 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6522 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6523 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6524 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6525 ) {
af39bc8c
AM
6526 if (float128_is_signaling_nan(a, status)
6527 || float128_is_signaling_nan(b, status)) {
ff32e16e 6528 float_raise(float_flag_invalid, status);
b689362d 6529 }
158142c2
FB
6530 return 0;
6531 }
6532 return
6533 ( a.low == b.low )
6534 && ( ( a.high == b.high )
6535 || ( ( a.low == 0 )
bb98fe42 6536 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6537 );
6538
6539}
6540
6541/*----------------------------------------------------------------------------
6542| Returns 1 if the quadruple-precision floating-point value `a' is less than
6543| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6544| cause an exception. Otherwise, the comparison is performed according to the
6545| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6546*----------------------------------------------------------------------------*/
6547
e5a41ffa 6548int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6549{
6550 flag aSign, bSign;
6551
6552 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6553 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6554 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6555 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6556 ) {
af39bc8c
AM
6557 if (float128_is_signaling_nan(a, status)
6558 || float128_is_signaling_nan(b, status)) {
ff32e16e 6559 float_raise(float_flag_invalid, status);
158142c2
FB
6560 }
6561 return 0;
6562 }
6563 aSign = extractFloat128Sign( a );
6564 bSign = extractFloat128Sign( b );
6565 if ( aSign != bSign ) {
6566 return
6567 aSign
bb98fe42 6568 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6569 == 0 );
6570 }
6571 return
6572 aSign ? le128( b.high, b.low, a.high, a.low )
6573 : le128( a.high, a.low, b.high, b.low );
6574
6575}
6576
6577/*----------------------------------------------------------------------------
6578| Returns 1 if the quadruple-precision floating-point value `a' is less than
6579| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6580| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6581| Standard for Binary Floating-Point Arithmetic.
6582*----------------------------------------------------------------------------*/
6583
e5a41ffa 6584int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6585{
6586 flag aSign, bSign;
6587
6588 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6589 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6590 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6591 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6592 ) {
af39bc8c
AM
6593 if (float128_is_signaling_nan(a, status)
6594 || float128_is_signaling_nan(b, status)) {
ff32e16e 6595 float_raise(float_flag_invalid, status);
158142c2
FB
6596 }
6597 return 0;
6598 }
6599 aSign = extractFloat128Sign( a );
6600 bSign = extractFloat128Sign( b );
6601 if ( aSign != bSign ) {
6602 return
6603 aSign
bb98fe42 6604 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6605 != 0 );
6606 }
6607 return
6608 aSign ? lt128( b.high, b.low, a.high, a.low )
6609 : lt128( a.high, a.low, b.high, b.low );
6610
6611}
6612
67b7861d
AJ
6613/*----------------------------------------------------------------------------
6614| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6615| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6616| comparison is performed according to the IEC/IEEE Standard for Binary
6617| Floating-Point Arithmetic.
6618*----------------------------------------------------------------------------*/
6619
e5a41ffa 6620int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
6621{
6622 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6623 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6624 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6625 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6626 ) {
af39bc8c
AM
6627 if (float128_is_signaling_nan(a, status)
6628 || float128_is_signaling_nan(b, status)) {
ff32e16e 6629 float_raise(float_flag_invalid, status);
67b7861d
AJ
6630 }
6631 return 1;
6632 }
6633 return 0;
6634}
6635
e5a41ffa
PM
6636static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6637 int is_quiet, float_status *status)
f6714d36
AJ
6638{
6639 flag aSign, bSign;
6640
d1eb8f2a
AD
6641 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6642 float_raise(float_flag_invalid, status);
6643 return float_relation_unordered;
6644 }
f6714d36
AJ
6645 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6646 ( extractFloatx80Frac( a )<<1 ) ) ||
6647 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6648 ( extractFloatx80Frac( b )<<1 ) )) {
6649 if (!is_quiet ||
af39bc8c
AM
6650 floatx80_is_signaling_nan(a, status) ||
6651 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6652 float_raise(float_flag_invalid, status);
f6714d36
AJ
6653 }
6654 return float_relation_unordered;
6655 }
6656 aSign = extractFloatx80Sign( a );
6657 bSign = extractFloatx80Sign( b );
6658 if ( aSign != bSign ) {
6659
6660 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6661 ( ( a.low | b.low ) == 0 ) ) {
6662 /* zero case */
6663 return float_relation_equal;
6664 } else {
6665 return 1 - (2 * aSign);
6666 }
6667 } else {
6668 if (a.low == b.low && a.high == b.high) {
6669 return float_relation_equal;
6670 } else {
6671 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6672 }
6673 }
6674}
6675
e5a41ffa 6676int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6677{
ff32e16e 6678 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6679}
6680
e5a41ffa 6681int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 6682{
ff32e16e 6683 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6684}
6685
e5a41ffa
PM
6686static inline int float128_compare_internal(float128 a, float128 b,
6687 int is_quiet, float_status *status)
1f587329
BS
6688{
6689 flag aSign, bSign;
6690
6691 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6692 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6693 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6694 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6695 if (!is_quiet ||
af39bc8c
AM
6696 float128_is_signaling_nan(a, status) ||
6697 float128_is_signaling_nan(b, status)) {
ff32e16e 6698 float_raise(float_flag_invalid, status);
1f587329
BS
6699 }
6700 return float_relation_unordered;
6701 }
6702 aSign = extractFloat128Sign( a );
6703 bSign = extractFloat128Sign( b );
6704 if ( aSign != bSign ) {
6705 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6706 /* zero case */
6707 return float_relation_equal;
6708 } else {
6709 return 1 - (2 * aSign);
6710 }
6711 } else {
6712 if (a.low == b.low && a.high == b.high) {
6713 return float_relation_equal;
6714 } else {
6715 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6716 }
6717 }
6718}
6719
e5a41ffa 6720int float128_compare(float128 a, float128 b, float_status *status)
1f587329 6721{
ff32e16e 6722 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6723}
6724
e5a41ffa 6725int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 6726{
ff32e16e 6727 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6728}
6729
e5a41ffa 6730floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
6731{
6732 flag aSign;
326b9e98 6733 int32_t aExp;
bb98fe42 6734 uint64_t aSig;
9ee6e8bb 6735
d1eb8f2a
AD
6736 if (floatx80_invalid_encoding(a)) {
6737 float_raise(float_flag_invalid, status);
6738 return floatx80_default_nan(status);
6739 }
9ee6e8bb
PB
6740 aSig = extractFloatx80Frac( a );
6741 aExp = extractFloatx80Exp( a );
6742 aSign = extractFloatx80Sign( a );
6743
326b9e98
AJ
6744 if ( aExp == 0x7FFF ) {
6745 if ( aSig<<1 ) {
ff32e16e 6746 return propagateFloatx80NaN(a, a, status);
326b9e98 6747 }
9ee6e8bb
PB
6748 return a;
6749 }
326b9e98 6750
3c85c37f
PM
6751 if (aExp == 0) {
6752 if (aSig == 0) {
6753 return a;
6754 }
6755 aExp++;
6756 }
69397542 6757
326b9e98
AJ
6758 if (n > 0x10000) {
6759 n = 0x10000;
6760 } else if (n < -0x10000) {
6761 n = -0x10000;
6762 }
6763
9ee6e8bb 6764 aExp += n;
a2f2d288
PM
6765 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6766 aSign, aExp, aSig, 0, status);
9ee6e8bb 6767}
9ee6e8bb 6768
e5a41ffa 6769float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
6770{
6771 flag aSign;
326b9e98 6772 int32_t aExp;
bb98fe42 6773 uint64_t aSig0, aSig1;
9ee6e8bb
PB
6774
6775 aSig1 = extractFloat128Frac1( a );
6776 aSig0 = extractFloat128Frac0( a );
6777 aExp = extractFloat128Exp( a );
6778 aSign = extractFloat128Sign( a );
6779 if ( aExp == 0x7FFF ) {
326b9e98 6780 if ( aSig0 | aSig1 ) {
ff32e16e 6781 return propagateFloat128NaN(a, a, status);
326b9e98 6782 }
9ee6e8bb
PB
6783 return a;
6784 }
3c85c37f 6785 if (aExp != 0) {
69397542 6786 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 6787 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 6788 return a;
3c85c37f
PM
6789 } else {
6790 aExp++;
6791 }
69397542 6792
326b9e98
AJ
6793 if (n > 0x10000) {
6794 n = 0x10000;
6795 } else if (n < -0x10000) {
6796 n = -0x10000;
6797 }
6798
69397542
PB
6799 aExp += n - 1;
6800 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 6801 , status);
9ee6e8bb
PB
6802
6803}