]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
tests/fp: add fp-bench
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167 338/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
339static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
6fff2167 341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
f9943c7f 516 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
f9943c7f 537 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
f9943c7f 547 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167 582 } else {
4f251cfd 583 if (pickNaN(a.cls, b.cls,
6fff2167
AB
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
0bcfbcbe
RH
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
6fff2167
AB
591 }
592 return a;
593}
594
d446830a
AB
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
1839189b
PM
598 int which;
599
d446830a
AB
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
3bd2dec1 604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 605
d446830a 606 if (s->default_nan_mode) {
1839189b
PM
607 /* Note that this check is after pickNaNMulAdd so that function
608 * has an opportunity to set the Invalid flag.
609 */
f7e598e2 610 which = 3;
1839189b 611 }
d446830a 612
1839189b
PM
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
f7e598e2 623 return parts_default_nan(s);
1839189b
PM
624 default:
625 g_assert_not_reached();
d446830a 626 }
1839189b 627
0bcfbcbe
RH
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
d446830a
AB
631 return a;
632}
633
6fff2167
AB
634/*
635 * Returns the result of adding or subtracting the values of the
636 * floating-point values `a' and `b'. The operation is performed
637 * according to the IEC/IEEE Standard for Binary Floating-Point
638 * Arithmetic.
639 */
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648 /* Subtraction */
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
f7e598e2 678 return parts_default_nan(s);
6fff2167
AB
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694 /* Addition */
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 704 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723/*
724 * Returns the result of adding or subtracting the floating-point
725 * values `a' and `b'. The operation is performed according to the
726 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
727 */
728
97ff87c0 729float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
730{
731 FloatParts pa = float16_unpack_canonical(a, status);
732 FloatParts pb = float16_unpack_canonical(b, status);
733 FloatParts pr = addsub_floats(pa, pb, false, status);
734
735 return float16_round_pack_canonical(pr, status);
736}
737
97ff87c0 738float32 QEMU_FLATTEN float32_add(float32 a, float32 b, float_status *status)
6fff2167
AB
739{
740 FloatParts pa = float32_unpack_canonical(a, status);
741 FloatParts pb = float32_unpack_canonical(b, status);
742 FloatParts pr = addsub_floats(pa, pb, false, status);
743
744 return float32_round_pack_canonical(pr, status);
745}
746
97ff87c0 747float64 QEMU_FLATTEN float64_add(float64 a, float64 b, float_status *status)
6fff2167
AB
748{
749 FloatParts pa = float64_unpack_canonical(a, status);
750 FloatParts pb = float64_unpack_canonical(b, status);
751 FloatParts pr = addsub_floats(pa, pb, false, status);
752
753 return float64_round_pack_canonical(pr, status);
754}
755
97ff87c0 756float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
6fff2167
AB
757{
758 FloatParts pa = float16_unpack_canonical(a, status);
759 FloatParts pb = float16_unpack_canonical(b, status);
760 FloatParts pr = addsub_floats(pa, pb, true, status);
761
762 return float16_round_pack_canonical(pr, status);
763}
764
97ff87c0 765float32 QEMU_FLATTEN float32_sub(float32 a, float32 b, float_status *status)
6fff2167
AB
766{
767 FloatParts pa = float32_unpack_canonical(a, status);
768 FloatParts pb = float32_unpack_canonical(b, status);
769 FloatParts pr = addsub_floats(pa, pb, true, status);
770
771 return float32_round_pack_canonical(pr, status);
772}
773
97ff87c0 774float64 QEMU_FLATTEN float64_sub(float64 a, float64 b, float_status *status)
6fff2167
AB
775{
776 FloatParts pa = float64_unpack_canonical(a, status);
777 FloatParts pb = float64_unpack_canonical(b, status);
778 FloatParts pr = addsub_floats(pa, pb, true, status);
779
780 return float64_round_pack_canonical(pr, status);
781}
782
74d707e2
AB
783/*
784 * Returns the result of multiplying the floating-point values `a' and
785 * `b'. The operation is performed according to the IEC/IEEE Standard
786 * for Binary Floating-Point Arithmetic.
787 */
788
789static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
790{
791 bool sign = a.sign ^ b.sign;
792
793 if (a.cls == float_class_normal && b.cls == float_class_normal) {
794 uint64_t hi, lo;
795 int exp = a.exp + b.exp;
796
797 mul64To128(a.frac, b.frac, &hi, &lo);
798 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
799 if (lo & DECOMPOSED_OVERFLOW_BIT) {
800 shift64RightJamming(lo, 1, &lo);
801 exp += 1;
802 }
803
804 /* Re-use a */
805 a.exp = exp;
806 a.sign = sign;
807 a.frac = lo;
808 return a;
809 }
810 /* handle all the NaN cases */
811 if (is_nan(a.cls) || is_nan(b.cls)) {
812 return pick_nan(a, b, s);
813 }
814 /* Inf * Zero == NaN */
815 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
816 (a.cls == float_class_zero && b.cls == float_class_inf)) {
817 s->float_exception_flags |= float_flag_invalid;
f7e598e2 818 return parts_default_nan(s);
74d707e2
AB
819 }
820 /* Multiply by 0 or Inf */
821 if (a.cls == float_class_inf || a.cls == float_class_zero) {
822 a.sign = sign;
823 return a;
824 }
825 if (b.cls == float_class_inf || b.cls == float_class_zero) {
826 b.sign = sign;
827 return b;
828 }
829 g_assert_not_reached();
830}
831
97ff87c0 832float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
833{
834 FloatParts pa = float16_unpack_canonical(a, status);
835 FloatParts pb = float16_unpack_canonical(b, status);
836 FloatParts pr = mul_floats(pa, pb, status);
837
838 return float16_round_pack_canonical(pr, status);
839}
840
97ff87c0 841float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
842{
843 FloatParts pa = float32_unpack_canonical(a, status);
844 FloatParts pb = float32_unpack_canonical(b, status);
845 FloatParts pr = mul_floats(pa, pb, status);
846
847 return float32_round_pack_canonical(pr, status);
848}
849
97ff87c0 850float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
851{
852 FloatParts pa = float64_unpack_canonical(a, status);
853 FloatParts pb = float64_unpack_canonical(b, status);
854 FloatParts pr = mul_floats(pa, pb, status);
855
856 return float64_round_pack_canonical(pr, status);
857}
858
d446830a
AB
859/*
860 * Returns the result of multiplying the floating-point values `a' and
861 * `b' then adding 'c', with no intermediate rounding step after the
862 * multiplication. The operation is performed according to the
863 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
864 * The flags argument allows the caller to select negation of the
865 * addend, the intermediate product, or the final result. (The
866 * difference between this and having the caller do a separate
867 * negation is that negating externally will flip the sign bit on
868 * NaNs.)
869 */
870
871static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
872 int flags, float_status *s)
873{
874 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
875 ((1 << float_class_inf) | (1 << float_class_zero));
876 bool p_sign;
877 bool sign_flip = flags & float_muladd_negate_result;
878 FloatClass p_class;
879 uint64_t hi, lo;
880 int p_exp;
881
882 /* It is implementation-defined whether the cases of (0,inf,qnan)
883 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
884 * they return if they do), so we have to hand this information
885 * off to the target-specific pick-a-NaN routine.
886 */
887 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
888 return pick_nan_muladd(a, b, c, inf_zero, s);
889 }
890
891 if (inf_zero) {
892 s->float_exception_flags |= float_flag_invalid;
f7e598e2 893 return parts_default_nan(s);
d446830a
AB
894 }
895
896 if (flags & float_muladd_negate_c) {
897 c.sign ^= 1;
898 }
899
900 p_sign = a.sign ^ b.sign;
901
902 if (flags & float_muladd_negate_product) {
903 p_sign ^= 1;
904 }
905
906 if (a.cls == float_class_inf || b.cls == float_class_inf) {
907 p_class = float_class_inf;
908 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
909 p_class = float_class_zero;
910 } else {
911 p_class = float_class_normal;
912 }
913
914 if (c.cls == float_class_inf) {
915 if (p_class == float_class_inf && p_sign != c.sign) {
916 s->float_exception_flags |= float_flag_invalid;
f7e598e2 917 return parts_default_nan(s);
d446830a
AB
918 } else {
919 a.cls = float_class_inf;
920 a.sign = c.sign ^ sign_flip;
f7e598e2 921 return a;
d446830a 922 }
d446830a
AB
923 }
924
925 if (p_class == float_class_inf) {
926 a.cls = float_class_inf;
927 a.sign = p_sign ^ sign_flip;
928 return a;
929 }
930
931 if (p_class == float_class_zero) {
932 if (c.cls == float_class_zero) {
933 if (p_sign != c.sign) {
934 p_sign = s->float_rounding_mode == float_round_down;
935 }
936 c.sign = p_sign;
937 } else if (flags & float_muladd_halve_result) {
938 c.exp -= 1;
939 }
940 c.sign ^= sign_flip;
941 return c;
942 }
943
944 /* a & b should be normals now... */
945 assert(a.cls == float_class_normal &&
946 b.cls == float_class_normal);
947
948 p_exp = a.exp + b.exp;
949
950 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
951 * result.
952 */
953 mul64To128(a.frac, b.frac, &hi, &lo);
954 /* binary point now at bit 124 */
955
956 /* check for overflow */
957 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
958 shift128RightJamming(hi, lo, 1, &hi, &lo);
959 p_exp += 1;
960 }
961
962 /* + add/sub */
963 if (c.cls == float_class_zero) {
964 /* move binary point back to 62 */
965 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
966 } else {
967 int exp_diff = p_exp - c.exp;
968 if (p_sign == c.sign) {
969 /* Addition */
970 if (exp_diff <= 0) {
971 shift128RightJamming(hi, lo,
972 DECOMPOSED_BINARY_POINT - exp_diff,
973 &hi, &lo);
974 lo += c.frac;
975 p_exp = c.exp;
976 } else {
977 uint64_t c_hi, c_lo;
978 /* shift c to the same binary point as the product (124) */
979 c_hi = c.frac >> 2;
980 c_lo = 0;
981 shift128RightJamming(c_hi, c_lo,
982 exp_diff,
983 &c_hi, &c_lo);
984 add128(hi, lo, c_hi, c_lo, &hi, &lo);
985 /* move binary point back to 62 */
986 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
987 }
988
989 if (lo & DECOMPOSED_OVERFLOW_BIT) {
990 shift64RightJamming(lo, 1, &lo);
991 p_exp += 1;
992 }
993
994 } else {
995 /* Subtraction */
996 uint64_t c_hi, c_lo;
997 /* make C binary point match product at bit 124 */
998 c_hi = c.frac >> 2;
999 c_lo = 0;
1000
1001 if (exp_diff <= 0) {
1002 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1003 if (exp_diff == 0
1004 &&
1005 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1006 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1007 } else {
1008 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1009 p_sign ^= 1;
1010 p_exp = c.exp;
1011 }
1012 } else {
1013 shift128RightJamming(c_hi, c_lo,
1014 exp_diff,
1015 &c_hi, &c_lo);
1016 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1017 }
1018
1019 if (hi == 0 && lo == 0) {
1020 a.cls = float_class_zero;
1021 a.sign = s->float_rounding_mode == float_round_down;
1022 a.sign ^= sign_flip;
1023 return a;
1024 } else {
1025 int shift;
1026 if (hi != 0) {
1027 shift = clz64(hi);
1028 } else {
1029 shift = clz64(lo) + 64;
1030 }
1031 /* Normalizing to a binary point of 124 is the
1032 correct adjust for the exponent. However since we're
1033 shifting, we might as well put the binary point back
1034 at 62 where we really want it. Therefore shift as
1035 if we're leaving 1 bit at the top of the word, but
1036 adjust the exponent as if we're leaving 3 bits. */
1037 shift -= 1;
1038 if (shift >= 64) {
1039 lo = lo << (shift - 64);
1040 } else {
1041 hi = (hi << shift) | (lo >> (64 - shift));
1042 lo = hi | ((lo << shift) != 0);
1043 }
1044 p_exp -= shift - 2;
1045 }
1046 }
1047 }
1048
1049 if (flags & float_muladd_halve_result) {
1050 p_exp -= 1;
1051 }
1052
1053 /* finally prepare our result */
1054 a.cls = float_class_normal;
1055 a.sign = p_sign ^ sign_flip;
1056 a.exp = p_exp;
1057 a.frac = lo;
1058
1059 return a;
1060}
1061
97ff87c0 1062float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1063 int flags, float_status *status)
1064{
1065 FloatParts pa = float16_unpack_canonical(a, status);
1066 FloatParts pb = float16_unpack_canonical(b, status);
1067 FloatParts pc = float16_unpack_canonical(c, status);
1068 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1069
1070 return float16_round_pack_canonical(pr, status);
1071}
1072
97ff87c0 1073float32 QEMU_FLATTEN float32_muladd(float32 a, float32 b, float32 c,
d446830a
AB
1074 int flags, float_status *status)
1075{
1076 FloatParts pa = float32_unpack_canonical(a, status);
1077 FloatParts pb = float32_unpack_canonical(b, status);
1078 FloatParts pc = float32_unpack_canonical(c, status);
1079 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1080
1081 return float32_round_pack_canonical(pr, status);
1082}
1083
97ff87c0 1084float64 QEMU_FLATTEN float64_muladd(float64 a, float64 b, float64 c,
d446830a
AB
1085 int flags, float_status *status)
1086{
1087 FloatParts pa = float64_unpack_canonical(a, status);
1088 FloatParts pb = float64_unpack_canonical(b, status);
1089 FloatParts pc = float64_unpack_canonical(c, status);
1090 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1091
1092 return float64_round_pack_canonical(pr, status);
1093}
1094
cf07323d
AB
1095/*
1096 * Returns the result of dividing the floating-point value `a' by the
1097 * corresponding value `b'. The operation is performed according to
1098 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1099 */
1100
1101static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1102{
1103 bool sign = a.sign ^ b.sign;
1104
1105 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1106 uint64_t n0, n1, q, r;
cf07323d 1107 int exp = a.exp - b.exp;
5dfbc9e4
RH
1108
1109 /*
1110 * We want a 2*N / N-bit division to produce exactly an N-bit
1111 * result, so that we do not lose any precision and so that we
1112 * do not have to renormalize afterward. If A.frac < B.frac,
1113 * then division would produce an (N-1)-bit result; shift A left
1114 * by one to produce the an N-bit result, and decrement the
1115 * exponent to match.
1116 *
1117 * The udiv_qrnnd algorithm that we're using requires normalization,
1118 * i.e. the msb of the denominator must be set. Since we know that
1119 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1120 * by one (more), and the remainder must be shifted right by one.
1121 */
cf07323d
AB
1122 if (a.frac < b.frac) {
1123 exp -= 1;
5dfbc9e4 1124 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1125 } else {
5dfbc9e4 1126 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1127 }
5dfbc9e4
RH
1128 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1129
1130 /*
1131 * Set lsb if there is a remainder, to set inexact.
1132 * As mentioned above, to find the actual value of the remainder we
1133 * would need to shift right, but (1) we are only concerned about
1134 * non-zero-ness, and (2) the remainder will always be even because
1135 * both inputs to the division primitive are even.
1136 */
1137 a.frac = q | (r != 0);
cf07323d
AB
1138 a.sign = sign;
1139 a.exp = exp;
1140 return a;
1141 }
1142 /* handle all the NaN cases */
1143 if (is_nan(a.cls) || is_nan(b.cls)) {
1144 return pick_nan(a, b, s);
1145 }
1146 /* 0/0 or Inf/Inf */
1147 if (a.cls == b.cls
1148 &&
1149 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1150 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1151 return parts_default_nan(s);
cf07323d 1152 }
9cb4e398
AB
1153 /* Inf / x or 0 / x */
1154 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1155 a.sign = sign;
1156 return a;
1157 }
cf07323d
AB
1158 /* Div 0 => Inf */
1159 if (b.cls == float_class_zero) {
1160 s->float_exception_flags |= float_flag_divbyzero;
1161 a.cls = float_class_inf;
1162 a.sign = sign;
1163 return a;
1164 }
cf07323d
AB
1165 /* Div by Inf */
1166 if (b.cls == float_class_inf) {
1167 a.cls = float_class_zero;
1168 a.sign = sign;
1169 return a;
1170 }
1171 g_assert_not_reached();
1172}
1173
1174float16 float16_div(float16 a, float16 b, float_status *status)
1175{
1176 FloatParts pa = float16_unpack_canonical(a, status);
1177 FloatParts pb = float16_unpack_canonical(b, status);
1178 FloatParts pr = div_floats(pa, pb, status);
1179
1180 return float16_round_pack_canonical(pr, status);
1181}
1182
1183float32 float32_div(float32 a, float32 b, float_status *status)
1184{
1185 FloatParts pa = float32_unpack_canonical(a, status);
1186 FloatParts pb = float32_unpack_canonical(b, status);
1187 FloatParts pr = div_floats(pa, pb, status);
1188
1189 return float32_round_pack_canonical(pr, status);
1190}
1191
1192float64 float64_div(float64 a, float64 b, float_status *status)
1193{
1194 FloatParts pa = float64_unpack_canonical(a, status);
1195 FloatParts pb = float64_unpack_canonical(b, status);
1196 FloatParts pr = div_floats(pa, pb, status);
1197
1198 return float64_round_pack_canonical(pr, status);
1199}
1200
6fed16b2
AB
1201/*
1202 * Float to Float conversions
1203 *
1204 * Returns the result of converting one float format to another. The
1205 * conversion is performed according to the IEC/IEEE Standard for
1206 * Binary Floating-Point Arithmetic.
1207 *
1208 * The float_to_float helper only needs to take care of raising
1209 * invalid exceptions and handling the conversion on NaNs.
1210 */
1211
1212static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1213 float_status *s)
1214{
1215 if (dstf->arm_althp) {
1216 switch (a.cls) {
1217 case float_class_qnan:
1218 case float_class_snan:
1219 /* There is no NaN in the destination format. Raise Invalid
1220 * and return a zero with the sign of the input NaN.
1221 */
1222 s->float_exception_flags |= float_flag_invalid;
1223 a.cls = float_class_zero;
1224 a.frac = 0;
1225 a.exp = 0;
1226 break;
1227
1228 case float_class_inf:
1229 /* There is no Inf in the destination format. Raise Invalid
1230 * and return the maximum normal with the correct sign.
1231 */
1232 s->float_exception_flags |= float_flag_invalid;
1233 a.cls = float_class_normal;
1234 a.exp = dstf->exp_max;
1235 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1236 break;
1237
1238 default:
1239 break;
1240 }
1241 } else if (is_nan(a.cls)) {
1242 if (is_snan(a.cls)) {
1243 s->float_exception_flags |= float_flag_invalid;
1244 a = parts_silence_nan(a, s);
1245 }
1246 if (s->default_nan_mode) {
1247 return parts_default_nan(s);
1248 }
1249 }
1250 return a;
1251}
1252
1253float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1254{
1255 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1256 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1257 FloatParts pr = float_to_float(p, &float32_params, s);
1258 return float32_round_pack_canonical(pr, s);
1259}
1260
1261float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1262{
1263 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1264 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1265 FloatParts pr = float_to_float(p, &float64_params, s);
1266 return float64_round_pack_canonical(pr, s);
1267}
1268
1269float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1270{
1271 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1272 FloatParts p = float32_unpack_canonical(a, s);
1273 FloatParts pr = float_to_float(p, fmt16, s);
1274 return float16a_round_pack_canonical(pr, s, fmt16);
1275}
1276
1277float64 float32_to_float64(float32 a, float_status *s)
1278{
1279 FloatParts p = float32_unpack_canonical(a, s);
1280 FloatParts pr = float_to_float(p, &float64_params, s);
1281 return float64_round_pack_canonical(pr, s);
1282}
1283
1284float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1285{
1286 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1287 FloatParts p = float64_unpack_canonical(a, s);
1288 FloatParts pr = float_to_float(p, fmt16, s);
1289 return float16a_round_pack_canonical(pr, s, fmt16);
1290}
1291
1292float32 float64_to_float32(float64 a, float_status *s)
1293{
1294 FloatParts p = float64_unpack_canonical(a, s);
1295 FloatParts pr = float_to_float(p, &float32_params, s);
1296 return float32_round_pack_canonical(pr, s);
1297}
1298
dbe4d53a
AB
1299/*
1300 * Rounds the floating-point value `a' to an integer, and returns the
1301 * result as a floating-point value. The operation is performed
1302 * according to the IEC/IEEE Standard for Binary Floating-Point
1303 * Arithmetic.
1304 */
1305
2f6c74be
RH
1306static FloatParts round_to_int(FloatParts a, int rmode,
1307 int scale, float_status *s)
dbe4d53a 1308{
2f6c74be
RH
1309 switch (a.cls) {
1310 case float_class_qnan:
1311 case float_class_snan:
dbe4d53a 1312 return return_nan(a, s);
dbe4d53a 1313
dbe4d53a
AB
1314 case float_class_zero:
1315 case float_class_inf:
dbe4d53a
AB
1316 /* already "integral" */
1317 break;
2f6c74be 1318
dbe4d53a 1319 case float_class_normal:
2f6c74be
RH
1320 scale = MIN(MAX(scale, -0x10000), 0x10000);
1321 a.exp += scale;
1322
dbe4d53a
AB
1323 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1324 /* already integral */
1325 break;
1326 }
1327 if (a.exp < 0) {
1328 bool one;
1329 /* all fractional */
1330 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1331 switch (rmode) {
dbe4d53a
AB
1332 case float_round_nearest_even:
1333 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1334 break;
1335 case float_round_ties_away:
1336 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1337 break;
1338 case float_round_to_zero:
1339 one = false;
1340 break;
1341 case float_round_up:
1342 one = !a.sign;
1343 break;
1344 case float_round_down:
1345 one = a.sign;
1346 break;
1347 default:
1348 g_assert_not_reached();
1349 }
1350
1351 if (one) {
1352 a.frac = DECOMPOSED_IMPLICIT_BIT;
1353 a.exp = 0;
1354 } else {
1355 a.cls = float_class_zero;
1356 }
1357 } else {
1358 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1359 uint64_t frac_lsbm1 = frac_lsb >> 1;
1360 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1361 uint64_t rnd_mask = rnd_even_mask >> 1;
1362 uint64_t inc;
1363
2f6c74be 1364 switch (rmode) {
dbe4d53a
AB
1365 case float_round_nearest_even:
1366 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1367 break;
1368 case float_round_ties_away:
1369 inc = frac_lsbm1;
1370 break;
1371 case float_round_to_zero:
1372 inc = 0;
1373 break;
1374 case float_round_up:
1375 inc = a.sign ? 0 : rnd_mask;
1376 break;
1377 case float_round_down:
1378 inc = a.sign ? rnd_mask : 0;
1379 break;
1380 default:
1381 g_assert_not_reached();
1382 }
1383
1384 if (a.frac & rnd_mask) {
1385 s->float_exception_flags |= float_flag_inexact;
1386 a.frac += inc;
1387 a.frac &= ~rnd_mask;
1388 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1389 a.frac >>= 1;
1390 a.exp++;
1391 }
1392 }
1393 }
1394 break;
1395 default:
1396 g_assert_not_reached();
1397 }
1398 return a;
1399}
1400
1401float16 float16_round_to_int(float16 a, float_status *s)
1402{
1403 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 1404 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1405 return float16_round_pack_canonical(pr, s);
1406}
1407
1408float32 float32_round_to_int(float32 a, float_status *s)
1409{
1410 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 1411 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1412 return float32_round_pack_canonical(pr, s);
1413}
1414
1415float64 float64_round_to_int(float64 a, float_status *s)
1416{
1417 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 1418 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
1419 return float64_round_pack_canonical(pr, s);
1420}
1421
ab52f973
AB
1422/*
1423 * Returns the result of converting the floating-point value `a' to
1424 * the two's complement integer format. The conversion is performed
1425 * according to the IEC/IEEE Standard for Binary Floating-Point
1426 * Arithmetic---which means in particular that the conversion is
1427 * rounded according to the current rounding mode. If `a' is a NaN,
1428 * the largest positive integer is returned. Otherwise, if the
1429 * conversion overflows, the largest integer with the same sign as `a'
1430 * is returned.
1431*/
1432
2f6c74be 1433static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
1434 int64_t min, int64_t max,
1435 float_status *s)
1436{
1437 uint64_t r;
1438 int orig_flags = get_float_exception_flags(s);
2f6c74be 1439 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
1440
1441 switch (p.cls) {
1442 case float_class_snan:
1443 case float_class_qnan:
801bc563 1444 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1445 return max;
1446 case float_class_inf:
801bc563 1447 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1448 return p.sign ? min : max;
1449 case float_class_zero:
1450 return 0;
1451 case float_class_normal:
1452 if (p.exp < DECOMPOSED_BINARY_POINT) {
1453 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1454 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1455 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1456 } else {
1457 r = UINT64_MAX;
1458 }
1459 if (p.sign) {
33358375 1460 if (r <= -(uint64_t) min) {
ab52f973
AB
1461 return -r;
1462 } else {
1463 s->float_exception_flags = orig_flags | float_flag_invalid;
1464 return min;
1465 }
1466 } else {
33358375 1467 if (r <= max) {
ab52f973
AB
1468 return r;
1469 } else {
1470 s->float_exception_flags = orig_flags | float_flag_invalid;
1471 return max;
1472 }
1473 }
1474 default:
1475 g_assert_not_reached();
1476 }
1477}
1478
2f6c74be
RH
1479int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
1480 float_status *s)
1481{
1482 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1483 rmode, scale, INT16_MIN, INT16_MAX, s);
1484}
1485
1486int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
1487 float_status *s)
1488{
1489 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1490 rmode, scale, INT32_MIN, INT32_MAX, s);
1491}
1492
1493int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
1494 float_status *s)
1495{
1496 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1497 rmode, scale, INT64_MIN, INT64_MAX, s);
1498}
1499
1500int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
1501 float_status *s)
1502{
1503 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1504 rmode, scale, INT16_MIN, INT16_MAX, s);
1505}
1506
1507int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
1508 float_status *s)
1509{
1510 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1511 rmode, scale, INT32_MIN, INT32_MAX, s);
1512}
1513
1514int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
1515 float_status *s)
1516{
1517 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1518 rmode, scale, INT64_MIN, INT64_MAX, s);
1519}
1520
1521int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
1522 float_status *s)
1523{
1524 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1525 rmode, scale, INT16_MIN, INT16_MAX, s);
1526}
1527
1528int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
1529 float_status *s)
1530{
1531 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1532 rmode, scale, INT32_MIN, INT32_MAX, s);
1533}
1534
1535int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
1536 float_status *s)
1537{
1538 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1539 rmode, scale, INT64_MIN, INT64_MAX, s);
1540}
1541
1542int16_t float16_to_int16(float16 a, float_status *s)
1543{
1544 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1545}
1546
1547int32_t float16_to_int32(float16 a, float_status *s)
1548{
1549 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1550}
1551
1552int64_t float16_to_int64(float16 a, float_status *s)
1553{
1554 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1555}
1556
1557int16_t float32_to_int16(float32 a, float_status *s)
1558{
1559 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1560}
1561
1562int32_t float32_to_int32(float32 a, float_status *s)
1563{
1564 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1565}
1566
1567int64_t float32_to_int64(float32 a, float_status *s)
1568{
1569 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1570}
1571
1572int16_t float64_to_int16(float64 a, float_status *s)
1573{
1574 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1575}
1576
1577int32_t float64_to_int32(float64 a, float_status *s)
1578{
1579 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1580}
1581
1582int64_t float64_to_int64(float64 a, float_status *s)
1583{
1584 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1585}
1586
1587int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
1588{
1589 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
1590}
1591
1592int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
1593{
1594 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
1595}
1596
1597int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
1598{
1599 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
1600}
1601
2f6c74be
RH
1602int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
1603{
1604 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
1605}
ab52f973 1606
2f6c74be
RH
1607int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
1608{
1609 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
1610}
1611
1612int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
1613{
1614 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
1615}
1616
1617int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
1618{
1619 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
1620}
ab52f973 1621
2f6c74be
RH
1622int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
1623{
1624 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
1625}
ab52f973 1626
2f6c74be
RH
1627int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
1628{
1629 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
1630}
ab52f973
AB
1631
1632/*
1633 * Returns the result of converting the floating-point value `a' to
1634 * the unsigned integer format. The conversion is performed according
1635 * to the IEC/IEEE Standard for Binary Floating-Point
1636 * Arithmetic---which means in particular that the conversion is
1637 * rounded according to the current rounding mode. If `a' is a NaN,
1638 * the largest unsigned integer is returned. Otherwise, if the
1639 * conversion overflows, the largest unsigned integer is returned. If
1640 * the 'a' is negative, the result is rounded and zero is returned;
1641 * values that do not round to zero will raise the inexact exception
1642 * flag.
1643 */
1644
2f6c74be
RH
1645static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
1646 uint64_t max, float_status *s)
ab52f973
AB
1647{
1648 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
1649 FloatParts p = round_to_int(in, rmode, scale, s);
1650 uint64_t r;
ab52f973
AB
1651
1652 switch (p.cls) {
1653 case float_class_snan:
1654 case float_class_qnan:
1655 s->float_exception_flags = orig_flags | float_flag_invalid;
1656 return max;
1657 case float_class_inf:
801bc563 1658 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1659 return p.sign ? 0 : max;
1660 case float_class_zero:
1661 return 0;
1662 case float_class_normal:
ab52f973
AB
1663 if (p.sign) {
1664 s->float_exception_flags = orig_flags | float_flag_invalid;
1665 return 0;
1666 }
1667
1668 if (p.exp < DECOMPOSED_BINARY_POINT) {
1669 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1670 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1671 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1672 } else {
1673 s->float_exception_flags = orig_flags | float_flag_invalid;
1674 return max;
1675 }
1676
1677 /* For uint64 this will never trip, but if p.exp is too large
1678 * to shift a decomposed fraction we shall have exited via the
1679 * 3rd leg above.
1680 */
1681 if (r > max) {
1682 s->float_exception_flags = orig_flags | float_flag_invalid;
1683 return max;
ab52f973 1684 }
2f6c74be 1685 return r;
ab52f973
AB
1686 default:
1687 g_assert_not_reached();
1688 }
1689}
1690
2f6c74be
RH
1691uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
1692 float_status *s)
1693{
1694 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1695 rmode, scale, UINT16_MAX, s);
1696}
1697
1698uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
1699 float_status *s)
1700{
1701 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1702 rmode, scale, UINT32_MAX, s);
1703}
1704
1705uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
1706 float_status *s)
1707{
1708 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1709 rmode, scale, UINT64_MAX, s);
1710}
1711
1712uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
1713 float_status *s)
1714{
1715 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1716 rmode, scale, UINT16_MAX, s);
1717}
1718
1719uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
1720 float_status *s)
1721{
1722 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1723 rmode, scale, UINT32_MAX, s);
1724}
1725
1726uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
1727 float_status *s)
1728{
1729 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1730 rmode, scale, UINT64_MAX, s);
1731}
1732
1733uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
1734 float_status *s)
1735{
1736 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1737 rmode, scale, UINT16_MAX, s);
1738}
1739
1740uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
1741 float_status *s)
1742{
1743 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1744 rmode, scale, UINT32_MAX, s);
1745}
1746
1747uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
1748 float_status *s)
1749{
1750 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1751 rmode, scale, UINT64_MAX, s);
1752}
1753
1754uint16_t float16_to_uint16(float16 a, float_status *s)
1755{
1756 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1757}
1758
1759uint32_t float16_to_uint32(float16 a, float_status *s)
1760{
1761 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1762}
1763
1764uint64_t float16_to_uint64(float16 a, float_status *s)
1765{
1766 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1767}
1768
1769uint16_t float32_to_uint16(float32 a, float_status *s)
1770{
1771 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1772}
1773
1774uint32_t float32_to_uint32(float32 a, float_status *s)
1775{
1776 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1777}
1778
1779uint64_t float32_to_uint64(float32 a, float_status *s)
1780{
1781 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1782}
1783
1784uint16_t float64_to_uint16(float64 a, float_status *s)
1785{
1786 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1787}
1788
1789uint32_t float64_to_uint32(float64 a, float_status *s)
1790{
1791 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1792}
1793
1794uint64_t float64_to_uint64(float64 a, float_status *s)
1795{
1796 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1797}
1798
1799uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
1800{
1801 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1802}
1803
1804uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
1805{
1806 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1807}
1808
1809uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
1810{
1811 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1812}
1813
1814uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
1815{
1816 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1817}
1818
1819uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
1820{
1821 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1822}
1823
1824uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
1825{
1826 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1827}
1828
1829uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
1830{
1831 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1832}
1833
1834uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
1835{
1836 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1837}
1838
1839uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
1840{
1841 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1842}
ab52f973 1843
c02e1fb8
AB
1844/*
1845 * Integer to float conversions
1846 *
1847 * Returns the result of converting the two's complement integer `a'
1848 * to the floating-point format. The conversion is performed according
1849 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1850 */
1851
2abdfe24 1852static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 1853{
2abdfe24
RH
1854 FloatParts r = { .sign = false };
1855
c02e1fb8
AB
1856 if (a == 0) {
1857 r.cls = float_class_zero;
c02e1fb8 1858 } else {
2abdfe24
RH
1859 uint64_t f = a;
1860 int shift;
1861
1862 r.cls = float_class_normal;
c02e1fb8 1863 if (a < 0) {
2abdfe24 1864 f = -f;
c02e1fb8 1865 r.sign = true;
c02e1fb8 1866 }
2abdfe24
RH
1867 shift = clz64(f) - 1;
1868 scale = MIN(MAX(scale, -0x10000), 0x10000);
1869
1870 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1871 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
1872 }
1873
1874 return r;
1875}
1876
2abdfe24 1877float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1878{
2abdfe24 1879 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1880 return float16_round_pack_canonical(pa, status);
1881}
1882
2abdfe24
RH
1883float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
1884{
1885 return int64_to_float16_scalbn(a, scale, status);
1886}
1887
1888float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
1889{
1890 return int64_to_float16_scalbn(a, scale, status);
1891}
1892
1893float16 int64_to_float16(int64_t a, float_status *status)
1894{
1895 return int64_to_float16_scalbn(a, 0, status);
1896}
1897
c02e1fb8
AB
1898float16 int32_to_float16(int32_t a, float_status *status)
1899{
2abdfe24 1900 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1901}
1902
1903float16 int16_to_float16(int16_t a, float_status *status)
1904{
2abdfe24 1905 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
1906}
1907
2abdfe24 1908float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1909{
2abdfe24 1910 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1911 return float32_round_pack_canonical(pa, status);
1912}
1913
2abdfe24
RH
1914float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
1915{
1916 return int64_to_float32_scalbn(a, scale, status);
1917}
1918
1919float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
1920{
1921 return int64_to_float32_scalbn(a, scale, status);
1922}
1923
1924float32 int64_to_float32(int64_t a, float_status *status)
1925{
1926 return int64_to_float32_scalbn(a, 0, status);
1927}
1928
c02e1fb8
AB
1929float32 int32_to_float32(int32_t a, float_status *status)
1930{
2abdfe24 1931 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1932}
1933
1934float32 int16_to_float32(int16_t a, float_status *status)
1935{
2abdfe24 1936 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
1937}
1938
2abdfe24 1939float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 1940{
2abdfe24 1941 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
1942 return float64_round_pack_canonical(pa, status);
1943}
1944
2abdfe24
RH
1945float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
1946{
1947 return int64_to_float64_scalbn(a, scale, status);
1948}
1949
1950float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
1951{
1952 return int64_to_float64_scalbn(a, scale, status);
1953}
1954
1955float64 int64_to_float64(int64_t a, float_status *status)
1956{
1957 return int64_to_float64_scalbn(a, 0, status);
1958}
1959
c02e1fb8
AB
1960float64 int32_to_float64(int32_t a, float_status *status)
1961{
2abdfe24 1962 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1963}
1964
1965float64 int16_to_float64(int16_t a, float_status *status)
1966{
2abdfe24 1967 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
1968}
1969
1970
1971/*
1972 * Unsigned Integer to float conversions
1973 *
1974 * Returns the result of converting the unsigned integer `a' to the
1975 * floating-point format. The conversion is performed according to the
1976 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1977 */
1978
2abdfe24 1979static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 1980{
2abdfe24 1981 FloatParts r = { .sign = false };
c02e1fb8
AB
1982
1983 if (a == 0) {
1984 r.cls = float_class_zero;
1985 } else {
2abdfe24 1986 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 1987 r.cls = float_class_normal;
2abdfe24
RH
1988 if ((int64_t)a < 0) {
1989 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
1990 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
1991 r.frac = a;
1992 } else {
2abdfe24
RH
1993 int shift = clz64(a) - 1;
1994 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1995 r.frac = a << shift;
c02e1fb8
AB
1996 }
1997 }
1998
1999 return r;
2000}
2001
2abdfe24 2002float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2003{
2abdfe24 2004 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2005 return float16_round_pack_canonical(pa, status);
2006}
2007
2abdfe24
RH
2008float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2009{
2010 return uint64_to_float16_scalbn(a, scale, status);
2011}
2012
2013float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2014{
2015 return uint64_to_float16_scalbn(a, scale, status);
2016}
2017
2018float16 uint64_to_float16(uint64_t a, float_status *status)
2019{
2020 return uint64_to_float16_scalbn(a, 0, status);
2021}
2022
c02e1fb8
AB
2023float16 uint32_to_float16(uint32_t a, float_status *status)
2024{
2abdfe24 2025 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2026}
2027
2028float16 uint16_to_float16(uint16_t a, float_status *status)
2029{
2abdfe24 2030 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2031}
2032
2abdfe24 2033float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2034{
2abdfe24 2035 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2036 return float32_round_pack_canonical(pa, status);
2037}
2038
2abdfe24
RH
2039float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2040{
2041 return uint64_to_float32_scalbn(a, scale, status);
2042}
2043
2044float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2045{
2046 return uint64_to_float32_scalbn(a, scale, status);
2047}
2048
2049float32 uint64_to_float32(uint64_t a, float_status *status)
2050{
2051 return uint64_to_float32_scalbn(a, 0, status);
2052}
2053
c02e1fb8
AB
2054float32 uint32_to_float32(uint32_t a, float_status *status)
2055{
2abdfe24 2056 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2057}
2058
2059float32 uint16_to_float32(uint16_t a, float_status *status)
2060{
2abdfe24 2061 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2062}
2063
2abdfe24 2064float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2065{
2abdfe24 2066 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2067 return float64_round_pack_canonical(pa, status);
2068}
2069
2abdfe24
RH
2070float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2071{
2072 return uint64_to_float64_scalbn(a, scale, status);
2073}
2074
2075float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2076{
2077 return uint64_to_float64_scalbn(a, scale, status);
2078}
2079
2080float64 uint64_to_float64(uint64_t a, float_status *status)
2081{
2082 return uint64_to_float64_scalbn(a, 0, status);
2083}
2084
c02e1fb8
AB
2085float64 uint32_to_float64(uint32_t a, float_status *status)
2086{
2abdfe24 2087 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2088}
2089
2090float64 uint16_to_float64(uint16_t a, float_status *status)
2091{
2abdfe24 2092 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2093}
2094
89360067
AB
2095/* Float Min/Max */
2096/* min() and max() functions. These can't be implemented as
2097 * 'compare and pick one input' because that would mishandle
2098 * NaNs and +0 vs -0.
2099 *
2100 * minnum() and maxnum() functions. These are similar to the min()
2101 * and max() functions but if one of the arguments is a QNaN and
2102 * the other is numerical then the numerical argument is returned.
2103 * SNaNs will get quietened before being returned.
2104 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2105 * and maxNum() operations. min() and max() are the typical min/max
2106 * semantics provided by many CPUs which predate that specification.
2107 *
2108 * minnummag() and maxnummag() functions correspond to minNumMag()
2109 * and minNumMag() from the IEEE-754 2008.
2110 */
2111static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2112 bool ieee, bool ismag, float_status *s)
2113{
2114 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2115 if (ieee) {
2116 /* Takes two floating-point values `a' and `b', one of
2117 * which is a NaN, and returns the appropriate NaN
2118 * result. If either `a' or `b' is a signaling NaN,
2119 * the invalid exception is raised.
2120 */
2121 if (is_snan(a.cls) || is_snan(b.cls)) {
2122 return pick_nan(a, b, s);
2123 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2124 return b;
2125 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2126 return a;
2127 }
2128 }
2129 return pick_nan(a, b, s);
2130 } else {
2131 int a_exp, b_exp;
89360067
AB
2132
2133 switch (a.cls) {
2134 case float_class_normal:
2135 a_exp = a.exp;
2136 break;
2137 case float_class_inf:
2138 a_exp = INT_MAX;
2139 break;
2140 case float_class_zero:
2141 a_exp = INT_MIN;
2142 break;
2143 default:
2144 g_assert_not_reached();
2145 break;
2146 }
2147 switch (b.cls) {
2148 case float_class_normal:
2149 b_exp = b.exp;
2150 break;
2151 case float_class_inf:
2152 b_exp = INT_MAX;
2153 break;
2154 case float_class_zero:
2155 b_exp = INT_MIN;
2156 break;
2157 default:
2158 g_assert_not_reached();
2159 break;
2160 }
2161
6245327a
EC
2162 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2163 bool a_less = a_exp < b_exp;
2164 if (a_exp == b_exp) {
2165 a_less = a.frac < b.frac;
2166 }
2167 return a_less ^ ismin ? b : a;
89360067
AB
2168 }
2169
6245327a 2170 if (a.sign == b.sign) {
89360067
AB
2171 bool a_less = a_exp < b_exp;
2172 if (a_exp == b_exp) {
2173 a_less = a.frac < b.frac;
2174 }
6245327a 2175 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2176 } else {
6245327a 2177 return a.sign ^ ismin ? b : a;
89360067
AB
2178 }
2179 }
2180}
2181
2182#define MINMAX(sz, name, ismin, isiee, ismag) \
2183float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2184 float_status *s) \
2185{ \
2186 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2187 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2188 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2189 \
2190 return float ## sz ## _round_pack_canonical(pr, s); \
2191}
2192
2193MINMAX(16, min, true, false, false)
2194MINMAX(16, minnum, true, true, false)
2195MINMAX(16, minnummag, true, true, true)
2196MINMAX(16, max, false, false, false)
2197MINMAX(16, maxnum, false, true, false)
2198MINMAX(16, maxnummag, false, true, true)
2199
2200MINMAX(32, min, true, false, false)
2201MINMAX(32, minnum, true, true, false)
2202MINMAX(32, minnummag, true, true, true)
2203MINMAX(32, max, false, false, false)
2204MINMAX(32, maxnum, false, true, false)
2205MINMAX(32, maxnummag, false, true, true)
2206
2207MINMAX(64, min, true, false, false)
2208MINMAX(64, minnum, true, true, false)
2209MINMAX(64, minnummag, true, true, true)
2210MINMAX(64, max, false, false, false)
2211MINMAX(64, maxnum, false, true, false)
2212MINMAX(64, maxnummag, false, true, true)
2213
2214#undef MINMAX
2215
0c4c9092
AB
2216/* Floating point compare */
2217static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2218 float_status *s)
2219{
2220 if (is_nan(a.cls) || is_nan(b.cls)) {
2221 if (!is_quiet ||
2222 a.cls == float_class_snan ||
2223 b.cls == float_class_snan) {
2224 s->float_exception_flags |= float_flag_invalid;
2225 }
2226 return float_relation_unordered;
2227 }
2228
2229 if (a.cls == float_class_zero) {
2230 if (b.cls == float_class_zero) {
2231 return float_relation_equal;
2232 }
2233 return b.sign ? float_relation_greater : float_relation_less;
2234 } else if (b.cls == float_class_zero) {
2235 return a.sign ? float_relation_less : float_relation_greater;
2236 }
2237
2238 /* The only really important thing about infinity is its sign. If
2239 * both are infinities the sign marks the smallest of the two.
2240 */
2241 if (a.cls == float_class_inf) {
2242 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2243 return float_relation_equal;
2244 }
2245 return a.sign ? float_relation_less : float_relation_greater;
2246 } else if (b.cls == float_class_inf) {
2247 return b.sign ? float_relation_greater : float_relation_less;
2248 }
2249
2250 if (a.sign != b.sign) {
2251 return a.sign ? float_relation_less : float_relation_greater;
2252 }
2253
2254 if (a.exp == b.exp) {
2255 if (a.frac == b.frac) {
2256 return float_relation_equal;
2257 }
2258 if (a.sign) {
2259 return a.frac > b.frac ?
2260 float_relation_less : float_relation_greater;
2261 } else {
2262 return a.frac > b.frac ?
2263 float_relation_greater : float_relation_less;
2264 }
2265 } else {
2266 if (a.sign) {
2267 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2268 } else {
2269 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2270 }
2271 }
2272}
2273
2274#define COMPARE(sz) \
2275int float ## sz ## _compare(float ## sz a, float ## sz b, \
2276 float_status *s) \
2277{ \
2278 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2279 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2280 return compare_floats(pa, pb, false, s); \
2281} \
2282int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2283 float_status *s) \
2284{ \
2285 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2286 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2287 return compare_floats(pa, pb, true, s); \
2288}
2289
2290COMPARE(16)
2291COMPARE(32)
2292COMPARE(64)
2293
2294#undef COMPARE
2295
0bfc9f19
AB
2296/* Multiply A by 2 raised to the power N. */
2297static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2298{
2299 if (unlikely(is_nan(a.cls))) {
2300 return return_nan(a, s);
2301 }
2302 if (a.cls == float_class_normal) {
ce8d4082
RH
2303 /* The largest float type (even though not supported by FloatParts)
2304 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
2305 * still allows rounding to infinity, without allowing overflow
2306 * within the int32_t that backs FloatParts.exp.
2307 */
2308 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
2309 a.exp += n;
2310 }
2311 return a;
2312}
2313
2314float16 float16_scalbn(float16 a, int n, float_status *status)
2315{
2316 FloatParts pa = float16_unpack_canonical(a, status);
2317 FloatParts pr = scalbn_decomposed(pa, n, status);
2318 return float16_round_pack_canonical(pr, status);
2319}
2320
2321float32 float32_scalbn(float32 a, int n, float_status *status)
2322{
2323 FloatParts pa = float32_unpack_canonical(a, status);
2324 FloatParts pr = scalbn_decomposed(pa, n, status);
2325 return float32_round_pack_canonical(pr, status);
2326}
2327
2328float64 float64_scalbn(float64 a, int n, float_status *status)
2329{
2330 FloatParts pa = float64_unpack_canonical(a, status);
2331 FloatParts pr = scalbn_decomposed(pa, n, status);
2332 return float64_round_pack_canonical(pr, status);
2333}
2334
c13bb2da
AB
2335/*
2336 * Square Root
2337 *
2338 * The old softfloat code did an approximation step before zeroing in
2339 * on the final result. However for simpleness we just compute the
2340 * square root by iterating down from the implicit bit to enough extra
2341 * bits to ensure we get a correctly rounded result.
2342 *
2343 * This does mean however the calculation is slower than before,
2344 * especially for 64 bit floats.
2345 */
2346
2347static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2348{
2349 uint64_t a_frac, r_frac, s_frac;
2350 int bit, last_bit;
2351
2352 if (is_nan(a.cls)) {
2353 return return_nan(a, s);
2354 }
2355 if (a.cls == float_class_zero) {
2356 return a; /* sqrt(+-0) = +-0 */
2357 }
2358 if (a.sign) {
2359 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2360 return parts_default_nan(s);
c13bb2da
AB
2361 }
2362 if (a.cls == float_class_inf) {
2363 return a; /* sqrt(+inf) = +inf */
2364 }
2365
2366 assert(a.cls == float_class_normal);
2367
2368 /* We need two overflow bits at the top. Adding room for that is a
2369 * right shift. If the exponent is odd, we can discard the low bit
2370 * by multiplying the fraction by 2; that's a left shift. Combine
2371 * those and we shift right if the exponent is even.
2372 */
2373 a_frac = a.frac;
2374 if (!(a.exp & 1)) {
2375 a_frac >>= 1;
2376 }
2377 a.exp >>= 1;
2378
2379 /* Bit-by-bit computation of sqrt. */
2380 r_frac = 0;
2381 s_frac = 0;
2382
2383 /* Iterate from implicit bit down to the 3 extra bits to compute a
2384 * properly rounded result. Remember we've inserted one more bit
2385 * at the top, so these positions are one less.
2386 */
2387 bit = DECOMPOSED_BINARY_POINT - 1;
2388 last_bit = MAX(p->frac_shift - 4, 0);
2389 do {
2390 uint64_t q = 1ULL << bit;
2391 uint64_t t_frac = s_frac + q;
2392 if (t_frac <= a_frac) {
2393 s_frac = t_frac + q;
2394 a_frac -= t_frac;
2395 r_frac += q;
2396 }
2397 a_frac <<= 1;
2398 } while (--bit >= last_bit);
2399
2400 /* Undo the right shift done above. If there is any remaining
2401 * fraction, the result is inexact. Set the sticky bit.
2402 */
2403 a.frac = (r_frac << 1) + (a_frac != 0);
2404
2405 return a;
2406}
2407
97ff87c0 2408float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
2409{
2410 FloatParts pa = float16_unpack_canonical(a, status);
2411 FloatParts pr = sqrt_float(pa, status, &float16_params);
2412 return float16_round_pack_canonical(pr, status);
2413}
2414
97ff87c0 2415float32 QEMU_FLATTEN float32_sqrt(float32 a, float_status *status)
c13bb2da
AB
2416{
2417 FloatParts pa = float32_unpack_canonical(a, status);
2418 FloatParts pr = sqrt_float(pa, status, &float32_params);
2419 return float32_round_pack_canonical(pr, status);
2420}
2421
97ff87c0 2422float64 QEMU_FLATTEN float64_sqrt(float64 a, float_status *status)
c13bb2da
AB
2423{
2424 FloatParts pa = float64_unpack_canonical(a, status);
2425 FloatParts pr = sqrt_float(pa, status, &float64_params);
2426 return float64_round_pack_canonical(pr, status);
2427}
2428
0218a16e
RH
2429/*----------------------------------------------------------------------------
2430| The pattern for a default generated NaN.
2431*----------------------------------------------------------------------------*/
2432
2433float16 float16_default_nan(float_status *status)
2434{
2435 FloatParts p = parts_default_nan(status);
2436 p.frac >>= float16_params.frac_shift;
2437 return float16_pack_raw(p);
2438}
2439
2440float32 float32_default_nan(float_status *status)
2441{
2442 FloatParts p = parts_default_nan(status);
2443 p.frac >>= float32_params.frac_shift;
2444 return float32_pack_raw(p);
2445}
2446
2447float64 float64_default_nan(float_status *status)
2448{
2449 FloatParts p = parts_default_nan(status);
2450 p.frac >>= float64_params.frac_shift;
2451 return float64_pack_raw(p);
2452}
2453
2454float128 float128_default_nan(float_status *status)
2455{
2456 FloatParts p = parts_default_nan(status);
2457 float128 r;
2458
2459 /* Extrapolate from the choices made by parts_default_nan to fill
2460 * in the quad-floating format. If the low bit is set, assume we
2461 * want to set all non-snan bits.
2462 */
2463 r.low = -(p.frac & 1);
2464 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2465 r.high |= LIT64(0x7FFF000000000000);
2466 r.high |= (uint64_t)p.sign << 63;
2467
2468 return r;
2469}
c13bb2da 2470
158142c2 2471/*----------------------------------------------------------------------------
377ed926
RH
2472| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
2473*----------------------------------------------------------------------------*/
2474
2475float16 float16_silence_nan(float16 a, float_status *status)
2476{
2477 FloatParts p = float16_unpack_raw(a);
2478 p.frac <<= float16_params.frac_shift;
2479 p = parts_silence_nan(p, status);
2480 p.frac >>= float16_params.frac_shift;
2481 return float16_pack_raw(p);
2482}
2483
2484float32 float32_silence_nan(float32 a, float_status *status)
2485{
2486 FloatParts p = float32_unpack_raw(a);
2487 p.frac <<= float32_params.frac_shift;
2488 p = parts_silence_nan(p, status);
2489 p.frac >>= float32_params.frac_shift;
2490 return float32_pack_raw(p);
2491}
2492
2493float64 float64_silence_nan(float64 a, float_status *status)
2494{
2495 FloatParts p = float64_unpack_raw(a);
2496 p.frac <<= float64_params.frac_shift;
2497 p = parts_silence_nan(p, status);
2498 p.frac >>= float64_params.frac_shift;
2499 return float64_pack_raw(p);
2500}
2501
2502/*----------------------------------------------------------------------------
158142c2
FB
2503| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2504| and 7, and returns the properly rounded 32-bit integer corresponding to the
2505| input. If `zSign' is 1, the input is negated before being converted to an
2506| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2507| is simply rounded to an integer, with the inexact exception raised if the
2508| input cannot be represented exactly as an integer. However, if the fixed-
2509| point input is too large, the invalid exception is raised and the largest
2510| positive or negative integer is returned.
2511*----------------------------------------------------------------------------*/
2512
f4014512 2513static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2514{
8f506c70 2515 int8_t roundingMode;
158142c2 2516 flag roundNearestEven;
8f506c70 2517 int8_t roundIncrement, roundBits;
760e1416 2518 int32_t z;
158142c2 2519
a2f2d288 2520 roundingMode = status->float_rounding_mode;
158142c2 2521 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2522 switch (roundingMode) {
2523 case float_round_nearest_even:
f9288a76 2524 case float_round_ties_away:
dc355b76
PM
2525 roundIncrement = 0x40;
2526 break;
2527 case float_round_to_zero:
2528 roundIncrement = 0;
2529 break;
2530 case float_round_up:
2531 roundIncrement = zSign ? 0 : 0x7f;
2532 break;
2533 case float_round_down:
2534 roundIncrement = zSign ? 0x7f : 0;
2535 break;
2536 default:
2537 abort();
158142c2
FB
2538 }
2539 roundBits = absZ & 0x7F;
2540 absZ = ( absZ + roundIncrement )>>7;
2541 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2542 z = absZ;
2543 if ( zSign ) z = - z;
2544 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2545 float_raise(float_flag_invalid, status);
bb98fe42 2546 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2547 }
a2f2d288
PM
2548 if (roundBits) {
2549 status->float_exception_flags |= float_flag_inexact;
2550 }
158142c2
FB
2551 return z;
2552
2553}
2554
2555/*----------------------------------------------------------------------------
2556| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2557| `absZ1', with binary point between bits 63 and 64 (between the input words),
2558| and returns the properly rounded 64-bit integer corresponding to the input.
2559| If `zSign' is 1, the input is negated before being converted to an integer.
2560| Ordinarily, the fixed-point input is simply rounded to an integer, with
2561| the inexact exception raised if the input cannot be represented exactly as
2562| an integer. However, if the fixed-point input is too large, the invalid
2563| exception is raised and the largest positive or negative integer is
2564| returned.
2565*----------------------------------------------------------------------------*/
2566
f42c2224 2567static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2568 float_status *status)
158142c2 2569{
8f506c70 2570 int8_t roundingMode;
158142c2 2571 flag roundNearestEven, increment;
760e1416 2572 int64_t z;
158142c2 2573
a2f2d288 2574 roundingMode = status->float_rounding_mode;
158142c2 2575 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2576 switch (roundingMode) {
2577 case float_round_nearest_even:
f9288a76 2578 case float_round_ties_away:
dc355b76
PM
2579 increment = ((int64_t) absZ1 < 0);
2580 break;
2581 case float_round_to_zero:
2582 increment = 0;
2583 break;
2584 case float_round_up:
2585 increment = !zSign && absZ1;
2586 break;
2587 case float_round_down:
2588 increment = zSign && absZ1;
2589 break;
2590 default:
2591 abort();
158142c2
FB
2592 }
2593 if ( increment ) {
2594 ++absZ0;
2595 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2596 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2597 }
2598 z = absZ0;
2599 if ( zSign ) z = - z;
2600 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2601 overflow:
ff32e16e 2602 float_raise(float_flag_invalid, status);
158142c2 2603 return
bb98fe42 2604 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2605 : LIT64( 0x7FFFFFFFFFFFFFFF );
2606 }
a2f2d288
PM
2607 if (absZ1) {
2608 status->float_exception_flags |= float_flag_inexact;
2609 }
158142c2
FB
2610 return z;
2611
2612}
2613
fb3ea83a
TM
2614/*----------------------------------------------------------------------------
2615| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2616| `absZ1', with binary point between bits 63 and 64 (between the input words),
2617| and returns the properly rounded 64-bit unsigned integer corresponding to the
2618| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2619| with the inexact exception raised if the input cannot be represented exactly
2620| as an integer. However, if the fixed-point input is too large, the invalid
2621| exception is raised and the largest unsigned integer is returned.
2622*----------------------------------------------------------------------------*/
2623
f42c2224 2624static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2625 uint64_t absZ1, float_status *status)
fb3ea83a 2626{
8f506c70 2627 int8_t roundingMode;
fb3ea83a
TM
2628 flag roundNearestEven, increment;
2629
a2f2d288 2630 roundingMode = status->float_rounding_mode;
fb3ea83a 2631 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2632 switch (roundingMode) {
2633 case float_round_nearest_even:
f9288a76 2634 case float_round_ties_away:
dc355b76
PM
2635 increment = ((int64_t)absZ1 < 0);
2636 break;
2637 case float_round_to_zero:
2638 increment = 0;
2639 break;
2640 case float_round_up:
2641 increment = !zSign && absZ1;
2642 break;
2643 case float_round_down:
2644 increment = zSign && absZ1;
2645 break;
2646 default:
2647 abort();
fb3ea83a
TM
2648 }
2649 if (increment) {
2650 ++absZ0;
2651 if (absZ0 == 0) {
ff32e16e 2652 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2653 return LIT64(0xFFFFFFFFFFFFFFFF);
2654 }
2655 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2656 }
2657
2658 if (zSign && absZ0) {
ff32e16e 2659 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2660 return 0;
2661 }
2662
2663 if (absZ1) {
a2f2d288 2664 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2665 }
2666 return absZ0;
2667}
2668
37d18660
PM
2669/*----------------------------------------------------------------------------
2670| If `a' is denormal and we are in flush-to-zero mode then set the
2671| input-denormal exception and return zero. Otherwise just return the value.
2672*----------------------------------------------------------------------------*/
e5a41ffa 2673float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2674{
a2f2d288 2675 if (status->flush_inputs_to_zero) {
37d18660 2676 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2677 float_raise(float_flag_input_denormal, status);
37d18660
PM
2678 return make_float32(float32_val(a) & 0x80000000);
2679 }
2680 }
2681 return a;
2682}
2683
158142c2
FB
2684/*----------------------------------------------------------------------------
2685| Normalizes the subnormal single-precision floating-point value represented
2686| by the denormalized significand `aSig'. The normalized exponent and
2687| significand are stored at the locations pointed to by `zExpPtr' and
2688| `zSigPtr', respectively.
2689*----------------------------------------------------------------------------*/
2690
2691static void
0c48262d 2692 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2693{
8f506c70 2694 int8_t shiftCount;
158142c2 2695
0019d5c3 2696 shiftCount = clz32(aSig) - 8;
158142c2
FB
2697 *zSigPtr = aSig<<shiftCount;
2698 *zExpPtr = 1 - shiftCount;
2699
2700}
2701
158142c2
FB
2702/*----------------------------------------------------------------------------
2703| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2704| and significand `zSig', and returns the proper single-precision floating-
2705| point value corresponding to the abstract input. Ordinarily, the abstract
2706| value is simply rounded and packed into the single-precision format, with
2707| the inexact exception raised if the abstract input cannot be represented
2708| exactly. However, if the abstract value is too large, the overflow and
2709| inexact exceptions are raised and an infinity or maximal finite value is
2710| returned. If the abstract value is too small, the input value is rounded to
2711| a subnormal number, and the underflow and inexact exceptions are raised if
2712| the abstract input cannot be represented exactly as a subnormal single-
2713| precision floating-point number.
2714| The input significand `zSig' has its binary point between bits 30
2715| and 29, which is 7 bits to the left of the usual location. This shifted
2716| significand must be normalized or smaller. If `zSig' is not normalized,
2717| `zExp' must be 0; in that case, the result returned is a subnormal number,
2718| and it must not require rounding. In the usual case that `zSig' is
2719| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2720| The handling of underflow and overflow follows the IEC/IEEE Standard for
2721| Binary Floating-Point Arithmetic.
2722*----------------------------------------------------------------------------*/
2723
0c48262d 2724static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2725 float_status *status)
158142c2 2726{
8f506c70 2727 int8_t roundingMode;
158142c2 2728 flag roundNearestEven;
8f506c70 2729 int8_t roundIncrement, roundBits;
158142c2
FB
2730 flag isTiny;
2731
a2f2d288 2732 roundingMode = status->float_rounding_mode;
158142c2 2733 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2734 switch (roundingMode) {
2735 case float_round_nearest_even:
f9288a76 2736 case float_round_ties_away:
dc355b76
PM
2737 roundIncrement = 0x40;
2738 break;
2739 case float_round_to_zero:
2740 roundIncrement = 0;
2741 break;
2742 case float_round_up:
2743 roundIncrement = zSign ? 0 : 0x7f;
2744 break;
2745 case float_round_down:
2746 roundIncrement = zSign ? 0x7f : 0;
2747 break;
2748 default:
2749 abort();
2750 break;
158142c2
FB
2751 }
2752 roundBits = zSig & 0x7F;
bb98fe42 2753 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2754 if ( ( 0xFD < zExp )
2755 || ( ( zExp == 0xFD )
bb98fe42 2756 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2757 ) {
ff32e16e 2758 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2759 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2760 }
2761 if ( zExp < 0 ) {
a2f2d288 2762 if (status->flush_to_zero) {
ff32e16e 2763 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2764 return packFloat32(zSign, 0, 0);
2765 }
158142c2 2766 isTiny =
a2f2d288
PM
2767 (status->float_detect_tininess
2768 == float_tininess_before_rounding)
158142c2
FB
2769 || ( zExp < -1 )
2770 || ( zSig + roundIncrement < 0x80000000 );
2771 shift32RightJamming( zSig, - zExp, &zSig );
2772 zExp = 0;
2773 roundBits = zSig & 0x7F;
ff32e16e
PM
2774 if (isTiny && roundBits) {
2775 float_raise(float_flag_underflow, status);
2776 }
158142c2
FB
2777 }
2778 }
a2f2d288
PM
2779 if (roundBits) {
2780 status->float_exception_flags |= float_flag_inexact;
2781 }
158142c2
FB
2782 zSig = ( zSig + roundIncrement )>>7;
2783 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2784 if ( zSig == 0 ) zExp = 0;
2785 return packFloat32( zSign, zExp, zSig );
2786
2787}
2788
2789/*----------------------------------------------------------------------------
2790| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2791| and significand `zSig', and returns the proper single-precision floating-
2792| point value corresponding to the abstract input. This routine is just like
2793| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2794| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2795| floating-point exponent.
2796*----------------------------------------------------------------------------*/
2797
2798static float32
0c48262d 2799 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2800 float_status *status)
158142c2 2801{
8f506c70 2802 int8_t shiftCount;
158142c2 2803
0019d5c3 2804 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
2805 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2806 status);
158142c2
FB
2807
2808}
2809
37d18660
PM
2810/*----------------------------------------------------------------------------
2811| If `a' is denormal and we are in flush-to-zero mode then set the
2812| input-denormal exception and return zero. Otherwise just return the value.
2813*----------------------------------------------------------------------------*/
e5a41ffa 2814float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2815{
a2f2d288 2816 if (status->flush_inputs_to_zero) {
37d18660 2817 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2818 float_raise(float_flag_input_denormal, status);
37d18660
PM
2819 return make_float64(float64_val(a) & (1ULL << 63));
2820 }
2821 }
2822 return a;
2823}
2824
158142c2
FB
2825/*----------------------------------------------------------------------------
2826| Normalizes the subnormal double-precision floating-point value represented
2827| by the denormalized significand `aSig'. The normalized exponent and
2828| significand are stored at the locations pointed to by `zExpPtr' and
2829| `zSigPtr', respectively.
2830*----------------------------------------------------------------------------*/
2831
2832static void
0c48262d 2833 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2834{
8f506c70 2835 int8_t shiftCount;
158142c2 2836
0019d5c3 2837 shiftCount = clz64(aSig) - 11;
158142c2
FB
2838 *zSigPtr = aSig<<shiftCount;
2839 *zExpPtr = 1 - shiftCount;
2840
2841}
2842
2843/*----------------------------------------------------------------------------
2844| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2845| double-precision floating-point value, returning the result. After being
2846| shifted into the proper positions, the three fields are simply added
2847| together to form the result. This means that any integer portion of `zSig'
2848| will be added into the exponent. Since a properly normalized significand
2849| will have an integer portion equal to 1, the `zExp' input should be 1 less
2850| than the desired result exponent whenever `zSig' is a complete, normalized
2851| significand.
2852*----------------------------------------------------------------------------*/
2853
0c48262d 2854static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2855{
2856
f090c9d4 2857 return make_float64(
bb98fe42 2858 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2859
2860}
2861
2862/*----------------------------------------------------------------------------
2863| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2864| and significand `zSig', and returns the proper double-precision floating-
2865| point value corresponding to the abstract input. Ordinarily, the abstract
2866| value is simply rounded and packed into the double-precision format, with
2867| the inexact exception raised if the abstract input cannot be represented
2868| exactly. However, if the abstract value is too large, the overflow and
2869| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2870| returned. If the abstract value is too small, the input value is rounded to
2871| a subnormal number, and the underflow and inexact exceptions are raised if
2872| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2873| precision floating-point number.
2874| The input significand `zSig' has its binary point between bits 62
2875| and 61, which is 10 bits to the left of the usual location. This shifted
2876| significand must be normalized or smaller. If `zSig' is not normalized,
2877| `zExp' must be 0; in that case, the result returned is a subnormal number,
2878| and it must not require rounding. In the usual case that `zSig' is
2879| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2880| The handling of underflow and overflow follows the IEC/IEEE Standard for
2881| Binary Floating-Point Arithmetic.
2882*----------------------------------------------------------------------------*/
2883
0c48262d 2884static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2885 float_status *status)
158142c2 2886{
8f506c70 2887 int8_t roundingMode;
158142c2 2888 flag roundNearestEven;
0c48262d 2889 int roundIncrement, roundBits;
158142c2
FB
2890 flag isTiny;
2891
a2f2d288 2892 roundingMode = status->float_rounding_mode;
158142c2 2893 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2894 switch (roundingMode) {
2895 case float_round_nearest_even:
f9288a76 2896 case float_round_ties_away:
dc355b76
PM
2897 roundIncrement = 0x200;
2898 break;
2899 case float_round_to_zero:
2900 roundIncrement = 0;
2901 break;
2902 case float_round_up:
2903 roundIncrement = zSign ? 0 : 0x3ff;
2904 break;
2905 case float_round_down:
2906 roundIncrement = zSign ? 0x3ff : 0;
2907 break;
9ee6f678
BR
2908 case float_round_to_odd:
2909 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2910 break;
dc355b76
PM
2911 default:
2912 abort();
158142c2
FB
2913 }
2914 roundBits = zSig & 0x3FF;
bb98fe42 2915 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2916 if ( ( 0x7FD < zExp )
2917 || ( ( zExp == 0x7FD )
bb98fe42 2918 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2919 ) {
9ee6f678
BR
2920 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2921 roundIncrement != 0;
ff32e16e 2922 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2923 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2924 }
2925 if ( zExp < 0 ) {
a2f2d288 2926 if (status->flush_to_zero) {
ff32e16e 2927 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2928 return packFloat64(zSign, 0, 0);
2929 }
158142c2 2930 isTiny =
a2f2d288
PM
2931 (status->float_detect_tininess
2932 == float_tininess_before_rounding)
158142c2
FB
2933 || ( zExp < -1 )
2934 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2935 shift64RightJamming( zSig, - zExp, &zSig );
2936 zExp = 0;
2937 roundBits = zSig & 0x3FF;
ff32e16e
PM
2938 if (isTiny && roundBits) {
2939 float_raise(float_flag_underflow, status);
2940 }
9ee6f678
BR
2941 if (roundingMode == float_round_to_odd) {
2942 /*
2943 * For round-to-odd case, the roundIncrement depends on
2944 * zSig which just changed.
2945 */
2946 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2947 }
158142c2
FB
2948 }
2949 }
a2f2d288
PM
2950 if (roundBits) {
2951 status->float_exception_flags |= float_flag_inexact;
2952 }
158142c2
FB
2953 zSig = ( zSig + roundIncrement )>>10;
2954 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2955 if ( zSig == 0 ) zExp = 0;
2956 return packFloat64( zSign, zExp, zSig );
2957
2958}
2959
2960/*----------------------------------------------------------------------------
2961| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2962| and significand `zSig', and returns the proper double-precision floating-
2963| point value corresponding to the abstract input. This routine is just like
2964| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2965| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2966| floating-point exponent.
2967*----------------------------------------------------------------------------*/
2968
2969static float64
0c48262d 2970 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2971 float_status *status)
158142c2 2972{
8f506c70 2973 int8_t shiftCount;
158142c2 2974
0019d5c3 2975 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
2976 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2977 status);
158142c2
FB
2978
2979}
2980
158142c2
FB
2981/*----------------------------------------------------------------------------
2982| Normalizes the subnormal extended double-precision floating-point value
2983| represented by the denormalized significand `aSig'. The normalized exponent
2984| and significand are stored at the locations pointed to by `zExpPtr' and
2985| `zSigPtr', respectively.
2986*----------------------------------------------------------------------------*/
2987
88857aca
LV
2988void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2989 uint64_t *zSigPtr)
158142c2 2990{
8f506c70 2991 int8_t shiftCount;
158142c2 2992
0019d5c3 2993 shiftCount = clz64(aSig);
158142c2
FB
2994 *zSigPtr = aSig<<shiftCount;
2995 *zExpPtr = 1 - shiftCount;
158142c2
FB
2996}
2997
2998/*----------------------------------------------------------------------------
2999| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3000| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3001| and returns the proper extended double-precision floating-point value
3002| corresponding to the abstract input. Ordinarily, the abstract value is
3003| rounded and packed into the extended double-precision format, with the
3004| inexact exception raised if the abstract input cannot be represented
3005| exactly. However, if the abstract value is too large, the overflow and
3006| inexact exceptions are raised and an infinity or maximal finite value is
3007| returned. If the abstract value is too small, the input value is rounded to
3008| a subnormal number, and the underflow and inexact exceptions are raised if
3009| the abstract input cannot be represented exactly as a subnormal extended
3010| double-precision floating-point number.
3011| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3012| number of bits as single or double precision, respectively. Otherwise, the
3013| result is rounded to the full precision of the extended double-precision
3014| format.
3015| The input significand must be normalized or smaller. If the input
3016| significand is not normalized, `zExp' must be 0; in that case, the result
3017| returned is a subnormal number, and it must not require rounding. The
3018| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3019| Floating-Point Arithmetic.
3020*----------------------------------------------------------------------------*/
3021
88857aca
LV
3022floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3023 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3024 float_status *status)
158142c2 3025{
8f506c70 3026 int8_t roundingMode;
158142c2 3027 flag roundNearestEven, increment, isTiny;
f42c2224 3028 int64_t roundIncrement, roundMask, roundBits;
158142c2 3029
a2f2d288 3030 roundingMode = status->float_rounding_mode;
158142c2
FB
3031 roundNearestEven = ( roundingMode == float_round_nearest_even );
3032 if ( roundingPrecision == 80 ) goto precision80;
3033 if ( roundingPrecision == 64 ) {
3034 roundIncrement = LIT64( 0x0000000000000400 );
3035 roundMask = LIT64( 0x00000000000007FF );
3036 }
3037 else if ( roundingPrecision == 32 ) {
3038 roundIncrement = LIT64( 0x0000008000000000 );
3039 roundMask = LIT64( 0x000000FFFFFFFFFF );
3040 }
3041 else {
3042 goto precision80;
3043 }
3044 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3045 switch (roundingMode) {
3046 case float_round_nearest_even:
f9288a76 3047 case float_round_ties_away:
dc355b76
PM
3048 break;
3049 case float_round_to_zero:
3050 roundIncrement = 0;
3051 break;
3052 case float_round_up:
3053 roundIncrement = zSign ? 0 : roundMask;
3054 break;
3055 case float_round_down:
3056 roundIncrement = zSign ? roundMask : 0;
3057 break;
3058 default:
3059 abort();
158142c2
FB
3060 }
3061 roundBits = zSig0 & roundMask;
bb98fe42 3062 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3063 if ( ( 0x7FFE < zExp )
3064 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3065 ) {
3066 goto overflow;
3067 }
3068 if ( zExp <= 0 ) {
a2f2d288 3069 if (status->flush_to_zero) {
ff32e16e 3070 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3071 return packFloatx80(zSign, 0, 0);
3072 }
158142c2 3073 isTiny =
a2f2d288
PM
3074 (status->float_detect_tininess
3075 == float_tininess_before_rounding)
158142c2
FB
3076 || ( zExp < 0 )
3077 || ( zSig0 <= zSig0 + roundIncrement );
3078 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3079 zExp = 0;
3080 roundBits = zSig0 & roundMask;
ff32e16e
PM
3081 if (isTiny && roundBits) {
3082 float_raise(float_flag_underflow, status);
3083 }
a2f2d288
PM
3084 if (roundBits) {
3085 status->float_exception_flags |= float_flag_inexact;
3086 }
158142c2 3087 zSig0 += roundIncrement;
bb98fe42 3088 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3089 roundIncrement = roundMask + 1;
3090 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3091 roundMask |= roundIncrement;
3092 }
3093 zSig0 &= ~ roundMask;
3094 return packFloatx80( zSign, zExp, zSig0 );
3095 }
3096 }
a2f2d288
PM
3097 if (roundBits) {
3098 status->float_exception_flags |= float_flag_inexact;
3099 }
158142c2
FB
3100 zSig0 += roundIncrement;
3101 if ( zSig0 < roundIncrement ) {
3102 ++zExp;
3103 zSig0 = LIT64( 0x8000000000000000 );
3104 }
3105 roundIncrement = roundMask + 1;
3106 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3107 roundMask |= roundIncrement;
3108 }
3109 zSig0 &= ~ roundMask;
3110 if ( zSig0 == 0 ) zExp = 0;
3111 return packFloatx80( zSign, zExp, zSig0 );
3112 precision80:
dc355b76
PM
3113 switch (roundingMode) {
3114 case float_round_nearest_even:
f9288a76 3115 case float_round_ties_away:
dc355b76
PM
3116 increment = ((int64_t)zSig1 < 0);
3117 break;
3118 case float_round_to_zero:
3119 increment = 0;
3120 break;
3121 case float_round_up:
3122 increment = !zSign && zSig1;
3123 break;
3124 case float_round_down:
3125 increment = zSign && zSig1;
3126 break;
3127 default:
3128 abort();
158142c2 3129 }
bb98fe42 3130 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3131 if ( ( 0x7FFE < zExp )
3132 || ( ( zExp == 0x7FFE )
3133 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3134 && increment
3135 )
3136 ) {
3137 roundMask = 0;
3138 overflow:
ff32e16e 3139 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3140 if ( ( roundingMode == float_round_to_zero )
3141 || ( zSign && ( roundingMode == float_round_up ) )
3142 || ( ! zSign && ( roundingMode == float_round_down ) )
3143 ) {
3144 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3145 }
0f605c88
LV
3146 return packFloatx80(zSign,
3147 floatx80_infinity_high,
3148 floatx80_infinity_low);
158142c2
FB
3149 }
3150 if ( zExp <= 0 ) {
3151 isTiny =
a2f2d288
PM
3152 (status->float_detect_tininess
3153 == float_tininess_before_rounding)
158142c2
FB
3154 || ( zExp < 0 )
3155 || ! increment
3156 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3157 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3158 zExp = 0;
ff32e16e
PM
3159 if (isTiny && zSig1) {
3160 float_raise(float_flag_underflow, status);
3161 }
a2f2d288
PM
3162 if (zSig1) {
3163 status->float_exception_flags |= float_flag_inexact;
3164 }
dc355b76
PM
3165 switch (roundingMode) {
3166 case float_round_nearest_even:
f9288a76 3167 case float_round_ties_away:
dc355b76
PM
3168 increment = ((int64_t)zSig1 < 0);
3169 break;
3170 case float_round_to_zero:
3171 increment = 0;
3172 break;
3173 case float_round_up:
3174 increment = !zSign && zSig1;
3175 break;
3176 case float_round_down:
3177 increment = zSign && zSig1;
3178 break;
3179 default:
3180 abort();
158142c2
FB
3181 }
3182 if ( increment ) {
3183 ++zSig0;
3184 zSig0 &=
bb98fe42
AF
3185 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3186 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3187 }
3188 return packFloatx80( zSign, zExp, zSig0 );
3189 }
3190 }
a2f2d288
PM
3191 if (zSig1) {
3192 status->float_exception_flags |= float_flag_inexact;
3193 }
158142c2
FB
3194 if ( increment ) {
3195 ++zSig0;
3196 if ( zSig0 == 0 ) {
3197 ++zExp;
3198 zSig0 = LIT64( 0x8000000000000000 );
3199 }
3200 else {
bb98fe42 3201 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3202 }
3203 }
3204 else {
3205 if ( zSig0 == 0 ) zExp = 0;
3206 }
3207 return packFloatx80( zSign, zExp, zSig0 );
3208
3209}
3210
3211/*----------------------------------------------------------------------------
3212| Takes an abstract floating-point value having sign `zSign', exponent
3213| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3214| and returns the proper extended double-precision floating-point value
3215| corresponding to the abstract input. This routine is just like
3216| `roundAndPackFloatx80' except that the input significand does not have to be
3217| normalized.
3218*----------------------------------------------------------------------------*/
3219
88857aca
LV
3220floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3221 flag zSign, int32_t zExp,
3222 uint64_t zSig0, uint64_t zSig1,
3223 float_status *status)
158142c2 3224{
8f506c70 3225 int8_t shiftCount;
158142c2
FB
3226
3227 if ( zSig0 == 0 ) {
3228 zSig0 = zSig1;
3229 zSig1 = 0;
3230 zExp -= 64;
3231 }
0019d5c3 3232 shiftCount = clz64(zSig0);
158142c2
FB
3233 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3234 zExp -= shiftCount;
ff32e16e
PM
3235 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3236 zSig0, zSig1, status);
158142c2
FB
3237
3238}
3239
158142c2
FB
3240/*----------------------------------------------------------------------------
3241| Returns the least-significant 64 fraction bits of the quadruple-precision
3242| floating-point value `a'.
3243*----------------------------------------------------------------------------*/
3244
a49db98d 3245static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
3246{
3247
3248 return a.low;
3249
3250}
3251
3252/*----------------------------------------------------------------------------
3253| Returns the most-significant 48 fraction bits of the quadruple-precision
3254| floating-point value `a'.
3255*----------------------------------------------------------------------------*/
3256
a49db98d 3257static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
3258{
3259
3260 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3261
3262}
3263
3264/*----------------------------------------------------------------------------
3265| Returns the exponent bits of the quadruple-precision floating-point value
3266| `a'.
3267*----------------------------------------------------------------------------*/
3268
f4014512 3269static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
3270{
3271
3272 return ( a.high>>48 ) & 0x7FFF;
3273
3274}
3275
3276/*----------------------------------------------------------------------------
3277| Returns the sign bit of the quadruple-precision floating-point value `a'.
3278*----------------------------------------------------------------------------*/
3279
a49db98d 3280static inline flag extractFloat128Sign( float128 a )
158142c2
FB
3281{
3282
3283 return a.high>>63;
3284
3285}
3286
3287/*----------------------------------------------------------------------------
3288| Normalizes the subnormal quadruple-precision floating-point value
3289| represented by the denormalized significand formed by the concatenation of
3290| `aSig0' and `aSig1'. The normalized exponent is stored at the location
3291| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
3292| significand are stored at the location pointed to by `zSig0Ptr', and the
3293| least significant 64 bits of the normalized significand are stored at the
3294| location pointed to by `zSig1Ptr'.
3295*----------------------------------------------------------------------------*/
3296
3297static void
3298 normalizeFloat128Subnormal(
bb98fe42
AF
3299 uint64_t aSig0,
3300 uint64_t aSig1,
f4014512 3301 int32_t *zExpPtr,
bb98fe42
AF
3302 uint64_t *zSig0Ptr,
3303 uint64_t *zSig1Ptr
158142c2
FB
3304 )
3305{
8f506c70 3306 int8_t shiftCount;
158142c2
FB
3307
3308 if ( aSig0 == 0 ) {
0019d5c3 3309 shiftCount = clz64(aSig1) - 15;
158142c2
FB
3310 if ( shiftCount < 0 ) {
3311 *zSig0Ptr = aSig1>>( - shiftCount );
3312 *zSig1Ptr = aSig1<<( shiftCount & 63 );
3313 }
3314 else {
3315 *zSig0Ptr = aSig1<<shiftCount;
3316 *zSig1Ptr = 0;
3317 }
3318 *zExpPtr = - shiftCount - 63;
3319 }
3320 else {
0019d5c3 3321 shiftCount = clz64(aSig0) - 15;
158142c2
FB
3322 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
3323 *zExpPtr = 1 - shiftCount;
3324 }
3325
3326}
3327
3328/*----------------------------------------------------------------------------
3329| Packs the sign `zSign', the exponent `zExp', and the significand formed
3330| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
3331| floating-point value, returning the result. After being shifted into the
3332| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
3333| added together to form the most significant 32 bits of the result. This
3334| means that any integer portion of `zSig0' will be added into the exponent.
3335| Since a properly normalized significand will have an integer portion equal
3336| to 1, the `zExp' input should be 1 less than the desired result exponent
3337| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
3338| significand.
3339*----------------------------------------------------------------------------*/
3340
a49db98d 3341static inline float128
f4014512 3342 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
3343{
3344 float128 z;
3345
3346 z.low = zSig1;
bb98fe42 3347 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
3348 return z;
3349
3350}
3351
3352/*----------------------------------------------------------------------------
3353| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3354| and extended significand formed by the concatenation of `zSig0', `zSig1',
3355| and `zSig2', and returns the proper quadruple-precision floating-point value
3356| corresponding to the abstract input. Ordinarily, the abstract value is
3357| simply rounded and packed into the quadruple-precision format, with the
3358| inexact exception raised if the abstract input cannot be represented
3359| exactly. However, if the abstract value is too large, the overflow and
3360| inexact exceptions are raised and an infinity or maximal finite value is
3361| returned. If the abstract value is too small, the input value is rounded to
3362| a subnormal number, and the underflow and inexact exceptions are raised if
3363| the abstract input cannot be represented exactly as a subnormal quadruple-
3364| precision floating-point number.
3365| The input significand must be normalized or smaller. If the input
3366| significand is not normalized, `zExp' must be 0; in that case, the result
3367| returned is a subnormal number, and it must not require rounding. In the
3368| usual case that the input significand is normalized, `zExp' must be 1 less
3369| than the ``true'' floating-point exponent. The handling of underflow and
3370| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3371*----------------------------------------------------------------------------*/
3372
f4014512 3373static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3374 uint64_t zSig0, uint64_t zSig1,
3375 uint64_t zSig2, float_status *status)
158142c2 3376{
8f506c70 3377 int8_t roundingMode;
158142c2
FB
3378 flag roundNearestEven, increment, isTiny;
3379
a2f2d288 3380 roundingMode = status->float_rounding_mode;
158142c2 3381 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3382 switch (roundingMode) {
3383 case float_round_nearest_even:
f9288a76 3384 case float_round_ties_away:
dc355b76
PM
3385 increment = ((int64_t)zSig2 < 0);
3386 break;
3387 case float_round_to_zero:
3388 increment = 0;
3389 break;
3390 case float_round_up:
3391 increment = !zSign && zSig2;
3392 break;
3393 case float_round_down:
3394 increment = zSign && zSig2;
3395 break;
9ee6f678
BR
3396 case float_round_to_odd:
3397 increment = !(zSig1 & 0x1) && zSig2;
3398 break;
dc355b76
PM
3399 default:
3400 abort();
158142c2 3401 }
bb98fe42 3402 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3403 if ( ( 0x7FFD < zExp )
3404 || ( ( zExp == 0x7FFD )
3405 && eq128(
3406 LIT64( 0x0001FFFFFFFFFFFF ),
3407 LIT64( 0xFFFFFFFFFFFFFFFF ),
3408 zSig0,
3409 zSig1
3410 )
3411 && increment
3412 )
3413 ) {
ff32e16e 3414 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3415 if ( ( roundingMode == float_round_to_zero )
3416 || ( zSign && ( roundingMode == float_round_up ) )
3417 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3418 || (roundingMode == float_round_to_odd)
158142c2
FB
3419 ) {
3420 return
3421 packFloat128(
3422 zSign,
3423 0x7FFE,
3424 LIT64( 0x0000FFFFFFFFFFFF ),
3425 LIT64( 0xFFFFFFFFFFFFFFFF )
3426 );
3427 }
3428 return packFloat128( zSign, 0x7FFF, 0, 0 );
3429 }
3430 if ( zExp < 0 ) {
a2f2d288 3431 if (status->flush_to_zero) {
ff32e16e 3432 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3433 return packFloat128(zSign, 0, 0, 0);
3434 }
158142c2 3435 isTiny =
a2f2d288
PM
3436 (status->float_detect_tininess
3437 == float_tininess_before_rounding)
158142c2
FB
3438 || ( zExp < -1 )
3439 || ! increment
3440 || lt128(
3441 zSig0,
3442 zSig1,
3443 LIT64( 0x0001FFFFFFFFFFFF ),
3444 LIT64( 0xFFFFFFFFFFFFFFFF )
3445 );
3446 shift128ExtraRightJamming(
3447 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3448 zExp = 0;
ff32e16e
PM
3449 if (isTiny && zSig2) {
3450 float_raise(float_flag_underflow, status);
3451 }
dc355b76
PM
3452 switch (roundingMode) {
3453 case float_round_nearest_even:
f9288a76 3454 case float_round_ties_away:
dc355b76
PM
3455 increment = ((int64_t)zSig2 < 0);
3456 break;
3457 case float_round_to_zero:
3458 increment = 0;
3459 break;
3460 case float_round_up:
3461 increment = !zSign && zSig2;
3462 break;
3463 case float_round_down:
3464 increment = zSign && zSig2;
3465 break;
9ee6f678
BR
3466 case float_round_to_odd:
3467 increment = !(zSig1 & 0x1) && zSig2;
3468 break;
dc355b76
PM
3469 default:
3470 abort();
158142c2
FB
3471 }
3472 }
3473 }
a2f2d288
PM
3474 if (zSig2) {
3475 status->float_exception_flags |= float_flag_inexact;
3476 }
158142c2
FB
3477 if ( increment ) {
3478 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3479 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3480 }
3481 else {
3482 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3483 }
3484 return packFloat128( zSign, zExp, zSig0, zSig1 );
3485
3486}
3487
3488/*----------------------------------------------------------------------------
3489| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3490| and significand formed by the concatenation of `zSig0' and `zSig1', and
3491| returns the proper quadruple-precision floating-point value corresponding
3492| to the abstract input. This routine is just like `roundAndPackFloat128'
3493| except that the input significand has fewer bits and does not have to be
3494| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3495| point exponent.
3496*----------------------------------------------------------------------------*/
3497
f4014512 3498static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3499 uint64_t zSig0, uint64_t zSig1,
3500 float_status *status)
158142c2 3501{
8f506c70 3502 int8_t shiftCount;
bb98fe42 3503 uint64_t zSig2;
158142c2
FB
3504
3505 if ( zSig0 == 0 ) {
3506 zSig0 = zSig1;
3507 zSig1 = 0;
3508 zExp -= 64;
3509 }
0019d5c3 3510 shiftCount = clz64(zSig0) - 15;
158142c2
FB
3511 if ( 0 <= shiftCount ) {
3512 zSig2 = 0;
3513 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3514 }
3515 else {
3516 shift128ExtraRightJamming(
3517 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3518 }
3519 zExp -= shiftCount;
ff32e16e 3520 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3521
3522}
3523
158142c2 3524
158142c2
FB
3525/*----------------------------------------------------------------------------
3526| Returns the result of converting the 32-bit two's complement integer `a'
3527| to the extended double-precision floating-point format. The conversion
3528| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3529| Arithmetic.
3530*----------------------------------------------------------------------------*/
3531
e5a41ffa 3532floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3533{
3534 flag zSign;
3a87d009 3535 uint32_t absA;
8f506c70 3536 int8_t shiftCount;
bb98fe42 3537 uint64_t zSig;
158142c2
FB
3538
3539 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3540 zSign = ( a < 0 );
3541 absA = zSign ? - a : a;
0019d5c3 3542 shiftCount = clz32(absA) + 32;
158142c2
FB
3543 zSig = absA;
3544 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3545
3546}
3547
158142c2
FB
3548/*----------------------------------------------------------------------------
3549| Returns the result of converting the 32-bit two's complement integer `a' to
3550| the quadruple-precision floating-point format. The conversion is performed
3551| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3552*----------------------------------------------------------------------------*/
3553
e5a41ffa 3554float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3555{
3556 flag zSign;
3a87d009 3557 uint32_t absA;
8f506c70 3558 int8_t shiftCount;
bb98fe42 3559 uint64_t zSig0;
158142c2
FB
3560
3561 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3562 zSign = ( a < 0 );
3563 absA = zSign ? - a : a;
0019d5c3 3564 shiftCount = clz32(absA) + 17;
158142c2
FB
3565 zSig0 = absA;
3566 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3567
3568}
3569
158142c2
FB
3570/*----------------------------------------------------------------------------
3571| Returns the result of converting the 64-bit two's complement integer `a'
3572| to the extended double-precision floating-point format. The conversion
3573| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3574| Arithmetic.
3575*----------------------------------------------------------------------------*/
3576
e5a41ffa 3577floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3578{
3579 flag zSign;
182f42fd 3580 uint64_t absA;
8f506c70 3581 int8_t shiftCount;
158142c2
FB
3582
3583 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3584 zSign = ( a < 0 );
3585 absA = zSign ? - a : a;
0019d5c3 3586 shiftCount = clz64(absA);
158142c2
FB
3587 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3588
3589}
3590
158142c2
FB
3591/*----------------------------------------------------------------------------
3592| Returns the result of converting the 64-bit two's complement integer `a' to
3593| the quadruple-precision floating-point format. The conversion is performed
3594| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3595*----------------------------------------------------------------------------*/
3596
e5a41ffa 3597float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3598{
3599 flag zSign;
182f42fd 3600 uint64_t absA;
8f506c70 3601 int8_t shiftCount;
f4014512 3602 int32_t zExp;
bb98fe42 3603 uint64_t zSig0, zSig1;
158142c2
FB
3604
3605 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3606 zSign = ( a < 0 );
3607 absA = zSign ? - a : a;
0019d5c3 3608 shiftCount = clz64(absA) + 49;
158142c2
FB
3609 zExp = 0x406E - shiftCount;
3610 if ( 64 <= shiftCount ) {
3611 zSig1 = 0;
3612 zSig0 = absA;
3613 shiftCount -= 64;
3614 }
3615 else {
3616 zSig1 = absA;
3617 zSig0 = 0;
3618 }
3619 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3620 return packFloat128( zSign, zExp, zSig0, zSig1 );
3621
3622}
3623
6bb8e0f1
PM
3624/*----------------------------------------------------------------------------
3625| Returns the result of converting the 64-bit unsigned integer `a'
3626| to the quadruple-precision floating-point format. The conversion is performed
3627| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3628*----------------------------------------------------------------------------*/
3629
e5a41ffa 3630float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3631{
3632 if (a == 0) {
3633 return float128_zero;
3634 }
6603d506 3635 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3636}
3637
158142c2
FB
3638/*----------------------------------------------------------------------------
3639| Returns the result of converting the single-precision floating-point value
3640| `a' to the extended double-precision floating-point format. The conversion
3641| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3642| Arithmetic.
3643*----------------------------------------------------------------------------*/
3644
e5a41ffa 3645floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3646{
3647 flag aSign;
0c48262d 3648 int aExp;
bb98fe42 3649 uint32_t aSig;
158142c2 3650
ff32e16e 3651 a = float32_squash_input_denormal(a, status);
158142c2
FB
3652 aSig = extractFloat32Frac( a );
3653 aExp = extractFloat32Exp( a );
3654 aSign = extractFloat32Sign( a );
3655 if ( aExp == 0xFF ) {
ff32e16e
PM
3656 if (aSig) {
3657 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3658 }
0f605c88
LV
3659 return packFloatx80(aSign,
3660 floatx80_infinity_high,
3661 floatx80_infinity_low);
158142c2
FB
3662 }
3663 if ( aExp == 0 ) {
3664 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3665 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3666 }
3667 aSig |= 0x00800000;
bb98fe42 3668 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3669
3670}
3671
158142c2
FB
3672/*----------------------------------------------------------------------------
3673| Returns the result of converting the single-precision floating-point value
3674| `a' to the double-precision floating-point format. The conversion is
3675| performed according to the IEC/IEEE Standard for Binary Floating-Point
3676| Arithmetic.
3677*----------------------------------------------------------------------------*/
3678
e5a41ffa 3679float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3680{
3681 flag aSign;
0c48262d 3682 int aExp;
bb98fe42 3683 uint32_t aSig;
158142c2 3684
ff32e16e 3685 a = float32_squash_input_denormal(a, status);
158142c2
FB
3686 aSig = extractFloat32Frac( a );
3687 aExp = extractFloat32Exp( a );
3688 aSign = extractFloat32Sign( a );
3689 if ( aExp == 0xFF ) {
ff32e16e
PM
3690 if (aSig) {
3691 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3692 }
158142c2
FB
3693 return packFloat128( aSign, 0x7FFF, 0, 0 );
3694 }
3695 if ( aExp == 0 ) {
3696 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3697 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3698 --aExp;
3699 }
bb98fe42 3700 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3701
3702}
3703
158142c2
FB
3704/*----------------------------------------------------------------------------
3705| Returns the remainder of the single-precision floating-point value `a'
3706| with respect to the corresponding value `b'. The operation is performed
3707| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3708*----------------------------------------------------------------------------*/
3709
e5a41ffa 3710float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3711{
ed086f3d 3712 flag aSign, zSign;
0c48262d 3713 int aExp, bExp, expDiff;
bb98fe42
AF
3714 uint32_t aSig, bSig;
3715 uint32_t q;
3716 uint64_t aSig64, bSig64, q64;
3717 uint32_t alternateASig;
3718 int32_t sigMean;
ff32e16e
PM
3719 a = float32_squash_input_denormal(a, status);
3720 b = float32_squash_input_denormal(b, status);
158142c2
FB
3721
3722 aSig = extractFloat32Frac( a );
3723 aExp = extractFloat32Exp( a );
3724 aSign = extractFloat32Sign( a );
3725 bSig = extractFloat32Frac( b );
3726 bExp = extractFloat32Exp( b );
158142c2
FB
3727 if ( aExp == 0xFF ) {
3728 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3729 return propagateFloat32NaN(a, b, status);
158142c2 3730 }
ff32e16e 3731 float_raise(float_flag_invalid, status);
af39bc8c 3732 return float32_default_nan(status);
158142c2
FB
3733 }
3734 if ( bExp == 0xFF ) {
ff32e16e
PM
3735 if (bSig) {
3736 return propagateFloat32NaN(a, b, status);
3737 }
158142c2
FB
3738 return a;
3739 }
3740 if ( bExp == 0 ) {
3741 if ( bSig == 0 ) {
ff32e16e 3742 float_raise(float_flag_invalid, status);
af39bc8c 3743 return float32_default_nan(status);
158142c2
FB
3744 }
3745 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3746 }
3747 if ( aExp == 0 ) {
3748 if ( aSig == 0 ) return a;
3749 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3750 }
3751 expDiff = aExp - bExp;
3752 aSig |= 0x00800000;
3753 bSig |= 0x00800000;
3754 if ( expDiff < 32 ) {
3755 aSig <<= 8;
3756 bSig <<= 8;
3757 if ( expDiff < 0 ) {
3758 if ( expDiff < -1 ) return a;
3759 aSig >>= 1;
3760 }
3761 q = ( bSig <= aSig );
3762 if ( q ) aSig -= bSig;
3763 if ( 0 < expDiff ) {
bb98fe42 3764 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3765 q >>= 32 - expDiff;
3766 bSig >>= 2;
3767 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3768 }
3769 else {
3770 aSig >>= 2;
3771 bSig >>= 2;
3772 }
3773 }
3774 else {
3775 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3776 aSig64 = ( (uint64_t) aSig )<<40;
3777 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3778 expDiff -= 64;
3779 while ( 0 < expDiff ) {
3780 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3781 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3782 aSig64 = - ( ( bSig * q64 )<<38 );
3783 expDiff -= 62;
3784 }
3785 expDiff += 64;
3786 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3787 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3788 q = q64>>( 64 - expDiff );
3789 bSig <<= 6;
3790 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3791 }
3792 do {
3793 alternateASig = aSig;
3794 ++q;
3795 aSig -= bSig;
bb98fe42 3796 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3797 sigMean = aSig + alternateASig;
3798 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3799 aSig = alternateASig;
3800 }
bb98fe42 3801 zSign = ( (int32_t) aSig < 0 );
158142c2 3802 if ( zSign ) aSig = - aSig;
ff32e16e 3803 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3804}
3805
369be8f6 3806
158142c2 3807
8229c991
AJ
3808/*----------------------------------------------------------------------------
3809| Returns the binary exponential of the single-precision floating-point value
3810| `a'. The operation is performed according to the IEC/IEEE Standard for
3811| Binary Floating-Point Arithmetic.
3812|
3813| Uses the following identities:
3814|
3815| 1. -------------------------------------------------------------------------
3816| x x*ln(2)
3817| 2 = e
3818|
3819| 2. -------------------------------------------------------------------------
3820| 2 3 4 5 n
3821| x x x x x x x
3822| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3823| 1! 2! 3! 4! 5! n!
3824*----------------------------------------------------------------------------*/
3825
3826static const float64 float32_exp2_coefficients[15] =
3827{
d5138cf4
PM
3828 const_float64( 0x3ff0000000000000ll ), /* 1 */
3829 const_float64( 0x3fe0000000000000ll ), /* 2 */
3830 const_float64( 0x3fc5555555555555ll ), /* 3 */
3831 const_float64( 0x3fa5555555555555ll ), /* 4 */
3832 const_float64( 0x3f81111111111111ll ), /* 5 */
3833 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3834 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3835 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3836 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3837 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3838 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3839 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3840 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3841 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3842 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3843};
3844
e5a41ffa 3845float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3846{
3847 flag aSign;
0c48262d 3848 int aExp;
bb98fe42 3849 uint32_t aSig;
8229c991
AJ
3850 float64 r, x, xn;
3851 int i;
ff32e16e 3852 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3853
3854 aSig = extractFloat32Frac( a );
3855 aExp = extractFloat32Exp( a );
3856 aSign = extractFloat32Sign( a );
3857
3858 if ( aExp == 0xFF) {
ff32e16e
PM
3859 if (aSig) {
3860 return propagateFloat32NaN(a, float32_zero, status);
3861 }
8229c991
AJ
3862 return (aSign) ? float32_zero : a;
3863 }
3864 if (aExp == 0) {
3865 if (aSig == 0) return float32_one;
3866 }
3867
ff32e16e 3868 float_raise(float_flag_inexact, status);
8229c991
AJ
3869
3870 /* ******************************* */
3871 /* using float64 for approximation */
3872 /* ******************************* */
ff32e16e
PM
3873 x = float32_to_float64(a, status);
3874 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3875
3876 xn = x;
3877 r = float64_one;
3878 for (i = 0 ; i < 15 ; i++) {
3879 float64 f;
3880
ff32e16e
PM
3881 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3882 r = float64_add(r, f, status);
8229c991 3883
ff32e16e 3884 xn = float64_mul(xn, x, status);
8229c991
AJ
3885 }
3886
3887 return float64_to_float32(r, status);
3888}
3889
374dfc33
AJ
3890/*----------------------------------------------------------------------------
3891| Returns the binary log of the single-precision floating-point value `a'.
3892| The operation is performed according to the IEC/IEEE Standard for Binary
3893| Floating-Point Arithmetic.
3894*----------------------------------------------------------------------------*/
e5a41ffa 3895float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3896{
3897 flag aSign, zSign;
0c48262d 3898 int aExp;
bb98fe42 3899 uint32_t aSig, zSig, i;
374dfc33 3900
ff32e16e 3901 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3902 aSig = extractFloat32Frac( a );
3903 aExp = extractFloat32Exp( a );
3904 aSign = extractFloat32Sign( a );
3905
3906 if ( aExp == 0 ) {
3907 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3908 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3909 }
3910 if ( aSign ) {
ff32e16e 3911 float_raise(float_flag_invalid, status);
af39bc8c 3912 return float32_default_nan(status);
374dfc33
AJ
3913 }
3914 if ( aExp == 0xFF ) {
ff32e16e
PM
3915 if (aSig) {
3916 return propagateFloat32NaN(a, float32_zero, status);
3917 }
374dfc33
AJ
3918 return a;
3919 }
3920
3921 aExp -= 0x7F;
3922 aSig |= 0x00800000;
3923 zSign = aExp < 0;
3924 zSig = aExp << 23;
3925
3926 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3927 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3928 if ( aSig & 0x01000000 ) {
3929 aSig >>= 1;
3930 zSig |= i;
3931 }
3932 }
3933
3934 if ( zSign )
3935 zSig = -zSig;
3936
ff32e16e 3937 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3938}
3939
158142c2
FB
3940/*----------------------------------------------------------------------------
3941| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3942| the corresponding value `b', and 0 otherwise. The invalid exception is
3943| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3944| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3945*----------------------------------------------------------------------------*/
3946
e5a41ffa 3947int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3948{
b689362d 3949 uint32_t av, bv;
ff32e16e
PM
3950 a = float32_squash_input_denormal(a, status);
3951 b = float32_squash_input_denormal(b, status);
158142c2
FB
3952
3953 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3954 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3955 ) {
ff32e16e 3956 float_raise(float_flag_invalid, status);
158142c2
FB
3957 return 0;
3958 }
b689362d
AJ
3959 av = float32_val(a);
3960 bv = float32_val(b);
3961 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3962}
3963
3964/*----------------------------------------------------------------------------
3965| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3966| or equal to the corresponding value `b', and 0 otherwise. The invalid
3967| exception is raised if either operand is a NaN. The comparison is performed
3968| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3969*----------------------------------------------------------------------------*/
3970
e5a41ffa 3971int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3972{
3973 flag aSign, bSign;
bb98fe42 3974 uint32_t av, bv;
ff32e16e
PM
3975 a = float32_squash_input_denormal(a, status);
3976 b = float32_squash_input_denormal(b, status);
158142c2
FB
3977
3978 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3979 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3980 ) {
ff32e16e 3981 float_raise(float_flag_invalid, status);
158142c2
FB
3982 return 0;
3983 }
3984 aSign = extractFloat32Sign( a );
3985 bSign = extractFloat32Sign( b );
f090c9d4
PB
3986 av = float32_val(a);
3987 bv = float32_val(b);
bb98fe42 3988 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3989 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3990
3991}
3992
3993/*----------------------------------------------------------------------------
3994| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3995| the corresponding value `b', and 0 otherwise. The invalid exception is
3996| raised if either operand is a NaN. The comparison is performed according
3997| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3998*----------------------------------------------------------------------------*/
3999
e5a41ffa 4000int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4001{
4002 flag aSign, bSign;
bb98fe42 4003 uint32_t av, bv;
ff32e16e
PM
4004 a = float32_squash_input_denormal(a, status);
4005 b = float32_squash_input_denormal(b, status);
158142c2
FB
4006
4007 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4008 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4009 ) {
ff32e16e 4010 float_raise(float_flag_invalid, status);
158142c2
FB
4011 return 0;
4012 }
4013 aSign = extractFloat32Sign( a );
4014 bSign = extractFloat32Sign( b );
f090c9d4
PB
4015 av = float32_val(a);
4016 bv = float32_val(b);
bb98fe42 4017 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4018 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4019
4020}
4021
67b7861d
AJ
4022/*----------------------------------------------------------------------------
4023| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4024| be compared, and 0 otherwise. The invalid exception is raised if either
4025| operand is a NaN. The comparison is performed according to the IEC/IEEE
4026| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4027*----------------------------------------------------------------------------*/
4028
e5a41ffa 4029int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4030{
ff32e16e
PM
4031 a = float32_squash_input_denormal(a, status);
4032 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4033
4034 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4035 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4036 ) {
ff32e16e 4037 float_raise(float_flag_invalid, status);
67b7861d
AJ
4038 return 1;
4039 }
4040 return 0;
4041}
b689362d 4042
158142c2
FB
4043/*----------------------------------------------------------------------------
4044| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4045| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4046| exception. The comparison is performed according to the IEC/IEEE Standard
4047| for Binary Floating-Point Arithmetic.
158142c2
FB
4048*----------------------------------------------------------------------------*/
4049
e5a41ffa 4050int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4051{
ff32e16e
PM
4052 a = float32_squash_input_denormal(a, status);
4053 b = float32_squash_input_denormal(b, status);
158142c2
FB
4054
4055 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4056 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4057 ) {
af39bc8c
AM
4058 if (float32_is_signaling_nan(a, status)
4059 || float32_is_signaling_nan(b, status)) {
ff32e16e 4060 float_raise(float_flag_invalid, status);
b689362d 4061 }
158142c2
FB
4062 return 0;
4063 }
b689362d
AJ
4064 return ( float32_val(a) == float32_val(b) ) ||
4065 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4066}
4067
4068/*----------------------------------------------------------------------------
4069| Returns 1 if the single-precision floating-point value `a' is less than or
4070| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4071| cause an exception. Otherwise, the comparison is performed according to the
4072| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4073*----------------------------------------------------------------------------*/
4074
e5a41ffa 4075int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4076{
4077 flag aSign, bSign;
bb98fe42 4078 uint32_t av, bv;
ff32e16e
PM
4079 a = float32_squash_input_denormal(a, status);
4080 b = float32_squash_input_denormal(b, status);
158142c2
FB
4081
4082 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4083 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4084 ) {
af39bc8c
AM
4085 if (float32_is_signaling_nan(a, status)
4086 || float32_is_signaling_nan(b, status)) {
ff32e16e 4087 float_raise(float_flag_invalid, status);
158142c2
FB
4088 }
4089 return 0;
4090 }
4091 aSign = extractFloat32Sign( a );
4092 bSign = extractFloat32Sign( b );
f090c9d4
PB
4093 av = float32_val(a);
4094 bv = float32_val(b);
bb98fe42 4095 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4096 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4097
4098}
4099
4100/*----------------------------------------------------------------------------
4101| Returns 1 if the single-precision floating-point value `a' is less than
4102| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4103| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4104| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4105*----------------------------------------------------------------------------*/
4106
ab52f973 4107int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4108{
ab52f973
AB
4109 flag aSign, bSign;
4110 uint32_t av, bv;
4111 a = float32_squash_input_denormal(a, status);
4112 b = float32_squash_input_denormal(b, status);
158142c2 4113
ab52f973
AB
4114 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4115 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4116 ) {
4117 if (float32_is_signaling_nan(a, status)
4118 || float32_is_signaling_nan(b, status)) {
ff32e16e 4119 float_raise(float_flag_invalid, status);
158142c2 4120 }
ab52f973 4121 return 0;
158142c2 4122 }
ab52f973
AB
4123 aSign = extractFloat32Sign( a );
4124 bSign = extractFloat32Sign( b );
4125 av = float32_val(a);
4126 bv = float32_val(b);
4127 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4128 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4129
4130}
4131
4132/*----------------------------------------------------------------------------
ab52f973
AB
4133| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4134| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4135| comparison is performed according to the IEC/IEEE Standard for Binary
4136| Floating-Point Arithmetic.
158142c2
FB
4137*----------------------------------------------------------------------------*/
4138
ab52f973 4139int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4140{
ab52f973
AB
4141 a = float32_squash_input_denormal(a, status);
4142 b = float32_squash_input_denormal(b, status);
158142c2 4143
ab52f973
AB
4144 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4145 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4146 ) {
4147 if (float32_is_signaling_nan(a, status)
4148 || float32_is_signaling_nan(b, status)) {
4149 float_raise(float_flag_invalid, status);
158142c2 4150 }
ab52f973 4151 return 1;
158142c2 4152 }
ab52f973 4153 return 0;
158142c2
FB
4154}
4155
210cbd49
AB
4156/*----------------------------------------------------------------------------
4157| If `a' is denormal and we are in flush-to-zero mode then set the
4158| input-denormal exception and return zero. Otherwise just return the value.
4159*----------------------------------------------------------------------------*/
4160float16 float16_squash_input_denormal(float16 a, float_status *status)
4161{
4162 if (status->flush_inputs_to_zero) {
4163 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4164 float_raise(float_flag_input_denormal, status);
4165 return make_float16(float16_val(a) & 0x8000);
4166 }
4167 }
4168 return a;
4169}
4170
158142c2
FB
4171/*----------------------------------------------------------------------------
4172| Returns the result of converting the double-precision floating-point value
4173| `a' to the extended double-precision floating-point format. The conversion
4174| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4175| Arithmetic.
4176*----------------------------------------------------------------------------*/
4177
e5a41ffa 4178floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4179{
4180 flag aSign;
0c48262d 4181 int aExp;
bb98fe42 4182 uint64_t aSig;
158142c2 4183
ff32e16e 4184 a = float64_squash_input_denormal(a, status);
158142c2
FB
4185 aSig = extractFloat64Frac( a );
4186 aExp = extractFloat64Exp( a );
4187 aSign = extractFloat64Sign( a );
4188 if ( aExp == 0x7FF ) {
ff32e16e
PM
4189 if (aSig) {
4190 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4191 }
0f605c88
LV
4192 return packFloatx80(aSign,
4193 floatx80_infinity_high,
4194 floatx80_infinity_low);
158142c2
FB
4195 }
4196 if ( aExp == 0 ) {
4197 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4198 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4199 }
4200 return
4201 packFloatx80(
4202 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4203
4204}
4205
158142c2
FB
4206/*----------------------------------------------------------------------------
4207| Returns the result of converting the double-precision floating-point value
4208| `a' to the quadruple-precision floating-point format. The conversion is
4209| performed according to the IEC/IEEE Standard for Binary Floating-Point
4210| Arithmetic.
4211*----------------------------------------------------------------------------*/
4212
e5a41ffa 4213float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4214{
4215 flag aSign;
0c48262d 4216 int aExp;
bb98fe42 4217 uint64_t aSig, zSig0, zSig1;
158142c2 4218
ff32e16e 4219 a = float64_squash_input_denormal(a, status);
158142c2
FB
4220 aSig = extractFloat64Frac( a );
4221 aExp = extractFloat64Exp( a );
4222 aSign = extractFloat64Sign( a );
4223 if ( aExp == 0x7FF ) {
ff32e16e
PM
4224 if (aSig) {
4225 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4226 }
158142c2
FB
4227 return packFloat128( aSign, 0x7FFF, 0, 0 );
4228 }
4229 if ( aExp == 0 ) {
4230 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4231 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4232 --aExp;
4233 }
4234 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4235 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4236
4237}
4238
158142c2
FB
4239
4240/*----------------------------------------------------------------------------
4241| Returns the remainder of the double-precision floating-point value `a'
4242| with respect to the corresponding value `b'. The operation is performed
4243| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4244*----------------------------------------------------------------------------*/
4245
e5a41ffa 4246float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4247{
ed086f3d 4248 flag aSign, zSign;
0c48262d 4249 int aExp, bExp, expDiff;
bb98fe42
AF
4250 uint64_t aSig, bSig;
4251 uint64_t q, alternateASig;
4252 int64_t sigMean;
158142c2 4253
ff32e16e
PM
4254 a = float64_squash_input_denormal(a, status);
4255 b = float64_squash_input_denormal(b, status);
158142c2
FB
4256 aSig = extractFloat64Frac( a );
4257 aExp = extractFloat64Exp( a );
4258 aSign = extractFloat64Sign( a );
4259 bSig = extractFloat64Frac( b );
4260 bExp = extractFloat64Exp( b );
158142c2
FB
4261 if ( aExp == 0x7FF ) {
4262 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4263 return propagateFloat64NaN(a, b, status);
158142c2 4264 }
ff32e16e 4265 float_raise(float_flag_invalid, status);
af39bc8c 4266 return float64_default_nan(status);
158142c2
FB
4267 }
4268 if ( bExp == 0x7FF ) {
ff32e16e
PM
4269 if (bSig) {
4270 return propagateFloat64NaN(a, b, status);
4271 }
158142c2
FB
4272 return a;
4273 }
4274 if ( bExp == 0 ) {
4275 if ( bSig == 0 ) {
ff32e16e 4276 float_raise(float_flag_invalid, status);
af39bc8c 4277 return float64_default_nan(status);
158142c2
FB
4278 }
4279 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4280 }
4281 if ( aExp == 0 ) {
4282 if ( aSig == 0 ) return a;
4283 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4284 }
4285 expDiff = aExp - bExp;
4286 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4287 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4288 if ( expDiff < 0 ) {
4289 if ( expDiff < -1 ) return a;
4290 aSig >>= 1;
4291 }
4292 q = ( bSig <= aSig );
4293 if ( q ) aSig -= bSig;
4294 expDiff -= 64;
4295 while ( 0 < expDiff ) {
4296 q = estimateDiv128To64( aSig, 0, bSig );
4297 q = ( 2 < q ) ? q - 2 : 0;
4298 aSig = - ( ( bSig>>2 ) * q );
4299 expDiff -= 62;
4300 }
4301 expDiff += 64;
4302 if ( 0 < expDiff ) {
4303 q = estimateDiv128To64( aSig, 0, bSig );
4304 q = ( 2 < q ) ? q - 2 : 0;
4305 q >>= 64 - expDiff;
4306 bSig >>= 2;
4307 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4308 }
4309 else {
4310 aSig >>= 2;
4311 bSig >>= 2;
4312 }
4313 do {
4314 alternateASig = aSig;
4315 ++q;
4316 aSig -= bSig;
bb98fe42 4317 } while ( 0 <= (int64_t) aSig );
158142c2
FB
4318 sigMean = aSig + alternateASig;
4319 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4320 aSig = alternateASig;
4321 }
bb98fe42 4322 zSign = ( (int64_t) aSig < 0 );
158142c2 4323 if ( zSign ) aSig = - aSig;
ff32e16e 4324 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4325
4326}
4327
374dfc33
AJ
4328/*----------------------------------------------------------------------------
4329| Returns the binary log of the double-precision floating-point value `a'.
4330| The operation is performed according to the IEC/IEEE Standard for Binary
4331| Floating-Point Arithmetic.
4332*----------------------------------------------------------------------------*/
e5a41ffa 4333float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4334{
4335 flag aSign, zSign;
0c48262d 4336 int aExp;
bb98fe42 4337 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4338 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4339
4340 aSig = extractFloat64Frac( a );
4341 aExp = extractFloat64Exp( a );
4342 aSign = extractFloat64Sign( a );
4343
4344 if ( aExp == 0 ) {
4345 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4346 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4347 }
4348 if ( aSign ) {
ff32e16e 4349 float_raise(float_flag_invalid, status);
af39bc8c 4350 return float64_default_nan(status);
374dfc33
AJ
4351 }
4352 if ( aExp == 0x7FF ) {
ff32e16e
PM
4353 if (aSig) {
4354 return propagateFloat64NaN(a, float64_zero, status);
4355 }
374dfc33
AJ
4356 return a;
4357 }
4358
4359 aExp -= 0x3FF;
4360 aSig |= LIT64( 0x0010000000000000 );
4361 zSign = aExp < 0;
bb98fe42 4362 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4363 for (i = 1LL << 51; i > 0; i >>= 1) {
4364 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4365 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4366 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4367 aSig >>= 1;
4368 zSig |= i;
4369 }
4370 }
4371
4372 if ( zSign )
4373 zSig = -zSig;
ff32e16e 4374 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4375}
4376
158142c2
FB
4377/*----------------------------------------------------------------------------
4378| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4379| corresponding value `b', and 0 otherwise. The invalid exception is raised
4380| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4381| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4382*----------------------------------------------------------------------------*/
4383
e5a41ffa 4384int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4385{
bb98fe42 4386 uint64_t av, bv;
ff32e16e
PM
4387 a = float64_squash_input_denormal(a, status);
4388 b = float64_squash_input_denormal(b, status);
158142c2
FB
4389
4390 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4391 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4392 ) {
ff32e16e 4393 float_raise(float_flag_invalid, status);
158142c2
FB
4394 return 0;
4395 }
f090c9d4 4396 av = float64_val(a);
a1b91bb4 4397 bv = float64_val(b);
bb98fe42 4398 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4399
4400}
4401
4402/*----------------------------------------------------------------------------
4403| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4404| equal to the corresponding value `b', and 0 otherwise. The invalid
4405| exception is raised if either operand is a NaN. The comparison is performed
4406| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4407*----------------------------------------------------------------------------*/
4408
e5a41ffa 4409int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4410{
4411 flag aSign, bSign;
bb98fe42 4412 uint64_t av, bv;
ff32e16e
PM
4413 a = float64_squash_input_denormal(a, status);
4414 b = float64_squash_input_denormal(b, status);
158142c2
FB
4415
4416 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4417 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4418 ) {
ff32e16e 4419 float_raise(float_flag_invalid, status);
158142c2
FB
4420 return 0;
4421 }
4422 aSign = extractFloat64Sign( a );
4423 bSign = extractFloat64Sign( b );
f090c9d4 4424 av = float64_val(a);
a1b91bb4 4425 bv = float64_val(b);
bb98fe42 4426 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4427 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4428
4429}
4430
4431/*----------------------------------------------------------------------------
4432| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4433| the corresponding value `b', and 0 otherwise. The invalid exception is
4434| raised if either operand is a NaN. The comparison is performed according
4435| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4436*----------------------------------------------------------------------------*/
4437
e5a41ffa 4438int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4439{
4440 flag aSign, bSign;
bb98fe42 4441 uint64_t av, bv;
158142c2 4442
ff32e16e
PM
4443 a = float64_squash_input_denormal(a, status);
4444 b = float64_squash_input_denormal(b, status);
158142c2
FB
4445 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4446 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4447 ) {
ff32e16e 4448 float_raise(float_flag_invalid, status);
158142c2
FB
4449 return 0;
4450 }
4451 aSign = extractFloat64Sign( a );
4452 bSign = extractFloat64Sign( b );
f090c9d4 4453 av = float64_val(a);
a1b91bb4 4454 bv = float64_val(b);
bb98fe42 4455 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4456 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4457
4458}
4459
67b7861d
AJ
4460/*----------------------------------------------------------------------------
4461| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4462| be compared, and 0 otherwise. The invalid exception is raised if either
4463| operand is a NaN. The comparison is performed according to the IEC/IEEE
4464| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4465*----------------------------------------------------------------------------*/
4466
e5a41ffa 4467int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4468{
ff32e16e
PM
4469 a = float64_squash_input_denormal(a, status);
4470 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4471
4472 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4473 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4474 ) {
ff32e16e 4475 float_raise(float_flag_invalid, status);
67b7861d
AJ
4476 return 1;
4477 }
4478 return 0;
4479}
4480
158142c2
FB
4481/*----------------------------------------------------------------------------
4482| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4483| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4484| exception.The comparison is performed according to the IEC/IEEE Standard
4485| for Binary Floating-Point Arithmetic.
158142c2
FB
4486*----------------------------------------------------------------------------*/
4487
e5a41ffa 4488int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4489{
bb98fe42 4490 uint64_t av, bv;
ff32e16e
PM
4491 a = float64_squash_input_denormal(a, status);
4492 b = float64_squash_input_denormal(b, status);
158142c2
FB
4493
4494 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4495 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4496 ) {
af39bc8c
AM
4497 if (float64_is_signaling_nan(a, status)
4498 || float64_is_signaling_nan(b, status)) {
ff32e16e 4499 float_raise(float_flag_invalid, status);
b689362d 4500 }
158142c2
FB
4501 return 0;
4502 }
f090c9d4 4503 av = float64_val(a);
a1b91bb4 4504 bv = float64_val(b);
bb98fe42 4505 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4506
4507}
4508
4509/*----------------------------------------------------------------------------
4510| Returns 1 if the double-precision floating-point value `a' is less than or
4511| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4512| cause an exception. Otherwise, the comparison is performed according to the
4513| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4514*----------------------------------------------------------------------------*/
4515
e5a41ffa 4516int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4517{
4518 flag aSign, bSign;
bb98fe42 4519 uint64_t av, bv;
ff32e16e
PM
4520 a = float64_squash_input_denormal(a, status);
4521 b = float64_squash_input_denormal(b, status);
158142c2
FB
4522
4523 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4524 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4525 ) {
af39bc8c
AM
4526 if (float64_is_signaling_nan(a, status)
4527 || float64_is_signaling_nan(b, status)) {
ff32e16e 4528 float_raise(float_flag_invalid, status);
158142c2
FB
4529 }
4530 return 0;
4531 }
4532 aSign = extractFloat64Sign( a );
4533 bSign = extractFloat64Sign( b );
f090c9d4 4534 av = float64_val(a);
a1b91bb4 4535 bv = float64_val(b);
bb98fe42 4536 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4537 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4538
4539}
4540
4541/*----------------------------------------------------------------------------
4542| Returns 1 if the double-precision floating-point value `a' is less than
4543| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4544| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4545| Standard for Binary Floating-Point Arithmetic.
4546*----------------------------------------------------------------------------*/
4547
e5a41ffa 4548int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4549{
4550 flag aSign, bSign;
bb98fe42 4551 uint64_t av, bv;
ff32e16e
PM
4552 a = float64_squash_input_denormal(a, status);
4553 b = float64_squash_input_denormal(b, status);
158142c2
FB
4554
4555 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4556 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4557 ) {
af39bc8c
AM
4558 if (float64_is_signaling_nan(a, status)
4559 || float64_is_signaling_nan(b, status)) {
ff32e16e 4560 float_raise(float_flag_invalid, status);
158142c2
FB
4561 }
4562 return 0;
4563 }
4564 aSign = extractFloat64Sign( a );
4565 bSign = extractFloat64Sign( b );
f090c9d4 4566 av = float64_val(a);
a1b91bb4 4567 bv = float64_val(b);
bb98fe42 4568 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4569 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4570
4571}
4572
67b7861d
AJ
4573/*----------------------------------------------------------------------------
4574| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4575| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4576| comparison is performed according to the IEC/IEEE Standard for Binary
4577| Floating-Point Arithmetic.
4578*----------------------------------------------------------------------------*/
4579
e5a41ffa 4580int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4581{
ff32e16e
PM
4582 a = float64_squash_input_denormal(a, status);
4583 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4584
4585 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4586 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4587 ) {
af39bc8c
AM
4588 if (float64_is_signaling_nan(a, status)
4589 || float64_is_signaling_nan(b, status)) {
ff32e16e 4590 float_raise(float_flag_invalid, status);
67b7861d
AJ
4591 }
4592 return 1;
4593 }
4594 return 0;
4595}
4596
158142c2
FB
4597/*----------------------------------------------------------------------------
4598| Returns the result of converting the extended double-precision floating-
4599| point value `a' to the 32-bit two's complement integer format. The
4600| conversion is performed according to the IEC/IEEE Standard for Binary
4601| Floating-Point Arithmetic---which means in particular that the conversion
4602| is rounded according to the current rounding mode. If `a' is a NaN, the
4603| largest positive integer is returned. Otherwise, if the conversion
4604| overflows, the largest integer with the same sign as `a' is returned.
4605*----------------------------------------------------------------------------*/
4606
f4014512 4607int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4608{
4609 flag aSign;
f4014512 4610 int32_t aExp, shiftCount;
bb98fe42 4611 uint64_t aSig;
158142c2 4612
d1eb8f2a
AD
4613 if (floatx80_invalid_encoding(a)) {
4614 float_raise(float_flag_invalid, status);
4615 return 1 << 31;
4616 }
158142c2
FB
4617 aSig = extractFloatx80Frac( a );
4618 aExp = extractFloatx80Exp( a );
4619 aSign = extractFloatx80Sign( a );
bb98fe42 4620 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4621 shiftCount = 0x4037 - aExp;
4622 if ( shiftCount <= 0 ) shiftCount = 1;
4623 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4624 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4625
4626}
4627
4628/*----------------------------------------------------------------------------
4629| Returns the result of converting the extended double-precision floating-
4630| point value `a' to the 32-bit two's complement integer format. The
4631| conversion is performed according to the IEC/IEEE Standard for Binary
4632| Floating-Point Arithmetic, except that the conversion is always rounded
4633| toward zero. If `a' is a NaN, the largest positive integer is returned.
4634| Otherwise, if the conversion overflows, the largest integer with the same
4635| sign as `a' is returned.
4636*----------------------------------------------------------------------------*/
4637
f4014512 4638int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4639{
4640 flag aSign;
f4014512 4641 int32_t aExp, shiftCount;
bb98fe42 4642 uint64_t aSig, savedASig;
b3a6a2e0 4643 int32_t z;
158142c2 4644
d1eb8f2a
AD
4645 if (floatx80_invalid_encoding(a)) {
4646 float_raise(float_flag_invalid, status);
4647 return 1 << 31;
4648 }
158142c2
FB
4649 aSig = extractFloatx80Frac( a );
4650 aExp = extractFloatx80Exp( a );
4651 aSign = extractFloatx80Sign( a );
4652 if ( 0x401E < aExp ) {
bb98fe42 4653 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4654 goto invalid;
4655 }
4656 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4657 if (aExp || aSig) {
4658 status->float_exception_flags |= float_flag_inexact;
4659 }
158142c2
FB
4660 return 0;
4661 }
4662 shiftCount = 0x403E - aExp;
4663 savedASig = aSig;
4664 aSig >>= shiftCount;
4665 z = aSig;
4666 if ( aSign ) z = - z;
4667 if ( ( z < 0 ) ^ aSign ) {
4668 invalid:
ff32e16e 4669 float_raise(float_flag_invalid, status);
bb98fe42 4670 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4671 }
4672 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4673 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4674 }
4675 return z;
4676
4677}
4678
4679/*----------------------------------------------------------------------------
4680| Returns the result of converting the extended double-precision floating-
4681| point value `a' to the 64-bit two's complement integer format. The
4682| conversion is performed according to the IEC/IEEE Standard for Binary
4683| Floating-Point Arithmetic---which means in particular that the conversion
4684| is rounded according to the current rounding mode. If `a' is a NaN,
4685| the largest positive integer is returned. Otherwise, if the conversion
4686| overflows, the largest integer with the same sign as `a' is returned.
4687*----------------------------------------------------------------------------*/
4688
f42c2224 4689int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4690{
4691 flag aSign;
f4014512 4692 int32_t aExp, shiftCount;
bb98fe42 4693 uint64_t aSig, aSigExtra;
158142c2 4694
d1eb8f2a
AD
4695 if (floatx80_invalid_encoding(a)) {
4696 float_raise(float_flag_invalid, status);
4697 return 1ULL << 63;
4698 }
158142c2
FB
4699 aSig = extractFloatx80Frac( a );
4700 aExp = extractFloatx80Exp( a );
4701 aSign = extractFloatx80Sign( a );
4702 shiftCount = 0x403E - aExp;
4703 if ( shiftCount <= 0 ) {
4704 if ( shiftCount ) {
ff32e16e 4705 float_raise(float_flag_invalid, status);
0f605c88 4706 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4707 return LIT64( 0x7FFFFFFFFFFFFFFF );
4708 }
bb98fe42 4709 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4710 }
4711 aSigExtra = 0;
4712 }
4713 else {
4714 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4715 }
ff32e16e 4716 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4717
4718}
4719
4720/*----------------------------------------------------------------------------
4721| Returns the result of converting the extended double-precision floating-
4722| point value `a' to the 64-bit two's complement integer format. The
4723| conversion is performed according to the IEC/IEEE Standard for Binary
4724| Floating-Point Arithmetic, except that the conversion is always rounded
4725| toward zero. If `a' is a NaN, the largest positive integer is returned.
4726| Otherwise, if the conversion overflows, the largest integer with the same
4727| sign as `a' is returned.
4728*----------------------------------------------------------------------------*/
4729
f42c2224 4730int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4731{
4732 flag aSign;
f4014512 4733 int32_t aExp, shiftCount;
bb98fe42 4734 uint64_t aSig;
f42c2224 4735 int64_t z;
158142c2 4736
d1eb8f2a
AD
4737 if (floatx80_invalid_encoding(a)) {
4738 float_raise(float_flag_invalid, status);
4739 return 1ULL << 63;
4740 }
158142c2
FB
4741 aSig = extractFloatx80Frac( a );
4742 aExp = extractFloatx80Exp( a );
4743 aSign = extractFloatx80Sign( a );
4744 shiftCount = aExp - 0x403E;
4745 if ( 0 <= shiftCount ) {
4746 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4747 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4748 float_raise(float_flag_invalid, status);
158142c2
FB
4749 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4750 return LIT64( 0x7FFFFFFFFFFFFFFF );
4751 }
4752 }
bb98fe42 4753 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4754 }
4755 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4756 if (aExp | aSig) {
4757 status->float_exception_flags |= float_flag_inexact;
4758 }
158142c2
FB
4759 return 0;
4760 }
4761 z = aSig>>( - shiftCount );
bb98fe42 4762 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4763 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4764 }
4765 if ( aSign ) z = - z;
4766 return z;
4767
4768}
4769
4770/*----------------------------------------------------------------------------
4771| Returns the result of converting the extended double-precision floating-
4772| point value `a' to the single-precision floating-point format. The
4773| conversion is performed according to the IEC/IEEE Standard for Binary
4774| Floating-Point Arithmetic.
4775*----------------------------------------------------------------------------*/
4776
e5a41ffa 4777float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4778{
4779 flag aSign;
f4014512 4780 int32_t aExp;
bb98fe42 4781 uint64_t aSig;
158142c2 4782
d1eb8f2a
AD
4783 if (floatx80_invalid_encoding(a)) {
4784 float_raise(float_flag_invalid, status);
4785 return float32_default_nan(status);
4786 }
158142c2
FB
4787 aSig = extractFloatx80Frac( a );
4788 aExp = extractFloatx80Exp( a );
4789 aSign = extractFloatx80Sign( a );
4790 if ( aExp == 0x7FFF ) {
bb98fe42 4791 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4792 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4793 }
4794 return packFloat32( aSign, 0xFF, 0 );
4795 }
4796 shift64RightJamming( aSig, 33, &aSig );
4797 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4798 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4799
4800}
4801
4802/*----------------------------------------------------------------------------
4803| Returns the result of converting the extended double-precision floating-
4804| point value `a' to the double-precision floating-point format. The
4805| conversion is performed according to the IEC/IEEE Standard for Binary
4806| Floating-Point Arithmetic.
4807*----------------------------------------------------------------------------*/
4808
e5a41ffa 4809float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4810{
4811 flag aSign;
f4014512 4812 int32_t aExp;
bb98fe42 4813 uint64_t aSig, zSig;
158142c2 4814
d1eb8f2a
AD
4815 if (floatx80_invalid_encoding(a)) {
4816 float_raise(float_flag_invalid, status);
4817 return float64_default_nan(status);
4818 }
158142c2
FB
4819 aSig = extractFloatx80Frac( a );
4820 aExp = extractFloatx80Exp( a );
4821 aSign = extractFloatx80Sign( a );
4822 if ( aExp == 0x7FFF ) {
bb98fe42 4823 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4824 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4825 }
4826 return packFloat64( aSign, 0x7FF, 0 );
4827 }
4828 shift64RightJamming( aSig, 1, &zSig );
4829 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4830 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4831
4832}
4833
158142c2
FB
4834/*----------------------------------------------------------------------------
4835| Returns the result of converting the extended double-precision floating-
4836| point value `a' to the quadruple-precision floating-point format. The
4837| conversion is performed according to the IEC/IEEE Standard for Binary
4838| Floating-Point Arithmetic.
4839*----------------------------------------------------------------------------*/
4840
e5a41ffa 4841float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4842{
4843 flag aSign;
0c48262d 4844 int aExp;
bb98fe42 4845 uint64_t aSig, zSig0, zSig1;
158142c2 4846
d1eb8f2a
AD
4847 if (floatx80_invalid_encoding(a)) {
4848 float_raise(float_flag_invalid, status);
4849 return float128_default_nan(status);
4850 }
158142c2
FB
4851 aSig = extractFloatx80Frac( a );
4852 aExp = extractFloatx80Exp( a );
4853 aSign = extractFloatx80Sign( a );
bb98fe42 4854 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4855 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4856 }
4857 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4858 return packFloat128( aSign, aExp, zSig0, zSig1 );
4859
4860}
4861
0f721292
LV
4862/*----------------------------------------------------------------------------
4863| Rounds the extended double-precision floating-point value `a'
4864| to the precision provided by floatx80_rounding_precision and returns the
4865| result as an extended double-precision floating-point value.
4866| The operation is performed according to the IEC/IEEE Standard for Binary
4867| Floating-Point Arithmetic.
4868*----------------------------------------------------------------------------*/
4869
4870floatx80 floatx80_round(floatx80 a, float_status *status)
4871{
4872 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4873 extractFloatx80Sign(a),
4874 extractFloatx80Exp(a),
4875 extractFloatx80Frac(a), 0, status);
4876}
4877
158142c2
FB
4878/*----------------------------------------------------------------------------
4879| Rounds the extended double-precision floating-point value `a' to an integer,
4880| and returns the result as an extended quadruple-precision floating-point
4881| value. The operation is performed according to the IEC/IEEE Standard for
4882| Binary Floating-Point Arithmetic.
4883*----------------------------------------------------------------------------*/
4884
e5a41ffa 4885floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4886{
4887 flag aSign;
f4014512 4888 int32_t aExp;
bb98fe42 4889 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4890 floatx80 z;
4891
d1eb8f2a
AD
4892 if (floatx80_invalid_encoding(a)) {
4893 float_raise(float_flag_invalid, status);
4894 return floatx80_default_nan(status);
4895 }
158142c2
FB
4896 aExp = extractFloatx80Exp( a );
4897 if ( 0x403E <= aExp ) {
bb98fe42 4898 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4899 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4900 }
4901 return a;
4902 }
4903 if ( aExp < 0x3FFF ) {
4904 if ( ( aExp == 0 )
bb98fe42 4905 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4906 return a;
4907 }
a2f2d288 4908 status->float_exception_flags |= float_flag_inexact;
158142c2 4909 aSign = extractFloatx80Sign( a );
a2f2d288 4910 switch (status->float_rounding_mode) {
158142c2 4911 case float_round_nearest_even:
bb98fe42 4912 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4913 ) {
4914 return
4915 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4916 }
4917 break;
f9288a76
PM
4918 case float_round_ties_away:
4919 if (aExp == 0x3FFE) {
4920 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4921 }
4922 break;
158142c2
FB
4923 case float_round_down:
4924 return
4925 aSign ?
4926 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4927 : packFloatx80( 0, 0, 0 );
4928 case float_round_up:
4929 return
4930 aSign ? packFloatx80( 1, 0, 0 )
4931 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4932 }
4933 return packFloatx80( aSign, 0, 0 );
4934 }
4935 lastBitMask = 1;
4936 lastBitMask <<= 0x403E - aExp;
4937 roundBitsMask = lastBitMask - 1;
4938 z = a;
a2f2d288 4939 switch (status->float_rounding_mode) {
dc355b76 4940 case float_round_nearest_even:
158142c2 4941 z.low += lastBitMask>>1;
dc355b76
PM
4942 if ((z.low & roundBitsMask) == 0) {
4943 z.low &= ~lastBitMask;
4944 }
4945 break;
f9288a76
PM
4946 case float_round_ties_away:
4947 z.low += lastBitMask >> 1;
4948 break;
dc355b76
PM
4949 case float_round_to_zero:
4950 break;
4951 case float_round_up:
4952 if (!extractFloatx80Sign(z)) {
4953 z.low += roundBitsMask;
4954 }
4955 break;
4956 case float_round_down:
4957 if (extractFloatx80Sign(z)) {
158142c2
FB
4958 z.low += roundBitsMask;
4959 }
dc355b76
PM
4960 break;
4961 default:
4962 abort();
158142c2
FB
4963 }
4964 z.low &= ~ roundBitsMask;
4965 if ( z.low == 0 ) {
4966 ++z.high;
4967 z.low = LIT64( 0x8000000000000000 );
4968 }
a2f2d288
PM
4969 if (z.low != a.low) {
4970 status->float_exception_flags |= float_flag_inexact;
4971 }
158142c2
FB
4972 return z;
4973
4974}
4975
4976/*----------------------------------------------------------------------------
4977| Returns the result of adding the absolute values of the extended double-
4978| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4979| negated before being returned. `zSign' is ignored if the result is a NaN.
4980| The addition is performed according to the IEC/IEEE Standard for Binary
4981| Floating-Point Arithmetic.
4982*----------------------------------------------------------------------------*/
4983
e5a41ffa
PM
4984static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4985 float_status *status)
158142c2 4986{
f4014512 4987 int32_t aExp, bExp, zExp;
bb98fe42 4988 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4989 int32_t expDiff;
158142c2
FB
4990
4991 aSig = extractFloatx80Frac( a );
4992 aExp = extractFloatx80Exp( a );
4993 bSig = extractFloatx80Frac( b );
4994 bExp = extractFloatx80Exp( b );
4995 expDiff = aExp - bExp;
4996 if ( 0 < expDiff ) {
4997 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4998 if ((uint64_t)(aSig << 1)) {
4999 return propagateFloatx80NaN(a, b, status);
5000 }
158142c2
FB
5001 return a;
5002 }
5003 if ( bExp == 0 ) --expDiff;
5004 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5005 zExp = aExp;
5006 }
5007 else if ( expDiff < 0 ) {
5008 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5009 if ((uint64_t)(bSig << 1)) {
5010 return propagateFloatx80NaN(a, b, status);
5011 }
0f605c88
LV
5012 return packFloatx80(zSign,
5013 floatx80_infinity_high,
5014 floatx80_infinity_low);
158142c2
FB
5015 }
5016 if ( aExp == 0 ) ++expDiff;
5017 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5018 zExp = bExp;
5019 }
5020 else {
5021 if ( aExp == 0x7FFF ) {
bb98fe42 5022 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5023 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5024 }
5025 return a;
5026 }
5027 zSig1 = 0;
5028 zSig0 = aSig + bSig;
5029 if ( aExp == 0 ) {
5030 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5031 goto roundAndPack;
5032 }
5033 zExp = aExp;
5034 goto shiftRight1;
5035 }
5036 zSig0 = aSig + bSig;
bb98fe42 5037 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5038 shiftRight1:
5039 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5040 zSig0 |= LIT64( 0x8000000000000000 );
5041 ++zExp;
5042 roundAndPack:
a2f2d288 5043 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5044 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5045}
5046
5047/*----------------------------------------------------------------------------
5048| Returns the result of subtracting the absolute values of the extended
5049| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5050| difference is negated before being returned. `zSign' is ignored if the
5051| result is a NaN. The subtraction is performed according to the IEC/IEEE
5052| Standard for Binary Floating-Point Arithmetic.
5053*----------------------------------------------------------------------------*/
5054
e5a41ffa
PM
5055static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5056 float_status *status)
158142c2 5057{
f4014512 5058 int32_t aExp, bExp, zExp;
bb98fe42 5059 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5060 int32_t expDiff;
158142c2
FB
5061
5062 aSig = extractFloatx80Frac( a );
5063 aExp = extractFloatx80Exp( a );
5064 bSig = extractFloatx80Frac( b );
5065 bExp = extractFloatx80Exp( b );
5066 expDiff = aExp - bExp;
5067 if ( 0 < expDiff ) goto aExpBigger;
5068 if ( expDiff < 0 ) goto bExpBigger;
5069 if ( aExp == 0x7FFF ) {
bb98fe42 5070 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5071 return propagateFloatx80NaN(a, b, status);
158142c2 5072 }
ff32e16e 5073 float_raise(float_flag_invalid, status);
af39bc8c 5074 return floatx80_default_nan(status);
158142c2
FB
5075 }
5076 if ( aExp == 0 ) {
5077 aExp = 1;
5078 bExp = 1;
5079 }
5080 zSig1 = 0;
5081 if ( bSig < aSig ) goto aBigger;
5082 if ( aSig < bSig ) goto bBigger;
a2f2d288 5083 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5084 bExpBigger:
5085 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5086 if ((uint64_t)(bSig << 1)) {
5087 return propagateFloatx80NaN(a, b, status);
5088 }
0f605c88
LV
5089 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5090 floatx80_infinity_low);
158142c2
FB
5091 }
5092 if ( aExp == 0 ) ++expDiff;
5093 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5094 bBigger:
5095 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5096 zExp = bExp;
5097 zSign ^= 1;
5098 goto normalizeRoundAndPack;
5099 aExpBigger:
5100 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5101 if ((uint64_t)(aSig << 1)) {
5102 return propagateFloatx80NaN(a, b, status);
5103 }
158142c2
FB
5104 return a;
5105 }
5106 if ( bExp == 0 ) --expDiff;
5107 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5108 aBigger:
5109 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5110 zExp = aExp;
5111 normalizeRoundAndPack:
a2f2d288 5112 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5113 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5114}
5115
5116/*----------------------------------------------------------------------------
5117| Returns the result of adding the extended double-precision floating-point
5118| values `a' and `b'. The operation is performed according to the IEC/IEEE
5119| Standard for Binary Floating-Point Arithmetic.
5120*----------------------------------------------------------------------------*/
5121
e5a41ffa 5122floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5123{
5124 flag aSign, bSign;
5125
d1eb8f2a
AD
5126 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5127 float_raise(float_flag_invalid, status);
5128 return floatx80_default_nan(status);
5129 }
158142c2
FB
5130 aSign = extractFloatx80Sign( a );
5131 bSign = extractFloatx80Sign( b );
5132 if ( aSign == bSign ) {
ff32e16e 5133 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5134 }
5135 else {
ff32e16e 5136 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5137 }
5138
5139}
5140
5141/*----------------------------------------------------------------------------
5142| Returns the result of subtracting the extended double-precision floating-
5143| point values `a' and `b'. The operation is performed according to the
5144| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5145*----------------------------------------------------------------------------*/
5146
e5a41ffa 5147floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5148{
5149 flag aSign, bSign;
5150
d1eb8f2a
AD
5151 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5152 float_raise(float_flag_invalid, status);
5153 return floatx80_default_nan(status);
5154 }
158142c2
FB
5155 aSign = extractFloatx80Sign( a );
5156 bSign = extractFloatx80Sign( b );
5157 if ( aSign == bSign ) {
ff32e16e 5158 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5159 }
5160 else {
ff32e16e 5161 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5162 }
5163
5164}
5165
5166/*----------------------------------------------------------------------------
5167| Returns the result of multiplying the extended double-precision floating-
5168| point values `a' and `b'. The operation is performed according to the
5169| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5170*----------------------------------------------------------------------------*/
5171
e5a41ffa 5172floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5173{
5174 flag aSign, bSign, zSign;
f4014512 5175 int32_t aExp, bExp, zExp;
bb98fe42 5176 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5177
d1eb8f2a
AD
5178 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5179 float_raise(float_flag_invalid, status);
5180 return floatx80_default_nan(status);
5181 }
158142c2
FB
5182 aSig = extractFloatx80Frac( a );
5183 aExp = extractFloatx80Exp( a );
5184 aSign = extractFloatx80Sign( a );
5185 bSig = extractFloatx80Frac( b );
5186 bExp = extractFloatx80Exp( b );
5187 bSign = extractFloatx80Sign( b );
5188 zSign = aSign ^ bSign;
5189 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5190 if ( (uint64_t) ( aSig<<1 )
5191 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5192 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5193 }
5194 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5195 return packFloatx80(zSign, floatx80_infinity_high,
5196 floatx80_infinity_low);
158142c2
FB
5197 }
5198 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5199 if ((uint64_t)(bSig << 1)) {
5200 return propagateFloatx80NaN(a, b, status);
5201 }
158142c2
FB
5202 if ( ( aExp | aSig ) == 0 ) {
5203 invalid:
ff32e16e 5204 float_raise(float_flag_invalid, status);
af39bc8c 5205 return floatx80_default_nan(status);
158142c2 5206 }
0f605c88
LV
5207 return packFloatx80(zSign, floatx80_infinity_high,
5208 floatx80_infinity_low);
158142c2
FB
5209 }
5210 if ( aExp == 0 ) {
5211 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5212 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5213 }
5214 if ( bExp == 0 ) {
5215 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5216 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5217 }
5218 zExp = aExp + bExp - 0x3FFE;
5219 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5220 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5221 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5222 --zExp;
5223 }
a2f2d288 5224 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5225 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5226}
5227
5228/*----------------------------------------------------------------------------
5229| Returns the result of dividing the extended double-precision floating-point
5230| value `a' by the corresponding value `b'. The operation is performed
5231| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5232*----------------------------------------------------------------------------*/
5233
e5a41ffa 5234floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5235{
5236 flag aSign, bSign, zSign;
f4014512 5237 int32_t aExp, bExp, zExp;
bb98fe42
AF
5238 uint64_t aSig, bSig, zSig0, zSig1;
5239 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5240
d1eb8f2a
AD
5241 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5242 float_raise(float_flag_invalid, status);
5243 return floatx80_default_nan(status);
5244 }
158142c2
FB
5245 aSig = extractFloatx80Frac( a );
5246 aExp = extractFloatx80Exp( a );
5247 aSign = extractFloatx80Sign( a );
5248 bSig = extractFloatx80Frac( b );
5249 bExp = extractFloatx80Exp( b );
5250 bSign = extractFloatx80Sign( b );
5251 zSign = aSign ^ bSign;
5252 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5253 if ((uint64_t)(aSig << 1)) {
5254 return propagateFloatx80NaN(a, b, status);
5255 }
158142c2 5256 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5257 if ((uint64_t)(bSig << 1)) {
5258 return propagateFloatx80NaN(a, b, status);
5259 }
158142c2
FB
5260 goto invalid;
5261 }
0f605c88
LV
5262 return packFloatx80(zSign, floatx80_infinity_high,
5263 floatx80_infinity_low);
158142c2
FB
5264 }
5265 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5266 if ((uint64_t)(bSig << 1)) {
5267 return propagateFloatx80NaN(a, b, status);
5268 }
158142c2
FB
5269 return packFloatx80( zSign, 0, 0 );
5270 }
5271 if ( bExp == 0 ) {
5272 if ( bSig == 0 ) {
5273 if ( ( aExp | aSig ) == 0 ) {
5274 invalid:
ff32e16e 5275 float_raise(float_flag_invalid, status);
af39bc8c 5276 return floatx80_default_nan(status);
158142c2 5277 }
ff32e16e 5278 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5279 return packFloatx80(zSign, floatx80_infinity_high,
5280 floatx80_infinity_low);
158142c2
FB
5281 }
5282 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5283 }
5284 if ( aExp == 0 ) {
5285 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5286 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5287 }
5288 zExp = aExp - bExp + 0x3FFE;
5289 rem1 = 0;
5290 if ( bSig <= aSig ) {
5291 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5292 ++zExp;
5293 }
5294 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5295 mul64To128( bSig, zSig0, &term0, &term1 );
5296 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5297 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5298 --zSig0;
5299 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5300 }
5301 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5302 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5303 mul64To128( bSig, zSig1, &term1, &term2 );
5304 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5305 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5306 --zSig1;
5307 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5308 }
5309 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5310 }
a2f2d288 5311 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5312 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5313}
5314
5315/*----------------------------------------------------------------------------
5316| Returns the remainder of the extended double-precision floating-point value
5317| `a' with respect to the corresponding value `b'. The operation is performed
5318| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5319*----------------------------------------------------------------------------*/
5320
e5a41ffa 5321floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 5322{
ed086f3d 5323 flag aSign, zSign;
f4014512 5324 int32_t aExp, bExp, expDiff;
bb98fe42
AF
5325 uint64_t aSig0, aSig1, bSig;
5326 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 5327
d1eb8f2a
AD
5328 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5329 float_raise(float_flag_invalid, status);
5330 return floatx80_default_nan(status);
5331 }
158142c2
FB
5332 aSig0 = extractFloatx80Frac( a );
5333 aExp = extractFloatx80Exp( a );
5334 aSign = extractFloatx80Sign( a );
5335 bSig = extractFloatx80Frac( b );
5336 bExp = extractFloatx80Exp( b );
158142c2 5337 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5338 if ( (uint64_t) ( aSig0<<1 )
5339 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5340 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5341 }
5342 goto invalid;
5343 }
5344 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5345 if ((uint64_t)(bSig << 1)) {
5346 return propagateFloatx80NaN(a, b, status);
5347 }
158142c2
FB
5348 return a;
5349 }
5350 if ( bExp == 0 ) {
5351 if ( bSig == 0 ) {
5352 invalid:
ff32e16e 5353 float_raise(float_flag_invalid, status);
af39bc8c 5354 return floatx80_default_nan(status);
158142c2
FB
5355 }
5356 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5357 }
5358 if ( aExp == 0 ) {
bb98fe42 5359 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5360 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5361 }
5362 bSig |= LIT64( 0x8000000000000000 );
5363 zSign = aSign;
5364 expDiff = aExp - bExp;
5365 aSig1 = 0;
5366 if ( expDiff < 0 ) {
5367 if ( expDiff < -1 ) return a;
5368 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5369 expDiff = 0;
5370 }
5371 q = ( bSig <= aSig0 );
5372 if ( q ) aSig0 -= bSig;
5373 expDiff -= 64;
5374 while ( 0 < expDiff ) {
5375 q = estimateDiv128To64( aSig0, aSig1, bSig );
5376 q = ( 2 < q ) ? q - 2 : 0;
5377 mul64To128( bSig, q, &term0, &term1 );
5378 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5379 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5380 expDiff -= 62;
5381 }
5382 expDiff += 64;
5383 if ( 0 < expDiff ) {
5384 q = estimateDiv128To64( aSig0, aSig1, bSig );
5385 q = ( 2 < q ) ? q - 2 : 0;
5386 q >>= 64 - expDiff;
5387 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5388 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5389 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5390 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5391 ++q;
5392 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5393 }
5394 }
5395 else {
5396 term1 = 0;
5397 term0 = bSig;
5398 }
5399 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5400 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5401 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5402 && ( q & 1 ) )
5403 ) {
5404 aSig0 = alternateASig0;
5405 aSig1 = alternateASig1;
5406 zSign = ! zSign;
5407 }
5408 return
5409 normalizeRoundAndPackFloatx80(
ff32e16e 5410 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5411
5412}
5413
5414/*----------------------------------------------------------------------------
5415| Returns the square root of the extended double-precision floating-point
5416| value `a'. The operation is performed according to the IEC/IEEE Standard
5417| for Binary Floating-Point Arithmetic.
5418*----------------------------------------------------------------------------*/
5419
e5a41ffa 5420floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5421{
5422 flag aSign;
f4014512 5423 int32_t aExp, zExp;
bb98fe42
AF
5424 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5425 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5426
d1eb8f2a
AD
5427 if (floatx80_invalid_encoding(a)) {
5428 float_raise(float_flag_invalid, status);
5429 return floatx80_default_nan(status);
5430 }
158142c2
FB
5431 aSig0 = extractFloatx80Frac( a );
5432 aExp = extractFloatx80Exp( a );
5433 aSign = extractFloatx80Sign( a );
5434 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5435 if ((uint64_t)(aSig0 << 1)) {
5436 return propagateFloatx80NaN(a, a, status);
5437 }
158142c2
FB
5438 if ( ! aSign ) return a;
5439 goto invalid;
5440 }
5441 if ( aSign ) {
5442 if ( ( aExp | aSig0 ) == 0 ) return a;
5443 invalid:
ff32e16e 5444 float_raise(float_flag_invalid, status);
af39bc8c 5445 return floatx80_default_nan(status);
158142c2
FB
5446 }
5447 if ( aExp == 0 ) {
5448 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5449 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5450 }
5451 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5452 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5453 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5454 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5455 doubleZSig0 = zSig0<<1;
5456 mul64To128( zSig0, zSig0, &term0, &term1 );
5457 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5458 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5459 --zSig0;
5460 doubleZSig0 -= 2;
5461 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5462 }
5463 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5464 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5465 if ( zSig1 == 0 ) zSig1 = 1;
5466 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5467 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5468 mul64To128( zSig1, zSig1, &term2, &term3 );
5469 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5470 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5471 --zSig1;
5472 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5473 term3 |= 1;
5474 term2 |= doubleZSig0;
5475 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5476 }
5477 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5478 }
5479 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5480 zSig0 |= doubleZSig0;
a2f2d288
PM
5481 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5482 0, zExp, zSig0, zSig1, status);
158142c2
FB
5483}
5484
5485/*----------------------------------------------------------------------------
b689362d
AJ
5486| Returns 1 if the extended double-precision floating-point value `a' is equal
5487| to the corresponding value `b', and 0 otherwise. The invalid exception is
5488| raised if either operand is a NaN. Otherwise, the comparison is performed
5489| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5490*----------------------------------------------------------------------------*/
5491
e5a41ffa 5492int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5493{
5494
d1eb8f2a
AD
5495 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5496 || (extractFloatx80Exp(a) == 0x7FFF
5497 && (uint64_t) (extractFloatx80Frac(a) << 1))
5498 || (extractFloatx80Exp(b) == 0x7FFF
5499 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5500 ) {
ff32e16e 5501 float_raise(float_flag_invalid, status);
158142c2
FB
5502 return 0;
5503 }
5504 return
5505 ( a.low == b.low )
5506 && ( ( a.high == b.high )
5507 || ( ( a.low == 0 )
bb98fe42 5508 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5509 );
5510
5511}
5512
5513/*----------------------------------------------------------------------------
5514| Returns 1 if the extended double-precision floating-point value `a' is
5515| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5516| invalid exception is raised if either operand is a NaN. The comparison is
5517| performed according to the IEC/IEEE Standard for Binary Floating-Point
5518| Arithmetic.
158142c2
FB
5519*----------------------------------------------------------------------------*/
5520
e5a41ffa 5521int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5522{
5523 flag aSign, bSign;
5524
d1eb8f2a
AD
5525 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5526 || (extractFloatx80Exp(a) == 0x7FFF
5527 && (uint64_t) (extractFloatx80Frac(a) << 1))
5528 || (extractFloatx80Exp(b) == 0x7FFF
5529 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5530 ) {
ff32e16e 5531 float_raise(float_flag_invalid, status);
158142c2
FB
5532 return 0;
5533 }
5534 aSign = extractFloatx80Sign( a );
5535 bSign = extractFloatx80Sign( b );
5536 if ( aSign != bSign ) {
5537 return
5538 aSign
bb98fe42 5539 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5540 == 0 );
5541 }
5542 return
5543 aSign ? le128( b.high, b.low, a.high, a.low )
5544 : le128( a.high, a.low, b.high, b.low );
5545
5546}
5547
5548/*----------------------------------------------------------------------------
5549| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5550| less than the corresponding value `b', and 0 otherwise. The invalid
5551| exception is raised if either operand is a NaN. The comparison is performed
5552| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5553*----------------------------------------------------------------------------*/
5554
e5a41ffa 5555int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5556{
5557 flag aSign, bSign;
5558
d1eb8f2a
AD
5559 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5560 || (extractFloatx80Exp(a) == 0x7FFF
5561 && (uint64_t) (extractFloatx80Frac(a) << 1))
5562 || (extractFloatx80Exp(b) == 0x7FFF
5563 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5564 ) {
ff32e16e 5565 float_raise(float_flag_invalid, status);
158142c2
FB
5566 return 0;
5567 }
5568 aSign = extractFloatx80Sign( a );
5569 bSign = extractFloatx80Sign( b );
5570 if ( aSign != bSign ) {
5571 return
5572 aSign
bb98fe42 5573 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5574 != 0 );
5575 }
5576 return
5577 aSign ? lt128( b.high, b.low, a.high, a.low )
5578 : lt128( a.high, a.low, b.high, b.low );
5579
5580}
5581
67b7861d
AJ
5582/*----------------------------------------------------------------------------
5583| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5584| cannot be compared, and 0 otherwise. The invalid exception is raised if
5585| either operand is a NaN. The comparison is performed according to the
5586| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5587*----------------------------------------------------------------------------*/
e5a41ffa 5588int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5589{
d1eb8f2a
AD
5590 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5591 || (extractFloatx80Exp(a) == 0x7FFF
5592 && (uint64_t) (extractFloatx80Frac(a) << 1))
5593 || (extractFloatx80Exp(b) == 0x7FFF
5594 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5595 ) {
ff32e16e 5596 float_raise(float_flag_invalid, status);
67b7861d
AJ
5597 return 1;
5598 }
5599 return 0;
5600}
5601
158142c2 5602/*----------------------------------------------------------------------------
b689362d 5603| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5604| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5605| cause an exception. The comparison is performed according to the IEC/IEEE
5606| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5607*----------------------------------------------------------------------------*/
5608
e5a41ffa 5609int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5610{
5611
d1eb8f2a
AD
5612 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5613 float_raise(float_flag_invalid, status);
5614 return 0;
5615 }
158142c2 5616 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5617 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5618 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5619 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5620 ) {
af39bc8c
AM
5621 if (floatx80_is_signaling_nan(a, status)
5622 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5623 float_raise(float_flag_invalid, status);
b689362d 5624 }
158142c2
FB
5625 return 0;
5626 }
5627 return
5628 ( a.low == b.low )
5629 && ( ( a.high == b.high )
5630 || ( ( a.low == 0 )
bb98fe42 5631 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5632 );
5633
5634}
5635
5636/*----------------------------------------------------------------------------
5637| Returns 1 if the extended double-precision floating-point value `a' is less
5638| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5639| do not cause an exception. Otherwise, the comparison is performed according
5640| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5641*----------------------------------------------------------------------------*/
5642
e5a41ffa 5643int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5644{
5645 flag aSign, bSign;
5646
d1eb8f2a
AD
5647 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5648 float_raise(float_flag_invalid, status);
5649 return 0;
5650 }
158142c2 5651 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5652 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5653 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5654 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5655 ) {
af39bc8c
AM
5656 if (floatx80_is_signaling_nan(a, status)
5657 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5658 float_raise(float_flag_invalid, status);
158142c2
FB
5659 }
5660 return 0;
5661 }
5662 aSign = extractFloatx80Sign( a );
5663 bSign = extractFloatx80Sign( b );
5664 if ( aSign != bSign ) {
5665 return
5666 aSign
bb98fe42 5667 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5668 == 0 );
5669 }
5670 return
5671 aSign ? le128( b.high, b.low, a.high, a.low )
5672 : le128( a.high, a.low, b.high, b.low );
5673
5674}
5675
5676/*----------------------------------------------------------------------------
5677| Returns 1 if the extended double-precision floating-point value `a' is less
5678| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5679| an exception. Otherwise, the comparison is performed according to the
5680| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5681*----------------------------------------------------------------------------*/
5682
e5a41ffa 5683int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5684{
5685 flag aSign, bSign;
5686
d1eb8f2a
AD
5687 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5688 float_raise(float_flag_invalid, status);
5689 return 0;
5690 }
158142c2 5691 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5692 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5693 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5694 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5695 ) {
af39bc8c
AM
5696 if (floatx80_is_signaling_nan(a, status)
5697 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5698 float_raise(float_flag_invalid, status);
158142c2
FB
5699 }
5700 return 0;
5701 }
5702 aSign = extractFloatx80Sign( a );
5703 bSign = extractFloatx80Sign( b );
5704 if ( aSign != bSign ) {
5705 return
5706 aSign
bb98fe42 5707 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5708 != 0 );
5709 }
5710 return
5711 aSign ? lt128( b.high, b.low, a.high, a.low )
5712 : lt128( a.high, a.low, b.high, b.low );
5713
5714}
5715
67b7861d
AJ
5716/*----------------------------------------------------------------------------
5717| Returns 1 if the extended double-precision floating-point values `a' and `b'
5718| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5719| The comparison is performed according to the IEC/IEEE Standard for Binary
5720| Floating-Point Arithmetic.
5721*----------------------------------------------------------------------------*/
e5a41ffa 5722int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5723{
d1eb8f2a
AD
5724 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5725 float_raise(float_flag_invalid, status);
5726 return 1;
5727 }
67b7861d
AJ
5728 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5729 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5730 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5731 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5732 ) {
af39bc8c
AM
5733 if (floatx80_is_signaling_nan(a, status)
5734 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5735 float_raise(float_flag_invalid, status);
67b7861d
AJ
5736 }
5737 return 1;
5738 }
5739 return 0;
5740}
5741
158142c2
FB
5742/*----------------------------------------------------------------------------
5743| Returns the result of converting the quadruple-precision floating-point
5744| value `a' to the 32-bit two's complement integer format. The conversion
5745| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5746| Arithmetic---which means in particular that the conversion is rounded
5747| according to the current rounding mode. If `a' is a NaN, the largest
5748| positive integer is returned. Otherwise, if the conversion overflows, the
5749| largest integer with the same sign as `a' is returned.
5750*----------------------------------------------------------------------------*/
5751
f4014512 5752int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5753{
5754 flag aSign;
f4014512 5755 int32_t aExp, shiftCount;
bb98fe42 5756 uint64_t aSig0, aSig1;
158142c2
FB
5757
5758 aSig1 = extractFloat128Frac1( a );
5759 aSig0 = extractFloat128Frac0( a );
5760 aExp = extractFloat128Exp( a );
5761 aSign = extractFloat128Sign( a );
5762 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5763 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5764 aSig0 |= ( aSig1 != 0 );
5765 shiftCount = 0x4028 - aExp;
5766 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5767 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5768
5769}
5770
5771/*----------------------------------------------------------------------------
5772| Returns the result of converting the quadruple-precision floating-point
5773| value `a' to the 32-bit two's complement integer format. The conversion
5774| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5775| Arithmetic, except that the conversion is always rounded toward zero. If
5776| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5777| conversion overflows, the largest integer with the same sign as `a' is
5778| returned.
5779*----------------------------------------------------------------------------*/
5780
f4014512 5781int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5782{
5783 flag aSign;
f4014512 5784 int32_t aExp, shiftCount;
bb98fe42 5785 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5786 int32_t z;
158142c2
FB
5787
5788 aSig1 = extractFloat128Frac1( a );
5789 aSig0 = extractFloat128Frac0( a );
5790 aExp = extractFloat128Exp( a );
5791 aSign = extractFloat128Sign( a );
5792 aSig0 |= ( aSig1 != 0 );
5793 if ( 0x401E < aExp ) {
5794 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5795 goto invalid;
5796 }
5797 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5798 if (aExp || aSig0) {
5799 status->float_exception_flags |= float_flag_inexact;
5800 }
158142c2
FB
5801 return 0;
5802 }
5803 aSig0 |= LIT64( 0x0001000000000000 );
5804 shiftCount = 0x402F - aExp;
5805 savedASig = aSig0;
5806 aSig0 >>= shiftCount;
5807 z = aSig0;
5808 if ( aSign ) z = - z;
5809 if ( ( z < 0 ) ^ aSign ) {
5810 invalid:
ff32e16e 5811 float_raise(float_flag_invalid, status);
bb98fe42 5812 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5813 }
5814 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5815 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5816 }
5817 return z;
5818
5819}
5820
5821/*----------------------------------------------------------------------------
5822| Returns the result of converting the quadruple-precision floating-point
5823| value `a' to the 64-bit two's complement integer format. The conversion
5824| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5825| Arithmetic---which means in particular that the conversion is rounded
5826| according to the current rounding mode. If `a' is a NaN, the largest
5827| positive integer is returned. Otherwise, if the conversion overflows, the
5828| largest integer with the same sign as `a' is returned.
5829*----------------------------------------------------------------------------*/
5830
f42c2224 5831int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5832{
5833 flag aSign;
f4014512 5834 int32_t aExp, shiftCount;
bb98fe42 5835 uint64_t aSig0, aSig1;
158142c2
FB
5836
5837 aSig1 = extractFloat128Frac1( a );
5838 aSig0 = extractFloat128Frac0( a );
5839 aExp = extractFloat128Exp( a );
5840 aSign = extractFloat128Sign( a );
5841 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5842 shiftCount = 0x402F - aExp;
5843 if ( shiftCount <= 0 ) {
5844 if ( 0x403E < aExp ) {
ff32e16e 5845 float_raise(float_flag_invalid, status);
158142c2
FB
5846 if ( ! aSign
5847 || ( ( aExp == 0x7FFF )
5848 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5849 )
5850 ) {
5851 return LIT64( 0x7FFFFFFFFFFFFFFF );
5852 }
bb98fe42 5853 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5854 }
5855 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5856 }
5857 else {
5858 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5859 }
ff32e16e 5860 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5861
5862}
5863
5864/*----------------------------------------------------------------------------
5865| Returns the result of converting the quadruple-precision floating-point
5866| value `a' to the 64-bit two's complement integer format. The conversion
5867| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5868| Arithmetic, except that the conversion is always rounded toward zero.
5869| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5870| the conversion overflows, the largest integer with the same sign as `a' is
5871| returned.
5872*----------------------------------------------------------------------------*/
5873
f42c2224 5874int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5875{
5876 flag aSign;
f4014512 5877 int32_t aExp, shiftCount;
bb98fe42 5878 uint64_t aSig0, aSig1;
f42c2224 5879 int64_t z;
158142c2
FB
5880
5881 aSig1 = extractFloat128Frac1( a );
5882 aSig0 = extractFloat128Frac0( a );
5883 aExp = extractFloat128Exp( a );
5884 aSign = extractFloat128Sign( a );
5885 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5886 shiftCount = aExp - 0x402F;
5887 if ( 0 < shiftCount ) {
5888 if ( 0x403E <= aExp ) {
5889 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5890 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5891 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5892 if (aSig1) {
5893 status->float_exception_flags |= float_flag_inexact;
5894 }
158142c2
FB
5895 }
5896 else {
ff32e16e 5897 float_raise(float_flag_invalid, status);
158142c2
FB
5898 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5899 return LIT64( 0x7FFFFFFFFFFFFFFF );
5900 }
5901 }
bb98fe42 5902 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5903 }
5904 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5905 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5906 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5907 }
5908 }
5909 else {
5910 if ( aExp < 0x3FFF ) {
5911 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5912 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5913 }
5914 return 0;
5915 }
5916 z = aSig0>>( - shiftCount );
5917 if ( aSig1
bb98fe42 5918 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5919 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5920 }
5921 }
5922 if ( aSign ) z = - z;
5923 return z;
5924
5925}
5926
2e6d8568
BR
5927/*----------------------------------------------------------------------------
5928| Returns the result of converting the quadruple-precision floating-point value
5929| `a' to the 64-bit unsigned integer format. The conversion is
5930| performed according to the IEC/IEEE Standard for Binary Floating-Point
5931| Arithmetic---which means in particular that the conversion is rounded
5932| according to the current rounding mode. If `a' is a NaN, the largest
5933| positive integer is returned. If the conversion overflows, the
5934| largest unsigned integer is returned. If 'a' is negative, the value is
5935| rounded and zero is returned; negative values that do not round to zero
5936| will raise the inexact exception.
5937*----------------------------------------------------------------------------*/
5938
5939uint64_t float128_to_uint64(float128 a, float_status *status)
5940{
5941 flag aSign;
5942 int aExp;
5943 int shiftCount;
5944 uint64_t aSig0, aSig1;
5945
5946 aSig0 = extractFloat128Frac0(a);
5947 aSig1 = extractFloat128Frac1(a);
5948 aExp = extractFloat128Exp(a);
5949 aSign = extractFloat128Sign(a);
5950 if (aSign && (aExp > 0x3FFE)) {
5951 float_raise(float_flag_invalid, status);
5952 if (float128_is_any_nan(a)) {
5953 return LIT64(0xFFFFFFFFFFFFFFFF);
5954 } else {
5955 return 0;
5956 }
5957 }
5958 if (aExp) {
5959 aSig0 |= LIT64(0x0001000000000000);
5960 }
5961 shiftCount = 0x402F - aExp;
5962 if (shiftCount <= 0) {
5963 if (0x403E < aExp) {
5964 float_raise(float_flag_invalid, status);
5965 return LIT64(0xFFFFFFFFFFFFFFFF);
5966 }
5967 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5968 } else {
5969 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5970 }
5971 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5972}
5973
5974uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5975{
5976 uint64_t v;
5977 signed char current_rounding_mode = status->float_rounding_mode;
5978
5979 set_float_rounding_mode(float_round_to_zero, status);
5980 v = float128_to_uint64(a, status);
5981 set_float_rounding_mode(current_rounding_mode, status);
5982
5983 return v;
5984}
5985
158142c2
FB
5986/*----------------------------------------------------------------------------
5987| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5988| value `a' to the 32-bit unsigned integer format. The conversion
5989| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5990| Arithmetic except that the conversion is always rounded toward zero.
5991| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5992| if the conversion overflows, the largest unsigned integer is returned.
5993| If 'a' is negative, the value is rounded and zero is returned; negative
5994| values that do not round to zero will raise the inexact exception.
5995*----------------------------------------------------------------------------*/
5996
5997uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5998{
5999 uint64_t v;
6000 uint32_t res;
6001 int old_exc_flags = get_float_exception_flags(status);
6002
6003 v = float128_to_uint64_round_to_zero(a, status);
6004 if (v > 0xffffffff) {
6005 res = 0xffffffff;
6006 } else {
6007 return v;
6008 }
6009 set_float_exception_flags(old_exc_flags, status);
6010 float_raise(float_flag_invalid, status);
6011 return res;
6012}
6013
6014/*----------------------------------------------------------------------------
6015| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6016| value `a' to the single-precision floating-point format. The conversion
6017| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6018| Arithmetic.
6019*----------------------------------------------------------------------------*/
6020
e5a41ffa 6021float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6022{
6023 flag aSign;
f4014512 6024 int32_t aExp;
bb98fe42
AF
6025 uint64_t aSig0, aSig1;
6026 uint32_t zSig;
158142c2
FB
6027
6028 aSig1 = extractFloat128Frac1( a );
6029 aSig0 = extractFloat128Frac0( a );
6030 aExp = extractFloat128Exp( a );
6031 aSign = extractFloat128Sign( a );
6032 if ( aExp == 0x7FFF ) {
6033 if ( aSig0 | aSig1 ) {
ff32e16e 6034 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6035 }
6036 return packFloat32( aSign, 0xFF, 0 );
6037 }
6038 aSig0 |= ( aSig1 != 0 );
6039 shift64RightJamming( aSig0, 18, &aSig0 );
6040 zSig = aSig0;
6041 if ( aExp || zSig ) {
6042 zSig |= 0x40000000;
6043 aExp -= 0x3F81;
6044 }
ff32e16e 6045 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6046
6047}
6048
6049/*----------------------------------------------------------------------------
6050| Returns the result of converting the quadruple-precision floating-point
6051| value `a' to the double-precision floating-point format. The conversion
6052| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6053| Arithmetic.
6054*----------------------------------------------------------------------------*/
6055
e5a41ffa 6056float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6057{
6058 flag aSign;
f4014512 6059 int32_t aExp;
bb98fe42 6060 uint64_t aSig0, aSig1;
158142c2
FB
6061
6062 aSig1 = extractFloat128Frac1( a );
6063 aSig0 = extractFloat128Frac0( a );
6064 aExp = extractFloat128Exp( a );
6065 aSign = extractFloat128Sign( a );
6066 if ( aExp == 0x7FFF ) {
6067 if ( aSig0 | aSig1 ) {
ff32e16e 6068 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6069 }
6070 return packFloat64( aSign, 0x7FF, 0 );
6071 }
6072 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6073 aSig0 |= ( aSig1 != 0 );
6074 if ( aExp || aSig0 ) {
6075 aSig0 |= LIT64( 0x4000000000000000 );
6076 aExp -= 0x3C01;
6077 }
ff32e16e 6078 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6079
6080}
6081
158142c2
FB
6082/*----------------------------------------------------------------------------
6083| Returns the result of converting the quadruple-precision floating-point
6084| value `a' to the extended double-precision floating-point format. The
6085| conversion is performed according to the IEC/IEEE Standard for Binary
6086| Floating-Point Arithmetic.
6087*----------------------------------------------------------------------------*/
6088
e5a41ffa 6089floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6090{
6091 flag aSign;
f4014512 6092 int32_t aExp;
bb98fe42 6093 uint64_t aSig0, aSig1;
158142c2
FB
6094
6095 aSig1 = extractFloat128Frac1( a );
6096 aSig0 = extractFloat128Frac0( a );
6097 aExp = extractFloat128Exp( a );
6098 aSign = extractFloat128Sign( a );
6099 if ( aExp == 0x7FFF ) {
6100 if ( aSig0 | aSig1 ) {
ff32e16e 6101 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6102 }
0f605c88
LV
6103 return packFloatx80(aSign, floatx80_infinity_high,
6104 floatx80_infinity_low);
158142c2
FB
6105 }
6106 if ( aExp == 0 ) {
6107 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6108 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6109 }
6110 else {
6111 aSig0 |= LIT64( 0x0001000000000000 );
6112 }
6113 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6114 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6115
6116}
6117
158142c2
FB
6118/*----------------------------------------------------------------------------
6119| Rounds the quadruple-precision floating-point value `a' to an integer, and
6120| returns the result as a quadruple-precision floating-point value. The
6121| operation is performed according to the IEC/IEEE Standard for Binary
6122| Floating-Point Arithmetic.
6123*----------------------------------------------------------------------------*/
6124
e5a41ffa 6125float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6126{
6127 flag aSign;
f4014512 6128 int32_t aExp;
bb98fe42 6129 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6130 float128 z;
6131
6132 aExp = extractFloat128Exp( a );
6133 if ( 0x402F <= aExp ) {
6134 if ( 0x406F <= aExp ) {
6135 if ( ( aExp == 0x7FFF )
6136 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6137 ) {
ff32e16e 6138 return propagateFloat128NaN(a, a, status);
158142c2
FB
6139 }
6140 return a;
6141 }
6142 lastBitMask = 1;
6143 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6144 roundBitsMask = lastBitMask - 1;
6145 z = a;
a2f2d288 6146 switch (status->float_rounding_mode) {
dc355b76 6147 case float_round_nearest_even:
158142c2
FB
6148 if ( lastBitMask ) {
6149 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6150 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6151 }
6152 else {
bb98fe42 6153 if ( (int64_t) z.low < 0 ) {
158142c2 6154 ++z.high;
bb98fe42 6155 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6156 }
6157 }
dc355b76 6158 break;
f9288a76
PM
6159 case float_round_ties_away:
6160 if (lastBitMask) {
6161 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6162 } else {
6163 if ((int64_t) z.low < 0) {
6164 ++z.high;
6165 }
6166 }
6167 break;
dc355b76
PM
6168 case float_round_to_zero:
6169 break;
6170 case float_round_up:
6171 if (!extractFloat128Sign(z)) {
6172 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6173 }
6174 break;
6175 case float_round_down:
6176 if (extractFloat128Sign(z)) {
6177 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6178 }
dc355b76
PM
6179 break;
6180 default:
6181 abort();
158142c2
FB
6182 }
6183 z.low &= ~ roundBitsMask;
6184 }
6185 else {
6186 if ( aExp < 0x3FFF ) {
bb98fe42 6187 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6188 status->float_exception_flags |= float_flag_inexact;
158142c2 6189 aSign = extractFloat128Sign( a );
a2f2d288 6190 switch (status->float_rounding_mode) {
158142c2
FB
6191 case float_round_nearest_even:
6192 if ( ( aExp == 0x3FFE )
6193 && ( extractFloat128Frac0( a )
6194 | extractFloat128Frac1( a ) )
6195 ) {
6196 return packFloat128( aSign, 0x3FFF, 0, 0 );
6197 }
6198 break;
f9288a76
PM
6199 case float_round_ties_away:
6200 if (aExp == 0x3FFE) {
6201 return packFloat128(aSign, 0x3FFF, 0, 0);
6202 }
6203 break;
158142c2
FB
6204 case float_round_down:
6205 return
6206 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6207 : packFloat128( 0, 0, 0, 0 );
6208 case float_round_up:
6209 return
6210 aSign ? packFloat128( 1, 0, 0, 0 )
6211 : packFloat128( 0, 0x3FFF, 0, 0 );
6212 }
6213 return packFloat128( aSign, 0, 0, 0 );
6214 }
6215 lastBitMask = 1;
6216 lastBitMask <<= 0x402F - aExp;
6217 roundBitsMask = lastBitMask - 1;
6218 z.low = 0;
6219 z.high = a.high;
a2f2d288 6220 switch (status->float_rounding_mode) {
dc355b76 6221 case float_round_nearest_even:
158142c2
FB
6222 z.high += lastBitMask>>1;
6223 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6224 z.high &= ~ lastBitMask;
6225 }
dc355b76 6226 break;
f9288a76
PM
6227 case float_round_ties_away:
6228 z.high += lastBitMask>>1;
6229 break;
dc355b76
PM
6230 case float_round_to_zero:
6231 break;
6232 case float_round_up:
6233 if (!extractFloat128Sign(z)) {
158142c2
FB
6234 z.high |= ( a.low != 0 );
6235 z.high += roundBitsMask;
6236 }
dc355b76
PM
6237 break;
6238 case float_round_down:
6239 if (extractFloat128Sign(z)) {
6240 z.high |= (a.low != 0);
6241 z.high += roundBitsMask;
6242 }
6243 break;
6244 default:
6245 abort();
158142c2
FB
6246 }
6247 z.high &= ~ roundBitsMask;
6248 }
6249 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6250 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6251 }
6252 return z;
6253
6254}
6255
6256/*----------------------------------------------------------------------------
6257| Returns the result of adding the absolute values of the quadruple-precision
6258| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6259| before being returned. `zSign' is ignored if the result is a NaN.
6260| The addition is performed according to the IEC/IEEE Standard for Binary
6261| Floating-Point Arithmetic.
6262*----------------------------------------------------------------------------*/
6263
e5a41ffa
PM
6264static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6265 float_status *status)
158142c2 6266{
f4014512 6267 int32_t aExp, bExp, zExp;
bb98fe42 6268 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6269 int32_t expDiff;
158142c2
FB
6270
6271 aSig1 = extractFloat128Frac1( a );
6272 aSig0 = extractFloat128Frac0( a );
6273 aExp = extractFloat128Exp( a );
6274 bSig1 = extractFloat128Frac1( b );
6275 bSig0 = extractFloat128Frac0( b );
6276 bExp = extractFloat128Exp( b );
6277 expDiff = aExp - bExp;
6278 if ( 0 < expDiff ) {
6279 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6280 if (aSig0 | aSig1) {
6281 return propagateFloat128NaN(a, b, status);
6282 }
158142c2
FB
6283 return a;
6284 }
6285 if ( bExp == 0 ) {
6286 --expDiff;
6287 }
6288 else {
6289 bSig0 |= LIT64( 0x0001000000000000 );
6290 }
6291 shift128ExtraRightJamming(
6292 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6293 zExp = aExp;
6294 }
6295 else if ( expDiff < 0 ) {
6296 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6297 if (bSig0 | bSig1) {
6298 return propagateFloat128NaN(a, b, status);
6299 }
158142c2
FB
6300 return packFloat128( zSign, 0x7FFF, 0, 0 );
6301 }
6302 if ( aExp == 0 ) {
6303 ++expDiff;
6304 }
6305 else {
6306 aSig0 |= LIT64( 0x0001000000000000 );
6307 }
6308 shift128ExtraRightJamming(
6309 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6310 zExp = bExp;
6311 }
6312 else {
6313 if ( aExp == 0x7FFF ) {
6314 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6315 return propagateFloat128NaN(a, b, status);
158142c2
FB
6316 }
6317 return a;
6318 }
6319 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6320 if ( aExp == 0 ) {
a2f2d288 6321 if (status->flush_to_zero) {
e6afc87f 6322 if (zSig0 | zSig1) {
ff32e16e 6323 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6324 }
6325 return packFloat128(zSign, 0, 0, 0);
6326 }
fe76d976
PB
6327 return packFloat128( zSign, 0, zSig0, zSig1 );
6328 }
158142c2
FB
6329 zSig2 = 0;
6330 zSig0 |= LIT64( 0x0002000000000000 );
6331 zExp = aExp;
6332 goto shiftRight1;
6333 }
6334 aSig0 |= LIT64( 0x0001000000000000 );
6335 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6336 --zExp;
6337 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6338 ++zExp;
6339 shiftRight1:
6340 shift128ExtraRightJamming(
6341 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6342 roundAndPack:
ff32e16e 6343 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6344
6345}
6346
6347/*----------------------------------------------------------------------------
6348| Returns the result of subtracting the absolute values of the quadruple-
6349| precision floating-point values `a' and `b'. If `zSign' is 1, the
6350| difference is negated before being returned. `zSign' is ignored if the
6351| result is a NaN. The subtraction is performed according to the IEC/IEEE
6352| Standard for Binary Floating-Point Arithmetic.
6353*----------------------------------------------------------------------------*/
6354
e5a41ffa
PM
6355static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6356 float_status *status)
158142c2 6357{
f4014512 6358 int32_t aExp, bExp, zExp;
bb98fe42 6359 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6360 int32_t expDiff;
158142c2
FB
6361
6362 aSig1 = extractFloat128Frac1( a );
6363 aSig0 = extractFloat128Frac0( a );
6364 aExp = extractFloat128Exp( a );
6365 bSig1 = extractFloat128Frac1( b );
6366 bSig0 = extractFloat128Frac0( b );
6367 bExp = extractFloat128Exp( b );
6368 expDiff = aExp - bExp;
6369 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6370 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6371 if ( 0 < expDiff ) goto aExpBigger;
6372 if ( expDiff < 0 ) goto bExpBigger;
6373 if ( aExp == 0x7FFF ) {
6374 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6375 return propagateFloat128NaN(a, b, status);
158142c2 6376 }
ff32e16e 6377 float_raise(float_flag_invalid, status);
af39bc8c 6378 return float128_default_nan(status);
158142c2
FB
6379 }
6380 if ( aExp == 0 ) {
6381 aExp = 1;
6382 bExp = 1;
6383 }
6384 if ( bSig0 < aSig0 ) goto aBigger;
6385 if ( aSig0 < bSig0 ) goto bBigger;
6386 if ( bSig1 < aSig1 ) goto aBigger;
6387 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6388 return packFloat128(status->float_rounding_mode == float_round_down,
6389 0, 0, 0);
158142c2
FB
6390 bExpBigger:
6391 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6392 if (bSig0 | bSig1) {
6393 return propagateFloat128NaN(a, b, status);
6394 }
158142c2
FB
6395 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6396 }
6397 if ( aExp == 0 ) {
6398 ++expDiff;
6399 }
6400 else {
6401 aSig0 |= LIT64( 0x4000000000000000 );
6402 }
6403 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6404 bSig0 |= LIT64( 0x4000000000000000 );
6405 bBigger:
6406 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6407 zExp = bExp;
6408 zSign ^= 1;
6409 goto normalizeRoundAndPack;
6410 aExpBigger:
6411 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6412 if (aSig0 | aSig1) {
6413 return propagateFloat128NaN(a, b, status);
6414 }
158142c2
FB
6415 return a;
6416 }
6417 if ( bExp == 0 ) {
6418 --expDiff;
6419 }
6420 else {
6421 bSig0 |= LIT64( 0x4000000000000000 );
6422 }
6423 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6424 aSig0 |= LIT64( 0x4000000000000000 );
6425 aBigger:
6426 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6427 zExp = aExp;
6428 normalizeRoundAndPack:
6429 --zExp;
ff32e16e
PM
6430 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6431 status);
158142c2
FB
6432
6433}
6434
6435/*----------------------------------------------------------------------------
6436| Returns the result of adding the quadruple-precision floating-point values
6437| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6438| for Binary Floating-Point Arithmetic.
6439*----------------------------------------------------------------------------*/
6440
e5a41ffa 6441float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6442{
6443 flag aSign, bSign;
6444
6445 aSign = extractFloat128Sign( a );
6446 bSign = extractFloat128Sign( b );
6447 if ( aSign == bSign ) {
ff32e16e 6448 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6449 }
6450 else {
ff32e16e 6451 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6452 }
6453
6454}
6455
6456/*----------------------------------------------------------------------------
6457| Returns the result of subtracting the quadruple-precision floating-point
6458| values `a' and `b'. The operation is performed according to the IEC/IEEE
6459| Standard for Binary Floating-Point Arithmetic.
6460*----------------------------------------------------------------------------*/
6461
e5a41ffa 6462float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6463{
6464 flag aSign, bSign;
6465
6466 aSign = extractFloat128Sign( a );
6467 bSign = extractFloat128Sign( b );
6468 if ( aSign == bSign ) {
ff32e16e 6469 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6470 }
6471 else {
ff32e16e 6472 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6473 }
6474
6475}
6476
6477/*----------------------------------------------------------------------------
6478| Returns the result of multiplying the quadruple-precision floating-point
6479| values `a' and `b'. The operation is performed according to the IEC/IEEE
6480| Standard for Binary Floating-Point Arithmetic.
6481*----------------------------------------------------------------------------*/
6482
e5a41ffa 6483float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6484{
6485 flag aSign, bSign, zSign;
f4014512 6486 int32_t aExp, bExp, zExp;
bb98fe42 6487 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6488
6489 aSig1 = extractFloat128Frac1( a );
6490 aSig0 = extractFloat128Frac0( a );
6491 aExp = extractFloat128Exp( a );
6492 aSign = extractFloat128Sign( a );
6493 bSig1 = extractFloat128Frac1( b );
6494 bSig0 = extractFloat128Frac0( b );
6495 bExp = extractFloat128Exp( b );
6496 bSign = extractFloat128Sign( b );
6497 zSign = aSign ^ bSign;
6498 if ( aExp == 0x7FFF ) {
6499 if ( ( aSig0 | aSig1 )
6500 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6501 return propagateFloat128NaN(a, b, status);
158142c2
FB
6502 }
6503 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6504 return packFloat128( zSign, 0x7FFF, 0, 0 );
6505 }
6506 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6507 if (bSig0 | bSig1) {
6508 return propagateFloat128NaN(a, b, status);
6509 }
158142c2
FB
6510 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6511 invalid:
ff32e16e 6512 float_raise(float_flag_invalid, status);
af39bc8c 6513 return float128_default_nan(status);
158142c2
FB
6514 }
6515 return packFloat128( zSign, 0x7FFF, 0, 0 );
6516 }
6517 if ( aExp == 0 ) {
6518 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6519 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6520 }
6521 if ( bExp == 0 ) {
6522 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6523 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6524 }
6525 zExp = aExp + bExp - 0x4000;
6526 aSig0 |= LIT64( 0x0001000000000000 );
6527 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6528 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6529 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6530 zSig2 |= ( zSig3 != 0 );
6531 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6532 shift128ExtraRightJamming(
6533 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6534 ++zExp;
6535 }
ff32e16e 6536 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6537
6538}
6539
6540/*----------------------------------------------------------------------------
6541| Returns the result of dividing the quadruple-precision floating-point value
6542| `a' by the corresponding value `b'. The operation is performed according to
6543| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6544*----------------------------------------------------------------------------*/
6545
e5a41ffa 6546float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6547{
6548 flag aSign, bSign, zSign;
f4014512 6549 int32_t aExp, bExp, zExp;
bb98fe42
AF
6550 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6551 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6552
6553 aSig1 = extractFloat128Frac1( a );
6554 aSig0 = extractFloat128Frac0( a );
6555 aExp = extractFloat128Exp( a );
6556 aSign = extractFloat128Sign( a );
6557 bSig1 = extractFloat128Frac1( b );
6558 bSig0 = extractFloat128Frac0( b );
6559 bExp = extractFloat128Exp( b );
6560 bSign = extractFloat128Sign( b );
6561 zSign = aSign ^ bSign;
6562 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6563 if (aSig0 | aSig1) {
6564 return propagateFloat128NaN(a, b, status);
6565 }
158142c2 6566 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6567 if (bSig0 | bSig1) {
6568 return propagateFloat128NaN(a, b, status);
6569 }
158142c2
FB
6570 goto invalid;
6571 }
6572 return packFloat128( zSign, 0x7FFF, 0, 0 );
6573 }
6574 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6575 if (bSig0 | bSig1) {
6576 return propagateFloat128NaN(a, b, status);
6577 }
158142c2
FB
6578 return packFloat128( zSign, 0, 0, 0 );
6579 }
6580 if ( bExp == 0 ) {
6581 if ( ( bSig0 | bSig1 ) == 0 ) {
6582 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6583 invalid:
ff32e16e 6584 float_raise(float_flag_invalid, status);
af39bc8c 6585 return float128_default_nan(status);
158142c2 6586 }
ff32e16e 6587 float_raise(float_flag_divbyzero, status);
158142c2
FB
6588 return packFloat128( zSign, 0x7FFF, 0, 0 );
6589 }
6590 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6591 }
6592 if ( aExp == 0 ) {
6593 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6594 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6595 }
6596 zExp = aExp - bExp + 0x3FFD;
6597 shortShift128Left(
6598 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6599 shortShift128Left(
6600 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6601 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6602 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6603 ++zExp;
6604 }
6605 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6606 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6607 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6608 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6609 --zSig0;
6610 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6611 }
6612 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6613 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6614 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6615 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6616 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6617 --zSig1;
6618 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6619 }
6620 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6621 }
6622 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6623 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6624
6625}
6626
6627/*----------------------------------------------------------------------------
6628| Returns the remainder of the quadruple-precision floating-point value `a'
6629| with respect to the corresponding value `b'. The operation is performed
6630| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6631*----------------------------------------------------------------------------*/
6632
e5a41ffa 6633float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6634{
ed086f3d 6635 flag aSign, zSign;
f4014512 6636 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6637 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6638 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6639 int64_t sigMean0;
158142c2
FB
6640
6641 aSig1 = extractFloat128Frac1( a );
6642 aSig0 = extractFloat128Frac0( a );
6643 aExp = extractFloat128Exp( a );
6644 aSign = extractFloat128Sign( a );
6645 bSig1 = extractFloat128Frac1( b );
6646 bSig0 = extractFloat128Frac0( b );
6647 bExp = extractFloat128Exp( b );
158142c2
FB
6648 if ( aExp == 0x7FFF ) {
6649 if ( ( aSig0 | aSig1 )
6650 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6651 return propagateFloat128NaN(a, b, status);
158142c2
FB
6652 }
6653 goto invalid;
6654 }
6655 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6656 if (bSig0 | bSig1) {
6657 return propagateFloat128NaN(a, b, status);
6658 }
158142c2
FB
6659 return a;
6660 }
6661 if ( bExp == 0 ) {
6662 if ( ( bSig0 | bSig1 ) == 0 ) {
6663 invalid:
ff32e16e 6664 float_raise(float_flag_invalid, status);
af39bc8c 6665 return float128_default_nan(status);
158142c2
FB
6666 }
6667 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6668 }
6669 if ( aExp == 0 ) {
6670 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6671 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6672 }
6673 expDiff = aExp - bExp;
6674 if ( expDiff < -1 ) return a;
6675 shortShift128Left(
6676 aSig0 | LIT64( 0x0001000000000000 ),
6677 aSig1,
6678 15 - ( expDiff < 0 ),
6679 &aSig0,
6680 &aSig1
6681 );
6682 shortShift128Left(
6683 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6684 q = le128( bSig0, bSig1, aSig0, aSig1 );
6685 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6686 expDiff -= 64;
6687 while ( 0 < expDiff ) {
6688 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6689 q = ( 4 < q ) ? q - 4 : 0;
6690 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6691 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6692 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6693 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6694 expDiff -= 61;
6695 }
6696 if ( -64 < expDiff ) {
6697 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6698 q = ( 4 < q ) ? q - 4 : 0;
6699 q >>= - expDiff;
6700 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6701 expDiff += 52;
6702 if ( expDiff < 0 ) {
6703 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6704 }
6705 else {
6706 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6707 }
6708 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6709 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6710 }
6711 else {
6712 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6713 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6714 }
6715 do {
6716 alternateASig0 = aSig0;
6717 alternateASig1 = aSig1;
6718 ++q;
6719 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6720 } while ( 0 <= (int64_t) aSig0 );
158142c2 6721 add128(
bb98fe42 6722 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6723 if ( ( sigMean0 < 0 )
6724 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6725 aSig0 = alternateASig0;
6726 aSig1 = alternateASig1;
6727 }
bb98fe42 6728 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6729 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6730 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6731 status);
158142c2
FB
6732}
6733
6734/*----------------------------------------------------------------------------
6735| Returns the square root of the quadruple-precision floating-point value `a'.
6736| The operation is performed according to the IEC/IEEE Standard for Binary
6737| Floating-Point Arithmetic.
6738*----------------------------------------------------------------------------*/
6739
e5a41ffa 6740float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6741{
6742 flag aSign;
f4014512 6743 int32_t aExp, zExp;
bb98fe42
AF
6744 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6745 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6746
6747 aSig1 = extractFloat128Frac1( a );
6748 aSig0 = extractFloat128Frac0( a );
6749 aExp = extractFloat128Exp( a );
6750 aSign = extractFloat128Sign( a );
6751 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6752 if (aSig0 | aSig1) {
6753 return propagateFloat128NaN(a, a, status);
6754 }
158142c2
FB
6755 if ( ! aSign ) return a;
6756 goto invalid;
6757 }
6758 if ( aSign ) {
6759 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6760 invalid:
ff32e16e 6761 float_raise(float_flag_invalid, status);
af39bc8c 6762 return float128_default_nan(status);
158142c2
FB
6763 }
6764 if ( aExp == 0 ) {
6765 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6766 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6767 }
6768 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6769 aSig0 |= LIT64( 0x0001000000000000 );
6770 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6771 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6772 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6773 doubleZSig0 = zSig0<<1;
6774 mul64To128( zSig0, zSig0, &term0, &term1 );
6775 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6776 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6777 --zSig0;
6778 doubleZSig0 -= 2;
6779 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6780 }
6781 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6782 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6783 if ( zSig1 == 0 ) zSig1 = 1;
6784 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6785 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6786 mul64To128( zSig1, zSig1, &term2, &term3 );
6787 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6788 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6789 --zSig1;
6790 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6791 term3 |= 1;
6792 term2 |= doubleZSig0;
6793 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6794 }
6795 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6796 }
6797 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6798 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6799
6800}
6801
6802/*----------------------------------------------------------------------------
6803| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6804| the corresponding value `b', and 0 otherwise. The invalid exception is
6805| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6806| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6807*----------------------------------------------------------------------------*/
6808
e5a41ffa 6809int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6810{
6811
6812 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6813 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6814 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6815 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6816 ) {
ff32e16e 6817 float_raise(float_flag_invalid, status);
158142c2
FB
6818 return 0;
6819 }
6820 return
6821 ( a.low == b.low )
6822 && ( ( a.high == b.high )
6823 || ( ( a.low == 0 )
bb98fe42 6824 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6825 );
6826
6827}
6828
6829/*----------------------------------------------------------------------------
6830| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6831| or equal to the corresponding value `b', and 0 otherwise. The invalid
6832| exception is raised if either operand is a NaN. The comparison is performed
6833| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6834*----------------------------------------------------------------------------*/
6835
e5a41ffa 6836int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6837{
6838 flag aSign, bSign;
6839
6840 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6841 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6842 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6843 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6844 ) {
ff32e16e 6845 float_raise(float_flag_invalid, status);
158142c2
FB
6846 return 0;
6847 }
6848 aSign = extractFloat128Sign( a );
6849 bSign = extractFloat128Sign( b );
6850 if ( aSign != bSign ) {
6851 return
6852 aSign
bb98fe42 6853 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6854 == 0 );
6855 }
6856 return
6857 aSign ? le128( b.high, b.low, a.high, a.low )
6858 : le128( a.high, a.low, b.high, b.low );
6859
6860}
6861
6862/*----------------------------------------------------------------------------
6863| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6864| the corresponding value `b', and 0 otherwise. The invalid exception is
6865| raised if either operand is a NaN. The comparison is performed according
6866| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6867*----------------------------------------------------------------------------*/
6868
e5a41ffa 6869int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6870{
6871 flag aSign, bSign;
6872
6873 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6874 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6875 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6876 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6877 ) {
ff32e16e 6878 float_raise(float_flag_invalid, status);
158142c2
FB
6879 return 0;
6880 }
6881 aSign = extractFloat128Sign( a );
6882 bSign = extractFloat128Sign( b );
6883 if ( aSign != bSign ) {
6884 return
6885 aSign
bb98fe42 6886 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6887 != 0 );
6888 }
6889 return
6890 aSign ? lt128( b.high, b.low, a.high, a.low )
6891 : lt128( a.high, a.low, b.high, b.low );
6892
6893}
6894
67b7861d
AJ
6895/*----------------------------------------------------------------------------
6896| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6897| be compared, and 0 otherwise. The invalid exception is raised if either
6898| operand is a NaN. The comparison is performed according to the IEC/IEEE
6899| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6900*----------------------------------------------------------------------------*/
6901
e5a41ffa 6902int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6903{
6904 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6905 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6906 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6907 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6908 ) {
ff32e16e 6909 float_raise(float_flag_invalid, status);
67b7861d
AJ
6910 return 1;
6911 }
6912 return 0;
6913}
6914
158142c2
FB
6915/*----------------------------------------------------------------------------
6916| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6917| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6918| exception. The comparison is performed according to the IEC/IEEE Standard
6919| for Binary Floating-Point Arithmetic.
158142c2
FB
6920*----------------------------------------------------------------------------*/
6921
e5a41ffa 6922int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6923{
6924
6925 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6926 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6927 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6928 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6929 ) {
af39bc8c
AM
6930 if (float128_is_signaling_nan(a, status)
6931 || float128_is_signaling_nan(b, status)) {
ff32e16e 6932 float_raise(float_flag_invalid, status);
b689362d 6933 }
158142c2
FB
6934 return 0;
6935 }
6936 return
6937 ( a.low == b.low )
6938 && ( ( a.high == b.high )
6939 || ( ( a.low == 0 )
bb98fe42 6940 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6941 );
6942
6943}
6944
6945/*----------------------------------------------------------------------------
6946| Returns 1 if the quadruple-precision floating-point value `a' is less than
6947| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6948| cause an exception. Otherwise, the comparison is performed according to the
6949| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6950*----------------------------------------------------------------------------*/
6951
e5a41ffa 6952int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6953{
6954 flag aSign, bSign;
6955
6956 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6957 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6958 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6959 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6960 ) {
af39bc8c
AM
6961 if (float128_is_signaling_nan(a, status)
6962 || float128_is_signaling_nan(b, status)) {
ff32e16e 6963 float_raise(float_flag_invalid, status);
158142c2
FB
6964 }
6965 return 0;
6966 }
6967 aSign = extractFloat128Sign( a );
6968 bSign = extractFloat128Sign( b );
6969 if ( aSign != bSign ) {
6970 return
6971 aSign
bb98fe42 6972 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6973 == 0 );
6974 }
6975 return
6976 aSign ? le128( b.high, b.low, a.high, a.low )
6977 : le128( a.high, a.low, b.high, b.low );
6978
6979}
6980
6981/*----------------------------------------------------------------------------
6982| Returns 1 if the quadruple-precision floating-point value `a' is less than
6983| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6984| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6985| Standard for Binary Floating-Point Arithmetic.
6986*----------------------------------------------------------------------------*/
6987
e5a41ffa 6988int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6989{
6990 flag aSign, bSign;
6991
6992 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6993 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6994 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6995 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6996 ) {
af39bc8c
AM
6997 if (float128_is_signaling_nan(a, status)
6998 || float128_is_signaling_nan(b, status)) {
ff32e16e 6999 float_raise(float_flag_invalid, status);
158142c2
FB
7000 }
7001 return 0;
7002 }
7003 aSign = extractFloat128Sign( a );
7004 bSign = extractFloat128Sign( b );
7005 if ( aSign != bSign ) {
7006 return
7007 aSign
bb98fe42 7008 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7009 != 0 );
7010 }
7011 return
7012 aSign ? lt128( b.high, b.low, a.high, a.low )
7013 : lt128( a.high, a.low, b.high, b.low );
7014
7015}
7016
67b7861d
AJ
7017/*----------------------------------------------------------------------------
7018| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7019| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7020| comparison is performed according to the IEC/IEEE Standard for Binary
7021| Floating-Point Arithmetic.
7022*----------------------------------------------------------------------------*/
7023
e5a41ffa 7024int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7025{
7026 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7027 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7028 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7029 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7030 ) {
af39bc8c
AM
7031 if (float128_is_signaling_nan(a, status)
7032 || float128_is_signaling_nan(b, status)) {
ff32e16e 7033 float_raise(float_flag_invalid, status);
67b7861d
AJ
7034 }
7035 return 1;
7036 }
7037 return 0;
7038}
7039
e5a41ffa
PM
7040static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7041 int is_quiet, float_status *status)
f6714d36
AJ
7042{
7043 flag aSign, bSign;
7044
d1eb8f2a
AD
7045 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7046 float_raise(float_flag_invalid, status);
7047 return float_relation_unordered;
7048 }
f6714d36
AJ
7049 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7050 ( extractFloatx80Frac( a )<<1 ) ) ||
7051 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7052 ( extractFloatx80Frac( b )<<1 ) )) {
7053 if (!is_quiet ||
af39bc8c
AM
7054 floatx80_is_signaling_nan(a, status) ||
7055 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7056 float_raise(float_flag_invalid, status);
f6714d36
AJ
7057 }
7058 return float_relation_unordered;
7059 }
7060 aSign = extractFloatx80Sign( a );
7061 bSign = extractFloatx80Sign( b );
7062 if ( aSign != bSign ) {
7063
7064 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7065 ( ( a.low | b.low ) == 0 ) ) {
7066 /* zero case */
7067 return float_relation_equal;
7068 } else {
7069 return 1 - (2 * aSign);
7070 }
7071 } else {
7072 if (a.low == b.low && a.high == b.high) {
7073 return float_relation_equal;
7074 } else {
7075 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7076 }
7077 }
7078}
7079
e5a41ffa 7080int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7081{
ff32e16e 7082 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7083}
7084
e5a41ffa 7085int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7086{
ff32e16e 7087 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7088}
7089
e5a41ffa
PM
7090static inline int float128_compare_internal(float128 a, float128 b,
7091 int is_quiet, float_status *status)
1f587329
BS
7092{
7093 flag aSign, bSign;
7094
7095 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7096 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7097 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7098 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7099 if (!is_quiet ||
af39bc8c
AM
7100 float128_is_signaling_nan(a, status) ||
7101 float128_is_signaling_nan(b, status)) {
ff32e16e 7102 float_raise(float_flag_invalid, status);
1f587329
BS
7103 }
7104 return float_relation_unordered;
7105 }
7106 aSign = extractFloat128Sign( a );
7107 bSign = extractFloat128Sign( b );
7108 if ( aSign != bSign ) {
7109 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7110 /* zero case */
7111 return float_relation_equal;
7112 } else {
7113 return 1 - (2 * aSign);
7114 }
7115 } else {
7116 if (a.low == b.low && a.high == b.high) {
7117 return float_relation_equal;
7118 } else {
7119 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7120 }
7121 }
7122}
7123
e5a41ffa 7124int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7125{
ff32e16e 7126 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7127}
7128
e5a41ffa 7129int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7130{
ff32e16e 7131 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7132}
7133
e5a41ffa 7134floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7135{
7136 flag aSign;
326b9e98 7137 int32_t aExp;
bb98fe42 7138 uint64_t aSig;
9ee6e8bb 7139
d1eb8f2a
AD
7140 if (floatx80_invalid_encoding(a)) {
7141 float_raise(float_flag_invalid, status);
7142 return floatx80_default_nan(status);
7143 }
9ee6e8bb
PB
7144 aSig = extractFloatx80Frac( a );
7145 aExp = extractFloatx80Exp( a );
7146 aSign = extractFloatx80Sign( a );
7147
326b9e98
AJ
7148 if ( aExp == 0x7FFF ) {
7149 if ( aSig<<1 ) {
ff32e16e 7150 return propagateFloatx80NaN(a, a, status);
326b9e98 7151 }
9ee6e8bb
PB
7152 return a;
7153 }
326b9e98 7154
3c85c37f
PM
7155 if (aExp == 0) {
7156 if (aSig == 0) {
7157 return a;
7158 }
7159 aExp++;
7160 }
69397542 7161
326b9e98
AJ
7162 if (n > 0x10000) {
7163 n = 0x10000;
7164 } else if (n < -0x10000) {
7165 n = -0x10000;
7166 }
7167
9ee6e8bb 7168 aExp += n;
a2f2d288
PM
7169 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7170 aSign, aExp, aSig, 0, status);
9ee6e8bb 7171}
9ee6e8bb 7172
e5a41ffa 7173float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7174{
7175 flag aSign;
326b9e98 7176 int32_t aExp;
bb98fe42 7177 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7178
7179 aSig1 = extractFloat128Frac1( a );
7180 aSig0 = extractFloat128Frac0( a );
7181 aExp = extractFloat128Exp( a );
7182 aSign = extractFloat128Sign( a );
7183 if ( aExp == 0x7FFF ) {
326b9e98 7184 if ( aSig0 | aSig1 ) {
ff32e16e 7185 return propagateFloat128NaN(a, a, status);
326b9e98 7186 }
9ee6e8bb
PB
7187 return a;
7188 }
3c85c37f 7189 if (aExp != 0) {
69397542 7190 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7191 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7192 return a;
3c85c37f
PM
7193 } else {
7194 aExp++;
7195 }
69397542 7196
326b9e98
AJ
7197 if (n > 0x10000) {
7198 n = 0x10000;
7199 } else if (n < -0x10000) {
7200 n = -0x10000;
7201 }
7202
69397542
PB
7203 aExp += n - 1;
7204 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7205 , status);
9ee6e8bb
PB
7206
7207}