]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
hardfloat: implement float32/64 square root
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
342/* Note: @fast_test and @post can be NULL */
343static inline float32
344float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348{
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378}
379
380static inline float64
381float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385{
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415}
416
bb4d4bb3
PM
417/*----------------------------------------------------------------------------
418| Returns the fraction bits of the half-precision floating-point value `a'.
419*----------------------------------------------------------------------------*/
420
a49db98d 421static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
422{
423 return float16_val(a) & 0x3ff;
424}
425
426/*----------------------------------------------------------------------------
427| Returns the exponent bits of the half-precision floating-point value `a'.
428*----------------------------------------------------------------------------*/
429
0c48262d 430static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
431{
432 return (float16_val(a) >> 10) & 0x1f;
433}
434
d97544c9
AB
435/*----------------------------------------------------------------------------
436| Returns the fraction bits of the single-precision floating-point value `a'.
437*----------------------------------------------------------------------------*/
438
439static inline uint32_t extractFloat32Frac(float32 a)
440{
441 return float32_val(a) & 0x007FFFFF;
442}
443
444/*----------------------------------------------------------------------------
445| Returns the exponent bits of the single-precision floating-point value `a'.
446*----------------------------------------------------------------------------*/
447
448static inline int extractFloat32Exp(float32 a)
449{
450 return (float32_val(a) >> 23) & 0xFF;
451}
452
453/*----------------------------------------------------------------------------
454| Returns the sign bit of the single-precision floating-point value `a'.
455*----------------------------------------------------------------------------*/
456
457static inline flag extractFloat32Sign(float32 a)
458{
459 return float32_val(a) >> 31;
460}
461
462/*----------------------------------------------------------------------------
463| Returns the fraction bits of the double-precision floating-point value `a'.
464*----------------------------------------------------------------------------*/
465
466static inline uint64_t extractFloat64Frac(float64 a)
467{
468 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
469}
470
471/*----------------------------------------------------------------------------
472| Returns the exponent bits of the double-precision floating-point value `a'.
473*----------------------------------------------------------------------------*/
474
475static inline int extractFloat64Exp(float64 a)
476{
477 return (float64_val(a) >> 52) & 0x7FF;
478}
479
480/*----------------------------------------------------------------------------
481| Returns the sign bit of the double-precision floating-point value `a'.
482*----------------------------------------------------------------------------*/
483
484static inline flag extractFloat64Sign(float64 a)
485{
486 return float64_val(a) >> 63;
487}
488
a90119b5
AB
489/*
490 * Classify a floating point number. Everything above float_class_qnan
491 * is a NaN so cls >= float_class_qnan is any NaN.
492 */
493
494typedef enum __attribute__ ((__packed__)) {
495 float_class_unclassified,
496 float_class_zero,
497 float_class_normal,
498 float_class_inf,
499 float_class_qnan, /* all NaNs from here */
500 float_class_snan,
a90119b5
AB
501} FloatClass;
502
247d1f21
RH
503/* Simple helpers for checking if, or what kind of, NaN we have */
504static inline __attribute__((unused)) bool is_nan(FloatClass c)
505{
506 return unlikely(c >= float_class_qnan);
507}
508
509static inline __attribute__((unused)) bool is_snan(FloatClass c)
510{
511 return c == float_class_snan;
512}
513
514static inline __attribute__((unused)) bool is_qnan(FloatClass c)
515{
516 return c == float_class_qnan;
517}
518
a90119b5
AB
519/*
520 * Structure holding all of the decomposed parts of a float. The
521 * exponent is unbiased and the fraction is normalized. All
522 * calculations are done with a 64 bit fraction and then rounded as
523 * appropriate for the final format.
524 *
525 * Thanks to the packed FloatClass a decent compiler should be able to
526 * fit the whole structure into registers and avoid using the stack
527 * for parameter passing.
528 */
529
530typedef struct {
531 uint64_t frac;
532 int32_t exp;
533 FloatClass cls;
534 bool sign;
535} FloatParts;
536
537#define DECOMPOSED_BINARY_POINT (64 - 2)
538#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
539#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
540
541/* Structure holding all of the relevant parameters for a format.
542 * exp_size: the size of the exponent field
543 * exp_bias: the offset applied to the exponent field
544 * exp_max: the maximum normalised exponent
545 * frac_size: the size of the fraction field
546 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
547 * The following are computed based the size of fraction
548 * frac_lsb: least significant bit of fraction
ca3a3d5a 549 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 550 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
551 * The following optional modifiers are available:
552 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
553 */
554typedef struct {
555 int exp_size;
556 int exp_bias;
557 int exp_max;
558 int frac_size;
559 int frac_shift;
560 uint64_t frac_lsb;
561 uint64_t frac_lsbm1;
562 uint64_t round_mask;
563 uint64_t roundeven_mask;
ca3a3d5a 564 bool arm_althp;
a90119b5
AB
565} FloatFmt;
566
567/* Expand fields based on the size of exponent and fraction */
568#define FLOAT_PARAMS(E, F) \
569 .exp_size = E, \
570 .exp_bias = ((1 << E) - 1) >> 1, \
571 .exp_max = (1 << E) - 1, \
572 .frac_size = F, \
573 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
574 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
575 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
576 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
577 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
578
579static const FloatFmt float16_params = {
580 FLOAT_PARAMS(5, 10)
581};
582
6fed16b2
AB
583static const FloatFmt float16_params_ahp = {
584 FLOAT_PARAMS(5, 10),
585 .arm_althp = true
586};
587
a90119b5
AB
588static const FloatFmt float32_params = {
589 FLOAT_PARAMS(8, 23)
590};
591
592static const FloatFmt float64_params = {
593 FLOAT_PARAMS(11, 52)
594};
595
6fff2167
AB
596/* Unpack a float to parts, but do not canonicalize. */
597static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
598{
599 const int sign_pos = fmt.frac_size + fmt.exp_size;
600
601 return (FloatParts) {
602 .cls = float_class_unclassified,
603 .sign = extract64(raw, sign_pos, 1),
604 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
605 .frac = extract64(raw, 0, fmt.frac_size),
606 };
607}
608
609static inline FloatParts float16_unpack_raw(float16 f)
610{
611 return unpack_raw(float16_params, f);
612}
613
614static inline FloatParts float32_unpack_raw(float32 f)
615{
616 return unpack_raw(float32_params, f);
617}
618
619static inline FloatParts float64_unpack_raw(float64 f)
620{
621 return unpack_raw(float64_params, f);
622}
623
624/* Pack a float from parts, but do not canonicalize. */
625static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
626{
627 const int sign_pos = fmt.frac_size + fmt.exp_size;
628 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
629 return deposit64(ret, sign_pos, 1, p.sign);
630}
631
632static inline float16 float16_pack_raw(FloatParts p)
633{
634 return make_float16(pack_raw(float16_params, p));
635}
636
637static inline float32 float32_pack_raw(FloatParts p)
638{
639 return make_float32(pack_raw(float32_params, p));
640}
641
642static inline float64 float64_pack_raw(FloatParts p)
643{
644 return make_float64(pack_raw(float64_params, p));
645}
646
0664335a
RH
647/*----------------------------------------------------------------------------
648| Functions and definitions to determine: (1) whether tininess for underflow
649| is detected before or after rounding by default, (2) what (if anything)
650| happens when exceptions are raised, (3) how signaling NaNs are distinguished
651| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
652| are propagated from function inputs to output. These details are target-
653| specific.
654*----------------------------------------------------------------------------*/
655#include "softfloat-specialize.h"
656
6fff2167 657/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
658static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
659 float_status *status)
6fff2167 660{
ca3a3d5a 661 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
662 if (part.frac == 0) {
663 part.cls = float_class_inf;
664 } else {
94933df0 665 part.frac <<= parm->frac_shift;
298b468e
RH
666 part.cls = (parts_is_snan_frac(part.frac, status)
667 ? float_class_snan : float_class_qnan);
6fff2167
AB
668 }
669 } else if (part.exp == 0) {
670 if (likely(part.frac == 0)) {
671 part.cls = float_class_zero;
672 } else if (status->flush_inputs_to_zero) {
673 float_raise(float_flag_input_denormal, status);
674 part.cls = float_class_zero;
675 part.frac = 0;
676 } else {
677 int shift = clz64(part.frac) - 1;
678 part.cls = float_class_normal;
679 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
680 part.frac <<= shift;
681 }
682 } else {
683 part.cls = float_class_normal;
684 part.exp -= parm->exp_bias;
685 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
686 }
687 return part;
688}
689
690/* Round and uncanonicalize a floating-point number by parts. There
691 * are FRAC_SHIFT bits that may require rounding at the bottom of the
692 * fraction; these bits will be removed. The exponent will be biased
693 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
694 */
695
696static FloatParts round_canonical(FloatParts p, float_status *s,
697 const FloatFmt *parm)
698{
699 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
700 const uint64_t round_mask = parm->round_mask;
701 const uint64_t roundeven_mask = parm->roundeven_mask;
702 const int exp_max = parm->exp_max;
703 const int frac_shift = parm->frac_shift;
704 uint64_t frac, inc;
705 int exp, flags = 0;
706 bool overflow_norm;
707
708 frac = p.frac;
709 exp = p.exp;
710
711 switch (p.cls) {
712 case float_class_normal:
713 switch (s->float_rounding_mode) {
714 case float_round_nearest_even:
715 overflow_norm = false;
716 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
717 break;
718 case float_round_ties_away:
719 overflow_norm = false;
720 inc = frac_lsbm1;
721 break;
722 case float_round_to_zero:
723 overflow_norm = true;
724 inc = 0;
725 break;
726 case float_round_up:
727 inc = p.sign ? 0 : round_mask;
728 overflow_norm = p.sign;
729 break;
730 case float_round_down:
731 inc = p.sign ? round_mask : 0;
732 overflow_norm = !p.sign;
733 break;
734 default:
735 g_assert_not_reached();
736 }
737
738 exp += parm->exp_bias;
739 if (likely(exp > 0)) {
740 if (frac & round_mask) {
741 flags |= float_flag_inexact;
742 frac += inc;
743 if (frac & DECOMPOSED_OVERFLOW_BIT) {
744 frac >>= 1;
745 exp++;
746 }
747 }
748 frac >>= frac_shift;
749
ca3a3d5a
AB
750 if (parm->arm_althp) {
751 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
752 if (unlikely(exp > exp_max)) {
753 /* Overflow. Return the maximum normal. */
754 flags = float_flag_invalid;
755 exp = exp_max;
756 frac = -1;
757 }
758 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
759 flags |= float_flag_overflow | float_flag_inexact;
760 if (overflow_norm) {
761 exp = exp_max - 1;
762 frac = -1;
763 } else {
764 p.cls = float_class_inf;
765 goto do_inf;
766 }
767 }
768 } else if (s->flush_to_zero) {
769 flags |= float_flag_output_denormal;
770 p.cls = float_class_zero;
771 goto do_zero;
772 } else {
773 bool is_tiny = (s->float_detect_tininess
774 == float_tininess_before_rounding)
775 || (exp < 0)
776 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
777
778 shift64RightJamming(frac, 1 - exp, &frac);
779 if (frac & round_mask) {
780 /* Need to recompute round-to-even. */
781 if (s->float_rounding_mode == float_round_nearest_even) {
782 inc = ((frac & roundeven_mask) != frac_lsbm1
783 ? frac_lsbm1 : 0);
784 }
785 flags |= float_flag_inexact;
786 frac += inc;
787 }
788
789 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
790 frac >>= frac_shift;
791
792 if (is_tiny && (flags & float_flag_inexact)) {
793 flags |= float_flag_underflow;
794 }
795 if (exp == 0 && frac == 0) {
796 p.cls = float_class_zero;
797 }
798 }
799 break;
800
801 case float_class_zero:
802 do_zero:
803 exp = 0;
804 frac = 0;
805 break;
806
807 case float_class_inf:
808 do_inf:
ca3a3d5a 809 assert(!parm->arm_althp);
6fff2167
AB
810 exp = exp_max;
811 frac = 0;
812 break;
813
814 case float_class_qnan:
815 case float_class_snan:
ca3a3d5a 816 assert(!parm->arm_althp);
6fff2167 817 exp = exp_max;
94933df0 818 frac >>= parm->frac_shift;
6fff2167
AB
819 break;
820
821 default:
822 g_assert_not_reached();
823 }
824
825 float_raise(flags, s);
826 p.exp = exp;
827 p.frac = frac;
828 return p;
829}
830
6fed16b2
AB
831/* Explicit FloatFmt version */
832static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
833 const FloatFmt *params)
834{
f9943c7f 835 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
836}
837
6fff2167
AB
838static FloatParts float16_unpack_canonical(float16 f, float_status *s)
839{
6fed16b2
AB
840 return float16a_unpack_canonical(f, s, &float16_params);
841}
842
843static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
844 const FloatFmt *params)
845{
846 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
847}
848
849static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
850{
6fed16b2 851 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
852}
853
854static FloatParts float32_unpack_canonical(float32 f, float_status *s)
855{
f9943c7f 856 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
857}
858
859static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
860{
0bcfbcbe 861 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
862}
863
864static FloatParts float64_unpack_canonical(float64 f, float_status *s)
865{
f9943c7f 866 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
867}
868
869static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
870{
0bcfbcbe 871 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
872}
873
dbe4d53a
AB
874static FloatParts return_nan(FloatParts a, float_status *s)
875{
876 switch (a.cls) {
877 case float_class_snan:
878 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 879 a = parts_silence_nan(a, s);
dbe4d53a
AB
880 /* fall through */
881 case float_class_qnan:
882 if (s->default_nan_mode) {
f7e598e2 883 return parts_default_nan(s);
dbe4d53a
AB
884 }
885 break;
886
887 default:
888 g_assert_not_reached();
889 }
890 return a;
891}
892
6fff2167
AB
893static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
894{
895 if (is_snan(a.cls) || is_snan(b.cls)) {
896 s->float_exception_flags |= float_flag_invalid;
897 }
898
899 if (s->default_nan_mode) {
f7e598e2 900 return parts_default_nan(s);
6fff2167 901 } else {
4f251cfd 902 if (pickNaN(a.cls, b.cls,
6fff2167
AB
903 a.frac > b.frac ||
904 (a.frac == b.frac && a.sign < b.sign))) {
905 a = b;
906 }
0bcfbcbe
RH
907 if (is_snan(a.cls)) {
908 return parts_silence_nan(a, s);
909 }
6fff2167
AB
910 }
911 return a;
912}
913
d446830a
AB
914static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
915 bool inf_zero, float_status *s)
916{
1839189b
PM
917 int which;
918
d446830a
AB
919 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
920 s->float_exception_flags |= float_flag_invalid;
921 }
922
3bd2dec1 923 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 924
d446830a 925 if (s->default_nan_mode) {
1839189b
PM
926 /* Note that this check is after pickNaNMulAdd so that function
927 * has an opportunity to set the Invalid flag.
928 */
f7e598e2 929 which = 3;
1839189b 930 }
d446830a 931
1839189b
PM
932 switch (which) {
933 case 0:
934 break;
935 case 1:
936 a = b;
937 break;
938 case 2:
939 a = c;
940 break;
941 case 3:
f7e598e2 942 return parts_default_nan(s);
1839189b
PM
943 default:
944 g_assert_not_reached();
d446830a 945 }
1839189b 946
0bcfbcbe
RH
947 if (is_snan(a.cls)) {
948 return parts_silence_nan(a, s);
949 }
d446830a
AB
950 return a;
951}
952
6fff2167
AB
953/*
954 * Returns the result of adding or subtracting the values of the
955 * floating-point values `a' and `b'. The operation is performed
956 * according to the IEC/IEEE Standard for Binary Floating-Point
957 * Arithmetic.
958 */
959
960static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
961 float_status *s)
962{
963 bool a_sign = a.sign;
964 bool b_sign = b.sign ^ subtract;
965
966 if (a_sign != b_sign) {
967 /* Subtraction */
968
969 if (a.cls == float_class_normal && b.cls == float_class_normal) {
970 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
971 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
972 a.frac = a.frac - b.frac;
973 } else {
974 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
975 a.frac = b.frac - a.frac;
976 a.exp = b.exp;
977 a_sign ^= 1;
978 }
979
980 if (a.frac == 0) {
981 a.cls = float_class_zero;
982 a.sign = s->float_rounding_mode == float_round_down;
983 } else {
984 int shift = clz64(a.frac) - 1;
985 a.frac = a.frac << shift;
986 a.exp = a.exp - shift;
987 a.sign = a_sign;
988 }
989 return a;
990 }
991 if (is_nan(a.cls) || is_nan(b.cls)) {
992 return pick_nan(a, b, s);
993 }
994 if (a.cls == float_class_inf) {
995 if (b.cls == float_class_inf) {
996 float_raise(float_flag_invalid, s);
f7e598e2 997 return parts_default_nan(s);
6fff2167
AB
998 }
999 return a;
1000 }
1001 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1002 a.sign = s->float_rounding_mode == float_round_down;
1003 return a;
1004 }
1005 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1006 b.sign = a_sign ^ 1;
1007 return b;
1008 }
1009 if (b.cls == float_class_zero) {
1010 return a;
1011 }
1012 } else {
1013 /* Addition */
1014 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1015 if (a.exp > b.exp) {
1016 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1017 } else if (a.exp < b.exp) {
1018 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1019 a.exp = b.exp;
1020 }
1021 a.frac += b.frac;
1022 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1023 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1024 a.exp += 1;
1025 }
1026 return a;
1027 }
1028 if (is_nan(a.cls) || is_nan(b.cls)) {
1029 return pick_nan(a, b, s);
1030 }
1031 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1032 return a;
1033 }
1034 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1035 b.sign = b_sign;
1036 return b;
1037 }
1038 }
1039 g_assert_not_reached();
1040}
1041
1042/*
1043 * Returns the result of adding or subtracting the floating-point
1044 * values `a' and `b'. The operation is performed according to the
1045 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1046 */
1047
97ff87c0 1048float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1049{
1050 FloatParts pa = float16_unpack_canonical(a, status);
1051 FloatParts pb = float16_unpack_canonical(b, status);
1052 FloatParts pr = addsub_floats(pa, pb, false, status);
1053
1054 return float16_round_pack_canonical(pr, status);
1055}
1056
1b615d48
EC
1057float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1058{
1059 FloatParts pa = float16_unpack_canonical(a, status);
1060 FloatParts pb = float16_unpack_canonical(b, status);
1061 FloatParts pr = addsub_floats(pa, pb, true, status);
1062
1063 return float16_round_pack_canonical(pr, status);
1064}
1065
1066static float32 QEMU_SOFTFLOAT_ATTR
1067soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1068{
1069 FloatParts pa = float32_unpack_canonical(a, status);
1070 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1071 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1072
1073 return float32_round_pack_canonical(pr, status);
1074}
1075
1b615d48
EC
1076static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1077{
1078 return soft_f32_addsub(a, b, false, status);
1079}
1080
1081static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1082{
1083 return soft_f32_addsub(a, b, true, status);
1084}
1085
1086static float64 QEMU_SOFTFLOAT_ATTR
1087soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1088{
1089 FloatParts pa = float64_unpack_canonical(a, status);
1090 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1091 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1092
1093 return float64_round_pack_canonical(pr, status);
1094}
1095
1b615d48 1096static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1097{
1b615d48
EC
1098 return soft_f64_addsub(a, b, false, status);
1099}
6fff2167 1100
1b615d48
EC
1101static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1102{
1103 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1104}
1105
1b615d48 1106static float hard_f32_add(float a, float b)
6fff2167 1107{
1b615d48
EC
1108 return a + b;
1109}
6fff2167 1110
1b615d48
EC
1111static float hard_f32_sub(float a, float b)
1112{
1113 return a - b;
6fff2167
AB
1114}
1115
1b615d48 1116static double hard_f64_add(double a, double b)
6fff2167 1117{
1b615d48
EC
1118 return a + b;
1119}
6fff2167 1120
1b615d48
EC
1121static double hard_f64_sub(double a, double b)
1122{
1123 return a - b;
1124}
1125
1126static bool f32_addsub_post(union_float32 a, union_float32 b)
1127{
1128 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1129 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1130 }
1131 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1132}
1133
1134static bool f64_addsub_post(union_float64 a, union_float64 b)
1135{
1136 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1137 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1138 } else {
1139 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1140 }
1141}
1142
1143static float32 float32_addsub(float32 a, float32 b, float_status *s,
1144 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1145{
1146 return float32_gen2(a, b, s, hard, soft,
1147 f32_is_zon2, f32_addsub_post, NULL, NULL);
1148}
1149
1150static float64 float64_addsub(float64 a, float64 b, float_status *s,
1151 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1152{
1153 return float64_gen2(a, b, s, hard, soft,
1154 f64_is_zon2, f64_addsub_post, NULL, NULL);
1155}
1156
1157float32 QEMU_FLATTEN
1158float32_add(float32 a, float32 b, float_status *s)
1159{
1160 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1161}
1162
1163float32 QEMU_FLATTEN
1164float32_sub(float32 a, float32 b, float_status *s)
1165{
1166 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1167}
1168
1169float64 QEMU_FLATTEN
1170float64_add(float64 a, float64 b, float_status *s)
1171{
1172 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1173}
1174
1175float64 QEMU_FLATTEN
1176float64_sub(float64 a, float64 b, float_status *s)
1177{
1178 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1179}
1180
74d707e2
AB
1181/*
1182 * Returns the result of multiplying the floating-point values `a' and
1183 * `b'. The operation is performed according to the IEC/IEEE Standard
1184 * for Binary Floating-Point Arithmetic.
1185 */
1186
1187static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1188{
1189 bool sign = a.sign ^ b.sign;
1190
1191 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1192 uint64_t hi, lo;
1193 int exp = a.exp + b.exp;
1194
1195 mul64To128(a.frac, b.frac, &hi, &lo);
1196 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1197 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1198 shift64RightJamming(lo, 1, &lo);
1199 exp += 1;
1200 }
1201
1202 /* Re-use a */
1203 a.exp = exp;
1204 a.sign = sign;
1205 a.frac = lo;
1206 return a;
1207 }
1208 /* handle all the NaN cases */
1209 if (is_nan(a.cls) || is_nan(b.cls)) {
1210 return pick_nan(a, b, s);
1211 }
1212 /* Inf * Zero == NaN */
1213 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1214 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1215 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1216 return parts_default_nan(s);
74d707e2
AB
1217 }
1218 /* Multiply by 0 or Inf */
1219 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1220 a.sign = sign;
1221 return a;
1222 }
1223 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1224 b.sign = sign;
1225 return b;
1226 }
1227 g_assert_not_reached();
1228}
1229
97ff87c0 1230float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1231{
1232 FloatParts pa = float16_unpack_canonical(a, status);
1233 FloatParts pb = float16_unpack_canonical(b, status);
1234 FloatParts pr = mul_floats(pa, pb, status);
1235
1236 return float16_round_pack_canonical(pr, status);
1237}
1238
2dfabc86
EC
1239static float32 QEMU_SOFTFLOAT_ATTR
1240soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1241{
1242 FloatParts pa = float32_unpack_canonical(a, status);
1243 FloatParts pb = float32_unpack_canonical(b, status);
1244 FloatParts pr = mul_floats(pa, pb, status);
1245
1246 return float32_round_pack_canonical(pr, status);
1247}
1248
2dfabc86
EC
1249static float64 QEMU_SOFTFLOAT_ATTR
1250soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1251{
1252 FloatParts pa = float64_unpack_canonical(a, status);
1253 FloatParts pb = float64_unpack_canonical(b, status);
1254 FloatParts pr = mul_floats(pa, pb, status);
1255
1256 return float64_round_pack_canonical(pr, status);
1257}
1258
2dfabc86
EC
1259static float hard_f32_mul(float a, float b)
1260{
1261 return a * b;
1262}
1263
1264static double hard_f64_mul(double a, double b)
1265{
1266 return a * b;
1267}
1268
1269static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1270{
1271 return float32_is_zero(a.s) || float32_is_zero(b.s);
1272}
1273
1274static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1275{
1276 return float64_is_zero(a.s) || float64_is_zero(b.s);
1277}
1278
1279static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1280{
1281 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1282
1283 return float32_set_sign(float32_zero, signbit);
1284}
1285
1286static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1287{
1288 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1289
1290 return float64_set_sign(float64_zero, signbit);
1291}
1292
1293float32 QEMU_FLATTEN
1294float32_mul(float32 a, float32 b, float_status *s)
1295{
1296 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1297 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1298}
1299
1300float64 QEMU_FLATTEN
1301float64_mul(float64 a, float64 b, float_status *s)
1302{
1303 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1304 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1305}
1306
d446830a
AB
1307/*
1308 * Returns the result of multiplying the floating-point values `a' and
1309 * `b' then adding 'c', with no intermediate rounding step after the
1310 * multiplication. The operation is performed according to the
1311 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1312 * The flags argument allows the caller to select negation of the
1313 * addend, the intermediate product, or the final result. (The
1314 * difference between this and having the caller do a separate
1315 * negation is that negating externally will flip the sign bit on
1316 * NaNs.)
1317 */
1318
1319static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1320 int flags, float_status *s)
1321{
1322 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1323 ((1 << float_class_inf) | (1 << float_class_zero));
1324 bool p_sign;
1325 bool sign_flip = flags & float_muladd_negate_result;
1326 FloatClass p_class;
1327 uint64_t hi, lo;
1328 int p_exp;
1329
1330 /* It is implementation-defined whether the cases of (0,inf,qnan)
1331 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1332 * they return if they do), so we have to hand this information
1333 * off to the target-specific pick-a-NaN routine.
1334 */
1335 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1336 return pick_nan_muladd(a, b, c, inf_zero, s);
1337 }
1338
1339 if (inf_zero) {
1340 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1341 return parts_default_nan(s);
d446830a
AB
1342 }
1343
1344 if (flags & float_muladd_negate_c) {
1345 c.sign ^= 1;
1346 }
1347
1348 p_sign = a.sign ^ b.sign;
1349
1350 if (flags & float_muladd_negate_product) {
1351 p_sign ^= 1;
1352 }
1353
1354 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1355 p_class = float_class_inf;
1356 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1357 p_class = float_class_zero;
1358 } else {
1359 p_class = float_class_normal;
1360 }
1361
1362 if (c.cls == float_class_inf) {
1363 if (p_class == float_class_inf && p_sign != c.sign) {
1364 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1365 return parts_default_nan(s);
d446830a
AB
1366 } else {
1367 a.cls = float_class_inf;
1368 a.sign = c.sign ^ sign_flip;
f7e598e2 1369 return a;
d446830a 1370 }
d446830a
AB
1371 }
1372
1373 if (p_class == float_class_inf) {
1374 a.cls = float_class_inf;
1375 a.sign = p_sign ^ sign_flip;
1376 return a;
1377 }
1378
1379 if (p_class == float_class_zero) {
1380 if (c.cls == float_class_zero) {
1381 if (p_sign != c.sign) {
1382 p_sign = s->float_rounding_mode == float_round_down;
1383 }
1384 c.sign = p_sign;
1385 } else if (flags & float_muladd_halve_result) {
1386 c.exp -= 1;
1387 }
1388 c.sign ^= sign_flip;
1389 return c;
1390 }
1391
1392 /* a & b should be normals now... */
1393 assert(a.cls == float_class_normal &&
1394 b.cls == float_class_normal);
1395
1396 p_exp = a.exp + b.exp;
1397
1398 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1399 * result.
1400 */
1401 mul64To128(a.frac, b.frac, &hi, &lo);
1402 /* binary point now at bit 124 */
1403
1404 /* check for overflow */
1405 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1406 shift128RightJamming(hi, lo, 1, &hi, &lo);
1407 p_exp += 1;
1408 }
1409
1410 /* + add/sub */
1411 if (c.cls == float_class_zero) {
1412 /* move binary point back to 62 */
1413 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1414 } else {
1415 int exp_diff = p_exp - c.exp;
1416 if (p_sign == c.sign) {
1417 /* Addition */
1418 if (exp_diff <= 0) {
1419 shift128RightJamming(hi, lo,
1420 DECOMPOSED_BINARY_POINT - exp_diff,
1421 &hi, &lo);
1422 lo += c.frac;
1423 p_exp = c.exp;
1424 } else {
1425 uint64_t c_hi, c_lo;
1426 /* shift c to the same binary point as the product (124) */
1427 c_hi = c.frac >> 2;
1428 c_lo = 0;
1429 shift128RightJamming(c_hi, c_lo,
1430 exp_diff,
1431 &c_hi, &c_lo);
1432 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1433 /* move binary point back to 62 */
1434 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1435 }
1436
1437 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1438 shift64RightJamming(lo, 1, &lo);
1439 p_exp += 1;
1440 }
1441
1442 } else {
1443 /* Subtraction */
1444 uint64_t c_hi, c_lo;
1445 /* make C binary point match product at bit 124 */
1446 c_hi = c.frac >> 2;
1447 c_lo = 0;
1448
1449 if (exp_diff <= 0) {
1450 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1451 if (exp_diff == 0
1452 &&
1453 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1454 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1455 } else {
1456 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1457 p_sign ^= 1;
1458 p_exp = c.exp;
1459 }
1460 } else {
1461 shift128RightJamming(c_hi, c_lo,
1462 exp_diff,
1463 &c_hi, &c_lo);
1464 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1465 }
1466
1467 if (hi == 0 && lo == 0) {
1468 a.cls = float_class_zero;
1469 a.sign = s->float_rounding_mode == float_round_down;
1470 a.sign ^= sign_flip;
1471 return a;
1472 } else {
1473 int shift;
1474 if (hi != 0) {
1475 shift = clz64(hi);
1476 } else {
1477 shift = clz64(lo) + 64;
1478 }
1479 /* Normalizing to a binary point of 124 is the
1480 correct adjust for the exponent. However since we're
1481 shifting, we might as well put the binary point back
1482 at 62 where we really want it. Therefore shift as
1483 if we're leaving 1 bit at the top of the word, but
1484 adjust the exponent as if we're leaving 3 bits. */
1485 shift -= 1;
1486 if (shift >= 64) {
1487 lo = lo << (shift - 64);
1488 } else {
1489 hi = (hi << shift) | (lo >> (64 - shift));
1490 lo = hi | ((lo << shift) != 0);
1491 }
1492 p_exp -= shift - 2;
1493 }
1494 }
1495 }
1496
1497 if (flags & float_muladd_halve_result) {
1498 p_exp -= 1;
1499 }
1500
1501 /* finally prepare our result */
1502 a.cls = float_class_normal;
1503 a.sign = p_sign ^ sign_flip;
1504 a.exp = p_exp;
1505 a.frac = lo;
1506
1507 return a;
1508}
1509
97ff87c0 1510float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1511 int flags, float_status *status)
1512{
1513 FloatParts pa = float16_unpack_canonical(a, status);
1514 FloatParts pb = float16_unpack_canonical(b, status);
1515 FloatParts pc = float16_unpack_canonical(c, status);
1516 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1517
1518 return float16_round_pack_canonical(pr, status);
1519}
1520
ccf770ba
EC
1521static float32 QEMU_SOFTFLOAT_ATTR
1522soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1523 float_status *status)
d446830a
AB
1524{
1525 FloatParts pa = float32_unpack_canonical(a, status);
1526 FloatParts pb = float32_unpack_canonical(b, status);
1527 FloatParts pc = float32_unpack_canonical(c, status);
1528 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1529
1530 return float32_round_pack_canonical(pr, status);
1531}
1532
ccf770ba
EC
1533static float64 QEMU_SOFTFLOAT_ATTR
1534soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1535 float_status *status)
d446830a
AB
1536{
1537 FloatParts pa = float64_unpack_canonical(a, status);
1538 FloatParts pb = float64_unpack_canonical(b, status);
1539 FloatParts pc = float64_unpack_canonical(c, status);
1540 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1541
1542 return float64_round_pack_canonical(pr, status);
1543}
1544
ccf770ba
EC
1545float32 QEMU_FLATTEN
1546float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1547{
1548 union_float32 ua, ub, uc, ur;
1549
1550 ua.s = xa;
1551 ub.s = xb;
1552 uc.s = xc;
1553
1554 if (unlikely(!can_use_fpu(s))) {
1555 goto soft;
1556 }
1557 if (unlikely(flags & float_muladd_halve_result)) {
1558 goto soft;
1559 }
1560
1561 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1562 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1563 goto soft;
1564 }
1565 /*
1566 * When (a || b) == 0, there's no need to check for under/over flow,
1567 * since we know the addend is (normal || 0) and the product is 0.
1568 */
1569 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1570 union_float32 up;
1571 bool prod_sign;
1572
1573 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1574 prod_sign ^= !!(flags & float_muladd_negate_product);
1575 up.s = float32_set_sign(float32_zero, prod_sign);
1576
1577 if (flags & float_muladd_negate_c) {
1578 uc.h = -uc.h;
1579 }
1580 ur.h = up.h + uc.h;
1581 } else {
1582 if (flags & float_muladd_negate_product) {
1583 ua.h = -ua.h;
1584 }
1585 if (flags & float_muladd_negate_c) {
1586 uc.h = -uc.h;
1587 }
1588
1589 ur.h = fmaf(ua.h, ub.h, uc.h);
1590
1591 if (unlikely(f32_is_inf(ur))) {
1592 s->float_exception_flags |= float_flag_overflow;
1593 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1594 goto soft;
1595 }
1596 }
1597 if (flags & float_muladd_negate_result) {
1598 return float32_chs(ur.s);
1599 }
1600 return ur.s;
1601
1602 soft:
1603 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1604}
1605
1606float64 QEMU_FLATTEN
1607float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1608{
1609 union_float64 ua, ub, uc, ur;
1610
1611 ua.s = xa;
1612 ub.s = xb;
1613 uc.s = xc;
1614
1615 if (unlikely(!can_use_fpu(s))) {
1616 goto soft;
1617 }
1618 if (unlikely(flags & float_muladd_halve_result)) {
1619 goto soft;
1620 }
1621
1622 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1623 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1624 goto soft;
1625 }
1626 /*
1627 * When (a || b) == 0, there's no need to check for under/over flow,
1628 * since we know the addend is (normal || 0) and the product is 0.
1629 */
1630 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1631 union_float64 up;
1632 bool prod_sign;
1633
1634 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1635 prod_sign ^= !!(flags & float_muladd_negate_product);
1636 up.s = float64_set_sign(float64_zero, prod_sign);
1637
1638 if (flags & float_muladd_negate_c) {
1639 uc.h = -uc.h;
1640 }
1641 ur.h = up.h + uc.h;
1642 } else {
1643 if (flags & float_muladd_negate_product) {
1644 ua.h = -ua.h;
1645 }
1646 if (flags & float_muladd_negate_c) {
1647 uc.h = -uc.h;
1648 }
1649
1650 ur.h = fma(ua.h, ub.h, uc.h);
1651
1652 if (unlikely(f64_is_inf(ur))) {
1653 s->float_exception_flags |= float_flag_overflow;
1654 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1655 goto soft;
1656 }
1657 }
1658 if (flags & float_muladd_negate_result) {
1659 return float64_chs(ur.s);
1660 }
1661 return ur.s;
1662
1663 soft:
1664 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1665}
1666
cf07323d
AB
1667/*
1668 * Returns the result of dividing the floating-point value `a' by the
1669 * corresponding value `b'. The operation is performed according to
1670 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1671 */
1672
1673static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1674{
1675 bool sign = a.sign ^ b.sign;
1676
1677 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1678 uint64_t n0, n1, q, r;
cf07323d 1679 int exp = a.exp - b.exp;
5dfbc9e4
RH
1680
1681 /*
1682 * We want a 2*N / N-bit division to produce exactly an N-bit
1683 * result, so that we do not lose any precision and so that we
1684 * do not have to renormalize afterward. If A.frac < B.frac,
1685 * then division would produce an (N-1)-bit result; shift A left
1686 * by one to produce the an N-bit result, and decrement the
1687 * exponent to match.
1688 *
1689 * The udiv_qrnnd algorithm that we're using requires normalization,
1690 * i.e. the msb of the denominator must be set. Since we know that
1691 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1692 * by one (more), and the remainder must be shifted right by one.
1693 */
cf07323d
AB
1694 if (a.frac < b.frac) {
1695 exp -= 1;
5dfbc9e4 1696 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1697 } else {
5dfbc9e4 1698 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1699 }
5dfbc9e4
RH
1700 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1701
1702 /*
1703 * Set lsb if there is a remainder, to set inexact.
1704 * As mentioned above, to find the actual value of the remainder we
1705 * would need to shift right, but (1) we are only concerned about
1706 * non-zero-ness, and (2) the remainder will always be even because
1707 * both inputs to the division primitive are even.
1708 */
1709 a.frac = q | (r != 0);
cf07323d
AB
1710 a.sign = sign;
1711 a.exp = exp;
1712 return a;
1713 }
1714 /* handle all the NaN cases */
1715 if (is_nan(a.cls) || is_nan(b.cls)) {
1716 return pick_nan(a, b, s);
1717 }
1718 /* 0/0 or Inf/Inf */
1719 if (a.cls == b.cls
1720 &&
1721 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1722 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1723 return parts_default_nan(s);
cf07323d 1724 }
9cb4e398
AB
1725 /* Inf / x or 0 / x */
1726 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1727 a.sign = sign;
1728 return a;
1729 }
cf07323d
AB
1730 /* Div 0 => Inf */
1731 if (b.cls == float_class_zero) {
1732 s->float_exception_flags |= float_flag_divbyzero;
1733 a.cls = float_class_inf;
1734 a.sign = sign;
1735 return a;
1736 }
cf07323d
AB
1737 /* Div by Inf */
1738 if (b.cls == float_class_inf) {
1739 a.cls = float_class_zero;
1740 a.sign = sign;
1741 return a;
1742 }
1743 g_assert_not_reached();
1744}
1745
1746float16 float16_div(float16 a, float16 b, float_status *status)
1747{
1748 FloatParts pa = float16_unpack_canonical(a, status);
1749 FloatParts pb = float16_unpack_canonical(b, status);
1750 FloatParts pr = div_floats(pa, pb, status);
1751
1752 return float16_round_pack_canonical(pr, status);
1753}
1754
4a629561
EC
1755static float32 QEMU_SOFTFLOAT_ATTR
1756soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1757{
1758 FloatParts pa = float32_unpack_canonical(a, status);
1759 FloatParts pb = float32_unpack_canonical(b, status);
1760 FloatParts pr = div_floats(pa, pb, status);
1761
1762 return float32_round_pack_canonical(pr, status);
1763}
1764
4a629561
EC
1765static float64 QEMU_SOFTFLOAT_ATTR
1766soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1767{
1768 FloatParts pa = float64_unpack_canonical(a, status);
1769 FloatParts pb = float64_unpack_canonical(b, status);
1770 FloatParts pr = div_floats(pa, pb, status);
1771
1772 return float64_round_pack_canonical(pr, status);
1773}
1774
4a629561
EC
1775static float hard_f32_div(float a, float b)
1776{
1777 return a / b;
1778}
1779
1780static double hard_f64_div(double a, double b)
1781{
1782 return a / b;
1783}
1784
1785static bool f32_div_pre(union_float32 a, union_float32 b)
1786{
1787 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1788 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1789 fpclassify(b.h) == FP_NORMAL;
1790 }
1791 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1792}
1793
1794static bool f64_div_pre(union_float64 a, union_float64 b)
1795{
1796 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1797 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1798 fpclassify(b.h) == FP_NORMAL;
1799 }
1800 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1801}
1802
1803static bool f32_div_post(union_float32 a, union_float32 b)
1804{
1805 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1806 return fpclassify(a.h) != FP_ZERO;
1807 }
1808 return !float32_is_zero(a.s);
1809}
1810
1811static bool f64_div_post(union_float64 a, union_float64 b)
1812{
1813 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1814 return fpclassify(a.h) != FP_ZERO;
1815 }
1816 return !float64_is_zero(a.s);
1817}
1818
1819float32 QEMU_FLATTEN
1820float32_div(float32 a, float32 b, float_status *s)
1821{
1822 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1823 f32_div_pre, f32_div_post, NULL, NULL);
1824}
1825
1826float64 QEMU_FLATTEN
1827float64_div(float64 a, float64 b, float_status *s)
1828{
1829 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1830 f64_div_pre, f64_div_post, NULL, NULL);
1831}
1832
6fed16b2
AB
1833/*
1834 * Float to Float conversions
1835 *
1836 * Returns the result of converting one float format to another. The
1837 * conversion is performed according to the IEC/IEEE Standard for
1838 * Binary Floating-Point Arithmetic.
1839 *
1840 * The float_to_float helper only needs to take care of raising
1841 * invalid exceptions and handling the conversion on NaNs.
1842 */
1843
1844static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1845 float_status *s)
1846{
1847 if (dstf->arm_althp) {
1848 switch (a.cls) {
1849 case float_class_qnan:
1850 case float_class_snan:
1851 /* There is no NaN in the destination format. Raise Invalid
1852 * and return a zero with the sign of the input NaN.
1853 */
1854 s->float_exception_flags |= float_flag_invalid;
1855 a.cls = float_class_zero;
1856 a.frac = 0;
1857 a.exp = 0;
1858 break;
1859
1860 case float_class_inf:
1861 /* There is no Inf in the destination format. Raise Invalid
1862 * and return the maximum normal with the correct sign.
1863 */
1864 s->float_exception_flags |= float_flag_invalid;
1865 a.cls = float_class_normal;
1866 a.exp = dstf->exp_max;
1867 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1868 break;
1869
1870 default:
1871 break;
1872 }
1873 } else if (is_nan(a.cls)) {
1874 if (is_snan(a.cls)) {
1875 s->float_exception_flags |= float_flag_invalid;
1876 a = parts_silence_nan(a, s);
1877 }
1878 if (s->default_nan_mode) {
1879 return parts_default_nan(s);
1880 }
1881 }
1882 return a;
1883}
1884
1885float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1886{
1887 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1888 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1889 FloatParts pr = float_to_float(p, &float32_params, s);
1890 return float32_round_pack_canonical(pr, s);
1891}
1892
1893float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1894{
1895 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1896 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1897 FloatParts pr = float_to_float(p, &float64_params, s);
1898 return float64_round_pack_canonical(pr, s);
1899}
1900
1901float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1902{
1903 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1904 FloatParts p = float32_unpack_canonical(a, s);
1905 FloatParts pr = float_to_float(p, fmt16, s);
1906 return float16a_round_pack_canonical(pr, s, fmt16);
1907}
1908
1909float64 float32_to_float64(float32 a, float_status *s)
1910{
1911 FloatParts p = float32_unpack_canonical(a, s);
1912 FloatParts pr = float_to_float(p, &float64_params, s);
1913 return float64_round_pack_canonical(pr, s);
1914}
1915
1916float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1917{
1918 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1919 FloatParts p = float64_unpack_canonical(a, s);
1920 FloatParts pr = float_to_float(p, fmt16, s);
1921 return float16a_round_pack_canonical(pr, s, fmt16);
1922}
1923
1924float32 float64_to_float32(float64 a, float_status *s)
1925{
1926 FloatParts p = float64_unpack_canonical(a, s);
1927 FloatParts pr = float_to_float(p, &float32_params, s);
1928 return float32_round_pack_canonical(pr, s);
1929}
1930
dbe4d53a
AB
1931/*
1932 * Rounds the floating-point value `a' to an integer, and returns the
1933 * result as a floating-point value. The operation is performed
1934 * according to the IEC/IEEE Standard for Binary Floating-Point
1935 * Arithmetic.
1936 */
1937
2f6c74be
RH
1938static FloatParts round_to_int(FloatParts a, int rmode,
1939 int scale, float_status *s)
dbe4d53a 1940{
2f6c74be
RH
1941 switch (a.cls) {
1942 case float_class_qnan:
1943 case float_class_snan:
dbe4d53a 1944 return return_nan(a, s);
dbe4d53a 1945
dbe4d53a
AB
1946 case float_class_zero:
1947 case float_class_inf:
dbe4d53a
AB
1948 /* already "integral" */
1949 break;
2f6c74be 1950
dbe4d53a 1951 case float_class_normal:
2f6c74be
RH
1952 scale = MIN(MAX(scale, -0x10000), 0x10000);
1953 a.exp += scale;
1954
dbe4d53a
AB
1955 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1956 /* already integral */
1957 break;
1958 }
1959 if (a.exp < 0) {
1960 bool one;
1961 /* all fractional */
1962 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1963 switch (rmode) {
dbe4d53a
AB
1964 case float_round_nearest_even:
1965 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1966 break;
1967 case float_round_ties_away:
1968 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1969 break;
1970 case float_round_to_zero:
1971 one = false;
1972 break;
1973 case float_round_up:
1974 one = !a.sign;
1975 break;
1976 case float_round_down:
1977 one = a.sign;
1978 break;
1979 default:
1980 g_assert_not_reached();
1981 }
1982
1983 if (one) {
1984 a.frac = DECOMPOSED_IMPLICIT_BIT;
1985 a.exp = 0;
1986 } else {
1987 a.cls = float_class_zero;
1988 }
1989 } else {
1990 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1991 uint64_t frac_lsbm1 = frac_lsb >> 1;
1992 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1993 uint64_t rnd_mask = rnd_even_mask >> 1;
1994 uint64_t inc;
1995
2f6c74be 1996 switch (rmode) {
dbe4d53a
AB
1997 case float_round_nearest_even:
1998 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1999 break;
2000 case float_round_ties_away:
2001 inc = frac_lsbm1;
2002 break;
2003 case float_round_to_zero:
2004 inc = 0;
2005 break;
2006 case float_round_up:
2007 inc = a.sign ? 0 : rnd_mask;
2008 break;
2009 case float_round_down:
2010 inc = a.sign ? rnd_mask : 0;
2011 break;
2012 default:
2013 g_assert_not_reached();
2014 }
2015
2016 if (a.frac & rnd_mask) {
2017 s->float_exception_flags |= float_flag_inexact;
2018 a.frac += inc;
2019 a.frac &= ~rnd_mask;
2020 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2021 a.frac >>= 1;
2022 a.exp++;
2023 }
2024 }
2025 }
2026 break;
2027 default:
2028 g_assert_not_reached();
2029 }
2030 return a;
2031}
2032
2033float16 float16_round_to_int(float16 a, float_status *s)
2034{
2035 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2036 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2037 return float16_round_pack_canonical(pr, s);
2038}
2039
2040float32 float32_round_to_int(float32 a, float_status *s)
2041{
2042 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2043 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2044 return float32_round_pack_canonical(pr, s);
2045}
2046
2047float64 float64_round_to_int(float64 a, float_status *s)
2048{
2049 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2050 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2051 return float64_round_pack_canonical(pr, s);
2052}
2053
ab52f973
AB
2054/*
2055 * Returns the result of converting the floating-point value `a' to
2056 * the two's complement integer format. The conversion is performed
2057 * according to the IEC/IEEE Standard for Binary Floating-Point
2058 * Arithmetic---which means in particular that the conversion is
2059 * rounded according to the current rounding mode. If `a' is a NaN,
2060 * the largest positive integer is returned. Otherwise, if the
2061 * conversion overflows, the largest integer with the same sign as `a'
2062 * is returned.
2063*/
2064
2f6c74be 2065static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
2066 int64_t min, int64_t max,
2067 float_status *s)
2068{
2069 uint64_t r;
2070 int orig_flags = get_float_exception_flags(s);
2f6c74be 2071 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2072
2073 switch (p.cls) {
2074 case float_class_snan:
2075 case float_class_qnan:
801bc563 2076 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2077 return max;
2078 case float_class_inf:
801bc563 2079 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2080 return p.sign ? min : max;
2081 case float_class_zero:
2082 return 0;
2083 case float_class_normal:
2084 if (p.exp < DECOMPOSED_BINARY_POINT) {
2085 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2086 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2087 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2088 } else {
2089 r = UINT64_MAX;
2090 }
2091 if (p.sign) {
33358375 2092 if (r <= -(uint64_t) min) {
ab52f973
AB
2093 return -r;
2094 } else {
2095 s->float_exception_flags = orig_flags | float_flag_invalid;
2096 return min;
2097 }
2098 } else {
33358375 2099 if (r <= max) {
ab52f973
AB
2100 return r;
2101 } else {
2102 s->float_exception_flags = orig_flags | float_flag_invalid;
2103 return max;
2104 }
2105 }
2106 default:
2107 g_assert_not_reached();
2108 }
2109}
2110
2f6c74be
RH
2111int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2112 float_status *s)
2113{
2114 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2115 rmode, scale, INT16_MIN, INT16_MAX, s);
2116}
2117
2118int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2119 float_status *s)
2120{
2121 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2122 rmode, scale, INT32_MIN, INT32_MAX, s);
2123}
2124
2125int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2126 float_status *s)
2127{
2128 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2129 rmode, scale, INT64_MIN, INT64_MAX, s);
2130}
2131
2132int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2133 float_status *s)
2134{
2135 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2136 rmode, scale, INT16_MIN, INT16_MAX, s);
2137}
2138
2139int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2140 float_status *s)
2141{
2142 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2143 rmode, scale, INT32_MIN, INT32_MAX, s);
2144}
2145
2146int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2147 float_status *s)
2148{
2149 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2150 rmode, scale, INT64_MIN, INT64_MAX, s);
2151}
2152
2153int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2154 float_status *s)
2155{
2156 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2157 rmode, scale, INT16_MIN, INT16_MAX, s);
2158}
2159
2160int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2161 float_status *s)
2162{
2163 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2164 rmode, scale, INT32_MIN, INT32_MAX, s);
2165}
2166
2167int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2168 float_status *s)
2169{
2170 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2171 rmode, scale, INT64_MIN, INT64_MAX, s);
2172}
2173
2174int16_t float16_to_int16(float16 a, float_status *s)
2175{
2176 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2177}
2178
2179int32_t float16_to_int32(float16 a, float_status *s)
2180{
2181 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2182}
2183
2184int64_t float16_to_int64(float16 a, float_status *s)
2185{
2186 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2187}
2188
2189int16_t float32_to_int16(float32 a, float_status *s)
2190{
2191 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2192}
2193
2194int32_t float32_to_int32(float32 a, float_status *s)
2195{
2196 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2197}
2198
2199int64_t float32_to_int64(float32 a, float_status *s)
2200{
2201 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2202}
2203
2204int16_t float64_to_int16(float64 a, float_status *s)
2205{
2206 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2207}
2208
2209int32_t float64_to_int32(float64 a, float_status *s)
2210{
2211 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2212}
2213
2214int64_t float64_to_int64(float64 a, float_status *s)
2215{
2216 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2217}
2218
2219int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2220{
2221 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2222}
2223
2224int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2225{
2226 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2227}
2228
2229int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2230{
2231 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2232}
2233
2f6c74be
RH
2234int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2235{
2236 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2237}
ab52f973 2238
2f6c74be
RH
2239int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2240{
2241 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2242}
2243
2244int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2245{
2246 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2247}
2248
2249int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2250{
2251 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2252}
ab52f973 2253
2f6c74be
RH
2254int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2255{
2256 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2257}
ab52f973 2258
2f6c74be
RH
2259int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2260{
2261 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2262}
ab52f973
AB
2263
2264/*
2265 * Returns the result of converting the floating-point value `a' to
2266 * the unsigned integer format. The conversion is performed according
2267 * to the IEC/IEEE Standard for Binary Floating-Point
2268 * Arithmetic---which means in particular that the conversion is
2269 * rounded according to the current rounding mode. If `a' is a NaN,
2270 * the largest unsigned integer is returned. Otherwise, if the
2271 * conversion overflows, the largest unsigned integer is returned. If
2272 * the 'a' is negative, the result is rounded and zero is returned;
2273 * values that do not round to zero will raise the inexact exception
2274 * flag.
2275 */
2276
2f6c74be
RH
2277static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2278 uint64_t max, float_status *s)
ab52f973
AB
2279{
2280 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2281 FloatParts p = round_to_int(in, rmode, scale, s);
2282 uint64_t r;
ab52f973
AB
2283
2284 switch (p.cls) {
2285 case float_class_snan:
2286 case float_class_qnan:
2287 s->float_exception_flags = orig_flags | float_flag_invalid;
2288 return max;
2289 case float_class_inf:
801bc563 2290 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2291 return p.sign ? 0 : max;
2292 case float_class_zero:
2293 return 0;
2294 case float_class_normal:
ab52f973
AB
2295 if (p.sign) {
2296 s->float_exception_flags = orig_flags | float_flag_invalid;
2297 return 0;
2298 }
2299
2300 if (p.exp < DECOMPOSED_BINARY_POINT) {
2301 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2302 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2303 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2304 } else {
2305 s->float_exception_flags = orig_flags | float_flag_invalid;
2306 return max;
2307 }
2308
2309 /* For uint64 this will never trip, but if p.exp is too large
2310 * to shift a decomposed fraction we shall have exited via the
2311 * 3rd leg above.
2312 */
2313 if (r > max) {
2314 s->float_exception_flags = orig_flags | float_flag_invalid;
2315 return max;
ab52f973 2316 }
2f6c74be 2317 return r;
ab52f973
AB
2318 default:
2319 g_assert_not_reached();
2320 }
2321}
2322
2f6c74be
RH
2323uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2324 float_status *s)
2325{
2326 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2327 rmode, scale, UINT16_MAX, s);
2328}
2329
2330uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2331 float_status *s)
2332{
2333 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2334 rmode, scale, UINT32_MAX, s);
2335}
2336
2337uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2338 float_status *s)
2339{
2340 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2341 rmode, scale, UINT64_MAX, s);
2342}
2343
2344uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2345 float_status *s)
2346{
2347 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2348 rmode, scale, UINT16_MAX, s);
2349}
2350
2351uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2352 float_status *s)
2353{
2354 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2355 rmode, scale, UINT32_MAX, s);
2356}
2357
2358uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2359 float_status *s)
2360{
2361 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2362 rmode, scale, UINT64_MAX, s);
2363}
2364
2365uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2366 float_status *s)
2367{
2368 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2369 rmode, scale, UINT16_MAX, s);
2370}
2371
2372uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2373 float_status *s)
2374{
2375 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2376 rmode, scale, UINT32_MAX, s);
2377}
2378
2379uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2380 float_status *s)
2381{
2382 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2383 rmode, scale, UINT64_MAX, s);
2384}
2385
2386uint16_t float16_to_uint16(float16 a, float_status *s)
2387{
2388 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2389}
2390
2391uint32_t float16_to_uint32(float16 a, float_status *s)
2392{
2393 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2394}
2395
2396uint64_t float16_to_uint64(float16 a, float_status *s)
2397{
2398 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2399}
2400
2401uint16_t float32_to_uint16(float32 a, float_status *s)
2402{
2403 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2404}
2405
2406uint32_t float32_to_uint32(float32 a, float_status *s)
2407{
2408 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2409}
2410
2411uint64_t float32_to_uint64(float32 a, float_status *s)
2412{
2413 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2414}
2415
2416uint16_t float64_to_uint16(float64 a, float_status *s)
2417{
2418 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2419}
2420
2421uint32_t float64_to_uint32(float64 a, float_status *s)
2422{
2423 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2424}
2425
2426uint64_t float64_to_uint64(float64 a, float_status *s)
2427{
2428 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2429}
2430
2431uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2432{
2433 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2434}
2435
2436uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2437{
2438 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2439}
2440
2441uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2442{
2443 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2444}
2445
2446uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2447{
2448 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2449}
2450
2451uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2452{
2453 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2454}
2455
2456uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2457{
2458 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2459}
2460
2461uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2462{
2463 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2464}
2465
2466uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2467{
2468 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2469}
2470
2471uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2472{
2473 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2474}
ab52f973 2475
c02e1fb8
AB
2476/*
2477 * Integer to float conversions
2478 *
2479 * Returns the result of converting the two's complement integer `a'
2480 * to the floating-point format. The conversion is performed according
2481 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2482 */
2483
2abdfe24 2484static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2485{
2abdfe24
RH
2486 FloatParts r = { .sign = false };
2487
c02e1fb8
AB
2488 if (a == 0) {
2489 r.cls = float_class_zero;
c02e1fb8 2490 } else {
2abdfe24
RH
2491 uint64_t f = a;
2492 int shift;
2493
2494 r.cls = float_class_normal;
c02e1fb8 2495 if (a < 0) {
2abdfe24 2496 f = -f;
c02e1fb8 2497 r.sign = true;
c02e1fb8 2498 }
2abdfe24
RH
2499 shift = clz64(f) - 1;
2500 scale = MIN(MAX(scale, -0x10000), 0x10000);
2501
2502 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2503 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2504 }
2505
2506 return r;
2507}
2508
2abdfe24 2509float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2510{
2abdfe24 2511 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2512 return float16_round_pack_canonical(pa, status);
2513}
2514
2abdfe24
RH
2515float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2516{
2517 return int64_to_float16_scalbn(a, scale, status);
2518}
2519
2520float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2521{
2522 return int64_to_float16_scalbn(a, scale, status);
2523}
2524
2525float16 int64_to_float16(int64_t a, float_status *status)
2526{
2527 return int64_to_float16_scalbn(a, 0, status);
2528}
2529
c02e1fb8
AB
2530float16 int32_to_float16(int32_t a, float_status *status)
2531{
2abdfe24 2532 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2533}
2534
2535float16 int16_to_float16(int16_t a, float_status *status)
2536{
2abdfe24 2537 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2538}
2539
2abdfe24 2540float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2541{
2abdfe24 2542 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2543 return float32_round_pack_canonical(pa, status);
2544}
2545
2abdfe24
RH
2546float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2547{
2548 return int64_to_float32_scalbn(a, scale, status);
2549}
2550
2551float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2552{
2553 return int64_to_float32_scalbn(a, scale, status);
2554}
2555
2556float32 int64_to_float32(int64_t a, float_status *status)
2557{
2558 return int64_to_float32_scalbn(a, 0, status);
2559}
2560
c02e1fb8
AB
2561float32 int32_to_float32(int32_t a, float_status *status)
2562{
2abdfe24 2563 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2564}
2565
2566float32 int16_to_float32(int16_t a, float_status *status)
2567{
2abdfe24 2568 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2569}
2570
2abdfe24 2571float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2572{
2abdfe24 2573 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2574 return float64_round_pack_canonical(pa, status);
2575}
2576
2abdfe24
RH
2577float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2578{
2579 return int64_to_float64_scalbn(a, scale, status);
2580}
2581
2582float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2583{
2584 return int64_to_float64_scalbn(a, scale, status);
2585}
2586
2587float64 int64_to_float64(int64_t a, float_status *status)
2588{
2589 return int64_to_float64_scalbn(a, 0, status);
2590}
2591
c02e1fb8
AB
2592float64 int32_to_float64(int32_t a, float_status *status)
2593{
2abdfe24 2594 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2595}
2596
2597float64 int16_to_float64(int16_t a, float_status *status)
2598{
2abdfe24 2599 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2600}
2601
2602
2603/*
2604 * Unsigned Integer to float conversions
2605 *
2606 * Returns the result of converting the unsigned integer `a' to the
2607 * floating-point format. The conversion is performed according to the
2608 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2609 */
2610
2abdfe24 2611static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2612{
2abdfe24 2613 FloatParts r = { .sign = false };
c02e1fb8
AB
2614
2615 if (a == 0) {
2616 r.cls = float_class_zero;
2617 } else {
2abdfe24 2618 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2619 r.cls = float_class_normal;
2abdfe24
RH
2620 if ((int64_t)a < 0) {
2621 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2622 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2623 r.frac = a;
2624 } else {
2abdfe24
RH
2625 int shift = clz64(a) - 1;
2626 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2627 r.frac = a << shift;
c02e1fb8
AB
2628 }
2629 }
2630
2631 return r;
2632}
2633
2abdfe24 2634float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2635{
2abdfe24 2636 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2637 return float16_round_pack_canonical(pa, status);
2638}
2639
2abdfe24
RH
2640float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2641{
2642 return uint64_to_float16_scalbn(a, scale, status);
2643}
2644
2645float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2646{
2647 return uint64_to_float16_scalbn(a, scale, status);
2648}
2649
2650float16 uint64_to_float16(uint64_t a, float_status *status)
2651{
2652 return uint64_to_float16_scalbn(a, 0, status);
2653}
2654
c02e1fb8
AB
2655float16 uint32_to_float16(uint32_t a, float_status *status)
2656{
2abdfe24 2657 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2658}
2659
2660float16 uint16_to_float16(uint16_t a, float_status *status)
2661{
2abdfe24 2662 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2663}
2664
2abdfe24 2665float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2666{
2abdfe24 2667 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2668 return float32_round_pack_canonical(pa, status);
2669}
2670
2abdfe24
RH
2671float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2672{
2673 return uint64_to_float32_scalbn(a, scale, status);
2674}
2675
2676float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2677{
2678 return uint64_to_float32_scalbn(a, scale, status);
2679}
2680
2681float32 uint64_to_float32(uint64_t a, float_status *status)
2682{
2683 return uint64_to_float32_scalbn(a, 0, status);
2684}
2685
c02e1fb8
AB
2686float32 uint32_to_float32(uint32_t a, float_status *status)
2687{
2abdfe24 2688 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2689}
2690
2691float32 uint16_to_float32(uint16_t a, float_status *status)
2692{
2abdfe24 2693 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2694}
2695
2abdfe24 2696float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2697{
2abdfe24 2698 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2699 return float64_round_pack_canonical(pa, status);
2700}
2701
2abdfe24
RH
2702float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2703{
2704 return uint64_to_float64_scalbn(a, scale, status);
2705}
2706
2707float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2708{
2709 return uint64_to_float64_scalbn(a, scale, status);
2710}
2711
2712float64 uint64_to_float64(uint64_t a, float_status *status)
2713{
2714 return uint64_to_float64_scalbn(a, 0, status);
2715}
2716
c02e1fb8
AB
2717float64 uint32_to_float64(uint32_t a, float_status *status)
2718{
2abdfe24 2719 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2720}
2721
2722float64 uint16_to_float64(uint16_t a, float_status *status)
2723{
2abdfe24 2724 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2725}
2726
89360067
AB
2727/* Float Min/Max */
2728/* min() and max() functions. These can't be implemented as
2729 * 'compare and pick one input' because that would mishandle
2730 * NaNs and +0 vs -0.
2731 *
2732 * minnum() and maxnum() functions. These are similar to the min()
2733 * and max() functions but if one of the arguments is a QNaN and
2734 * the other is numerical then the numerical argument is returned.
2735 * SNaNs will get quietened before being returned.
2736 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2737 * and maxNum() operations. min() and max() are the typical min/max
2738 * semantics provided by many CPUs which predate that specification.
2739 *
2740 * minnummag() and maxnummag() functions correspond to minNumMag()
2741 * and minNumMag() from the IEEE-754 2008.
2742 */
2743static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2744 bool ieee, bool ismag, float_status *s)
2745{
2746 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2747 if (ieee) {
2748 /* Takes two floating-point values `a' and `b', one of
2749 * which is a NaN, and returns the appropriate NaN
2750 * result. If either `a' or `b' is a signaling NaN,
2751 * the invalid exception is raised.
2752 */
2753 if (is_snan(a.cls) || is_snan(b.cls)) {
2754 return pick_nan(a, b, s);
2755 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2756 return b;
2757 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2758 return a;
2759 }
2760 }
2761 return pick_nan(a, b, s);
2762 } else {
2763 int a_exp, b_exp;
89360067
AB
2764
2765 switch (a.cls) {
2766 case float_class_normal:
2767 a_exp = a.exp;
2768 break;
2769 case float_class_inf:
2770 a_exp = INT_MAX;
2771 break;
2772 case float_class_zero:
2773 a_exp = INT_MIN;
2774 break;
2775 default:
2776 g_assert_not_reached();
2777 break;
2778 }
2779 switch (b.cls) {
2780 case float_class_normal:
2781 b_exp = b.exp;
2782 break;
2783 case float_class_inf:
2784 b_exp = INT_MAX;
2785 break;
2786 case float_class_zero:
2787 b_exp = INT_MIN;
2788 break;
2789 default:
2790 g_assert_not_reached();
2791 break;
2792 }
2793
6245327a
EC
2794 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2795 bool a_less = a_exp < b_exp;
2796 if (a_exp == b_exp) {
2797 a_less = a.frac < b.frac;
2798 }
2799 return a_less ^ ismin ? b : a;
89360067
AB
2800 }
2801
6245327a 2802 if (a.sign == b.sign) {
89360067
AB
2803 bool a_less = a_exp < b_exp;
2804 if (a_exp == b_exp) {
2805 a_less = a.frac < b.frac;
2806 }
6245327a 2807 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2808 } else {
6245327a 2809 return a.sign ^ ismin ? b : a;
89360067
AB
2810 }
2811 }
2812}
2813
2814#define MINMAX(sz, name, ismin, isiee, ismag) \
2815float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2816 float_status *s) \
2817{ \
2818 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2819 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2820 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2821 \
2822 return float ## sz ## _round_pack_canonical(pr, s); \
2823}
2824
2825MINMAX(16, min, true, false, false)
2826MINMAX(16, minnum, true, true, false)
2827MINMAX(16, minnummag, true, true, true)
2828MINMAX(16, max, false, false, false)
2829MINMAX(16, maxnum, false, true, false)
2830MINMAX(16, maxnummag, false, true, true)
2831
2832MINMAX(32, min, true, false, false)
2833MINMAX(32, minnum, true, true, false)
2834MINMAX(32, minnummag, true, true, true)
2835MINMAX(32, max, false, false, false)
2836MINMAX(32, maxnum, false, true, false)
2837MINMAX(32, maxnummag, false, true, true)
2838
2839MINMAX(64, min, true, false, false)
2840MINMAX(64, minnum, true, true, false)
2841MINMAX(64, minnummag, true, true, true)
2842MINMAX(64, max, false, false, false)
2843MINMAX(64, maxnum, false, true, false)
2844MINMAX(64, maxnummag, false, true, true)
2845
2846#undef MINMAX
2847
0c4c9092
AB
2848/* Floating point compare */
2849static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2850 float_status *s)
2851{
2852 if (is_nan(a.cls) || is_nan(b.cls)) {
2853 if (!is_quiet ||
2854 a.cls == float_class_snan ||
2855 b.cls == float_class_snan) {
2856 s->float_exception_flags |= float_flag_invalid;
2857 }
2858 return float_relation_unordered;
2859 }
2860
2861 if (a.cls == float_class_zero) {
2862 if (b.cls == float_class_zero) {
2863 return float_relation_equal;
2864 }
2865 return b.sign ? float_relation_greater : float_relation_less;
2866 } else if (b.cls == float_class_zero) {
2867 return a.sign ? float_relation_less : float_relation_greater;
2868 }
2869
2870 /* The only really important thing about infinity is its sign. If
2871 * both are infinities the sign marks the smallest of the two.
2872 */
2873 if (a.cls == float_class_inf) {
2874 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2875 return float_relation_equal;
2876 }
2877 return a.sign ? float_relation_less : float_relation_greater;
2878 } else if (b.cls == float_class_inf) {
2879 return b.sign ? float_relation_greater : float_relation_less;
2880 }
2881
2882 if (a.sign != b.sign) {
2883 return a.sign ? float_relation_less : float_relation_greater;
2884 }
2885
2886 if (a.exp == b.exp) {
2887 if (a.frac == b.frac) {
2888 return float_relation_equal;
2889 }
2890 if (a.sign) {
2891 return a.frac > b.frac ?
2892 float_relation_less : float_relation_greater;
2893 } else {
2894 return a.frac > b.frac ?
2895 float_relation_greater : float_relation_less;
2896 }
2897 } else {
2898 if (a.sign) {
2899 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2900 } else {
2901 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2902 }
2903 }
2904}
2905
2906#define COMPARE(sz) \
2907int float ## sz ## _compare(float ## sz a, float ## sz b, \
2908 float_status *s) \
2909{ \
2910 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2911 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2912 return compare_floats(pa, pb, false, s); \
2913} \
2914int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2915 float_status *s) \
2916{ \
2917 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2918 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2919 return compare_floats(pa, pb, true, s); \
2920}
2921
2922COMPARE(16)
2923COMPARE(32)
2924COMPARE(64)
2925
2926#undef COMPARE
2927
0bfc9f19
AB
2928/* Multiply A by 2 raised to the power N. */
2929static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2930{
2931 if (unlikely(is_nan(a.cls))) {
2932 return return_nan(a, s);
2933 }
2934 if (a.cls == float_class_normal) {
ce8d4082
RH
2935 /* The largest float type (even though not supported by FloatParts)
2936 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
2937 * still allows rounding to infinity, without allowing overflow
2938 * within the int32_t that backs FloatParts.exp.
2939 */
2940 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
2941 a.exp += n;
2942 }
2943 return a;
2944}
2945
2946float16 float16_scalbn(float16 a, int n, float_status *status)
2947{
2948 FloatParts pa = float16_unpack_canonical(a, status);
2949 FloatParts pr = scalbn_decomposed(pa, n, status);
2950 return float16_round_pack_canonical(pr, status);
2951}
2952
2953float32 float32_scalbn(float32 a, int n, float_status *status)
2954{
2955 FloatParts pa = float32_unpack_canonical(a, status);
2956 FloatParts pr = scalbn_decomposed(pa, n, status);
2957 return float32_round_pack_canonical(pr, status);
2958}
2959
2960float64 float64_scalbn(float64 a, int n, float_status *status)
2961{
2962 FloatParts pa = float64_unpack_canonical(a, status);
2963 FloatParts pr = scalbn_decomposed(pa, n, status);
2964 return float64_round_pack_canonical(pr, status);
2965}
2966
c13bb2da
AB
2967/*
2968 * Square Root
2969 *
2970 * The old softfloat code did an approximation step before zeroing in
2971 * on the final result. However for simpleness we just compute the
2972 * square root by iterating down from the implicit bit to enough extra
2973 * bits to ensure we get a correctly rounded result.
2974 *
2975 * This does mean however the calculation is slower than before,
2976 * especially for 64 bit floats.
2977 */
2978
2979static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2980{
2981 uint64_t a_frac, r_frac, s_frac;
2982 int bit, last_bit;
2983
2984 if (is_nan(a.cls)) {
2985 return return_nan(a, s);
2986 }
2987 if (a.cls == float_class_zero) {
2988 return a; /* sqrt(+-0) = +-0 */
2989 }
2990 if (a.sign) {
2991 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2992 return parts_default_nan(s);
c13bb2da
AB
2993 }
2994 if (a.cls == float_class_inf) {
2995 return a; /* sqrt(+inf) = +inf */
2996 }
2997
2998 assert(a.cls == float_class_normal);
2999
3000 /* We need two overflow bits at the top. Adding room for that is a
3001 * right shift. If the exponent is odd, we can discard the low bit
3002 * by multiplying the fraction by 2; that's a left shift. Combine
3003 * those and we shift right if the exponent is even.
3004 */
3005 a_frac = a.frac;
3006 if (!(a.exp & 1)) {
3007 a_frac >>= 1;
3008 }
3009 a.exp >>= 1;
3010
3011 /* Bit-by-bit computation of sqrt. */
3012 r_frac = 0;
3013 s_frac = 0;
3014
3015 /* Iterate from implicit bit down to the 3 extra bits to compute a
3016 * properly rounded result. Remember we've inserted one more bit
3017 * at the top, so these positions are one less.
3018 */
3019 bit = DECOMPOSED_BINARY_POINT - 1;
3020 last_bit = MAX(p->frac_shift - 4, 0);
3021 do {
3022 uint64_t q = 1ULL << bit;
3023 uint64_t t_frac = s_frac + q;
3024 if (t_frac <= a_frac) {
3025 s_frac = t_frac + q;
3026 a_frac -= t_frac;
3027 r_frac += q;
3028 }
3029 a_frac <<= 1;
3030 } while (--bit >= last_bit);
3031
3032 /* Undo the right shift done above. If there is any remaining
3033 * fraction, the result is inexact. Set the sticky bit.
3034 */
3035 a.frac = (r_frac << 1) + (a_frac != 0);
3036
3037 return a;
3038}
3039
97ff87c0 3040float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3041{
3042 FloatParts pa = float16_unpack_canonical(a, status);
3043 FloatParts pr = sqrt_float(pa, status, &float16_params);
3044 return float16_round_pack_canonical(pr, status);
3045}
3046
f131bae8
EC
3047static float32 QEMU_SOFTFLOAT_ATTR
3048soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3049{
3050 FloatParts pa = float32_unpack_canonical(a, status);
3051 FloatParts pr = sqrt_float(pa, status, &float32_params);
3052 return float32_round_pack_canonical(pr, status);
3053}
3054
f131bae8
EC
3055static float64 QEMU_SOFTFLOAT_ATTR
3056soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3057{
3058 FloatParts pa = float64_unpack_canonical(a, status);
3059 FloatParts pr = sqrt_float(pa, status, &float64_params);
3060 return float64_round_pack_canonical(pr, status);
3061}
3062
f131bae8
EC
3063float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3064{
3065 union_float32 ua, ur;
3066
3067 ua.s = xa;
3068 if (unlikely(!can_use_fpu(s))) {
3069 goto soft;
3070 }
3071
3072 float32_input_flush1(&ua.s, s);
3073 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3074 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3075 fpclassify(ua.h) == FP_ZERO) ||
3076 signbit(ua.h))) {
3077 goto soft;
3078 }
3079 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3080 float32_is_neg(ua.s))) {
3081 goto soft;
3082 }
3083 ur.h = sqrtf(ua.h);
3084 return ur.s;
3085
3086 soft:
3087 return soft_f32_sqrt(ua.s, s);
3088}
3089
3090float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3091{
3092 union_float64 ua, ur;
3093
3094 ua.s = xa;
3095 if (unlikely(!can_use_fpu(s))) {
3096 goto soft;
3097 }
3098
3099 float64_input_flush1(&ua.s, s);
3100 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3101 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3102 fpclassify(ua.h) == FP_ZERO) ||
3103 signbit(ua.h))) {
3104 goto soft;
3105 }
3106 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3107 float64_is_neg(ua.s))) {
3108 goto soft;
3109 }
3110 ur.h = sqrt(ua.h);
3111 return ur.s;
3112
3113 soft:
3114 return soft_f64_sqrt(ua.s, s);
3115}
3116
0218a16e
RH
3117/*----------------------------------------------------------------------------
3118| The pattern for a default generated NaN.
3119*----------------------------------------------------------------------------*/
3120
3121float16 float16_default_nan(float_status *status)
3122{
3123 FloatParts p = parts_default_nan(status);
3124 p.frac >>= float16_params.frac_shift;
3125 return float16_pack_raw(p);
3126}
3127
3128float32 float32_default_nan(float_status *status)
3129{
3130 FloatParts p = parts_default_nan(status);
3131 p.frac >>= float32_params.frac_shift;
3132 return float32_pack_raw(p);
3133}
3134
3135float64 float64_default_nan(float_status *status)
3136{
3137 FloatParts p = parts_default_nan(status);
3138 p.frac >>= float64_params.frac_shift;
3139 return float64_pack_raw(p);
3140}
3141
3142float128 float128_default_nan(float_status *status)
3143{
3144 FloatParts p = parts_default_nan(status);
3145 float128 r;
3146
3147 /* Extrapolate from the choices made by parts_default_nan to fill
3148 * in the quad-floating format. If the low bit is set, assume we
3149 * want to set all non-snan bits.
3150 */
3151 r.low = -(p.frac & 1);
3152 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3153 r.high |= LIT64(0x7FFF000000000000);
3154 r.high |= (uint64_t)p.sign << 63;
3155
3156 return r;
3157}
c13bb2da 3158
158142c2 3159/*----------------------------------------------------------------------------
377ed926
RH
3160| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3161*----------------------------------------------------------------------------*/
3162
3163float16 float16_silence_nan(float16 a, float_status *status)
3164{
3165 FloatParts p = float16_unpack_raw(a);
3166 p.frac <<= float16_params.frac_shift;
3167 p = parts_silence_nan(p, status);
3168 p.frac >>= float16_params.frac_shift;
3169 return float16_pack_raw(p);
3170}
3171
3172float32 float32_silence_nan(float32 a, float_status *status)
3173{
3174 FloatParts p = float32_unpack_raw(a);
3175 p.frac <<= float32_params.frac_shift;
3176 p = parts_silence_nan(p, status);
3177 p.frac >>= float32_params.frac_shift;
3178 return float32_pack_raw(p);
3179}
3180
3181float64 float64_silence_nan(float64 a, float_status *status)
3182{
3183 FloatParts p = float64_unpack_raw(a);
3184 p.frac <<= float64_params.frac_shift;
3185 p = parts_silence_nan(p, status);
3186 p.frac >>= float64_params.frac_shift;
3187 return float64_pack_raw(p);
3188}
3189
3190/*----------------------------------------------------------------------------
158142c2
FB
3191| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3192| and 7, and returns the properly rounded 32-bit integer corresponding to the
3193| input. If `zSign' is 1, the input is negated before being converted to an
3194| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3195| is simply rounded to an integer, with the inexact exception raised if the
3196| input cannot be represented exactly as an integer. However, if the fixed-
3197| point input is too large, the invalid exception is raised and the largest
3198| positive or negative integer is returned.
3199*----------------------------------------------------------------------------*/
3200
f4014512 3201static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 3202{
8f506c70 3203 int8_t roundingMode;
158142c2 3204 flag roundNearestEven;
8f506c70 3205 int8_t roundIncrement, roundBits;
760e1416 3206 int32_t z;
158142c2 3207
a2f2d288 3208 roundingMode = status->float_rounding_mode;
158142c2 3209 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3210 switch (roundingMode) {
3211 case float_round_nearest_even:
f9288a76 3212 case float_round_ties_away:
dc355b76
PM
3213 roundIncrement = 0x40;
3214 break;
3215 case float_round_to_zero:
3216 roundIncrement = 0;
3217 break;
3218 case float_round_up:
3219 roundIncrement = zSign ? 0 : 0x7f;
3220 break;
3221 case float_round_down:
3222 roundIncrement = zSign ? 0x7f : 0;
3223 break;
3224 default:
3225 abort();
158142c2
FB
3226 }
3227 roundBits = absZ & 0x7F;
3228 absZ = ( absZ + roundIncrement )>>7;
3229 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3230 z = absZ;
3231 if ( zSign ) z = - z;
3232 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3233 float_raise(float_flag_invalid, status);
bb98fe42 3234 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 3235 }
a2f2d288
PM
3236 if (roundBits) {
3237 status->float_exception_flags |= float_flag_inexact;
3238 }
158142c2
FB
3239 return z;
3240
3241}
3242
3243/*----------------------------------------------------------------------------
3244| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3245| `absZ1', with binary point between bits 63 and 64 (between the input words),
3246| and returns the properly rounded 64-bit integer corresponding to the input.
3247| If `zSign' is 1, the input is negated before being converted to an integer.
3248| Ordinarily, the fixed-point input is simply rounded to an integer, with
3249| the inexact exception raised if the input cannot be represented exactly as
3250| an integer. However, if the fixed-point input is too large, the invalid
3251| exception is raised and the largest positive or negative integer is
3252| returned.
3253*----------------------------------------------------------------------------*/
3254
f42c2224 3255static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3256 float_status *status)
158142c2 3257{
8f506c70 3258 int8_t roundingMode;
158142c2 3259 flag roundNearestEven, increment;
760e1416 3260 int64_t z;
158142c2 3261
a2f2d288 3262 roundingMode = status->float_rounding_mode;
158142c2 3263 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3264 switch (roundingMode) {
3265 case float_round_nearest_even:
f9288a76 3266 case float_round_ties_away:
dc355b76
PM
3267 increment = ((int64_t) absZ1 < 0);
3268 break;
3269 case float_round_to_zero:
3270 increment = 0;
3271 break;
3272 case float_round_up:
3273 increment = !zSign && absZ1;
3274 break;
3275 case float_round_down:
3276 increment = zSign && absZ1;
3277 break;
3278 default:
3279 abort();
158142c2
FB
3280 }
3281 if ( increment ) {
3282 ++absZ0;
3283 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3284 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3285 }
3286 z = absZ0;
3287 if ( zSign ) z = - z;
3288 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3289 overflow:
ff32e16e 3290 float_raise(float_flag_invalid, status);
158142c2 3291 return
bb98fe42 3292 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
3293 : LIT64( 0x7FFFFFFFFFFFFFFF );
3294 }
a2f2d288
PM
3295 if (absZ1) {
3296 status->float_exception_flags |= float_flag_inexact;
3297 }
158142c2
FB
3298 return z;
3299
3300}
3301
fb3ea83a
TM
3302/*----------------------------------------------------------------------------
3303| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3304| `absZ1', with binary point between bits 63 and 64 (between the input words),
3305| and returns the properly rounded 64-bit unsigned integer corresponding to the
3306| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3307| with the inexact exception raised if the input cannot be represented exactly
3308| as an integer. However, if the fixed-point input is too large, the invalid
3309| exception is raised and the largest unsigned integer is returned.
3310*----------------------------------------------------------------------------*/
3311
f42c2224 3312static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 3313 uint64_t absZ1, float_status *status)
fb3ea83a 3314{
8f506c70 3315 int8_t roundingMode;
fb3ea83a
TM
3316 flag roundNearestEven, increment;
3317
a2f2d288 3318 roundingMode = status->float_rounding_mode;
fb3ea83a 3319 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3320 switch (roundingMode) {
3321 case float_round_nearest_even:
f9288a76 3322 case float_round_ties_away:
dc355b76
PM
3323 increment = ((int64_t)absZ1 < 0);
3324 break;
3325 case float_round_to_zero:
3326 increment = 0;
3327 break;
3328 case float_round_up:
3329 increment = !zSign && absZ1;
3330 break;
3331 case float_round_down:
3332 increment = zSign && absZ1;
3333 break;
3334 default:
3335 abort();
fb3ea83a
TM
3336 }
3337 if (increment) {
3338 ++absZ0;
3339 if (absZ0 == 0) {
ff32e16e 3340 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3341 return LIT64(0xFFFFFFFFFFFFFFFF);
3342 }
3343 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3344 }
3345
3346 if (zSign && absZ0) {
ff32e16e 3347 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3348 return 0;
3349 }
3350
3351 if (absZ1) {
a2f2d288 3352 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3353 }
3354 return absZ0;
3355}
3356
37d18660
PM
3357/*----------------------------------------------------------------------------
3358| If `a' is denormal and we are in flush-to-zero mode then set the
3359| input-denormal exception and return zero. Otherwise just return the value.
3360*----------------------------------------------------------------------------*/
e5a41ffa 3361float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 3362{
a2f2d288 3363 if (status->flush_inputs_to_zero) {
37d18660 3364 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 3365 float_raise(float_flag_input_denormal, status);
37d18660
PM
3366 return make_float32(float32_val(a) & 0x80000000);
3367 }
3368 }
3369 return a;
3370}
3371
158142c2
FB
3372/*----------------------------------------------------------------------------
3373| Normalizes the subnormal single-precision floating-point value represented
3374| by the denormalized significand `aSig'. The normalized exponent and
3375| significand are stored at the locations pointed to by `zExpPtr' and
3376| `zSigPtr', respectively.
3377*----------------------------------------------------------------------------*/
3378
3379static void
0c48262d 3380 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3381{
8f506c70 3382 int8_t shiftCount;
158142c2 3383
0019d5c3 3384 shiftCount = clz32(aSig) - 8;
158142c2
FB
3385 *zSigPtr = aSig<<shiftCount;
3386 *zExpPtr = 1 - shiftCount;
3387
3388}
3389
158142c2
FB
3390/*----------------------------------------------------------------------------
3391| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3392| and significand `zSig', and returns the proper single-precision floating-
3393| point value corresponding to the abstract input. Ordinarily, the abstract
3394| value is simply rounded and packed into the single-precision format, with
3395| the inexact exception raised if the abstract input cannot be represented
3396| exactly. However, if the abstract value is too large, the overflow and
3397| inexact exceptions are raised and an infinity or maximal finite value is
3398| returned. If the abstract value is too small, the input value is rounded to
3399| a subnormal number, and the underflow and inexact exceptions are raised if
3400| the abstract input cannot be represented exactly as a subnormal single-
3401| precision floating-point number.
3402| The input significand `zSig' has its binary point between bits 30
3403| and 29, which is 7 bits to the left of the usual location. This shifted
3404| significand must be normalized or smaller. If `zSig' is not normalized,
3405| `zExp' must be 0; in that case, the result returned is a subnormal number,
3406| and it must not require rounding. In the usual case that `zSig' is
3407| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3408| The handling of underflow and overflow follows the IEC/IEEE Standard for
3409| Binary Floating-Point Arithmetic.
3410*----------------------------------------------------------------------------*/
3411
0c48262d 3412static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3413 float_status *status)
158142c2 3414{
8f506c70 3415 int8_t roundingMode;
158142c2 3416 flag roundNearestEven;
8f506c70 3417 int8_t roundIncrement, roundBits;
158142c2
FB
3418 flag isTiny;
3419
a2f2d288 3420 roundingMode = status->float_rounding_mode;
158142c2 3421 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3422 switch (roundingMode) {
3423 case float_round_nearest_even:
f9288a76 3424 case float_round_ties_away:
dc355b76
PM
3425 roundIncrement = 0x40;
3426 break;
3427 case float_round_to_zero:
3428 roundIncrement = 0;
3429 break;
3430 case float_round_up:
3431 roundIncrement = zSign ? 0 : 0x7f;
3432 break;
3433 case float_round_down:
3434 roundIncrement = zSign ? 0x7f : 0;
3435 break;
3436 default:
3437 abort();
3438 break;
158142c2
FB
3439 }
3440 roundBits = zSig & 0x7F;
bb98fe42 3441 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3442 if ( ( 0xFD < zExp )
3443 || ( ( zExp == 0xFD )
bb98fe42 3444 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3445 ) {
ff32e16e 3446 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 3447 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
3448 }
3449 if ( zExp < 0 ) {
a2f2d288 3450 if (status->flush_to_zero) {
ff32e16e 3451 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3452 return packFloat32(zSign, 0, 0);
3453 }
158142c2 3454 isTiny =
a2f2d288
PM
3455 (status->float_detect_tininess
3456 == float_tininess_before_rounding)
158142c2
FB
3457 || ( zExp < -1 )
3458 || ( zSig + roundIncrement < 0x80000000 );
3459 shift32RightJamming( zSig, - zExp, &zSig );
3460 zExp = 0;
3461 roundBits = zSig & 0x7F;
ff32e16e
PM
3462 if (isTiny && roundBits) {
3463 float_raise(float_flag_underflow, status);
3464 }
158142c2
FB
3465 }
3466 }
a2f2d288
PM
3467 if (roundBits) {
3468 status->float_exception_flags |= float_flag_inexact;
3469 }
158142c2
FB
3470 zSig = ( zSig + roundIncrement )>>7;
3471 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3472 if ( zSig == 0 ) zExp = 0;
3473 return packFloat32( zSign, zExp, zSig );
3474
3475}
3476
3477/*----------------------------------------------------------------------------
3478| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3479| and significand `zSig', and returns the proper single-precision floating-
3480| point value corresponding to the abstract input. This routine is just like
3481| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3482| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3483| floating-point exponent.
3484*----------------------------------------------------------------------------*/
3485
3486static float32
0c48262d 3487 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3488 float_status *status)
158142c2 3489{
8f506c70 3490 int8_t shiftCount;
158142c2 3491
0019d5c3 3492 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3493 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3494 status);
158142c2
FB
3495
3496}
3497
37d18660
PM
3498/*----------------------------------------------------------------------------
3499| If `a' is denormal and we are in flush-to-zero mode then set the
3500| input-denormal exception and return zero. Otherwise just return the value.
3501*----------------------------------------------------------------------------*/
e5a41ffa 3502float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 3503{
a2f2d288 3504 if (status->flush_inputs_to_zero) {
37d18660 3505 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 3506 float_raise(float_flag_input_denormal, status);
37d18660
PM
3507 return make_float64(float64_val(a) & (1ULL << 63));
3508 }
3509 }
3510 return a;
3511}
3512
158142c2
FB
3513/*----------------------------------------------------------------------------
3514| Normalizes the subnormal double-precision floating-point value represented
3515| by the denormalized significand `aSig'. The normalized exponent and
3516| significand are stored at the locations pointed to by `zExpPtr' and
3517| `zSigPtr', respectively.
3518*----------------------------------------------------------------------------*/
3519
3520static void
0c48262d 3521 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3522{
8f506c70 3523 int8_t shiftCount;
158142c2 3524
0019d5c3 3525 shiftCount = clz64(aSig) - 11;
158142c2
FB
3526 *zSigPtr = aSig<<shiftCount;
3527 *zExpPtr = 1 - shiftCount;
3528
3529}
3530
3531/*----------------------------------------------------------------------------
3532| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3533| double-precision floating-point value, returning the result. After being
3534| shifted into the proper positions, the three fields are simply added
3535| together to form the result. This means that any integer portion of `zSig'
3536| will be added into the exponent. Since a properly normalized significand
3537| will have an integer portion equal to 1, the `zExp' input should be 1 less
3538| than the desired result exponent whenever `zSig' is a complete, normalized
3539| significand.
3540*----------------------------------------------------------------------------*/
3541
0c48262d 3542static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
3543{
3544
f090c9d4 3545 return make_float64(
bb98fe42 3546 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3547
3548}
3549
3550/*----------------------------------------------------------------------------
3551| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3552| and significand `zSig', and returns the proper double-precision floating-
3553| point value corresponding to the abstract input. Ordinarily, the abstract
3554| value is simply rounded and packed into the double-precision format, with
3555| the inexact exception raised if the abstract input cannot be represented
3556| exactly. However, if the abstract value is too large, the overflow and
3557| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3558| returned. If the abstract value is too small, the input value is rounded to
3559| a subnormal number, and the underflow and inexact exceptions are raised if
3560| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3561| precision floating-point number.
3562| The input significand `zSig' has its binary point between bits 62
3563| and 61, which is 10 bits to the left of the usual location. This shifted
3564| significand must be normalized or smaller. If `zSig' is not normalized,
3565| `zExp' must be 0; in that case, the result returned is a subnormal number,
3566| and it must not require rounding. In the usual case that `zSig' is
3567| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3568| The handling of underflow and overflow follows the IEC/IEEE Standard for
3569| Binary Floating-Point Arithmetic.
3570*----------------------------------------------------------------------------*/
3571
0c48262d 3572static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3573 float_status *status)
158142c2 3574{
8f506c70 3575 int8_t roundingMode;
158142c2 3576 flag roundNearestEven;
0c48262d 3577 int roundIncrement, roundBits;
158142c2
FB
3578 flag isTiny;
3579
a2f2d288 3580 roundingMode = status->float_rounding_mode;
158142c2 3581 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3582 switch (roundingMode) {
3583 case float_round_nearest_even:
f9288a76 3584 case float_round_ties_away:
dc355b76
PM
3585 roundIncrement = 0x200;
3586 break;
3587 case float_round_to_zero:
3588 roundIncrement = 0;
3589 break;
3590 case float_round_up:
3591 roundIncrement = zSign ? 0 : 0x3ff;
3592 break;
3593 case float_round_down:
3594 roundIncrement = zSign ? 0x3ff : 0;
3595 break;
9ee6f678
BR
3596 case float_round_to_odd:
3597 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3598 break;
dc355b76
PM
3599 default:
3600 abort();
158142c2
FB
3601 }
3602 roundBits = zSig & 0x3FF;
bb98fe42 3603 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3604 if ( ( 0x7FD < zExp )
3605 || ( ( zExp == 0x7FD )
bb98fe42 3606 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3607 ) {
9ee6f678
BR
3608 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3609 roundIncrement != 0;
ff32e16e 3610 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3611 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3612 }
3613 if ( zExp < 0 ) {
a2f2d288 3614 if (status->flush_to_zero) {
ff32e16e 3615 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3616 return packFloat64(zSign, 0, 0);
3617 }
158142c2 3618 isTiny =
a2f2d288
PM
3619 (status->float_detect_tininess
3620 == float_tininess_before_rounding)
158142c2
FB
3621 || ( zExp < -1 )
3622 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
3623 shift64RightJamming( zSig, - zExp, &zSig );
3624 zExp = 0;
3625 roundBits = zSig & 0x3FF;
ff32e16e
PM
3626 if (isTiny && roundBits) {
3627 float_raise(float_flag_underflow, status);
3628 }
9ee6f678
BR
3629 if (roundingMode == float_round_to_odd) {
3630 /*
3631 * For round-to-odd case, the roundIncrement depends on
3632 * zSig which just changed.
3633 */
3634 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3635 }
158142c2
FB
3636 }
3637 }
a2f2d288
PM
3638 if (roundBits) {
3639 status->float_exception_flags |= float_flag_inexact;
3640 }
158142c2
FB
3641 zSig = ( zSig + roundIncrement )>>10;
3642 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3643 if ( zSig == 0 ) zExp = 0;
3644 return packFloat64( zSign, zExp, zSig );
3645
3646}
3647
3648/*----------------------------------------------------------------------------
3649| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3650| and significand `zSig', and returns the proper double-precision floating-
3651| point value corresponding to the abstract input. This routine is just like
3652| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3653| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3654| floating-point exponent.
3655*----------------------------------------------------------------------------*/
3656
3657static float64
0c48262d 3658 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3659 float_status *status)
158142c2 3660{
8f506c70 3661 int8_t shiftCount;
158142c2 3662
0019d5c3 3663 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3664 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3665 status);
158142c2
FB
3666
3667}
3668
158142c2
FB
3669/*----------------------------------------------------------------------------
3670| Normalizes the subnormal extended double-precision floating-point value
3671| represented by the denormalized significand `aSig'. The normalized exponent
3672| and significand are stored at the locations pointed to by `zExpPtr' and
3673| `zSigPtr', respectively.
3674*----------------------------------------------------------------------------*/
3675
88857aca
LV
3676void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3677 uint64_t *zSigPtr)
158142c2 3678{
8f506c70 3679 int8_t shiftCount;
158142c2 3680
0019d5c3 3681 shiftCount = clz64(aSig);
158142c2
FB
3682 *zSigPtr = aSig<<shiftCount;
3683 *zExpPtr = 1 - shiftCount;
158142c2
FB
3684}
3685
3686/*----------------------------------------------------------------------------
3687| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3688| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3689| and returns the proper extended double-precision floating-point value
3690| corresponding to the abstract input. Ordinarily, the abstract value is
3691| rounded and packed into the extended double-precision format, with the
3692| inexact exception raised if the abstract input cannot be represented
3693| exactly. However, if the abstract value is too large, the overflow and
3694| inexact exceptions are raised and an infinity or maximal finite value is
3695| returned. If the abstract value is too small, the input value is rounded to
3696| a subnormal number, and the underflow and inexact exceptions are raised if
3697| the abstract input cannot be represented exactly as a subnormal extended
3698| double-precision floating-point number.
3699| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3700| number of bits as single or double precision, respectively. Otherwise, the
3701| result is rounded to the full precision of the extended double-precision
3702| format.
3703| The input significand must be normalized or smaller. If the input
3704| significand is not normalized, `zExp' must be 0; in that case, the result
3705| returned is a subnormal number, and it must not require rounding. The
3706| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3707| Floating-Point Arithmetic.
3708*----------------------------------------------------------------------------*/
3709
88857aca
LV
3710floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3711 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3712 float_status *status)
158142c2 3713{
8f506c70 3714 int8_t roundingMode;
158142c2 3715 flag roundNearestEven, increment, isTiny;
f42c2224 3716 int64_t roundIncrement, roundMask, roundBits;
158142c2 3717
a2f2d288 3718 roundingMode = status->float_rounding_mode;
158142c2
FB
3719 roundNearestEven = ( roundingMode == float_round_nearest_even );
3720 if ( roundingPrecision == 80 ) goto precision80;
3721 if ( roundingPrecision == 64 ) {
3722 roundIncrement = LIT64( 0x0000000000000400 );
3723 roundMask = LIT64( 0x00000000000007FF );
3724 }
3725 else if ( roundingPrecision == 32 ) {
3726 roundIncrement = LIT64( 0x0000008000000000 );
3727 roundMask = LIT64( 0x000000FFFFFFFFFF );
3728 }
3729 else {
3730 goto precision80;
3731 }
3732 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3733 switch (roundingMode) {
3734 case float_round_nearest_even:
f9288a76 3735 case float_round_ties_away:
dc355b76
PM
3736 break;
3737 case float_round_to_zero:
3738 roundIncrement = 0;
3739 break;
3740 case float_round_up:
3741 roundIncrement = zSign ? 0 : roundMask;
3742 break;
3743 case float_round_down:
3744 roundIncrement = zSign ? roundMask : 0;
3745 break;
3746 default:
3747 abort();
158142c2
FB
3748 }
3749 roundBits = zSig0 & roundMask;
bb98fe42 3750 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3751 if ( ( 0x7FFE < zExp )
3752 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3753 ) {
3754 goto overflow;
3755 }
3756 if ( zExp <= 0 ) {
a2f2d288 3757 if (status->flush_to_zero) {
ff32e16e 3758 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3759 return packFloatx80(zSign, 0, 0);
3760 }
158142c2 3761 isTiny =
a2f2d288
PM
3762 (status->float_detect_tininess
3763 == float_tininess_before_rounding)
158142c2
FB
3764 || ( zExp < 0 )
3765 || ( zSig0 <= zSig0 + roundIncrement );
3766 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3767 zExp = 0;
3768 roundBits = zSig0 & roundMask;
ff32e16e
PM
3769 if (isTiny && roundBits) {
3770 float_raise(float_flag_underflow, status);
3771 }
a2f2d288
PM
3772 if (roundBits) {
3773 status->float_exception_flags |= float_flag_inexact;
3774 }
158142c2 3775 zSig0 += roundIncrement;
bb98fe42 3776 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3777 roundIncrement = roundMask + 1;
3778 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3779 roundMask |= roundIncrement;
3780 }
3781 zSig0 &= ~ roundMask;
3782 return packFloatx80( zSign, zExp, zSig0 );
3783 }
3784 }
a2f2d288
PM
3785 if (roundBits) {
3786 status->float_exception_flags |= float_flag_inexact;
3787 }
158142c2
FB
3788 zSig0 += roundIncrement;
3789 if ( zSig0 < roundIncrement ) {
3790 ++zExp;
3791 zSig0 = LIT64( 0x8000000000000000 );
3792 }
3793 roundIncrement = roundMask + 1;
3794 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3795 roundMask |= roundIncrement;
3796 }
3797 zSig0 &= ~ roundMask;
3798 if ( zSig0 == 0 ) zExp = 0;
3799 return packFloatx80( zSign, zExp, zSig0 );
3800 precision80:
dc355b76
PM
3801 switch (roundingMode) {
3802 case float_round_nearest_even:
f9288a76 3803 case float_round_ties_away:
dc355b76
PM
3804 increment = ((int64_t)zSig1 < 0);
3805 break;
3806 case float_round_to_zero:
3807 increment = 0;
3808 break;
3809 case float_round_up:
3810 increment = !zSign && zSig1;
3811 break;
3812 case float_round_down:
3813 increment = zSign && zSig1;
3814 break;
3815 default:
3816 abort();
158142c2 3817 }
bb98fe42 3818 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3819 if ( ( 0x7FFE < zExp )
3820 || ( ( zExp == 0x7FFE )
3821 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3822 && increment
3823 )
3824 ) {
3825 roundMask = 0;
3826 overflow:
ff32e16e 3827 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3828 if ( ( roundingMode == float_round_to_zero )
3829 || ( zSign && ( roundingMode == float_round_up ) )
3830 || ( ! zSign && ( roundingMode == float_round_down ) )
3831 ) {
3832 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3833 }
0f605c88
LV
3834 return packFloatx80(zSign,
3835 floatx80_infinity_high,
3836 floatx80_infinity_low);
158142c2
FB
3837 }
3838 if ( zExp <= 0 ) {
3839 isTiny =
a2f2d288
PM
3840 (status->float_detect_tininess
3841 == float_tininess_before_rounding)
158142c2
FB
3842 || ( zExp < 0 )
3843 || ! increment
3844 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3845 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3846 zExp = 0;
ff32e16e
PM
3847 if (isTiny && zSig1) {
3848 float_raise(float_flag_underflow, status);
3849 }
a2f2d288
PM
3850 if (zSig1) {
3851 status->float_exception_flags |= float_flag_inexact;
3852 }
dc355b76
PM
3853 switch (roundingMode) {
3854 case float_round_nearest_even:
f9288a76 3855 case float_round_ties_away:
dc355b76
PM
3856 increment = ((int64_t)zSig1 < 0);
3857 break;
3858 case float_round_to_zero:
3859 increment = 0;
3860 break;
3861 case float_round_up:
3862 increment = !zSign && zSig1;
3863 break;
3864 case float_round_down:
3865 increment = zSign && zSig1;
3866 break;
3867 default:
3868 abort();
158142c2
FB
3869 }
3870 if ( increment ) {
3871 ++zSig0;
3872 zSig0 &=
bb98fe42
AF
3873 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3874 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3875 }
3876 return packFloatx80( zSign, zExp, zSig0 );
3877 }
3878 }
a2f2d288
PM
3879 if (zSig1) {
3880 status->float_exception_flags |= float_flag_inexact;
3881 }
158142c2
FB
3882 if ( increment ) {
3883 ++zSig0;
3884 if ( zSig0 == 0 ) {
3885 ++zExp;
3886 zSig0 = LIT64( 0x8000000000000000 );
3887 }
3888 else {
bb98fe42 3889 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3890 }
3891 }
3892 else {
3893 if ( zSig0 == 0 ) zExp = 0;
3894 }
3895 return packFloatx80( zSign, zExp, zSig0 );
3896
3897}
3898
3899/*----------------------------------------------------------------------------
3900| Takes an abstract floating-point value having sign `zSign', exponent
3901| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
3902| and returns the proper extended double-precision floating-point value
3903| corresponding to the abstract input. This routine is just like
3904| `roundAndPackFloatx80' except that the input significand does not have to be
3905| normalized.
3906*----------------------------------------------------------------------------*/
3907
88857aca
LV
3908floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3909 flag zSign, int32_t zExp,
3910 uint64_t zSig0, uint64_t zSig1,
3911 float_status *status)
158142c2 3912{
8f506c70 3913 int8_t shiftCount;
158142c2
FB
3914
3915 if ( zSig0 == 0 ) {
3916 zSig0 = zSig1;
3917 zSig1 = 0;
3918 zExp -= 64;
3919 }
0019d5c3 3920 shiftCount = clz64(zSig0);
158142c2
FB
3921 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3922 zExp -= shiftCount;
ff32e16e
PM
3923 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3924 zSig0, zSig1, status);
158142c2
FB
3925
3926}
3927
158142c2
FB
3928/*----------------------------------------------------------------------------
3929| Returns the least-significant 64 fraction bits of the quadruple-precision
3930| floating-point value `a'.
3931*----------------------------------------------------------------------------*/
3932
a49db98d 3933static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
3934{
3935
3936 return a.low;
3937
3938}
3939
3940/*----------------------------------------------------------------------------
3941| Returns the most-significant 48 fraction bits of the quadruple-precision
3942| floating-point value `a'.
3943*----------------------------------------------------------------------------*/
3944
a49db98d 3945static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
3946{
3947
3948 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3949
3950}
3951
3952/*----------------------------------------------------------------------------
3953| Returns the exponent bits of the quadruple-precision floating-point value
3954| `a'.
3955*----------------------------------------------------------------------------*/
3956
f4014512 3957static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
3958{
3959
3960 return ( a.high>>48 ) & 0x7FFF;
3961
3962}
3963
3964/*----------------------------------------------------------------------------
3965| Returns the sign bit of the quadruple-precision floating-point value `a'.
3966*----------------------------------------------------------------------------*/
3967
a49db98d 3968static inline flag extractFloat128Sign( float128 a )
158142c2
FB
3969{
3970
3971 return a.high>>63;
3972
3973}
3974
3975/*----------------------------------------------------------------------------
3976| Normalizes the subnormal quadruple-precision floating-point value
3977| represented by the denormalized significand formed by the concatenation of
3978| `aSig0' and `aSig1'. The normalized exponent is stored at the location
3979| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
3980| significand are stored at the location pointed to by `zSig0Ptr', and the
3981| least significant 64 bits of the normalized significand are stored at the
3982| location pointed to by `zSig1Ptr'.
3983*----------------------------------------------------------------------------*/
3984
3985static void
3986 normalizeFloat128Subnormal(
bb98fe42
AF
3987 uint64_t aSig0,
3988 uint64_t aSig1,
f4014512 3989 int32_t *zExpPtr,
bb98fe42
AF
3990 uint64_t *zSig0Ptr,
3991 uint64_t *zSig1Ptr
158142c2
FB
3992 )
3993{
8f506c70 3994 int8_t shiftCount;
158142c2
FB
3995
3996 if ( aSig0 == 0 ) {
0019d5c3 3997 shiftCount = clz64(aSig1) - 15;
158142c2
FB
3998 if ( shiftCount < 0 ) {
3999 *zSig0Ptr = aSig1>>( - shiftCount );
4000 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4001 }
4002 else {
4003 *zSig0Ptr = aSig1<<shiftCount;
4004 *zSig1Ptr = 0;
4005 }
4006 *zExpPtr = - shiftCount - 63;
4007 }
4008 else {
0019d5c3 4009 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4010 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4011 *zExpPtr = 1 - shiftCount;
4012 }
4013
4014}
4015
4016/*----------------------------------------------------------------------------
4017| Packs the sign `zSign', the exponent `zExp', and the significand formed
4018| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4019| floating-point value, returning the result. After being shifted into the
4020| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4021| added together to form the most significant 32 bits of the result. This
4022| means that any integer portion of `zSig0' will be added into the exponent.
4023| Since a properly normalized significand will have an integer portion equal
4024| to 1, the `zExp' input should be 1 less than the desired result exponent
4025| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4026| significand.
4027*----------------------------------------------------------------------------*/
4028
a49db98d 4029static inline float128
f4014512 4030 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
4031{
4032 float128 z;
4033
4034 z.low = zSig1;
bb98fe42 4035 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
4036 return z;
4037
4038}
4039
4040/*----------------------------------------------------------------------------
4041| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4042| and extended significand formed by the concatenation of `zSig0', `zSig1',
4043| and `zSig2', and returns the proper quadruple-precision floating-point value
4044| corresponding to the abstract input. Ordinarily, the abstract value is
4045| simply rounded and packed into the quadruple-precision format, with the
4046| inexact exception raised if the abstract input cannot be represented
4047| exactly. However, if the abstract value is too large, the overflow and
4048| inexact exceptions are raised and an infinity or maximal finite value is
4049| returned. If the abstract value is too small, the input value is rounded to
4050| a subnormal number, and the underflow and inexact exceptions are raised if
4051| the abstract input cannot be represented exactly as a subnormal quadruple-
4052| precision floating-point number.
4053| The input significand must be normalized or smaller. If the input
4054| significand is not normalized, `zExp' must be 0; in that case, the result
4055| returned is a subnormal number, and it must not require rounding. In the
4056| usual case that the input significand is normalized, `zExp' must be 1 less
4057| than the ``true'' floating-point exponent. The handling of underflow and
4058| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4059*----------------------------------------------------------------------------*/
4060
f4014512 4061static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4062 uint64_t zSig0, uint64_t zSig1,
4063 uint64_t zSig2, float_status *status)
158142c2 4064{
8f506c70 4065 int8_t roundingMode;
158142c2
FB
4066 flag roundNearestEven, increment, isTiny;
4067
a2f2d288 4068 roundingMode = status->float_rounding_mode;
158142c2 4069 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4070 switch (roundingMode) {
4071 case float_round_nearest_even:
f9288a76 4072 case float_round_ties_away:
dc355b76
PM
4073 increment = ((int64_t)zSig2 < 0);
4074 break;
4075 case float_round_to_zero:
4076 increment = 0;
4077 break;
4078 case float_round_up:
4079 increment = !zSign && zSig2;
4080 break;
4081 case float_round_down:
4082 increment = zSign && zSig2;
4083 break;
9ee6f678
BR
4084 case float_round_to_odd:
4085 increment = !(zSig1 & 0x1) && zSig2;
4086 break;
dc355b76
PM
4087 default:
4088 abort();
158142c2 4089 }
bb98fe42 4090 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4091 if ( ( 0x7FFD < zExp )
4092 || ( ( zExp == 0x7FFD )
4093 && eq128(
4094 LIT64( 0x0001FFFFFFFFFFFF ),
4095 LIT64( 0xFFFFFFFFFFFFFFFF ),
4096 zSig0,
4097 zSig1
4098 )
4099 && increment
4100 )
4101 ) {
ff32e16e 4102 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4103 if ( ( roundingMode == float_round_to_zero )
4104 || ( zSign && ( roundingMode == float_round_up ) )
4105 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4106 || (roundingMode == float_round_to_odd)
158142c2
FB
4107 ) {
4108 return
4109 packFloat128(
4110 zSign,
4111 0x7FFE,
4112 LIT64( 0x0000FFFFFFFFFFFF ),
4113 LIT64( 0xFFFFFFFFFFFFFFFF )
4114 );
4115 }
4116 return packFloat128( zSign, 0x7FFF, 0, 0 );
4117 }
4118 if ( zExp < 0 ) {
a2f2d288 4119 if (status->flush_to_zero) {
ff32e16e 4120 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4121 return packFloat128(zSign, 0, 0, 0);
4122 }
158142c2 4123 isTiny =
a2f2d288
PM
4124 (status->float_detect_tininess
4125 == float_tininess_before_rounding)
158142c2
FB
4126 || ( zExp < -1 )
4127 || ! increment
4128 || lt128(
4129 zSig0,
4130 zSig1,
4131 LIT64( 0x0001FFFFFFFFFFFF ),
4132 LIT64( 0xFFFFFFFFFFFFFFFF )
4133 );
4134 shift128ExtraRightJamming(
4135 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4136 zExp = 0;
ff32e16e
PM
4137 if (isTiny && zSig2) {
4138 float_raise(float_flag_underflow, status);
4139 }
dc355b76
PM
4140 switch (roundingMode) {
4141 case float_round_nearest_even:
f9288a76 4142 case float_round_ties_away:
dc355b76
PM
4143 increment = ((int64_t)zSig2 < 0);
4144 break;
4145 case float_round_to_zero:
4146 increment = 0;
4147 break;
4148 case float_round_up:
4149 increment = !zSign && zSig2;
4150 break;
4151 case float_round_down:
4152 increment = zSign && zSig2;
4153 break;
9ee6f678
BR
4154 case float_round_to_odd:
4155 increment = !(zSig1 & 0x1) && zSig2;
4156 break;
dc355b76
PM
4157 default:
4158 abort();
158142c2
FB
4159 }
4160 }
4161 }
a2f2d288
PM
4162 if (zSig2) {
4163 status->float_exception_flags |= float_flag_inexact;
4164 }
158142c2
FB
4165 if ( increment ) {
4166 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4167 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4168 }
4169 else {
4170 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4171 }
4172 return packFloat128( zSign, zExp, zSig0, zSig1 );
4173
4174}
4175
4176/*----------------------------------------------------------------------------
4177| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4178| and significand formed by the concatenation of `zSig0' and `zSig1', and
4179| returns the proper quadruple-precision floating-point value corresponding
4180| to the abstract input. This routine is just like `roundAndPackFloat128'
4181| except that the input significand has fewer bits and does not have to be
4182| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4183| point exponent.
4184*----------------------------------------------------------------------------*/
4185
f4014512 4186static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4187 uint64_t zSig0, uint64_t zSig1,
4188 float_status *status)
158142c2 4189{
8f506c70 4190 int8_t shiftCount;
bb98fe42 4191 uint64_t zSig2;
158142c2
FB
4192
4193 if ( zSig0 == 0 ) {
4194 zSig0 = zSig1;
4195 zSig1 = 0;
4196 zExp -= 64;
4197 }
0019d5c3 4198 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4199 if ( 0 <= shiftCount ) {
4200 zSig2 = 0;
4201 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4202 }
4203 else {
4204 shift128ExtraRightJamming(
4205 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4206 }
4207 zExp -= shiftCount;
ff32e16e 4208 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4209
4210}
4211
158142c2 4212
158142c2
FB
4213/*----------------------------------------------------------------------------
4214| Returns the result of converting the 32-bit two's complement integer `a'
4215| to the extended double-precision floating-point format. The conversion
4216| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4217| Arithmetic.
4218*----------------------------------------------------------------------------*/
4219
e5a41ffa 4220floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
4221{
4222 flag zSign;
3a87d009 4223 uint32_t absA;
8f506c70 4224 int8_t shiftCount;
bb98fe42 4225 uint64_t zSig;
158142c2
FB
4226
4227 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4228 zSign = ( a < 0 );
4229 absA = zSign ? - a : a;
0019d5c3 4230 shiftCount = clz32(absA) + 32;
158142c2
FB
4231 zSig = absA;
4232 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4233
4234}
4235
158142c2
FB
4236/*----------------------------------------------------------------------------
4237| Returns the result of converting the 32-bit two's complement integer `a' to
4238| the quadruple-precision floating-point format. The conversion is performed
4239| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4240*----------------------------------------------------------------------------*/
4241
e5a41ffa 4242float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
4243{
4244 flag zSign;
3a87d009 4245 uint32_t absA;
8f506c70 4246 int8_t shiftCount;
bb98fe42 4247 uint64_t zSig0;
158142c2
FB
4248
4249 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4250 zSign = ( a < 0 );
4251 absA = zSign ? - a : a;
0019d5c3 4252 shiftCount = clz32(absA) + 17;
158142c2
FB
4253 zSig0 = absA;
4254 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4255
4256}
4257
158142c2
FB
4258/*----------------------------------------------------------------------------
4259| Returns the result of converting the 64-bit two's complement integer `a'
4260| to the extended double-precision floating-point format. The conversion
4261| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4262| Arithmetic.
4263*----------------------------------------------------------------------------*/
4264
e5a41ffa 4265floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
4266{
4267 flag zSign;
182f42fd 4268 uint64_t absA;
8f506c70 4269 int8_t shiftCount;
158142c2
FB
4270
4271 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4272 zSign = ( a < 0 );
4273 absA = zSign ? - a : a;
0019d5c3 4274 shiftCount = clz64(absA);
158142c2
FB
4275 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4276
4277}
4278
158142c2
FB
4279/*----------------------------------------------------------------------------
4280| Returns the result of converting the 64-bit two's complement integer `a' to
4281| the quadruple-precision floating-point format. The conversion is performed
4282| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4283*----------------------------------------------------------------------------*/
4284
e5a41ffa 4285float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
4286{
4287 flag zSign;
182f42fd 4288 uint64_t absA;
8f506c70 4289 int8_t shiftCount;
f4014512 4290 int32_t zExp;
bb98fe42 4291 uint64_t zSig0, zSig1;
158142c2
FB
4292
4293 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4294 zSign = ( a < 0 );
4295 absA = zSign ? - a : a;
0019d5c3 4296 shiftCount = clz64(absA) + 49;
158142c2
FB
4297 zExp = 0x406E - shiftCount;
4298 if ( 64 <= shiftCount ) {
4299 zSig1 = 0;
4300 zSig0 = absA;
4301 shiftCount -= 64;
4302 }
4303 else {
4304 zSig1 = absA;
4305 zSig0 = 0;
4306 }
4307 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4308 return packFloat128( zSign, zExp, zSig0, zSig1 );
4309
4310}
4311
6bb8e0f1
PM
4312/*----------------------------------------------------------------------------
4313| Returns the result of converting the 64-bit unsigned integer `a'
4314| to the quadruple-precision floating-point format. The conversion is performed
4315| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4316*----------------------------------------------------------------------------*/
4317
e5a41ffa 4318float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4319{
4320 if (a == 0) {
4321 return float128_zero;
4322 }
6603d506 4323 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4324}
4325
158142c2
FB
4326/*----------------------------------------------------------------------------
4327| Returns the result of converting the single-precision floating-point value
4328| `a' to the extended double-precision floating-point format. The conversion
4329| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4330| Arithmetic.
4331*----------------------------------------------------------------------------*/
4332
e5a41ffa 4333floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
4334{
4335 flag aSign;
0c48262d 4336 int aExp;
bb98fe42 4337 uint32_t aSig;
158142c2 4338
ff32e16e 4339 a = float32_squash_input_denormal(a, status);
158142c2
FB
4340 aSig = extractFloat32Frac( a );
4341 aExp = extractFloat32Exp( a );
4342 aSign = extractFloat32Sign( a );
4343 if ( aExp == 0xFF ) {
ff32e16e
PM
4344 if (aSig) {
4345 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
4346 }
0f605c88
LV
4347 return packFloatx80(aSign,
4348 floatx80_infinity_high,
4349 floatx80_infinity_low);
158142c2
FB
4350 }
4351 if ( aExp == 0 ) {
4352 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4353 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4354 }
4355 aSig |= 0x00800000;
bb98fe42 4356 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4357
4358}
4359
158142c2
FB
4360/*----------------------------------------------------------------------------
4361| Returns the result of converting the single-precision floating-point value
4362| `a' to the double-precision floating-point format. The conversion is
4363| performed according to the IEC/IEEE Standard for Binary Floating-Point
4364| Arithmetic.
4365*----------------------------------------------------------------------------*/
4366
e5a41ffa 4367float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
4368{
4369 flag aSign;
0c48262d 4370 int aExp;
bb98fe42 4371 uint32_t aSig;
158142c2 4372
ff32e16e 4373 a = float32_squash_input_denormal(a, status);
158142c2
FB
4374 aSig = extractFloat32Frac( a );
4375 aExp = extractFloat32Exp( a );
4376 aSign = extractFloat32Sign( a );
4377 if ( aExp == 0xFF ) {
ff32e16e
PM
4378 if (aSig) {
4379 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4380 }
158142c2
FB
4381 return packFloat128( aSign, 0x7FFF, 0, 0 );
4382 }
4383 if ( aExp == 0 ) {
4384 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4385 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4386 --aExp;
4387 }
bb98fe42 4388 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4389
4390}
4391
158142c2
FB
4392/*----------------------------------------------------------------------------
4393| Returns the remainder of the single-precision floating-point value `a'
4394| with respect to the corresponding value `b'. The operation is performed
4395| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4396*----------------------------------------------------------------------------*/
4397
e5a41ffa 4398float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4399{
ed086f3d 4400 flag aSign, zSign;
0c48262d 4401 int aExp, bExp, expDiff;
bb98fe42
AF
4402 uint32_t aSig, bSig;
4403 uint32_t q;
4404 uint64_t aSig64, bSig64, q64;
4405 uint32_t alternateASig;
4406 int32_t sigMean;
ff32e16e
PM
4407 a = float32_squash_input_denormal(a, status);
4408 b = float32_squash_input_denormal(b, status);
158142c2
FB
4409
4410 aSig = extractFloat32Frac( a );
4411 aExp = extractFloat32Exp( a );
4412 aSign = extractFloat32Sign( a );
4413 bSig = extractFloat32Frac( b );
4414 bExp = extractFloat32Exp( b );
158142c2
FB
4415 if ( aExp == 0xFF ) {
4416 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4417 return propagateFloat32NaN(a, b, status);
158142c2 4418 }
ff32e16e 4419 float_raise(float_flag_invalid, status);
af39bc8c 4420 return float32_default_nan(status);
158142c2
FB
4421 }
4422 if ( bExp == 0xFF ) {
ff32e16e
PM
4423 if (bSig) {
4424 return propagateFloat32NaN(a, b, status);
4425 }
158142c2
FB
4426 return a;
4427 }
4428 if ( bExp == 0 ) {
4429 if ( bSig == 0 ) {
ff32e16e 4430 float_raise(float_flag_invalid, status);
af39bc8c 4431 return float32_default_nan(status);
158142c2
FB
4432 }
4433 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4434 }
4435 if ( aExp == 0 ) {
4436 if ( aSig == 0 ) return a;
4437 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4438 }
4439 expDiff = aExp - bExp;
4440 aSig |= 0x00800000;
4441 bSig |= 0x00800000;
4442 if ( expDiff < 32 ) {
4443 aSig <<= 8;
4444 bSig <<= 8;
4445 if ( expDiff < 0 ) {
4446 if ( expDiff < -1 ) return a;
4447 aSig >>= 1;
4448 }
4449 q = ( bSig <= aSig );
4450 if ( q ) aSig -= bSig;
4451 if ( 0 < expDiff ) {
bb98fe42 4452 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4453 q >>= 32 - expDiff;
4454 bSig >>= 2;
4455 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4456 }
4457 else {
4458 aSig >>= 2;
4459 bSig >>= 2;
4460 }
4461 }
4462 else {
4463 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4464 aSig64 = ( (uint64_t) aSig )<<40;
4465 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4466 expDiff -= 64;
4467 while ( 0 < expDiff ) {
4468 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4469 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4470 aSig64 = - ( ( bSig * q64 )<<38 );
4471 expDiff -= 62;
4472 }
4473 expDiff += 64;
4474 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4475 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4476 q = q64>>( 64 - expDiff );
4477 bSig <<= 6;
4478 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4479 }
4480 do {
4481 alternateASig = aSig;
4482 ++q;
4483 aSig -= bSig;
bb98fe42 4484 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4485 sigMean = aSig + alternateASig;
4486 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4487 aSig = alternateASig;
4488 }
bb98fe42 4489 zSign = ( (int32_t) aSig < 0 );
158142c2 4490 if ( zSign ) aSig = - aSig;
ff32e16e 4491 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4492}
4493
369be8f6 4494
158142c2 4495
8229c991
AJ
4496/*----------------------------------------------------------------------------
4497| Returns the binary exponential of the single-precision floating-point value
4498| `a'. The operation is performed according to the IEC/IEEE Standard for
4499| Binary Floating-Point Arithmetic.
4500|
4501| Uses the following identities:
4502|
4503| 1. -------------------------------------------------------------------------
4504| x x*ln(2)
4505| 2 = e
4506|
4507| 2. -------------------------------------------------------------------------
4508| 2 3 4 5 n
4509| x x x x x x x
4510| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4511| 1! 2! 3! 4! 5! n!
4512*----------------------------------------------------------------------------*/
4513
4514static const float64 float32_exp2_coefficients[15] =
4515{
d5138cf4
PM
4516 const_float64( 0x3ff0000000000000ll ), /* 1 */
4517 const_float64( 0x3fe0000000000000ll ), /* 2 */
4518 const_float64( 0x3fc5555555555555ll ), /* 3 */
4519 const_float64( 0x3fa5555555555555ll ), /* 4 */
4520 const_float64( 0x3f81111111111111ll ), /* 5 */
4521 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4522 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4523 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4524 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4525 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4526 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4527 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4528 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4529 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4530 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4531};
4532
e5a41ffa 4533float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
4534{
4535 flag aSign;
0c48262d 4536 int aExp;
bb98fe42 4537 uint32_t aSig;
8229c991
AJ
4538 float64 r, x, xn;
4539 int i;
ff32e16e 4540 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4541
4542 aSig = extractFloat32Frac( a );
4543 aExp = extractFloat32Exp( a );
4544 aSign = extractFloat32Sign( a );
4545
4546 if ( aExp == 0xFF) {
ff32e16e
PM
4547 if (aSig) {
4548 return propagateFloat32NaN(a, float32_zero, status);
4549 }
8229c991
AJ
4550 return (aSign) ? float32_zero : a;
4551 }
4552 if (aExp == 0) {
4553 if (aSig == 0) return float32_one;
4554 }
4555
ff32e16e 4556 float_raise(float_flag_inexact, status);
8229c991
AJ
4557
4558 /* ******************************* */
4559 /* using float64 for approximation */
4560 /* ******************************* */
ff32e16e
PM
4561 x = float32_to_float64(a, status);
4562 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4563
4564 xn = x;
4565 r = float64_one;
4566 for (i = 0 ; i < 15 ; i++) {
4567 float64 f;
4568
ff32e16e
PM
4569 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4570 r = float64_add(r, f, status);
8229c991 4571
ff32e16e 4572 xn = float64_mul(xn, x, status);
8229c991
AJ
4573 }
4574
4575 return float64_to_float32(r, status);
4576}
4577
374dfc33
AJ
4578/*----------------------------------------------------------------------------
4579| Returns the binary log of the single-precision floating-point value `a'.
4580| The operation is performed according to the IEC/IEEE Standard for Binary
4581| Floating-Point Arithmetic.
4582*----------------------------------------------------------------------------*/
e5a41ffa 4583float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
4584{
4585 flag aSign, zSign;
0c48262d 4586 int aExp;
bb98fe42 4587 uint32_t aSig, zSig, i;
374dfc33 4588
ff32e16e 4589 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4590 aSig = extractFloat32Frac( a );
4591 aExp = extractFloat32Exp( a );
4592 aSign = extractFloat32Sign( a );
4593
4594 if ( aExp == 0 ) {
4595 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4596 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4597 }
4598 if ( aSign ) {
ff32e16e 4599 float_raise(float_flag_invalid, status);
af39bc8c 4600 return float32_default_nan(status);
374dfc33
AJ
4601 }
4602 if ( aExp == 0xFF ) {
ff32e16e
PM
4603 if (aSig) {
4604 return propagateFloat32NaN(a, float32_zero, status);
4605 }
374dfc33
AJ
4606 return a;
4607 }
4608
4609 aExp -= 0x7F;
4610 aSig |= 0x00800000;
4611 zSign = aExp < 0;
4612 zSig = aExp << 23;
4613
4614 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4615 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4616 if ( aSig & 0x01000000 ) {
4617 aSig >>= 1;
4618 zSig |= i;
4619 }
4620 }
4621
4622 if ( zSign )
4623 zSig = -zSig;
4624
ff32e16e 4625 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4626}
4627
158142c2
FB
4628/*----------------------------------------------------------------------------
4629| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4630| the corresponding value `b', and 0 otherwise. The invalid exception is
4631| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4632| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4633*----------------------------------------------------------------------------*/
4634
e5a41ffa 4635int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4636{
b689362d 4637 uint32_t av, bv;
ff32e16e
PM
4638 a = float32_squash_input_denormal(a, status);
4639 b = float32_squash_input_denormal(b, status);
158142c2
FB
4640
4641 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4642 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4643 ) {
ff32e16e 4644 float_raise(float_flag_invalid, status);
158142c2
FB
4645 return 0;
4646 }
b689362d
AJ
4647 av = float32_val(a);
4648 bv = float32_val(b);
4649 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4650}
4651
4652/*----------------------------------------------------------------------------
4653| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4654| or equal to the corresponding value `b', and 0 otherwise. The invalid
4655| exception is raised if either operand is a NaN. The comparison is performed
4656| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4657*----------------------------------------------------------------------------*/
4658
e5a41ffa 4659int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
4660{
4661 flag aSign, bSign;
bb98fe42 4662 uint32_t av, bv;
ff32e16e
PM
4663 a = float32_squash_input_denormal(a, status);
4664 b = float32_squash_input_denormal(b, status);
158142c2
FB
4665
4666 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4667 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4668 ) {
ff32e16e 4669 float_raise(float_flag_invalid, status);
158142c2
FB
4670 return 0;
4671 }
4672 aSign = extractFloat32Sign( a );
4673 bSign = extractFloat32Sign( b );
f090c9d4
PB
4674 av = float32_val(a);
4675 bv = float32_val(b);
bb98fe42 4676 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4677 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4678
4679}
4680
4681/*----------------------------------------------------------------------------
4682| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4683| the corresponding value `b', and 0 otherwise. The invalid exception is
4684| raised if either operand is a NaN. The comparison is performed according
4685| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4686*----------------------------------------------------------------------------*/
4687
e5a41ffa 4688int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4689{
4690 flag aSign, bSign;
bb98fe42 4691 uint32_t av, bv;
ff32e16e
PM
4692 a = float32_squash_input_denormal(a, status);
4693 b = float32_squash_input_denormal(b, status);
158142c2
FB
4694
4695 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4696 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4697 ) {
ff32e16e 4698 float_raise(float_flag_invalid, status);
158142c2
FB
4699 return 0;
4700 }
4701 aSign = extractFloat32Sign( a );
4702 bSign = extractFloat32Sign( b );
f090c9d4
PB
4703 av = float32_val(a);
4704 bv = float32_val(b);
bb98fe42 4705 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4706 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4707
4708}
4709
67b7861d
AJ
4710/*----------------------------------------------------------------------------
4711| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4712| be compared, and 0 otherwise. The invalid exception is raised if either
4713| operand is a NaN. The comparison is performed according to the IEC/IEEE
4714| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4715*----------------------------------------------------------------------------*/
4716
e5a41ffa 4717int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4718{
ff32e16e
PM
4719 a = float32_squash_input_denormal(a, status);
4720 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4721
4722 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4723 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4724 ) {
ff32e16e 4725 float_raise(float_flag_invalid, status);
67b7861d
AJ
4726 return 1;
4727 }
4728 return 0;
4729}
b689362d 4730
158142c2
FB
4731/*----------------------------------------------------------------------------
4732| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4733| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4734| exception. The comparison is performed according to the IEC/IEEE Standard
4735| for Binary Floating-Point Arithmetic.
158142c2
FB
4736*----------------------------------------------------------------------------*/
4737
e5a41ffa 4738int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4739{
ff32e16e
PM
4740 a = float32_squash_input_denormal(a, status);
4741 b = float32_squash_input_denormal(b, status);
158142c2
FB
4742
4743 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4744 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4745 ) {
af39bc8c
AM
4746 if (float32_is_signaling_nan(a, status)
4747 || float32_is_signaling_nan(b, status)) {
ff32e16e 4748 float_raise(float_flag_invalid, status);
b689362d 4749 }
158142c2
FB
4750 return 0;
4751 }
b689362d
AJ
4752 return ( float32_val(a) == float32_val(b) ) ||
4753 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4754}
4755
4756/*----------------------------------------------------------------------------
4757| Returns 1 if the single-precision floating-point value `a' is less than or
4758| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4759| cause an exception. Otherwise, the comparison is performed according to the
4760| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4761*----------------------------------------------------------------------------*/
4762
e5a41ffa 4763int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4764{
4765 flag aSign, bSign;
bb98fe42 4766 uint32_t av, bv;
ff32e16e
PM
4767 a = float32_squash_input_denormal(a, status);
4768 b = float32_squash_input_denormal(b, status);
158142c2
FB
4769
4770 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4771 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4772 ) {
af39bc8c
AM
4773 if (float32_is_signaling_nan(a, status)
4774 || float32_is_signaling_nan(b, status)) {
ff32e16e 4775 float_raise(float_flag_invalid, status);
158142c2
FB
4776 }
4777 return 0;
4778 }
4779 aSign = extractFloat32Sign( a );
4780 bSign = extractFloat32Sign( b );
f090c9d4
PB
4781 av = float32_val(a);
4782 bv = float32_val(b);
bb98fe42 4783 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4784 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4785
4786}
4787
4788/*----------------------------------------------------------------------------
4789| Returns 1 if the single-precision floating-point value `a' is less than
4790| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4791| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4792| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4793*----------------------------------------------------------------------------*/
4794
ab52f973 4795int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4796{
ab52f973
AB
4797 flag aSign, bSign;
4798 uint32_t av, bv;
4799 a = float32_squash_input_denormal(a, status);
4800 b = float32_squash_input_denormal(b, status);
158142c2 4801
ab52f973
AB
4802 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4803 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4804 ) {
4805 if (float32_is_signaling_nan(a, status)
4806 || float32_is_signaling_nan(b, status)) {
ff32e16e 4807 float_raise(float_flag_invalid, status);
158142c2 4808 }
ab52f973 4809 return 0;
158142c2 4810 }
ab52f973
AB
4811 aSign = extractFloat32Sign( a );
4812 bSign = extractFloat32Sign( b );
4813 av = float32_val(a);
4814 bv = float32_val(b);
4815 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4816 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4817
4818}
4819
4820/*----------------------------------------------------------------------------
ab52f973
AB
4821| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4822| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4823| comparison is performed according to the IEC/IEEE Standard for Binary
4824| Floating-Point Arithmetic.
158142c2
FB
4825*----------------------------------------------------------------------------*/
4826
ab52f973 4827int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4828{
ab52f973
AB
4829 a = float32_squash_input_denormal(a, status);
4830 b = float32_squash_input_denormal(b, status);
158142c2 4831
ab52f973
AB
4832 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4833 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4834 ) {
4835 if (float32_is_signaling_nan(a, status)
4836 || float32_is_signaling_nan(b, status)) {
4837 float_raise(float_flag_invalid, status);
158142c2 4838 }
ab52f973 4839 return 1;
158142c2 4840 }
ab52f973 4841 return 0;
158142c2
FB
4842}
4843
210cbd49
AB
4844/*----------------------------------------------------------------------------
4845| If `a' is denormal and we are in flush-to-zero mode then set the
4846| input-denormal exception and return zero. Otherwise just return the value.
4847*----------------------------------------------------------------------------*/
4848float16 float16_squash_input_denormal(float16 a, float_status *status)
4849{
4850 if (status->flush_inputs_to_zero) {
4851 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4852 float_raise(float_flag_input_denormal, status);
4853 return make_float16(float16_val(a) & 0x8000);
4854 }
4855 }
4856 return a;
4857}
4858
158142c2
FB
4859/*----------------------------------------------------------------------------
4860| Returns the result of converting the double-precision floating-point value
4861| `a' to the extended double-precision floating-point format. The conversion
4862| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4863| Arithmetic.
4864*----------------------------------------------------------------------------*/
4865
e5a41ffa 4866floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4867{
4868 flag aSign;
0c48262d 4869 int aExp;
bb98fe42 4870 uint64_t aSig;
158142c2 4871
ff32e16e 4872 a = float64_squash_input_denormal(a, status);
158142c2
FB
4873 aSig = extractFloat64Frac( a );
4874 aExp = extractFloat64Exp( a );
4875 aSign = extractFloat64Sign( a );
4876 if ( aExp == 0x7FF ) {
ff32e16e
PM
4877 if (aSig) {
4878 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4879 }
0f605c88
LV
4880 return packFloatx80(aSign,
4881 floatx80_infinity_high,
4882 floatx80_infinity_low);
158142c2
FB
4883 }
4884 if ( aExp == 0 ) {
4885 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4886 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4887 }
4888 return
4889 packFloatx80(
4890 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4891
4892}
4893
158142c2
FB
4894/*----------------------------------------------------------------------------
4895| Returns the result of converting the double-precision floating-point value
4896| `a' to the quadruple-precision floating-point format. The conversion is
4897| performed according to the IEC/IEEE Standard for Binary Floating-Point
4898| Arithmetic.
4899*----------------------------------------------------------------------------*/
4900
e5a41ffa 4901float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
4902{
4903 flag aSign;
0c48262d 4904 int aExp;
bb98fe42 4905 uint64_t aSig, zSig0, zSig1;
158142c2 4906
ff32e16e 4907 a = float64_squash_input_denormal(a, status);
158142c2
FB
4908 aSig = extractFloat64Frac( a );
4909 aExp = extractFloat64Exp( a );
4910 aSign = extractFloat64Sign( a );
4911 if ( aExp == 0x7FF ) {
ff32e16e
PM
4912 if (aSig) {
4913 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4914 }
158142c2
FB
4915 return packFloat128( aSign, 0x7FFF, 0, 0 );
4916 }
4917 if ( aExp == 0 ) {
4918 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4919 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4920 --aExp;
4921 }
4922 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4923 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4924
4925}
4926
158142c2
FB
4927
4928/*----------------------------------------------------------------------------
4929| Returns the remainder of the double-precision floating-point value `a'
4930| with respect to the corresponding value `b'. The operation is performed
4931| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4932*----------------------------------------------------------------------------*/
4933
e5a41ffa 4934float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 4935{
ed086f3d 4936 flag aSign, zSign;
0c48262d 4937 int aExp, bExp, expDiff;
bb98fe42
AF
4938 uint64_t aSig, bSig;
4939 uint64_t q, alternateASig;
4940 int64_t sigMean;
158142c2 4941
ff32e16e
PM
4942 a = float64_squash_input_denormal(a, status);
4943 b = float64_squash_input_denormal(b, status);
158142c2
FB
4944 aSig = extractFloat64Frac( a );
4945 aExp = extractFloat64Exp( a );
4946 aSign = extractFloat64Sign( a );
4947 bSig = extractFloat64Frac( b );
4948 bExp = extractFloat64Exp( b );
158142c2
FB
4949 if ( aExp == 0x7FF ) {
4950 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 4951 return propagateFloat64NaN(a, b, status);
158142c2 4952 }
ff32e16e 4953 float_raise(float_flag_invalid, status);
af39bc8c 4954 return float64_default_nan(status);
158142c2
FB
4955 }
4956 if ( bExp == 0x7FF ) {
ff32e16e
PM
4957 if (bSig) {
4958 return propagateFloat64NaN(a, b, status);
4959 }
158142c2
FB
4960 return a;
4961 }
4962 if ( bExp == 0 ) {
4963 if ( bSig == 0 ) {
ff32e16e 4964 float_raise(float_flag_invalid, status);
af39bc8c 4965 return float64_default_nan(status);
158142c2
FB
4966 }
4967 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4968 }
4969 if ( aExp == 0 ) {
4970 if ( aSig == 0 ) return a;
4971 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4972 }
4973 expDiff = aExp - bExp;
4974 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4975 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4976 if ( expDiff < 0 ) {
4977 if ( expDiff < -1 ) return a;
4978 aSig >>= 1;
4979 }
4980 q = ( bSig <= aSig );
4981 if ( q ) aSig -= bSig;
4982 expDiff -= 64;
4983 while ( 0 < expDiff ) {
4984 q = estimateDiv128To64( aSig, 0, bSig );
4985 q = ( 2 < q ) ? q - 2 : 0;
4986 aSig = - ( ( bSig>>2 ) * q );
4987 expDiff -= 62;
4988 }
4989 expDiff += 64;
4990 if ( 0 < expDiff ) {
4991 q = estimateDiv128To64( aSig, 0, bSig );
4992 q = ( 2 < q ) ? q - 2 : 0;
4993 q >>= 64 - expDiff;
4994 bSig >>= 2;
4995 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4996 }
4997 else {
4998 aSig >>= 2;
4999 bSig >>= 2;
5000 }
5001 do {
5002 alternateASig = aSig;
5003 ++q;
5004 aSig -= bSig;
bb98fe42 5005 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5006 sigMean = aSig + alternateASig;
5007 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5008 aSig = alternateASig;
5009 }
bb98fe42 5010 zSign = ( (int64_t) aSig < 0 );
158142c2 5011 if ( zSign ) aSig = - aSig;
ff32e16e 5012 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5013
5014}
5015
374dfc33
AJ
5016/*----------------------------------------------------------------------------
5017| Returns the binary log of the double-precision floating-point value `a'.
5018| The operation is performed according to the IEC/IEEE Standard for Binary
5019| Floating-Point Arithmetic.
5020*----------------------------------------------------------------------------*/
e5a41ffa 5021float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
5022{
5023 flag aSign, zSign;
0c48262d 5024 int aExp;
bb98fe42 5025 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5026 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5027
5028 aSig = extractFloat64Frac( a );
5029 aExp = extractFloat64Exp( a );
5030 aSign = extractFloat64Sign( a );
5031
5032 if ( aExp == 0 ) {
5033 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5034 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5035 }
5036 if ( aSign ) {
ff32e16e 5037 float_raise(float_flag_invalid, status);
af39bc8c 5038 return float64_default_nan(status);
374dfc33
AJ
5039 }
5040 if ( aExp == 0x7FF ) {
ff32e16e
PM
5041 if (aSig) {
5042 return propagateFloat64NaN(a, float64_zero, status);
5043 }
374dfc33
AJ
5044 return a;
5045 }
5046
5047 aExp -= 0x3FF;
5048 aSig |= LIT64( 0x0010000000000000 );
5049 zSign = aExp < 0;
bb98fe42 5050 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5051 for (i = 1LL << 51; i > 0; i >>= 1) {
5052 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5053 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5054 if ( aSig & LIT64( 0x0020000000000000 ) ) {
5055 aSig >>= 1;
5056 zSig |= i;
5057 }
5058 }
5059
5060 if ( zSign )
5061 zSig = -zSig;
ff32e16e 5062 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5063}
5064
158142c2
FB
5065/*----------------------------------------------------------------------------
5066| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5067| corresponding value `b', and 0 otherwise. The invalid exception is raised
5068| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5069| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5070*----------------------------------------------------------------------------*/
5071
e5a41ffa 5072int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5073{
bb98fe42 5074 uint64_t av, bv;
ff32e16e
PM
5075 a = float64_squash_input_denormal(a, status);
5076 b = float64_squash_input_denormal(b, status);
158142c2
FB
5077
5078 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5079 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5080 ) {
ff32e16e 5081 float_raise(float_flag_invalid, status);
158142c2
FB
5082 return 0;
5083 }
f090c9d4 5084 av = float64_val(a);
a1b91bb4 5085 bv = float64_val(b);
bb98fe42 5086 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5087
5088}
5089
5090/*----------------------------------------------------------------------------
5091| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5092| equal to the corresponding value `b', and 0 otherwise. The invalid
5093| exception is raised if either operand is a NaN. The comparison is performed
5094| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5095*----------------------------------------------------------------------------*/
5096
e5a41ffa 5097int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
5098{
5099 flag aSign, bSign;
bb98fe42 5100 uint64_t av, bv;
ff32e16e
PM
5101 a = float64_squash_input_denormal(a, status);
5102 b = float64_squash_input_denormal(b, status);
158142c2
FB
5103
5104 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5105 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5106 ) {
ff32e16e 5107 float_raise(float_flag_invalid, status);
158142c2
FB
5108 return 0;
5109 }
5110 aSign = extractFloat64Sign( a );
5111 bSign = extractFloat64Sign( b );
f090c9d4 5112 av = float64_val(a);
a1b91bb4 5113 bv = float64_val(b);
bb98fe42 5114 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5115 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5116
5117}
5118
5119/*----------------------------------------------------------------------------
5120| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5121| the corresponding value `b', and 0 otherwise. The invalid exception is
5122| raised if either operand is a NaN. The comparison is performed according
5123| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5124*----------------------------------------------------------------------------*/
5125
e5a41ffa 5126int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
5127{
5128 flag aSign, bSign;
bb98fe42 5129 uint64_t av, bv;
158142c2 5130
ff32e16e
PM
5131 a = float64_squash_input_denormal(a, status);
5132 b = float64_squash_input_denormal(b, status);
158142c2
FB
5133 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5134 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5135 ) {
ff32e16e 5136 float_raise(float_flag_invalid, status);
158142c2
FB
5137 return 0;
5138 }
5139 aSign = extractFloat64Sign( a );
5140 bSign = extractFloat64Sign( b );
f090c9d4 5141 av = float64_val(a);
a1b91bb4 5142 bv = float64_val(b);
bb98fe42 5143 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5144 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5145
5146}
5147
67b7861d
AJ
5148/*----------------------------------------------------------------------------
5149| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5150| be compared, and 0 otherwise. The invalid exception is raised if either
5151| operand is a NaN. The comparison is performed according to the IEC/IEEE
5152| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5153*----------------------------------------------------------------------------*/
5154
e5a41ffa 5155int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5156{
ff32e16e
PM
5157 a = float64_squash_input_denormal(a, status);
5158 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5159
5160 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5161 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5162 ) {
ff32e16e 5163 float_raise(float_flag_invalid, status);
67b7861d
AJ
5164 return 1;
5165 }
5166 return 0;
5167}
5168
158142c2
FB
5169/*----------------------------------------------------------------------------
5170| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5171| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5172| exception.The comparison is performed according to the IEC/IEEE Standard
5173| for Binary Floating-Point Arithmetic.
158142c2
FB
5174*----------------------------------------------------------------------------*/
5175
e5a41ffa 5176int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5177{
bb98fe42 5178 uint64_t av, bv;
ff32e16e
PM
5179 a = float64_squash_input_denormal(a, status);
5180 b = float64_squash_input_denormal(b, status);
158142c2
FB
5181
5182 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5183 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5184 ) {
af39bc8c
AM
5185 if (float64_is_signaling_nan(a, status)
5186 || float64_is_signaling_nan(b, status)) {
ff32e16e 5187 float_raise(float_flag_invalid, status);
b689362d 5188 }
158142c2
FB
5189 return 0;
5190 }
f090c9d4 5191 av = float64_val(a);
a1b91bb4 5192 bv = float64_val(b);
bb98fe42 5193 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5194
5195}
5196
5197/*----------------------------------------------------------------------------
5198| Returns 1 if the double-precision floating-point value `a' is less than or
5199| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5200| cause an exception. Otherwise, the comparison is performed according to the
5201| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5202*----------------------------------------------------------------------------*/
5203
e5a41ffa 5204int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5205{
5206 flag aSign, bSign;
bb98fe42 5207 uint64_t av, bv;
ff32e16e
PM
5208 a = float64_squash_input_denormal(a, status);
5209 b = float64_squash_input_denormal(b, status);
158142c2
FB
5210
5211 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5212 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5213 ) {
af39bc8c
AM
5214 if (float64_is_signaling_nan(a, status)
5215 || float64_is_signaling_nan(b, status)) {
ff32e16e 5216 float_raise(float_flag_invalid, status);
158142c2
FB
5217 }
5218 return 0;
5219 }
5220 aSign = extractFloat64Sign( a );
5221 bSign = extractFloat64Sign( b );
f090c9d4 5222 av = float64_val(a);
a1b91bb4 5223 bv = float64_val(b);
bb98fe42 5224 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5225 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5226
5227}
5228
5229/*----------------------------------------------------------------------------
5230| Returns 1 if the double-precision floating-point value `a' is less than
5231| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5232| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5233| Standard for Binary Floating-Point Arithmetic.
5234*----------------------------------------------------------------------------*/
5235
e5a41ffa 5236int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5237{
5238 flag aSign, bSign;
bb98fe42 5239 uint64_t av, bv;
ff32e16e
PM
5240 a = float64_squash_input_denormal(a, status);
5241 b = float64_squash_input_denormal(b, status);
158142c2
FB
5242
5243 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5244 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5245 ) {
af39bc8c
AM
5246 if (float64_is_signaling_nan(a, status)
5247 || float64_is_signaling_nan(b, status)) {
ff32e16e 5248 float_raise(float_flag_invalid, status);
158142c2
FB
5249 }
5250 return 0;
5251 }
5252 aSign = extractFloat64Sign( a );
5253 bSign = extractFloat64Sign( b );
f090c9d4 5254 av = float64_val(a);
a1b91bb4 5255 bv = float64_val(b);
bb98fe42 5256 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5257 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5258
5259}
5260
67b7861d
AJ
5261/*----------------------------------------------------------------------------
5262| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5263| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5264| comparison is performed according to the IEC/IEEE Standard for Binary
5265| Floating-Point Arithmetic.
5266*----------------------------------------------------------------------------*/
5267
e5a41ffa 5268int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5269{
ff32e16e
PM
5270 a = float64_squash_input_denormal(a, status);
5271 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5272
5273 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5274 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5275 ) {
af39bc8c
AM
5276 if (float64_is_signaling_nan(a, status)
5277 || float64_is_signaling_nan(b, status)) {
ff32e16e 5278 float_raise(float_flag_invalid, status);
67b7861d
AJ
5279 }
5280 return 1;
5281 }
5282 return 0;
5283}
5284
158142c2
FB
5285/*----------------------------------------------------------------------------
5286| Returns the result of converting the extended double-precision floating-
5287| point value `a' to the 32-bit two's complement integer format. The
5288| conversion is performed according to the IEC/IEEE Standard for Binary
5289| Floating-Point Arithmetic---which means in particular that the conversion
5290| is rounded according to the current rounding mode. If `a' is a NaN, the
5291| largest positive integer is returned. Otherwise, if the conversion
5292| overflows, the largest integer with the same sign as `a' is returned.
5293*----------------------------------------------------------------------------*/
5294
f4014512 5295int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
5296{
5297 flag aSign;
f4014512 5298 int32_t aExp, shiftCount;
bb98fe42 5299 uint64_t aSig;
158142c2 5300
d1eb8f2a
AD
5301 if (floatx80_invalid_encoding(a)) {
5302 float_raise(float_flag_invalid, status);
5303 return 1 << 31;
5304 }
158142c2
FB
5305 aSig = extractFloatx80Frac( a );
5306 aExp = extractFloatx80Exp( a );
5307 aSign = extractFloatx80Sign( a );
bb98fe42 5308 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5309 shiftCount = 0x4037 - aExp;
5310 if ( shiftCount <= 0 ) shiftCount = 1;
5311 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5312 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5313
5314}
5315
5316/*----------------------------------------------------------------------------
5317| Returns the result of converting the extended double-precision floating-
5318| point value `a' to the 32-bit two's complement integer format. The
5319| conversion is performed according to the IEC/IEEE Standard for Binary
5320| Floating-Point Arithmetic, except that the conversion is always rounded
5321| toward zero. If `a' is a NaN, the largest positive integer is returned.
5322| Otherwise, if the conversion overflows, the largest integer with the same
5323| sign as `a' is returned.
5324*----------------------------------------------------------------------------*/
5325
f4014512 5326int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5327{
5328 flag aSign;
f4014512 5329 int32_t aExp, shiftCount;
bb98fe42 5330 uint64_t aSig, savedASig;
b3a6a2e0 5331 int32_t z;
158142c2 5332
d1eb8f2a
AD
5333 if (floatx80_invalid_encoding(a)) {
5334 float_raise(float_flag_invalid, status);
5335 return 1 << 31;
5336 }
158142c2
FB
5337 aSig = extractFloatx80Frac( a );
5338 aExp = extractFloatx80Exp( a );
5339 aSign = extractFloatx80Sign( a );
5340 if ( 0x401E < aExp ) {
bb98fe42 5341 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5342 goto invalid;
5343 }
5344 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5345 if (aExp || aSig) {
5346 status->float_exception_flags |= float_flag_inexact;
5347 }
158142c2
FB
5348 return 0;
5349 }
5350 shiftCount = 0x403E - aExp;
5351 savedASig = aSig;
5352 aSig >>= shiftCount;
5353 z = aSig;
5354 if ( aSign ) z = - z;
5355 if ( ( z < 0 ) ^ aSign ) {
5356 invalid:
ff32e16e 5357 float_raise(float_flag_invalid, status);
bb98fe42 5358 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5359 }
5360 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5361 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5362 }
5363 return z;
5364
5365}
5366
5367/*----------------------------------------------------------------------------
5368| Returns the result of converting the extended double-precision floating-
5369| point value `a' to the 64-bit two's complement integer format. The
5370| conversion is performed according to the IEC/IEEE Standard for Binary
5371| Floating-Point Arithmetic---which means in particular that the conversion
5372| is rounded according to the current rounding mode. If `a' is a NaN,
5373| the largest positive integer is returned. Otherwise, if the conversion
5374| overflows, the largest integer with the same sign as `a' is returned.
5375*----------------------------------------------------------------------------*/
5376
f42c2224 5377int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
5378{
5379 flag aSign;
f4014512 5380 int32_t aExp, shiftCount;
bb98fe42 5381 uint64_t aSig, aSigExtra;
158142c2 5382
d1eb8f2a
AD
5383 if (floatx80_invalid_encoding(a)) {
5384 float_raise(float_flag_invalid, status);
5385 return 1ULL << 63;
5386 }
158142c2
FB
5387 aSig = extractFloatx80Frac( a );
5388 aExp = extractFloatx80Exp( a );
5389 aSign = extractFloatx80Sign( a );
5390 shiftCount = 0x403E - aExp;
5391 if ( shiftCount <= 0 ) {
5392 if ( shiftCount ) {
ff32e16e 5393 float_raise(float_flag_invalid, status);
0f605c88 5394 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
5395 return LIT64( 0x7FFFFFFFFFFFFFFF );
5396 }
bb98fe42 5397 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5398 }
5399 aSigExtra = 0;
5400 }
5401 else {
5402 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5403 }
ff32e16e 5404 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5405
5406}
5407
5408/*----------------------------------------------------------------------------
5409| Returns the result of converting the extended double-precision floating-
5410| point value `a' to the 64-bit two's complement integer format. The
5411| conversion is performed according to the IEC/IEEE Standard for Binary
5412| Floating-Point Arithmetic, except that the conversion is always rounded
5413| toward zero. If `a' is a NaN, the largest positive integer is returned.
5414| Otherwise, if the conversion overflows, the largest integer with the same
5415| sign as `a' is returned.
5416*----------------------------------------------------------------------------*/
5417
f42c2224 5418int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5419{
5420 flag aSign;
f4014512 5421 int32_t aExp, shiftCount;
bb98fe42 5422 uint64_t aSig;
f42c2224 5423 int64_t z;
158142c2 5424
d1eb8f2a
AD
5425 if (floatx80_invalid_encoding(a)) {
5426 float_raise(float_flag_invalid, status);
5427 return 1ULL << 63;
5428 }
158142c2
FB
5429 aSig = extractFloatx80Frac( a );
5430 aExp = extractFloatx80Exp( a );
5431 aSign = extractFloatx80Sign( a );
5432 shiftCount = aExp - 0x403E;
5433 if ( 0 <= shiftCount ) {
5434 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
5435 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5436 float_raise(float_flag_invalid, status);
158142c2
FB
5437 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5438 return LIT64( 0x7FFFFFFFFFFFFFFF );
5439 }
5440 }
bb98fe42 5441 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5442 }
5443 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5444 if (aExp | aSig) {
5445 status->float_exception_flags |= float_flag_inexact;
5446 }
158142c2
FB
5447 return 0;
5448 }
5449 z = aSig>>( - shiftCount );
bb98fe42 5450 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5451 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5452 }
5453 if ( aSign ) z = - z;
5454 return z;
5455
5456}
5457
5458/*----------------------------------------------------------------------------
5459| Returns the result of converting the extended double-precision floating-
5460| point value `a' to the single-precision floating-point format. The
5461| conversion is performed according to the IEC/IEEE Standard for Binary
5462| Floating-Point Arithmetic.
5463*----------------------------------------------------------------------------*/
5464
e5a41ffa 5465float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
5466{
5467 flag aSign;
f4014512 5468 int32_t aExp;
bb98fe42 5469 uint64_t aSig;
158142c2 5470
d1eb8f2a
AD
5471 if (floatx80_invalid_encoding(a)) {
5472 float_raise(float_flag_invalid, status);
5473 return float32_default_nan(status);
5474 }
158142c2
FB
5475 aSig = extractFloatx80Frac( a );
5476 aExp = extractFloatx80Exp( a );
5477 aSign = extractFloatx80Sign( a );
5478 if ( aExp == 0x7FFF ) {
bb98fe42 5479 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5480 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5481 }
5482 return packFloat32( aSign, 0xFF, 0 );
5483 }
5484 shift64RightJamming( aSig, 33, &aSig );
5485 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5486 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5487
5488}
5489
5490/*----------------------------------------------------------------------------
5491| Returns the result of converting the extended double-precision floating-
5492| point value `a' to the double-precision floating-point format. The
5493| conversion is performed according to the IEC/IEEE Standard for Binary
5494| Floating-Point Arithmetic.
5495*----------------------------------------------------------------------------*/
5496
e5a41ffa 5497float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5498{
5499 flag aSign;
f4014512 5500 int32_t aExp;
bb98fe42 5501 uint64_t aSig, zSig;
158142c2 5502
d1eb8f2a
AD
5503 if (floatx80_invalid_encoding(a)) {
5504 float_raise(float_flag_invalid, status);
5505 return float64_default_nan(status);
5506 }
158142c2
FB
5507 aSig = extractFloatx80Frac( a );
5508 aExp = extractFloatx80Exp( a );
5509 aSign = extractFloatx80Sign( a );
5510 if ( aExp == 0x7FFF ) {
bb98fe42 5511 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5512 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5513 }
5514 return packFloat64( aSign, 0x7FF, 0 );
5515 }
5516 shift64RightJamming( aSig, 1, &zSig );
5517 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5518 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5519
5520}
5521
158142c2
FB
5522/*----------------------------------------------------------------------------
5523| Returns the result of converting the extended double-precision floating-
5524| point value `a' to the quadruple-precision floating-point format. The
5525| conversion is performed according to the IEC/IEEE Standard for Binary
5526| Floating-Point Arithmetic.
5527*----------------------------------------------------------------------------*/
5528
e5a41ffa 5529float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5530{
5531 flag aSign;
0c48262d 5532 int aExp;
bb98fe42 5533 uint64_t aSig, zSig0, zSig1;
158142c2 5534
d1eb8f2a
AD
5535 if (floatx80_invalid_encoding(a)) {
5536 float_raise(float_flag_invalid, status);
5537 return float128_default_nan(status);
5538 }
158142c2
FB
5539 aSig = extractFloatx80Frac( a );
5540 aExp = extractFloatx80Exp( a );
5541 aSign = extractFloatx80Sign( a );
bb98fe42 5542 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5543 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5544 }
5545 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5546 return packFloat128( aSign, aExp, zSig0, zSig1 );
5547
5548}
5549
0f721292
LV
5550/*----------------------------------------------------------------------------
5551| Rounds the extended double-precision floating-point value `a'
5552| to the precision provided by floatx80_rounding_precision and returns the
5553| result as an extended double-precision floating-point value.
5554| The operation is performed according to the IEC/IEEE Standard for Binary
5555| Floating-Point Arithmetic.
5556*----------------------------------------------------------------------------*/
5557
5558floatx80 floatx80_round(floatx80 a, float_status *status)
5559{
5560 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5561 extractFloatx80Sign(a),
5562 extractFloatx80Exp(a),
5563 extractFloatx80Frac(a), 0, status);
5564}
5565
158142c2
FB
5566/*----------------------------------------------------------------------------
5567| Rounds the extended double-precision floating-point value `a' to an integer,
5568| and returns the result as an extended quadruple-precision floating-point
5569| value. The operation is performed according to the IEC/IEEE Standard for
5570| Binary Floating-Point Arithmetic.
5571*----------------------------------------------------------------------------*/
5572
e5a41ffa 5573floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5574{
5575 flag aSign;
f4014512 5576 int32_t aExp;
bb98fe42 5577 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5578 floatx80 z;
5579
d1eb8f2a
AD
5580 if (floatx80_invalid_encoding(a)) {
5581 float_raise(float_flag_invalid, status);
5582 return floatx80_default_nan(status);
5583 }
158142c2
FB
5584 aExp = extractFloatx80Exp( a );
5585 if ( 0x403E <= aExp ) {
bb98fe42 5586 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5587 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5588 }
5589 return a;
5590 }
5591 if ( aExp < 0x3FFF ) {
5592 if ( ( aExp == 0 )
bb98fe42 5593 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
5594 return a;
5595 }
a2f2d288 5596 status->float_exception_flags |= float_flag_inexact;
158142c2 5597 aSign = extractFloatx80Sign( a );
a2f2d288 5598 switch (status->float_rounding_mode) {
158142c2 5599 case float_round_nearest_even:
bb98fe42 5600 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5601 ) {
5602 return
5603 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5604 }
5605 break;
f9288a76
PM
5606 case float_round_ties_away:
5607 if (aExp == 0x3FFE) {
5608 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5609 }
5610 break;
158142c2
FB
5611 case float_round_down:
5612 return
5613 aSign ?
5614 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5615 : packFloatx80( 0, 0, 0 );
5616 case float_round_up:
5617 return
5618 aSign ? packFloatx80( 1, 0, 0 )
5619 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5620 }
5621 return packFloatx80( aSign, 0, 0 );
5622 }
5623 lastBitMask = 1;
5624 lastBitMask <<= 0x403E - aExp;
5625 roundBitsMask = lastBitMask - 1;
5626 z = a;
a2f2d288 5627 switch (status->float_rounding_mode) {
dc355b76 5628 case float_round_nearest_even:
158142c2 5629 z.low += lastBitMask>>1;
dc355b76
PM
5630 if ((z.low & roundBitsMask) == 0) {
5631 z.low &= ~lastBitMask;
5632 }
5633 break;
f9288a76
PM
5634 case float_round_ties_away:
5635 z.low += lastBitMask >> 1;
5636 break;
dc355b76
PM
5637 case float_round_to_zero:
5638 break;
5639 case float_round_up:
5640 if (!extractFloatx80Sign(z)) {
5641 z.low += roundBitsMask;
5642 }
5643 break;
5644 case float_round_down:
5645 if (extractFloatx80Sign(z)) {
158142c2
FB
5646 z.low += roundBitsMask;
5647 }
dc355b76
PM
5648 break;
5649 default:
5650 abort();
158142c2
FB
5651 }
5652 z.low &= ~ roundBitsMask;
5653 if ( z.low == 0 ) {
5654 ++z.high;
5655 z.low = LIT64( 0x8000000000000000 );
5656 }
a2f2d288
PM
5657 if (z.low != a.low) {
5658 status->float_exception_flags |= float_flag_inexact;
5659 }
158142c2
FB
5660 return z;
5661
5662}
5663
5664/*----------------------------------------------------------------------------
5665| Returns the result of adding the absolute values of the extended double-
5666| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5667| negated before being returned. `zSign' is ignored if the result is a NaN.
5668| The addition is performed according to the IEC/IEEE Standard for Binary
5669| Floating-Point Arithmetic.
5670*----------------------------------------------------------------------------*/
5671
e5a41ffa
PM
5672static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5673 float_status *status)
158142c2 5674{
f4014512 5675 int32_t aExp, bExp, zExp;
bb98fe42 5676 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5677 int32_t expDiff;
158142c2
FB
5678
5679 aSig = extractFloatx80Frac( a );
5680 aExp = extractFloatx80Exp( a );
5681 bSig = extractFloatx80Frac( b );
5682 bExp = extractFloatx80Exp( b );
5683 expDiff = aExp - bExp;
5684 if ( 0 < expDiff ) {
5685 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5686 if ((uint64_t)(aSig << 1)) {
5687 return propagateFloatx80NaN(a, b, status);
5688 }
158142c2
FB
5689 return a;
5690 }
5691 if ( bExp == 0 ) --expDiff;
5692 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5693 zExp = aExp;
5694 }
5695 else if ( expDiff < 0 ) {
5696 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5697 if ((uint64_t)(bSig << 1)) {
5698 return propagateFloatx80NaN(a, b, status);
5699 }
0f605c88
LV
5700 return packFloatx80(zSign,
5701 floatx80_infinity_high,
5702 floatx80_infinity_low);
158142c2
FB
5703 }
5704 if ( aExp == 0 ) ++expDiff;
5705 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5706 zExp = bExp;
5707 }
5708 else {
5709 if ( aExp == 0x7FFF ) {
bb98fe42 5710 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5711 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5712 }
5713 return a;
5714 }
5715 zSig1 = 0;
5716 zSig0 = aSig + bSig;
5717 if ( aExp == 0 ) {
5718 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5719 goto roundAndPack;
5720 }
5721 zExp = aExp;
5722 goto shiftRight1;
5723 }
5724 zSig0 = aSig + bSig;
bb98fe42 5725 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5726 shiftRight1:
5727 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5728 zSig0 |= LIT64( 0x8000000000000000 );
5729 ++zExp;
5730 roundAndPack:
a2f2d288 5731 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5732 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5733}
5734
5735/*----------------------------------------------------------------------------
5736| Returns the result of subtracting the absolute values of the extended
5737| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5738| difference is negated before being returned. `zSign' is ignored if the
5739| result is a NaN. The subtraction is performed according to the IEC/IEEE
5740| Standard for Binary Floating-Point Arithmetic.
5741*----------------------------------------------------------------------------*/
5742
e5a41ffa
PM
5743static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5744 float_status *status)
158142c2 5745{
f4014512 5746 int32_t aExp, bExp, zExp;
bb98fe42 5747 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5748 int32_t expDiff;
158142c2
FB
5749
5750 aSig = extractFloatx80Frac( a );
5751 aExp = extractFloatx80Exp( a );
5752 bSig = extractFloatx80Frac( b );
5753 bExp = extractFloatx80Exp( b );
5754 expDiff = aExp - bExp;
5755 if ( 0 < expDiff ) goto aExpBigger;
5756 if ( expDiff < 0 ) goto bExpBigger;
5757 if ( aExp == 0x7FFF ) {
bb98fe42 5758 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5759 return propagateFloatx80NaN(a, b, status);
158142c2 5760 }
ff32e16e 5761 float_raise(float_flag_invalid, status);
af39bc8c 5762 return floatx80_default_nan(status);
158142c2
FB
5763 }
5764 if ( aExp == 0 ) {
5765 aExp = 1;
5766 bExp = 1;
5767 }
5768 zSig1 = 0;
5769 if ( bSig < aSig ) goto aBigger;
5770 if ( aSig < bSig ) goto bBigger;
a2f2d288 5771 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5772 bExpBigger:
5773 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5774 if ((uint64_t)(bSig << 1)) {
5775 return propagateFloatx80NaN(a, b, status);
5776 }
0f605c88
LV
5777 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5778 floatx80_infinity_low);
158142c2
FB
5779 }
5780 if ( aExp == 0 ) ++expDiff;
5781 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5782 bBigger:
5783 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5784 zExp = bExp;
5785 zSign ^= 1;
5786 goto normalizeRoundAndPack;
5787 aExpBigger:
5788 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5789 if ((uint64_t)(aSig << 1)) {
5790 return propagateFloatx80NaN(a, b, status);
5791 }
158142c2
FB
5792 return a;
5793 }
5794 if ( bExp == 0 ) --expDiff;
5795 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5796 aBigger:
5797 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5798 zExp = aExp;
5799 normalizeRoundAndPack:
a2f2d288 5800 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5801 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5802}
5803
5804/*----------------------------------------------------------------------------
5805| Returns the result of adding the extended double-precision floating-point
5806| values `a' and `b'. The operation is performed according to the IEC/IEEE
5807| Standard for Binary Floating-Point Arithmetic.
5808*----------------------------------------------------------------------------*/
5809
e5a41ffa 5810floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5811{
5812 flag aSign, bSign;
5813
d1eb8f2a
AD
5814 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5815 float_raise(float_flag_invalid, status);
5816 return floatx80_default_nan(status);
5817 }
158142c2
FB
5818 aSign = extractFloatx80Sign( a );
5819 bSign = extractFloatx80Sign( b );
5820 if ( aSign == bSign ) {
ff32e16e 5821 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5822 }
5823 else {
ff32e16e 5824 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5825 }
5826
5827}
5828
5829/*----------------------------------------------------------------------------
5830| Returns the result of subtracting the extended double-precision floating-
5831| point values `a' and `b'. The operation is performed according to the
5832| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5833*----------------------------------------------------------------------------*/
5834
e5a41ffa 5835floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5836{
5837 flag aSign, bSign;
5838
d1eb8f2a
AD
5839 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5840 float_raise(float_flag_invalid, status);
5841 return floatx80_default_nan(status);
5842 }
158142c2
FB
5843 aSign = extractFloatx80Sign( a );
5844 bSign = extractFloatx80Sign( b );
5845 if ( aSign == bSign ) {
ff32e16e 5846 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5847 }
5848 else {
ff32e16e 5849 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5850 }
5851
5852}
5853
5854/*----------------------------------------------------------------------------
5855| Returns the result of multiplying the extended double-precision floating-
5856| point values `a' and `b'. The operation is performed according to the
5857| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5858*----------------------------------------------------------------------------*/
5859
e5a41ffa 5860floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5861{
5862 flag aSign, bSign, zSign;
f4014512 5863 int32_t aExp, bExp, zExp;
bb98fe42 5864 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5865
d1eb8f2a
AD
5866 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5867 float_raise(float_flag_invalid, status);
5868 return floatx80_default_nan(status);
5869 }
158142c2
FB
5870 aSig = extractFloatx80Frac( a );
5871 aExp = extractFloatx80Exp( a );
5872 aSign = extractFloatx80Sign( a );
5873 bSig = extractFloatx80Frac( b );
5874 bExp = extractFloatx80Exp( b );
5875 bSign = extractFloatx80Sign( b );
5876 zSign = aSign ^ bSign;
5877 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5878 if ( (uint64_t) ( aSig<<1 )
5879 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5880 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5881 }
5882 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5883 return packFloatx80(zSign, floatx80_infinity_high,
5884 floatx80_infinity_low);
158142c2
FB
5885 }
5886 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5887 if ((uint64_t)(bSig << 1)) {
5888 return propagateFloatx80NaN(a, b, status);
5889 }
158142c2
FB
5890 if ( ( aExp | aSig ) == 0 ) {
5891 invalid:
ff32e16e 5892 float_raise(float_flag_invalid, status);
af39bc8c 5893 return floatx80_default_nan(status);
158142c2 5894 }
0f605c88
LV
5895 return packFloatx80(zSign, floatx80_infinity_high,
5896 floatx80_infinity_low);
158142c2
FB
5897 }
5898 if ( aExp == 0 ) {
5899 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5900 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5901 }
5902 if ( bExp == 0 ) {
5903 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5904 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5905 }
5906 zExp = aExp + bExp - 0x3FFE;
5907 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5908 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5909 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5910 --zExp;
5911 }
a2f2d288 5912 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5913 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5914}
5915
5916/*----------------------------------------------------------------------------
5917| Returns the result of dividing the extended double-precision floating-point
5918| value `a' by the corresponding value `b'. The operation is performed
5919| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5920*----------------------------------------------------------------------------*/
5921
e5a41ffa 5922floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5923{
5924 flag aSign, bSign, zSign;
f4014512 5925 int32_t aExp, bExp, zExp;
bb98fe42
AF
5926 uint64_t aSig, bSig, zSig0, zSig1;
5927 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 5928
d1eb8f2a
AD
5929 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5930 float_raise(float_flag_invalid, status);
5931 return floatx80_default_nan(status);
5932 }
158142c2
FB
5933 aSig = extractFloatx80Frac( a );
5934 aExp = extractFloatx80Exp( a );
5935 aSign = extractFloatx80Sign( a );
5936 bSig = extractFloatx80Frac( b );
5937 bExp = extractFloatx80Exp( b );
5938 bSign = extractFloatx80Sign( b );
5939 zSign = aSign ^ bSign;
5940 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5941 if ((uint64_t)(aSig << 1)) {
5942 return propagateFloatx80NaN(a, b, status);
5943 }
158142c2 5944 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5945 if ((uint64_t)(bSig << 1)) {
5946 return propagateFloatx80NaN(a, b, status);
5947 }
158142c2
FB
5948 goto invalid;
5949 }
0f605c88
LV
5950 return packFloatx80(zSign, floatx80_infinity_high,
5951 floatx80_infinity_low);
158142c2
FB
5952 }
5953 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5954 if ((uint64_t)(bSig << 1)) {
5955 return propagateFloatx80NaN(a, b, status);
5956 }
158142c2
FB
5957 return packFloatx80( zSign, 0, 0 );
5958 }
5959 if ( bExp == 0 ) {
5960 if ( bSig == 0 ) {
5961 if ( ( aExp | aSig ) == 0 ) {
5962 invalid:
ff32e16e 5963 float_raise(float_flag_invalid, status);
af39bc8c 5964 return floatx80_default_nan(status);
158142c2 5965 }
ff32e16e 5966 float_raise(float_flag_divbyzero, status);
0f605c88
LV
5967 return packFloatx80(zSign, floatx80_infinity_high,
5968 floatx80_infinity_low);
158142c2
FB
5969 }
5970 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5971 }
5972 if ( aExp == 0 ) {
5973 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5974 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5975 }
5976 zExp = aExp - bExp + 0x3FFE;
5977 rem1 = 0;
5978 if ( bSig <= aSig ) {
5979 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5980 ++zExp;
5981 }
5982 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5983 mul64To128( bSig, zSig0, &term0, &term1 );
5984 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 5985 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5986 --zSig0;
5987 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5988 }
5989 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 5990 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
5991 mul64To128( bSig, zSig1, &term1, &term2 );
5992 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 5993 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5994 --zSig1;
5995 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5996 }
5997 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5998 }
a2f2d288 5999 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6000 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6001}
6002
6003/*----------------------------------------------------------------------------
6004| Returns the remainder of the extended double-precision floating-point value
6005| `a' with respect to the corresponding value `b'. The operation is performed
6006| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6007*----------------------------------------------------------------------------*/
6008
e5a41ffa 6009floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6010{
ed086f3d 6011 flag aSign, zSign;
f4014512 6012 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6013 uint64_t aSig0, aSig1, bSig;
6014 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6015
d1eb8f2a
AD
6016 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6017 float_raise(float_flag_invalid, status);
6018 return floatx80_default_nan(status);
6019 }
158142c2
FB
6020 aSig0 = extractFloatx80Frac( a );
6021 aExp = extractFloatx80Exp( a );
6022 aSign = extractFloatx80Sign( a );
6023 bSig = extractFloatx80Frac( b );
6024 bExp = extractFloatx80Exp( b );
158142c2 6025 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6026 if ( (uint64_t) ( aSig0<<1 )
6027 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6028 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6029 }
6030 goto invalid;
6031 }
6032 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6033 if ((uint64_t)(bSig << 1)) {
6034 return propagateFloatx80NaN(a, b, status);
6035 }
158142c2
FB
6036 return a;
6037 }
6038 if ( bExp == 0 ) {
6039 if ( bSig == 0 ) {
6040 invalid:
ff32e16e 6041 float_raise(float_flag_invalid, status);
af39bc8c 6042 return floatx80_default_nan(status);
158142c2
FB
6043 }
6044 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6045 }
6046 if ( aExp == 0 ) {
bb98fe42 6047 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6048 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6049 }
6050 bSig |= LIT64( 0x8000000000000000 );
6051 zSign = aSign;
6052 expDiff = aExp - bExp;
6053 aSig1 = 0;
6054 if ( expDiff < 0 ) {
6055 if ( expDiff < -1 ) return a;
6056 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6057 expDiff = 0;
6058 }
6059 q = ( bSig <= aSig0 );
6060 if ( q ) aSig0 -= bSig;
6061 expDiff -= 64;
6062 while ( 0 < expDiff ) {
6063 q = estimateDiv128To64( aSig0, aSig1, bSig );
6064 q = ( 2 < q ) ? q - 2 : 0;
6065 mul64To128( bSig, q, &term0, &term1 );
6066 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6067 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6068 expDiff -= 62;
6069 }
6070 expDiff += 64;
6071 if ( 0 < expDiff ) {
6072 q = estimateDiv128To64( aSig0, aSig1, bSig );
6073 q = ( 2 < q ) ? q - 2 : 0;
6074 q >>= 64 - expDiff;
6075 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6076 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6077 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6078 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6079 ++q;
6080 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6081 }
6082 }
6083 else {
6084 term1 = 0;
6085 term0 = bSig;
6086 }
6087 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6088 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6089 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6090 && ( q & 1 ) )
6091 ) {
6092 aSig0 = alternateASig0;
6093 aSig1 = alternateASig1;
6094 zSign = ! zSign;
6095 }
6096 return
6097 normalizeRoundAndPackFloatx80(
ff32e16e 6098 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6099
6100}
6101
6102/*----------------------------------------------------------------------------
6103| Returns the square root of the extended double-precision floating-point
6104| value `a'. The operation is performed according to the IEC/IEEE Standard
6105| for Binary Floating-Point Arithmetic.
6106*----------------------------------------------------------------------------*/
6107
e5a41ffa 6108floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
6109{
6110 flag aSign;
f4014512 6111 int32_t aExp, zExp;
bb98fe42
AF
6112 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6113 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6114
d1eb8f2a
AD
6115 if (floatx80_invalid_encoding(a)) {
6116 float_raise(float_flag_invalid, status);
6117 return floatx80_default_nan(status);
6118 }
158142c2
FB
6119 aSig0 = extractFloatx80Frac( a );
6120 aExp = extractFloatx80Exp( a );
6121 aSign = extractFloatx80Sign( a );
6122 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6123 if ((uint64_t)(aSig0 << 1)) {
6124 return propagateFloatx80NaN(a, a, status);
6125 }
158142c2
FB
6126 if ( ! aSign ) return a;
6127 goto invalid;
6128 }
6129 if ( aSign ) {
6130 if ( ( aExp | aSig0 ) == 0 ) return a;
6131 invalid:
ff32e16e 6132 float_raise(float_flag_invalid, status);
af39bc8c 6133 return floatx80_default_nan(status);
158142c2
FB
6134 }
6135 if ( aExp == 0 ) {
6136 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6137 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6138 }
6139 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6140 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6141 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6142 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6143 doubleZSig0 = zSig0<<1;
6144 mul64To128( zSig0, zSig0, &term0, &term1 );
6145 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6146 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6147 --zSig0;
6148 doubleZSig0 -= 2;
6149 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6150 }
6151 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6152 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
6153 if ( zSig1 == 0 ) zSig1 = 1;
6154 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6155 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6156 mul64To128( zSig1, zSig1, &term2, &term3 );
6157 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6158 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6159 --zSig1;
6160 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6161 term3 |= 1;
6162 term2 |= doubleZSig0;
6163 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6164 }
6165 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6166 }
6167 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6168 zSig0 |= doubleZSig0;
a2f2d288
PM
6169 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6170 0, zExp, zSig0, zSig1, status);
158142c2
FB
6171}
6172
6173/*----------------------------------------------------------------------------
b689362d
AJ
6174| Returns 1 if the extended double-precision floating-point value `a' is equal
6175| to the corresponding value `b', and 0 otherwise. The invalid exception is
6176| raised if either operand is a NaN. Otherwise, the comparison is performed
6177| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6178*----------------------------------------------------------------------------*/
6179
e5a41ffa 6180int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6181{
6182
d1eb8f2a
AD
6183 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6184 || (extractFloatx80Exp(a) == 0x7FFF
6185 && (uint64_t) (extractFloatx80Frac(a) << 1))
6186 || (extractFloatx80Exp(b) == 0x7FFF
6187 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6188 ) {
ff32e16e 6189 float_raise(float_flag_invalid, status);
158142c2
FB
6190 return 0;
6191 }
6192 return
6193 ( a.low == b.low )
6194 && ( ( a.high == b.high )
6195 || ( ( a.low == 0 )
bb98fe42 6196 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6197 );
6198
6199}
6200
6201/*----------------------------------------------------------------------------
6202| Returns 1 if the extended double-precision floating-point value `a' is
6203| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6204| invalid exception is raised if either operand is a NaN. The comparison is
6205| performed according to the IEC/IEEE Standard for Binary Floating-Point
6206| Arithmetic.
158142c2
FB
6207*----------------------------------------------------------------------------*/
6208
e5a41ffa 6209int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6210{
6211 flag aSign, bSign;
6212
d1eb8f2a
AD
6213 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6214 || (extractFloatx80Exp(a) == 0x7FFF
6215 && (uint64_t) (extractFloatx80Frac(a) << 1))
6216 || (extractFloatx80Exp(b) == 0x7FFF
6217 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6218 ) {
ff32e16e 6219 float_raise(float_flag_invalid, status);
158142c2
FB
6220 return 0;
6221 }
6222 aSign = extractFloatx80Sign( a );
6223 bSign = extractFloatx80Sign( b );
6224 if ( aSign != bSign ) {
6225 return
6226 aSign
bb98fe42 6227 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6228 == 0 );
6229 }
6230 return
6231 aSign ? le128( b.high, b.low, a.high, a.low )
6232 : le128( a.high, a.low, b.high, b.low );
6233
6234}
6235
6236/*----------------------------------------------------------------------------
6237| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6238| less than the corresponding value `b', and 0 otherwise. The invalid
6239| exception is raised if either operand is a NaN. The comparison is performed
6240| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6241*----------------------------------------------------------------------------*/
6242
e5a41ffa 6243int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6244{
6245 flag aSign, bSign;
6246
d1eb8f2a
AD
6247 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6248 || (extractFloatx80Exp(a) == 0x7FFF
6249 && (uint64_t) (extractFloatx80Frac(a) << 1))
6250 || (extractFloatx80Exp(b) == 0x7FFF
6251 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6252 ) {
ff32e16e 6253 float_raise(float_flag_invalid, status);
158142c2
FB
6254 return 0;
6255 }
6256 aSign = extractFloatx80Sign( a );
6257 bSign = extractFloatx80Sign( b );
6258 if ( aSign != bSign ) {
6259 return
6260 aSign
bb98fe42 6261 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6262 != 0 );
6263 }
6264 return
6265 aSign ? lt128( b.high, b.low, a.high, a.low )
6266 : lt128( a.high, a.low, b.high, b.low );
6267
6268}
6269
67b7861d
AJ
6270/*----------------------------------------------------------------------------
6271| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6272| cannot be compared, and 0 otherwise. The invalid exception is raised if
6273| either operand is a NaN. The comparison is performed according to the
6274| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6275*----------------------------------------------------------------------------*/
e5a41ffa 6276int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6277{
d1eb8f2a
AD
6278 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6279 || (extractFloatx80Exp(a) == 0x7FFF
6280 && (uint64_t) (extractFloatx80Frac(a) << 1))
6281 || (extractFloatx80Exp(b) == 0x7FFF
6282 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6283 ) {
ff32e16e 6284 float_raise(float_flag_invalid, status);
67b7861d
AJ
6285 return 1;
6286 }
6287 return 0;
6288}
6289
158142c2 6290/*----------------------------------------------------------------------------
b689362d 6291| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6292| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6293| cause an exception. The comparison is performed according to the IEC/IEEE
6294| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6295*----------------------------------------------------------------------------*/
6296
e5a41ffa 6297int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6298{
6299
d1eb8f2a
AD
6300 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6301 float_raise(float_flag_invalid, status);
6302 return 0;
6303 }
158142c2 6304 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6305 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6306 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6307 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6308 ) {
af39bc8c
AM
6309 if (floatx80_is_signaling_nan(a, status)
6310 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6311 float_raise(float_flag_invalid, status);
b689362d 6312 }
158142c2
FB
6313 return 0;
6314 }
6315 return
6316 ( a.low == b.low )
6317 && ( ( a.high == b.high )
6318 || ( ( a.low == 0 )
bb98fe42 6319 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6320 );
6321
6322}
6323
6324/*----------------------------------------------------------------------------
6325| Returns 1 if the extended double-precision floating-point value `a' is less
6326| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6327| do not cause an exception. Otherwise, the comparison is performed according
6328| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6329*----------------------------------------------------------------------------*/
6330
e5a41ffa 6331int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6332{
6333 flag aSign, bSign;
6334
d1eb8f2a
AD
6335 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6336 float_raise(float_flag_invalid, status);
6337 return 0;
6338 }
158142c2 6339 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6340 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6341 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6342 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6343 ) {
af39bc8c
AM
6344 if (floatx80_is_signaling_nan(a, status)
6345 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6346 float_raise(float_flag_invalid, status);
158142c2
FB
6347 }
6348 return 0;
6349 }
6350 aSign = extractFloatx80Sign( a );
6351 bSign = extractFloatx80Sign( b );
6352 if ( aSign != bSign ) {
6353 return
6354 aSign
bb98fe42 6355 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6356 == 0 );
6357 }
6358 return
6359 aSign ? le128( b.high, b.low, a.high, a.low )
6360 : le128( a.high, a.low, b.high, b.low );
6361
6362}
6363
6364/*----------------------------------------------------------------------------
6365| Returns 1 if the extended double-precision floating-point value `a' is less
6366| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6367| an exception. Otherwise, the comparison is performed according to the
6368| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6369*----------------------------------------------------------------------------*/
6370
e5a41ffa 6371int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6372{
6373 flag aSign, bSign;
6374
d1eb8f2a
AD
6375 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6376 float_raise(float_flag_invalid, status);
6377 return 0;
6378 }
158142c2 6379 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6380 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6381 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6382 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6383 ) {
af39bc8c
AM
6384 if (floatx80_is_signaling_nan(a, status)
6385 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6386 float_raise(float_flag_invalid, status);
158142c2
FB
6387 }
6388 return 0;
6389 }
6390 aSign = extractFloatx80Sign( a );
6391 bSign = extractFloatx80Sign( b );
6392 if ( aSign != bSign ) {
6393 return
6394 aSign
bb98fe42 6395 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6396 != 0 );
6397 }
6398 return
6399 aSign ? lt128( b.high, b.low, a.high, a.low )
6400 : lt128( a.high, a.low, b.high, b.low );
6401
6402}
6403
67b7861d
AJ
6404/*----------------------------------------------------------------------------
6405| Returns 1 if the extended double-precision floating-point values `a' and `b'
6406| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6407| The comparison is performed according to the IEC/IEEE Standard for Binary
6408| Floating-Point Arithmetic.
6409*----------------------------------------------------------------------------*/
e5a41ffa 6410int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6411{
d1eb8f2a
AD
6412 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6413 float_raise(float_flag_invalid, status);
6414 return 1;
6415 }
67b7861d
AJ
6416 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6417 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6418 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6419 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6420 ) {
af39bc8c
AM
6421 if (floatx80_is_signaling_nan(a, status)
6422 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6423 float_raise(float_flag_invalid, status);
67b7861d
AJ
6424 }
6425 return 1;
6426 }
6427 return 0;
6428}
6429
158142c2
FB
6430/*----------------------------------------------------------------------------
6431| Returns the result of converting the quadruple-precision floating-point
6432| value `a' to the 32-bit two's complement integer format. The conversion
6433| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6434| Arithmetic---which means in particular that the conversion is rounded
6435| according to the current rounding mode. If `a' is a NaN, the largest
6436| positive integer is returned. Otherwise, if the conversion overflows, the
6437| largest integer with the same sign as `a' is returned.
6438*----------------------------------------------------------------------------*/
6439
f4014512 6440int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
6441{
6442 flag aSign;
f4014512 6443 int32_t aExp, shiftCount;
bb98fe42 6444 uint64_t aSig0, aSig1;
158142c2
FB
6445
6446 aSig1 = extractFloat128Frac1( a );
6447 aSig0 = extractFloat128Frac0( a );
6448 aExp = extractFloat128Exp( a );
6449 aSign = extractFloat128Sign( a );
6450 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6451 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6452 aSig0 |= ( aSig1 != 0 );
6453 shiftCount = 0x4028 - aExp;
6454 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6455 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6456
6457}
6458
6459/*----------------------------------------------------------------------------
6460| Returns the result of converting the quadruple-precision floating-point
6461| value `a' to the 32-bit two's complement integer format. The conversion
6462| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6463| Arithmetic, except that the conversion is always rounded toward zero. If
6464| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6465| conversion overflows, the largest integer with the same sign as `a' is
6466| returned.
6467*----------------------------------------------------------------------------*/
6468
f4014512 6469int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
6470{
6471 flag aSign;
f4014512 6472 int32_t aExp, shiftCount;
bb98fe42 6473 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6474 int32_t z;
158142c2
FB
6475
6476 aSig1 = extractFloat128Frac1( a );
6477 aSig0 = extractFloat128Frac0( a );
6478 aExp = extractFloat128Exp( a );
6479 aSign = extractFloat128Sign( a );
6480 aSig0 |= ( aSig1 != 0 );
6481 if ( 0x401E < aExp ) {
6482 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6483 goto invalid;
6484 }
6485 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6486 if (aExp || aSig0) {
6487 status->float_exception_flags |= float_flag_inexact;
6488 }
158142c2
FB
6489 return 0;
6490 }
6491 aSig0 |= LIT64( 0x0001000000000000 );
6492 shiftCount = 0x402F - aExp;
6493 savedASig = aSig0;
6494 aSig0 >>= shiftCount;
6495 z = aSig0;
6496 if ( aSign ) z = - z;
6497 if ( ( z < 0 ) ^ aSign ) {
6498 invalid:
ff32e16e 6499 float_raise(float_flag_invalid, status);
bb98fe42 6500 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
6501 }
6502 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6503 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6504 }
6505 return z;
6506
6507}
6508
6509/*----------------------------------------------------------------------------
6510| Returns the result of converting the quadruple-precision floating-point
6511| value `a' to the 64-bit two's complement integer format. The conversion
6512| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6513| Arithmetic---which means in particular that the conversion is rounded
6514| according to the current rounding mode. If `a' is a NaN, the largest
6515| positive integer is returned. Otherwise, if the conversion overflows, the
6516| largest integer with the same sign as `a' is returned.
6517*----------------------------------------------------------------------------*/
6518
f42c2224 6519int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6520{
6521 flag aSign;
f4014512 6522 int32_t aExp, shiftCount;
bb98fe42 6523 uint64_t aSig0, aSig1;
158142c2
FB
6524
6525 aSig1 = extractFloat128Frac1( a );
6526 aSig0 = extractFloat128Frac0( a );
6527 aExp = extractFloat128Exp( a );
6528 aSign = extractFloat128Sign( a );
6529 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6530 shiftCount = 0x402F - aExp;
6531 if ( shiftCount <= 0 ) {
6532 if ( 0x403E < aExp ) {
ff32e16e 6533 float_raise(float_flag_invalid, status);
158142c2
FB
6534 if ( ! aSign
6535 || ( ( aExp == 0x7FFF )
6536 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6537 )
6538 ) {
6539 return LIT64( 0x7FFFFFFFFFFFFFFF );
6540 }
bb98fe42 6541 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6542 }
6543 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6544 }
6545 else {
6546 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6547 }
ff32e16e 6548 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6549
6550}
6551
6552/*----------------------------------------------------------------------------
6553| Returns the result of converting the quadruple-precision floating-point
6554| value `a' to the 64-bit two's complement integer format. The conversion
6555| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6556| Arithmetic, except that the conversion is always rounded toward zero.
6557| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6558| the conversion overflows, the largest integer with the same sign as `a' is
6559| returned.
6560*----------------------------------------------------------------------------*/
6561
f42c2224 6562int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6563{
6564 flag aSign;
f4014512 6565 int32_t aExp, shiftCount;
bb98fe42 6566 uint64_t aSig0, aSig1;
f42c2224 6567 int64_t z;
158142c2
FB
6568
6569 aSig1 = extractFloat128Frac1( a );
6570 aSig0 = extractFloat128Frac0( a );
6571 aExp = extractFloat128Exp( a );
6572 aSign = extractFloat128Sign( a );
6573 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6574 shiftCount = aExp - 0x402F;
6575 if ( 0 < shiftCount ) {
6576 if ( 0x403E <= aExp ) {
6577 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6578 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6579 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
6580 if (aSig1) {
6581 status->float_exception_flags |= float_flag_inexact;
6582 }
158142c2
FB
6583 }
6584 else {
ff32e16e 6585 float_raise(float_flag_invalid, status);
158142c2
FB
6586 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6587 return LIT64( 0x7FFFFFFFFFFFFFFF );
6588 }
6589 }
bb98fe42 6590 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6591 }
6592 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6593 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6594 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6595 }
6596 }
6597 else {
6598 if ( aExp < 0x3FFF ) {
6599 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6600 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6601 }
6602 return 0;
6603 }
6604 z = aSig0>>( - shiftCount );
6605 if ( aSig1
bb98fe42 6606 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6607 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6608 }
6609 }
6610 if ( aSign ) z = - z;
6611 return z;
6612
6613}
6614
2e6d8568
BR
6615/*----------------------------------------------------------------------------
6616| Returns the result of converting the quadruple-precision floating-point value
6617| `a' to the 64-bit unsigned integer format. The conversion is
6618| performed according to the IEC/IEEE Standard for Binary Floating-Point
6619| Arithmetic---which means in particular that the conversion is rounded
6620| according to the current rounding mode. If `a' is a NaN, the largest
6621| positive integer is returned. If the conversion overflows, the
6622| largest unsigned integer is returned. If 'a' is negative, the value is
6623| rounded and zero is returned; negative values that do not round to zero
6624| will raise the inexact exception.
6625*----------------------------------------------------------------------------*/
6626
6627uint64_t float128_to_uint64(float128 a, float_status *status)
6628{
6629 flag aSign;
6630 int aExp;
6631 int shiftCount;
6632 uint64_t aSig0, aSig1;
6633
6634 aSig0 = extractFloat128Frac0(a);
6635 aSig1 = extractFloat128Frac1(a);
6636 aExp = extractFloat128Exp(a);
6637 aSign = extractFloat128Sign(a);
6638 if (aSign && (aExp > 0x3FFE)) {
6639 float_raise(float_flag_invalid, status);
6640 if (float128_is_any_nan(a)) {
6641 return LIT64(0xFFFFFFFFFFFFFFFF);
6642 } else {
6643 return 0;
6644 }
6645 }
6646 if (aExp) {
6647 aSig0 |= LIT64(0x0001000000000000);
6648 }
6649 shiftCount = 0x402F - aExp;
6650 if (shiftCount <= 0) {
6651 if (0x403E < aExp) {
6652 float_raise(float_flag_invalid, status);
6653 return LIT64(0xFFFFFFFFFFFFFFFF);
6654 }
6655 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6656 } else {
6657 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6658 }
6659 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6660}
6661
6662uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6663{
6664 uint64_t v;
6665 signed char current_rounding_mode = status->float_rounding_mode;
6666
6667 set_float_rounding_mode(float_round_to_zero, status);
6668 v = float128_to_uint64(a, status);
6669 set_float_rounding_mode(current_rounding_mode, status);
6670
6671 return v;
6672}
6673
158142c2
FB
6674/*----------------------------------------------------------------------------
6675| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6676| value `a' to the 32-bit unsigned integer format. The conversion
6677| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6678| Arithmetic except that the conversion is always rounded toward zero.
6679| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6680| if the conversion overflows, the largest unsigned integer is returned.
6681| If 'a' is negative, the value is rounded and zero is returned; negative
6682| values that do not round to zero will raise the inexact exception.
6683*----------------------------------------------------------------------------*/
6684
6685uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6686{
6687 uint64_t v;
6688 uint32_t res;
6689 int old_exc_flags = get_float_exception_flags(status);
6690
6691 v = float128_to_uint64_round_to_zero(a, status);
6692 if (v > 0xffffffff) {
6693 res = 0xffffffff;
6694 } else {
6695 return v;
6696 }
6697 set_float_exception_flags(old_exc_flags, status);
6698 float_raise(float_flag_invalid, status);
6699 return res;
6700}
6701
6702/*----------------------------------------------------------------------------
6703| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6704| value `a' to the single-precision floating-point format. The conversion
6705| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6706| Arithmetic.
6707*----------------------------------------------------------------------------*/
6708
e5a41ffa 6709float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6710{
6711 flag aSign;
f4014512 6712 int32_t aExp;
bb98fe42
AF
6713 uint64_t aSig0, aSig1;
6714 uint32_t zSig;
158142c2
FB
6715
6716 aSig1 = extractFloat128Frac1( a );
6717 aSig0 = extractFloat128Frac0( a );
6718 aExp = extractFloat128Exp( a );
6719 aSign = extractFloat128Sign( a );
6720 if ( aExp == 0x7FFF ) {
6721 if ( aSig0 | aSig1 ) {
ff32e16e 6722 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6723 }
6724 return packFloat32( aSign, 0xFF, 0 );
6725 }
6726 aSig0 |= ( aSig1 != 0 );
6727 shift64RightJamming( aSig0, 18, &aSig0 );
6728 zSig = aSig0;
6729 if ( aExp || zSig ) {
6730 zSig |= 0x40000000;
6731 aExp -= 0x3F81;
6732 }
ff32e16e 6733 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6734
6735}
6736
6737/*----------------------------------------------------------------------------
6738| Returns the result of converting the quadruple-precision floating-point
6739| value `a' to the double-precision floating-point format. The conversion
6740| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6741| Arithmetic.
6742*----------------------------------------------------------------------------*/
6743
e5a41ffa 6744float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6745{
6746 flag aSign;
f4014512 6747 int32_t aExp;
bb98fe42 6748 uint64_t aSig0, aSig1;
158142c2
FB
6749
6750 aSig1 = extractFloat128Frac1( a );
6751 aSig0 = extractFloat128Frac0( a );
6752 aExp = extractFloat128Exp( a );
6753 aSign = extractFloat128Sign( a );
6754 if ( aExp == 0x7FFF ) {
6755 if ( aSig0 | aSig1 ) {
ff32e16e 6756 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6757 }
6758 return packFloat64( aSign, 0x7FF, 0 );
6759 }
6760 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6761 aSig0 |= ( aSig1 != 0 );
6762 if ( aExp || aSig0 ) {
6763 aSig0 |= LIT64( 0x4000000000000000 );
6764 aExp -= 0x3C01;
6765 }
ff32e16e 6766 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6767
6768}
6769
158142c2
FB
6770/*----------------------------------------------------------------------------
6771| Returns the result of converting the quadruple-precision floating-point
6772| value `a' to the extended double-precision floating-point format. The
6773| conversion is performed according to the IEC/IEEE Standard for Binary
6774| Floating-Point Arithmetic.
6775*----------------------------------------------------------------------------*/
6776
e5a41ffa 6777floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6778{
6779 flag aSign;
f4014512 6780 int32_t aExp;
bb98fe42 6781 uint64_t aSig0, aSig1;
158142c2
FB
6782
6783 aSig1 = extractFloat128Frac1( a );
6784 aSig0 = extractFloat128Frac0( a );
6785 aExp = extractFloat128Exp( a );
6786 aSign = extractFloat128Sign( a );
6787 if ( aExp == 0x7FFF ) {
6788 if ( aSig0 | aSig1 ) {
ff32e16e 6789 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6790 }
0f605c88
LV
6791 return packFloatx80(aSign, floatx80_infinity_high,
6792 floatx80_infinity_low);
158142c2
FB
6793 }
6794 if ( aExp == 0 ) {
6795 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6796 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6797 }
6798 else {
6799 aSig0 |= LIT64( 0x0001000000000000 );
6800 }
6801 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6802 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6803
6804}
6805
158142c2
FB
6806/*----------------------------------------------------------------------------
6807| Rounds the quadruple-precision floating-point value `a' to an integer, and
6808| returns the result as a quadruple-precision floating-point value. The
6809| operation is performed according to the IEC/IEEE Standard for Binary
6810| Floating-Point Arithmetic.
6811*----------------------------------------------------------------------------*/
6812
e5a41ffa 6813float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6814{
6815 flag aSign;
f4014512 6816 int32_t aExp;
bb98fe42 6817 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6818 float128 z;
6819
6820 aExp = extractFloat128Exp( a );
6821 if ( 0x402F <= aExp ) {
6822 if ( 0x406F <= aExp ) {
6823 if ( ( aExp == 0x7FFF )
6824 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6825 ) {
ff32e16e 6826 return propagateFloat128NaN(a, a, status);
158142c2
FB
6827 }
6828 return a;
6829 }
6830 lastBitMask = 1;
6831 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6832 roundBitsMask = lastBitMask - 1;
6833 z = a;
a2f2d288 6834 switch (status->float_rounding_mode) {
dc355b76 6835 case float_round_nearest_even:
158142c2
FB
6836 if ( lastBitMask ) {
6837 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6838 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6839 }
6840 else {
bb98fe42 6841 if ( (int64_t) z.low < 0 ) {
158142c2 6842 ++z.high;
bb98fe42 6843 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6844 }
6845 }
dc355b76 6846 break;
f9288a76
PM
6847 case float_round_ties_away:
6848 if (lastBitMask) {
6849 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6850 } else {
6851 if ((int64_t) z.low < 0) {
6852 ++z.high;
6853 }
6854 }
6855 break;
dc355b76
PM
6856 case float_round_to_zero:
6857 break;
6858 case float_round_up:
6859 if (!extractFloat128Sign(z)) {
6860 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6861 }
6862 break;
6863 case float_round_down:
6864 if (extractFloat128Sign(z)) {
6865 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6866 }
dc355b76
PM
6867 break;
6868 default:
6869 abort();
158142c2
FB
6870 }
6871 z.low &= ~ roundBitsMask;
6872 }
6873 else {
6874 if ( aExp < 0x3FFF ) {
bb98fe42 6875 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6876 status->float_exception_flags |= float_flag_inexact;
158142c2 6877 aSign = extractFloat128Sign( a );
a2f2d288 6878 switch (status->float_rounding_mode) {
158142c2
FB
6879 case float_round_nearest_even:
6880 if ( ( aExp == 0x3FFE )
6881 && ( extractFloat128Frac0( a )
6882 | extractFloat128Frac1( a ) )
6883 ) {
6884 return packFloat128( aSign, 0x3FFF, 0, 0 );
6885 }
6886 break;
f9288a76
PM
6887 case float_round_ties_away:
6888 if (aExp == 0x3FFE) {
6889 return packFloat128(aSign, 0x3FFF, 0, 0);
6890 }
6891 break;
158142c2
FB
6892 case float_round_down:
6893 return
6894 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6895 : packFloat128( 0, 0, 0, 0 );
6896 case float_round_up:
6897 return
6898 aSign ? packFloat128( 1, 0, 0, 0 )
6899 : packFloat128( 0, 0x3FFF, 0, 0 );
6900 }
6901 return packFloat128( aSign, 0, 0, 0 );
6902 }
6903 lastBitMask = 1;
6904 lastBitMask <<= 0x402F - aExp;
6905 roundBitsMask = lastBitMask - 1;
6906 z.low = 0;
6907 z.high = a.high;
a2f2d288 6908 switch (status->float_rounding_mode) {
dc355b76 6909 case float_round_nearest_even:
158142c2
FB
6910 z.high += lastBitMask>>1;
6911 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6912 z.high &= ~ lastBitMask;
6913 }
dc355b76 6914 break;
f9288a76
PM
6915 case float_round_ties_away:
6916 z.high += lastBitMask>>1;
6917 break;
dc355b76
PM
6918 case float_round_to_zero:
6919 break;
6920 case float_round_up:
6921 if (!extractFloat128Sign(z)) {
158142c2
FB
6922 z.high |= ( a.low != 0 );
6923 z.high += roundBitsMask;
6924 }
dc355b76
PM
6925 break;
6926 case float_round_down:
6927 if (extractFloat128Sign(z)) {
6928 z.high |= (a.low != 0);
6929 z.high += roundBitsMask;
6930 }
6931 break;
6932 default:
6933 abort();
158142c2
FB
6934 }
6935 z.high &= ~ roundBitsMask;
6936 }
6937 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6938 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6939 }
6940 return z;
6941
6942}
6943
6944/*----------------------------------------------------------------------------
6945| Returns the result of adding the absolute values of the quadruple-precision
6946| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6947| before being returned. `zSign' is ignored if the result is a NaN.
6948| The addition is performed according to the IEC/IEEE Standard for Binary
6949| Floating-Point Arithmetic.
6950*----------------------------------------------------------------------------*/
6951
e5a41ffa
PM
6952static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6953 float_status *status)
158142c2 6954{
f4014512 6955 int32_t aExp, bExp, zExp;
bb98fe42 6956 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6957 int32_t expDiff;
158142c2
FB
6958
6959 aSig1 = extractFloat128Frac1( a );
6960 aSig0 = extractFloat128Frac0( a );
6961 aExp = extractFloat128Exp( a );
6962 bSig1 = extractFloat128Frac1( b );
6963 bSig0 = extractFloat128Frac0( b );
6964 bExp = extractFloat128Exp( b );
6965 expDiff = aExp - bExp;
6966 if ( 0 < expDiff ) {
6967 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6968 if (aSig0 | aSig1) {
6969 return propagateFloat128NaN(a, b, status);
6970 }
158142c2
FB
6971 return a;
6972 }
6973 if ( bExp == 0 ) {
6974 --expDiff;
6975 }
6976 else {
6977 bSig0 |= LIT64( 0x0001000000000000 );
6978 }
6979 shift128ExtraRightJamming(
6980 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6981 zExp = aExp;
6982 }
6983 else if ( expDiff < 0 ) {
6984 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6985 if (bSig0 | bSig1) {
6986 return propagateFloat128NaN(a, b, status);
6987 }
158142c2
FB
6988 return packFloat128( zSign, 0x7FFF, 0, 0 );
6989 }
6990 if ( aExp == 0 ) {
6991 ++expDiff;
6992 }
6993 else {
6994 aSig0 |= LIT64( 0x0001000000000000 );
6995 }
6996 shift128ExtraRightJamming(
6997 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6998 zExp = bExp;
6999 }
7000 else {
7001 if ( aExp == 0x7FFF ) {
7002 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7003 return propagateFloat128NaN(a, b, status);
158142c2
FB
7004 }
7005 return a;
7006 }
7007 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7008 if ( aExp == 0 ) {
a2f2d288 7009 if (status->flush_to_zero) {
e6afc87f 7010 if (zSig0 | zSig1) {
ff32e16e 7011 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7012 }
7013 return packFloat128(zSign, 0, 0, 0);
7014 }
fe76d976
PB
7015 return packFloat128( zSign, 0, zSig0, zSig1 );
7016 }
158142c2
FB
7017 zSig2 = 0;
7018 zSig0 |= LIT64( 0x0002000000000000 );
7019 zExp = aExp;
7020 goto shiftRight1;
7021 }
7022 aSig0 |= LIT64( 0x0001000000000000 );
7023 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7024 --zExp;
7025 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
7026 ++zExp;
7027 shiftRight1:
7028 shift128ExtraRightJamming(
7029 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7030 roundAndPack:
ff32e16e 7031 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7032
7033}
7034
7035/*----------------------------------------------------------------------------
7036| Returns the result of subtracting the absolute values of the quadruple-
7037| precision floating-point values `a' and `b'. If `zSign' is 1, the
7038| difference is negated before being returned. `zSign' is ignored if the
7039| result is a NaN. The subtraction is performed according to the IEC/IEEE
7040| Standard for Binary Floating-Point Arithmetic.
7041*----------------------------------------------------------------------------*/
7042
e5a41ffa
PM
7043static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7044 float_status *status)
158142c2 7045{
f4014512 7046 int32_t aExp, bExp, zExp;
bb98fe42 7047 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7048 int32_t expDiff;
158142c2
FB
7049
7050 aSig1 = extractFloat128Frac1( a );
7051 aSig0 = extractFloat128Frac0( a );
7052 aExp = extractFloat128Exp( a );
7053 bSig1 = extractFloat128Frac1( b );
7054 bSig0 = extractFloat128Frac0( b );
7055 bExp = extractFloat128Exp( b );
7056 expDiff = aExp - bExp;
7057 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7058 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7059 if ( 0 < expDiff ) goto aExpBigger;
7060 if ( expDiff < 0 ) goto bExpBigger;
7061 if ( aExp == 0x7FFF ) {
7062 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7063 return propagateFloat128NaN(a, b, status);
158142c2 7064 }
ff32e16e 7065 float_raise(float_flag_invalid, status);
af39bc8c 7066 return float128_default_nan(status);
158142c2
FB
7067 }
7068 if ( aExp == 0 ) {
7069 aExp = 1;
7070 bExp = 1;
7071 }
7072 if ( bSig0 < aSig0 ) goto aBigger;
7073 if ( aSig0 < bSig0 ) goto bBigger;
7074 if ( bSig1 < aSig1 ) goto aBigger;
7075 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7076 return packFloat128(status->float_rounding_mode == float_round_down,
7077 0, 0, 0);
158142c2
FB
7078 bExpBigger:
7079 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7080 if (bSig0 | bSig1) {
7081 return propagateFloat128NaN(a, b, status);
7082 }
158142c2
FB
7083 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7084 }
7085 if ( aExp == 0 ) {
7086 ++expDiff;
7087 }
7088 else {
7089 aSig0 |= LIT64( 0x4000000000000000 );
7090 }
7091 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7092 bSig0 |= LIT64( 0x4000000000000000 );
7093 bBigger:
7094 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7095 zExp = bExp;
7096 zSign ^= 1;
7097 goto normalizeRoundAndPack;
7098 aExpBigger:
7099 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7100 if (aSig0 | aSig1) {
7101 return propagateFloat128NaN(a, b, status);
7102 }
158142c2
FB
7103 return a;
7104 }
7105 if ( bExp == 0 ) {
7106 --expDiff;
7107 }
7108 else {
7109 bSig0 |= LIT64( 0x4000000000000000 );
7110 }
7111 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7112 aSig0 |= LIT64( 0x4000000000000000 );
7113 aBigger:
7114 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7115 zExp = aExp;
7116 normalizeRoundAndPack:
7117 --zExp;
ff32e16e
PM
7118 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7119 status);
158142c2
FB
7120
7121}
7122
7123/*----------------------------------------------------------------------------
7124| Returns the result of adding the quadruple-precision floating-point values
7125| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7126| for Binary Floating-Point Arithmetic.
7127*----------------------------------------------------------------------------*/
7128
e5a41ffa 7129float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
7130{
7131 flag aSign, bSign;
7132
7133 aSign = extractFloat128Sign( a );
7134 bSign = extractFloat128Sign( b );
7135 if ( aSign == bSign ) {
ff32e16e 7136 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7137 }
7138 else {
ff32e16e 7139 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7140 }
7141
7142}
7143
7144/*----------------------------------------------------------------------------
7145| Returns the result of subtracting the quadruple-precision floating-point
7146| values `a' and `b'. The operation is performed according to the IEC/IEEE
7147| Standard for Binary Floating-Point Arithmetic.
7148*----------------------------------------------------------------------------*/
7149
e5a41ffa 7150float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
7151{
7152 flag aSign, bSign;
7153
7154 aSign = extractFloat128Sign( a );
7155 bSign = extractFloat128Sign( b );
7156 if ( aSign == bSign ) {
ff32e16e 7157 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7158 }
7159 else {
ff32e16e 7160 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7161 }
7162
7163}
7164
7165/*----------------------------------------------------------------------------
7166| Returns the result of multiplying the quadruple-precision floating-point
7167| values `a' and `b'. The operation is performed according to the IEC/IEEE
7168| Standard for Binary Floating-Point Arithmetic.
7169*----------------------------------------------------------------------------*/
7170
e5a41ffa 7171float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
7172{
7173 flag aSign, bSign, zSign;
f4014512 7174 int32_t aExp, bExp, zExp;
bb98fe42 7175 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7176
7177 aSig1 = extractFloat128Frac1( a );
7178 aSig0 = extractFloat128Frac0( a );
7179 aExp = extractFloat128Exp( a );
7180 aSign = extractFloat128Sign( a );
7181 bSig1 = extractFloat128Frac1( b );
7182 bSig0 = extractFloat128Frac0( b );
7183 bExp = extractFloat128Exp( b );
7184 bSign = extractFloat128Sign( b );
7185 zSign = aSign ^ bSign;
7186 if ( aExp == 0x7FFF ) {
7187 if ( ( aSig0 | aSig1 )
7188 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7189 return propagateFloat128NaN(a, b, status);
158142c2
FB
7190 }
7191 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7192 return packFloat128( zSign, 0x7FFF, 0, 0 );
7193 }
7194 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7195 if (bSig0 | bSig1) {
7196 return propagateFloat128NaN(a, b, status);
7197 }
158142c2
FB
7198 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7199 invalid:
ff32e16e 7200 float_raise(float_flag_invalid, status);
af39bc8c 7201 return float128_default_nan(status);
158142c2
FB
7202 }
7203 return packFloat128( zSign, 0x7FFF, 0, 0 );
7204 }
7205 if ( aExp == 0 ) {
7206 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7207 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7208 }
7209 if ( bExp == 0 ) {
7210 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7211 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7212 }
7213 zExp = aExp + bExp - 0x4000;
7214 aSig0 |= LIT64( 0x0001000000000000 );
7215 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7216 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7217 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7218 zSig2 |= ( zSig3 != 0 );
7219 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
7220 shift128ExtraRightJamming(
7221 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7222 ++zExp;
7223 }
ff32e16e 7224 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7225
7226}
7227
7228/*----------------------------------------------------------------------------
7229| Returns the result of dividing the quadruple-precision floating-point value
7230| `a' by the corresponding value `b'. The operation is performed according to
7231| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7232*----------------------------------------------------------------------------*/
7233
e5a41ffa 7234float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
7235{
7236 flag aSign, bSign, zSign;
f4014512 7237 int32_t aExp, bExp, zExp;
bb98fe42
AF
7238 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7239 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7240
7241 aSig1 = extractFloat128Frac1( a );
7242 aSig0 = extractFloat128Frac0( a );
7243 aExp = extractFloat128Exp( a );
7244 aSign = extractFloat128Sign( a );
7245 bSig1 = extractFloat128Frac1( b );
7246 bSig0 = extractFloat128Frac0( b );
7247 bExp = extractFloat128Exp( b );
7248 bSign = extractFloat128Sign( b );
7249 zSign = aSign ^ bSign;
7250 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7251 if (aSig0 | aSig1) {
7252 return propagateFloat128NaN(a, b, status);
7253 }
158142c2 7254 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7255 if (bSig0 | bSig1) {
7256 return propagateFloat128NaN(a, b, status);
7257 }
158142c2
FB
7258 goto invalid;
7259 }
7260 return packFloat128( zSign, 0x7FFF, 0, 0 );
7261 }
7262 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7263 if (bSig0 | bSig1) {
7264 return propagateFloat128NaN(a, b, status);
7265 }
158142c2
FB
7266 return packFloat128( zSign, 0, 0, 0 );
7267 }
7268 if ( bExp == 0 ) {
7269 if ( ( bSig0 | bSig1 ) == 0 ) {
7270 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7271 invalid:
ff32e16e 7272 float_raise(float_flag_invalid, status);
af39bc8c 7273 return float128_default_nan(status);
158142c2 7274 }
ff32e16e 7275 float_raise(float_flag_divbyzero, status);
158142c2
FB
7276 return packFloat128( zSign, 0x7FFF, 0, 0 );
7277 }
7278 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7279 }
7280 if ( aExp == 0 ) {
7281 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7282 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7283 }
7284 zExp = aExp - bExp + 0x3FFD;
7285 shortShift128Left(
7286 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
7287 shortShift128Left(
7288 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7289 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7290 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7291 ++zExp;
7292 }
7293 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7294 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7295 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7296 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7297 --zSig0;
7298 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7299 }
7300 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7301 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7302 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7303 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7304 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7305 --zSig1;
7306 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7307 }
7308 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7309 }
7310 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7311 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7312
7313}
7314
7315/*----------------------------------------------------------------------------
7316| Returns the remainder of the quadruple-precision floating-point value `a'
7317| with respect to the corresponding value `b'. The operation is performed
7318| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7319*----------------------------------------------------------------------------*/
7320
e5a41ffa 7321float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7322{
ed086f3d 7323 flag aSign, zSign;
f4014512 7324 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7325 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7326 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7327 int64_t sigMean0;
158142c2
FB
7328
7329 aSig1 = extractFloat128Frac1( a );
7330 aSig0 = extractFloat128Frac0( a );
7331 aExp = extractFloat128Exp( a );
7332 aSign = extractFloat128Sign( a );
7333 bSig1 = extractFloat128Frac1( b );
7334 bSig0 = extractFloat128Frac0( b );
7335 bExp = extractFloat128Exp( b );
158142c2
FB
7336 if ( aExp == 0x7FFF ) {
7337 if ( ( aSig0 | aSig1 )
7338 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7339 return propagateFloat128NaN(a, b, status);
158142c2
FB
7340 }
7341 goto invalid;
7342 }
7343 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7344 if (bSig0 | bSig1) {
7345 return propagateFloat128NaN(a, b, status);
7346 }
158142c2
FB
7347 return a;
7348 }
7349 if ( bExp == 0 ) {
7350 if ( ( bSig0 | bSig1 ) == 0 ) {
7351 invalid:
ff32e16e 7352 float_raise(float_flag_invalid, status);
af39bc8c 7353 return float128_default_nan(status);
158142c2
FB
7354 }
7355 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7356 }
7357 if ( aExp == 0 ) {
7358 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7359 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7360 }
7361 expDiff = aExp - bExp;
7362 if ( expDiff < -1 ) return a;
7363 shortShift128Left(
7364 aSig0 | LIT64( 0x0001000000000000 ),
7365 aSig1,
7366 15 - ( expDiff < 0 ),
7367 &aSig0,
7368 &aSig1
7369 );
7370 shortShift128Left(
7371 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7372 q = le128( bSig0, bSig1, aSig0, aSig1 );
7373 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7374 expDiff -= 64;
7375 while ( 0 < expDiff ) {
7376 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7377 q = ( 4 < q ) ? q - 4 : 0;
7378 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7379 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7380 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7381 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7382 expDiff -= 61;
7383 }
7384 if ( -64 < expDiff ) {
7385 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7386 q = ( 4 < q ) ? q - 4 : 0;
7387 q >>= - expDiff;
7388 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7389 expDiff += 52;
7390 if ( expDiff < 0 ) {
7391 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7392 }
7393 else {
7394 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7395 }
7396 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7397 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7398 }
7399 else {
7400 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7401 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7402 }
7403 do {
7404 alternateASig0 = aSig0;
7405 alternateASig1 = aSig1;
7406 ++q;
7407 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7408 } while ( 0 <= (int64_t) aSig0 );
158142c2 7409 add128(
bb98fe42 7410 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7411 if ( ( sigMean0 < 0 )
7412 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7413 aSig0 = alternateASig0;
7414 aSig1 = alternateASig1;
7415 }
bb98fe42 7416 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7417 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7418 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7419 status);
158142c2
FB
7420}
7421
7422/*----------------------------------------------------------------------------
7423| Returns the square root of the quadruple-precision floating-point value `a'.
7424| The operation is performed according to the IEC/IEEE Standard for Binary
7425| Floating-Point Arithmetic.
7426*----------------------------------------------------------------------------*/
7427
e5a41ffa 7428float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
7429{
7430 flag aSign;
f4014512 7431 int32_t aExp, zExp;
bb98fe42
AF
7432 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7433 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7434
7435 aSig1 = extractFloat128Frac1( a );
7436 aSig0 = extractFloat128Frac0( a );
7437 aExp = extractFloat128Exp( a );
7438 aSign = extractFloat128Sign( a );
7439 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7440 if (aSig0 | aSig1) {
7441 return propagateFloat128NaN(a, a, status);
7442 }
158142c2
FB
7443 if ( ! aSign ) return a;
7444 goto invalid;
7445 }
7446 if ( aSign ) {
7447 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7448 invalid:
ff32e16e 7449 float_raise(float_flag_invalid, status);
af39bc8c 7450 return float128_default_nan(status);
158142c2
FB
7451 }
7452 if ( aExp == 0 ) {
7453 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7454 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7455 }
7456 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7457 aSig0 |= LIT64( 0x0001000000000000 );
7458 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7459 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7460 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7461 doubleZSig0 = zSig0<<1;
7462 mul64To128( zSig0, zSig0, &term0, &term1 );
7463 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7464 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7465 --zSig0;
7466 doubleZSig0 -= 2;
7467 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7468 }
7469 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7470 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7471 if ( zSig1 == 0 ) zSig1 = 1;
7472 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7473 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7474 mul64To128( zSig1, zSig1, &term2, &term3 );
7475 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7476 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7477 --zSig1;
7478 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7479 term3 |= 1;
7480 term2 |= doubleZSig0;
7481 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7482 }
7483 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7484 }
7485 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7486 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7487
7488}
7489
7490/*----------------------------------------------------------------------------
7491| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7492| the corresponding value `b', and 0 otherwise. The invalid exception is
7493| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7494| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7495*----------------------------------------------------------------------------*/
7496
e5a41ffa 7497int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7498{
7499
7500 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7501 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7502 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7503 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7504 ) {
ff32e16e 7505 float_raise(float_flag_invalid, status);
158142c2
FB
7506 return 0;
7507 }
7508 return
7509 ( a.low == b.low )
7510 && ( ( a.high == b.high )
7511 || ( ( a.low == 0 )
bb98fe42 7512 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7513 );
7514
7515}
7516
7517/*----------------------------------------------------------------------------
7518| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7519| or equal to the corresponding value `b', and 0 otherwise. The invalid
7520| exception is raised if either operand is a NaN. The comparison is performed
7521| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7522*----------------------------------------------------------------------------*/
7523
e5a41ffa 7524int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
7525{
7526 flag aSign, bSign;
7527
7528 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7529 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7530 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7531 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7532 ) {
ff32e16e 7533 float_raise(float_flag_invalid, status);
158142c2
FB
7534 return 0;
7535 }
7536 aSign = extractFloat128Sign( a );
7537 bSign = extractFloat128Sign( b );
7538 if ( aSign != bSign ) {
7539 return
7540 aSign
bb98fe42 7541 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7542 == 0 );
7543 }
7544 return
7545 aSign ? le128( b.high, b.low, a.high, a.low )
7546 : le128( a.high, a.low, b.high, b.low );
7547
7548}
7549
7550/*----------------------------------------------------------------------------
7551| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7552| the corresponding value `b', and 0 otherwise. The invalid exception is
7553| raised if either operand is a NaN. The comparison is performed according
7554| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7555*----------------------------------------------------------------------------*/
7556
e5a41ffa 7557int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
7558{
7559 flag aSign, bSign;
7560
7561 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7562 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7563 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7564 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7565 ) {
ff32e16e 7566 float_raise(float_flag_invalid, status);
158142c2
FB
7567 return 0;
7568 }
7569 aSign = extractFloat128Sign( a );
7570 bSign = extractFloat128Sign( b );
7571 if ( aSign != bSign ) {
7572 return
7573 aSign
bb98fe42 7574 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7575 != 0 );
7576 }
7577 return
7578 aSign ? lt128( b.high, b.low, a.high, a.low )
7579 : lt128( a.high, a.low, b.high, b.low );
7580
7581}
7582
67b7861d
AJ
7583/*----------------------------------------------------------------------------
7584| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7585| be compared, and 0 otherwise. The invalid exception is raised if either
7586| operand is a NaN. The comparison is performed according to the IEC/IEEE
7587| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7588*----------------------------------------------------------------------------*/
7589
e5a41ffa 7590int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7591{
7592 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7593 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7594 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7595 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7596 ) {
ff32e16e 7597 float_raise(float_flag_invalid, status);
67b7861d
AJ
7598 return 1;
7599 }
7600 return 0;
7601}
7602
158142c2
FB
7603/*----------------------------------------------------------------------------
7604| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7605| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7606| exception. The comparison is performed according to the IEC/IEEE Standard
7607| for Binary Floating-Point Arithmetic.
158142c2
FB
7608*----------------------------------------------------------------------------*/
7609
e5a41ffa 7610int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7611{
7612
7613 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7614 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7615 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7616 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7617 ) {
af39bc8c
AM
7618 if (float128_is_signaling_nan(a, status)
7619 || float128_is_signaling_nan(b, status)) {
ff32e16e 7620 float_raise(float_flag_invalid, status);
b689362d 7621 }
158142c2
FB
7622 return 0;
7623 }
7624 return
7625 ( a.low == b.low )
7626 && ( ( a.high == b.high )
7627 || ( ( a.low == 0 )
bb98fe42 7628 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7629 );
7630
7631}
7632
7633/*----------------------------------------------------------------------------
7634| Returns 1 if the quadruple-precision floating-point value `a' is less than
7635| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7636| cause an exception. Otherwise, the comparison is performed according to the
7637| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7638*----------------------------------------------------------------------------*/
7639
e5a41ffa 7640int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7641{
7642 flag aSign, bSign;
7643
7644 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7645 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7646 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7647 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7648 ) {
af39bc8c
AM
7649 if (float128_is_signaling_nan(a, status)
7650 || float128_is_signaling_nan(b, status)) {
ff32e16e 7651 float_raise(float_flag_invalid, status);
158142c2
FB
7652 }
7653 return 0;
7654 }
7655 aSign = extractFloat128Sign( a );
7656 bSign = extractFloat128Sign( b );
7657 if ( aSign != bSign ) {
7658 return
7659 aSign
bb98fe42 7660 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7661 == 0 );
7662 }
7663 return
7664 aSign ? le128( b.high, b.low, a.high, a.low )
7665 : le128( a.high, a.low, b.high, b.low );
7666
7667}
7668
7669/*----------------------------------------------------------------------------
7670| Returns 1 if the quadruple-precision floating-point value `a' is less than
7671| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7672| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7673| Standard for Binary Floating-Point Arithmetic.
7674*----------------------------------------------------------------------------*/
7675
e5a41ffa 7676int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7677{
7678 flag aSign, bSign;
7679
7680 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7681 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7682 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7683 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7684 ) {
af39bc8c
AM
7685 if (float128_is_signaling_nan(a, status)
7686 || float128_is_signaling_nan(b, status)) {
ff32e16e 7687 float_raise(float_flag_invalid, status);
158142c2
FB
7688 }
7689 return 0;
7690 }
7691 aSign = extractFloat128Sign( a );
7692 bSign = extractFloat128Sign( b );
7693 if ( aSign != bSign ) {
7694 return
7695 aSign
bb98fe42 7696 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7697 != 0 );
7698 }
7699 return
7700 aSign ? lt128( b.high, b.low, a.high, a.low )
7701 : lt128( a.high, a.low, b.high, b.low );
7702
7703}
7704
67b7861d
AJ
7705/*----------------------------------------------------------------------------
7706| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7707| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7708| comparison is performed according to the IEC/IEEE Standard for Binary
7709| Floating-Point Arithmetic.
7710*----------------------------------------------------------------------------*/
7711
e5a41ffa 7712int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7713{
7714 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7715 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7716 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7717 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7718 ) {
af39bc8c
AM
7719 if (float128_is_signaling_nan(a, status)
7720 || float128_is_signaling_nan(b, status)) {
ff32e16e 7721 float_raise(float_flag_invalid, status);
67b7861d
AJ
7722 }
7723 return 1;
7724 }
7725 return 0;
7726}
7727
e5a41ffa
PM
7728static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7729 int is_quiet, float_status *status)
f6714d36
AJ
7730{
7731 flag aSign, bSign;
7732
d1eb8f2a
AD
7733 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7734 float_raise(float_flag_invalid, status);
7735 return float_relation_unordered;
7736 }
f6714d36
AJ
7737 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7738 ( extractFloatx80Frac( a )<<1 ) ) ||
7739 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7740 ( extractFloatx80Frac( b )<<1 ) )) {
7741 if (!is_quiet ||
af39bc8c
AM
7742 floatx80_is_signaling_nan(a, status) ||
7743 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7744 float_raise(float_flag_invalid, status);
f6714d36
AJ
7745 }
7746 return float_relation_unordered;
7747 }
7748 aSign = extractFloatx80Sign( a );
7749 bSign = extractFloatx80Sign( b );
7750 if ( aSign != bSign ) {
7751
7752 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7753 ( ( a.low | b.low ) == 0 ) ) {
7754 /* zero case */
7755 return float_relation_equal;
7756 } else {
7757 return 1 - (2 * aSign);
7758 }
7759 } else {
7760 if (a.low == b.low && a.high == b.high) {
7761 return float_relation_equal;
7762 } else {
7763 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7764 }
7765 }
7766}
7767
e5a41ffa 7768int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7769{
ff32e16e 7770 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7771}
7772
e5a41ffa 7773int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7774{
ff32e16e 7775 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7776}
7777
e5a41ffa
PM
7778static inline int float128_compare_internal(float128 a, float128 b,
7779 int is_quiet, float_status *status)
1f587329
BS
7780{
7781 flag aSign, bSign;
7782
7783 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7784 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7785 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7786 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7787 if (!is_quiet ||
af39bc8c
AM
7788 float128_is_signaling_nan(a, status) ||
7789 float128_is_signaling_nan(b, status)) {
ff32e16e 7790 float_raise(float_flag_invalid, status);
1f587329
BS
7791 }
7792 return float_relation_unordered;
7793 }
7794 aSign = extractFloat128Sign( a );
7795 bSign = extractFloat128Sign( b );
7796 if ( aSign != bSign ) {
7797 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7798 /* zero case */
7799 return float_relation_equal;
7800 } else {
7801 return 1 - (2 * aSign);
7802 }
7803 } else {
7804 if (a.low == b.low && a.high == b.high) {
7805 return float_relation_equal;
7806 } else {
7807 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7808 }
7809 }
7810}
7811
e5a41ffa 7812int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7813{
ff32e16e 7814 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7815}
7816
e5a41ffa 7817int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7818{
ff32e16e 7819 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7820}
7821
e5a41ffa 7822floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7823{
7824 flag aSign;
326b9e98 7825 int32_t aExp;
bb98fe42 7826 uint64_t aSig;
9ee6e8bb 7827
d1eb8f2a
AD
7828 if (floatx80_invalid_encoding(a)) {
7829 float_raise(float_flag_invalid, status);
7830 return floatx80_default_nan(status);
7831 }
9ee6e8bb
PB
7832 aSig = extractFloatx80Frac( a );
7833 aExp = extractFloatx80Exp( a );
7834 aSign = extractFloatx80Sign( a );
7835
326b9e98
AJ
7836 if ( aExp == 0x7FFF ) {
7837 if ( aSig<<1 ) {
ff32e16e 7838 return propagateFloatx80NaN(a, a, status);
326b9e98 7839 }
9ee6e8bb
PB
7840 return a;
7841 }
326b9e98 7842
3c85c37f
PM
7843 if (aExp == 0) {
7844 if (aSig == 0) {
7845 return a;
7846 }
7847 aExp++;
7848 }
69397542 7849
326b9e98
AJ
7850 if (n > 0x10000) {
7851 n = 0x10000;
7852 } else if (n < -0x10000) {
7853 n = -0x10000;
7854 }
7855
9ee6e8bb 7856 aExp += n;
a2f2d288
PM
7857 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7858 aSign, aExp, aSig, 0, status);
9ee6e8bb 7859}
9ee6e8bb 7860
e5a41ffa 7861float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
7862{
7863 flag aSign;
326b9e98 7864 int32_t aExp;
bb98fe42 7865 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7866
7867 aSig1 = extractFloat128Frac1( a );
7868 aSig0 = extractFloat128Frac0( a );
7869 aExp = extractFloat128Exp( a );
7870 aSign = extractFloat128Sign( a );
7871 if ( aExp == 0x7FFF ) {
326b9e98 7872 if ( aSig0 | aSig1 ) {
ff32e16e 7873 return propagateFloat128NaN(a, a, status);
326b9e98 7874 }
9ee6e8bb
PB
7875 return a;
7876 }
3c85c37f 7877 if (aExp != 0) {
69397542 7878 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 7879 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7880 return a;
3c85c37f
PM
7881 } else {
7882 aExp++;
7883 }
69397542 7884
326b9e98
AJ
7885 if (n > 0x10000) {
7886 n = 0x10000;
7887 } else if (n < -0x10000) {
7888 n = -0x10000;
7889 }
7890
69397542
PB
7891 aExp += n - 1;
7892 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7893 , status);
9ee6e8bb
PB
7894
7895}