4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
408 static inline uint32_t extractFloat32Frac(float32 a
)
410 return float32_val(a
) & 0x007FFFFF;
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
417 static inline int extractFloat32Exp(float32 a
)
419 return (float32_val(a
) >> 23) & 0xFF;
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
426 static inline bool extractFloat32Sign(float32 a
)
428 return float32_val(a
) >> 31;
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
435 static inline uint64_t extractFloat64Frac(float64 a
)
437 return float64_val(a
) & UINT64_C(0x000FFFFFFFFFFFFF);
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
444 static inline int extractFloat64Exp(float64 a
)
446 return (float64_val(a
) >> 52) & 0x7FF;
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
453 static inline bool extractFloat64Sign(float64 a
)
455 return float64_val(a
) >> 63;
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
463 typedef enum __attribute__ ((__packed__
)) {
464 float_class_unclassified
,
468 float_class_qnan
, /* all NaNs from here */
472 #define float_cmask(bit) (1u << (bit))
475 float_cmask_zero
= float_cmask(float_class_zero
),
476 float_cmask_normal
= float_cmask(float_class_normal
),
477 float_cmask_inf
= float_cmask(float_class_inf
),
478 float_cmask_qnan
= float_cmask(float_class_qnan
),
479 float_cmask_snan
= float_cmask(float_class_snan
),
481 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
482 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
485 /* Flags for parts_minmax. */
487 /* Set for minimum; clear for maximum. */
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
498 return unlikely(c
>= float_class_qnan
);
501 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
503 return c
== float_class_snan
;
506 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
508 return c
== float_class_qnan
;
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
525 /* Routines that know the structure may reference the singular name. */
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
550 uint64_t frac_hm
; /* high-middle */
551 uint64_t frac_lm
; /* low-middle */
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
586 #define FLOAT_PARAMS(E, F) \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
592 static const FloatFmt float16_params
= {
596 static const FloatFmt float16_params_ahp
= {
601 static const FloatFmt bfloat16_params
= {
605 static const FloatFmt float32_params
= {
609 static const FloatFmt float64_params
= {
613 static const FloatFmt float128_params
= {
614 FLOAT_PARAMS(15, 112)
617 #define FLOATX80_PARAMS(R) \
619 .frac_size = R == 64 ? 63 : R, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
623 static const FloatFmt floatx80_params
[3] = {
624 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
632 const int f_size
= fmt
->frac_size
;
633 const int e_size
= fmt
->exp_size
;
635 *r
= (FloatParts64
) {
636 .cls
= float_class_unclassified
,
637 .sign
= extract64(raw
, f_size
+ e_size
, 1),
638 .exp
= extract64(raw
, f_size
, e_size
),
639 .frac
= extract64(raw
, 0, f_size
)
643 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
645 unpack_raw64(p
, &float16_params
, f
);
648 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
650 unpack_raw64(p
, &bfloat16_params
, f
);
653 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
655 unpack_raw64(p
, &float32_params
, f
);
658 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
660 unpack_raw64(p
, &float64_params
, f
);
663 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
665 *p
= (FloatParts128
) {
666 .cls
= float_class_unclassified
,
667 .sign
= extract32(f
.high
, 15, 1),
668 .exp
= extract32(f
.high
, 0, 15),
673 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
675 const int f_size
= float128_params
.frac_size
- 64;
676 const int e_size
= float128_params
.exp_size
;
678 *p
= (FloatParts128
) {
679 .cls
= float_class_unclassified
,
680 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
681 .exp
= extract64(f
.high
, f_size
, e_size
),
682 .frac_hi
= extract64(f
.high
, 0, f_size
),
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
690 const int f_size
= fmt
->frac_size
;
691 const int e_size
= fmt
->exp_size
;
694 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
695 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
696 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
700 static inline float16
float16_pack_raw(const FloatParts64
*p
)
702 return make_float16(pack_raw64(p
, &float16_params
));
705 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
707 return pack_raw64(p
, &bfloat16_params
);
710 static inline float32
float32_pack_raw(const FloatParts64
*p
)
712 return make_float32(pack_raw64(p
, &float32_params
));
715 static inline float64
float64_pack_raw(const FloatParts64
*p
)
717 return make_float64(pack_raw64(p
, &float64_params
));
720 static float128
float128_pack_raw(const FloatParts128
*p
)
722 const int f_size
= float128_params
.frac_size
- 64;
723 const int e_size
= float128_params
.exp_size
;
726 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
727 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
728 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
729 return make_float128(hi
, p
->frac_lo
);
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
752 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
753 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
757 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
759 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
764 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
765 FloatParts64
*c
, float_status
*s
,
766 int ab_mask
, int abc_mask
);
767 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
771 int ab_mask
, int abc_mask
);
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
776 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
777 const FloatFmt
*fmt
);
778 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
779 const FloatFmt
*fmt
);
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
784 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
785 const FloatFmt
*fmt
);
786 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
787 const FloatFmt
*fmt
);
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
792 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
793 const FloatFmt
*fmt
);
794 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
795 const FloatFmt
*fmt
);
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
800 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
801 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
802 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
807 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
808 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
809 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
814 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
815 float_status
*s
, bool subtract
);
816 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
817 float_status
*s
, bool subtract
);
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
822 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
824 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
830 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
831 FloatParts64
*c
, int flags
,
833 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
834 FloatParts128
*c
, int flags
,
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
840 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
842 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
848 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
849 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
854 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
855 int scale
, int frac_size
);
856 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
857 int scale
, int frac_size
);
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
862 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
863 int scale
, float_status
*s
,
864 const FloatFmt
*fmt
);
865 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
866 int scale
, float_status
*s
,
867 const FloatFmt
*fmt
);
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
872 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
873 int scale
, int64_t min
, int64_t max
,
875 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
876 int scale
, int64_t min
, int64_t max
,
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
882 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
883 int scale
, uint64_t max
,
885 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
886 int scale
, uint64_t max
,
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
892 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
893 int scale
, float_status
*s
);
894 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
895 int scale
, float_status
*s
);
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
900 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
901 int scale
, float_status
*s
);
902 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
903 int scale
, float_status
*s
);
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
908 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
909 float_status
*s
, int flags
);
910 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
911 float_status
*s
, int flags
);
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
916 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
917 float_status
*s
, bool q
);
918 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
919 float_status
*s
, bool q
);
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
924 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
925 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
941 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
943 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
946 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
949 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
950 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
954 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
957 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
958 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
959 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
960 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
966 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
968 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
971 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
973 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
974 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
979 static void frac64_allones(FloatParts64
*a
)
984 static void frac128_allones(FloatParts128
*a
)
986 a
->frac_hi
= a
->frac_lo
= -1;
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
991 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
993 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
996 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
998 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
1000 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
1005 return ta
< tb
? -1 : 1;
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1010 static void frac64_clear(FloatParts64
*a
)
1015 static void frac128_clear(FloatParts128
*a
)
1017 a
->frac_hi
= a
->frac_lo
= 0;
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1022 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
1024 uint64_t n1
, n0
, r
, q
;
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1038 ret
= a
->frac
< b
->frac
;
1046 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a
->frac
= q
| (r
!= 0);
1054 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1056 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1057 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1060 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1061 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1063 ret
= lt128(a0
, a1
, b0
, b1
);
1065 a1
= shr_double(a0
, a1
, 1);
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0
= estimateDiv128To64(a0
, a1
, b0
);
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1077 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1078 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1081 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1
= estimateDiv128To64(r1
, r2
, b0
);
1086 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1087 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1090 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1
|= (r2
| r3
) != 0;
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1103 static bool frac64_eqz(FloatParts64
*a
)
1105 return a
->frac
== 0;
1108 static bool frac128_eqz(FloatParts128
*a
)
1110 return (a
->frac_hi
| a
->frac_lo
) == 0;
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1115 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1117 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1120 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1122 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1123 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1128 static void frac64_neg(FloatParts64
*a
)
1133 static void frac128_neg(FloatParts128
*a
)
1136 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1137 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1140 static void frac256_neg(FloatParts256
*a
)
1143 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1144 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1145 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1146 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1151 static int frac64_normalize(FloatParts64
*a
)
1154 int shift
= clz64(a
->frac
);
1161 static int frac128_normalize(FloatParts128
*a
)
1164 int shl
= clz64(a
->frac_hi
);
1165 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1168 } else if (a
->frac_lo
) {
1169 int shl
= clz64(a
->frac_lo
);
1170 a
->frac_hi
= a
->frac_lo
<< shl
;
1177 static int frac256_normalize(FloatParts256
*a
)
1179 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1180 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1192 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1195 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1198 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1201 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1211 a0
= shl_double(a0
, a1
, shl
);
1212 a1
= shl_double(a1
, a2
, shl
);
1213 a2
= shl_double(a2
, a3
, shl
);
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1226 static void frac64_shl(FloatParts64
*a
, int c
)
1231 static void frac128_shl(FloatParts128
*a
, int c
)
1233 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1241 a0
= shl_double(a0
, a1
, c
);
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1251 static void frac64_shr(FloatParts64
*a
, int c
)
1256 static void frac128_shr(FloatParts128
*a
, int c
)
1258 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1266 a1
= shr_double(a0
, a1
, c
);
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1276 static void frac64_shrjam(FloatParts64
*a
, int c
)
1278 uint64_t a0
= a
->frac
;
1280 if (likely(c
!= 0)) {
1281 if (likely(c
< 64)) {
1282 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1290 static void frac128_shrjam(FloatParts128
*a
, int c
)
1292 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1293 uint64_t sticky
= 0;
1295 if (unlikely(c
== 0)) {
1297 } else if (likely(c
< 64)) {
1299 } else if (likely(c
< 128)) {
1313 sticky
|= shr_double(a1
, 0, c
);
1314 a1
= shr_double(a0
, a1
, c
);
1318 a
->frac_lo
= a1
| (sticky
!= 0);
1322 static void frac256_shrjam(FloatParts256
*a
, int c
)
1324 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1325 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1326 uint64_t sticky
= 0;
1328 if (unlikely(c
== 0)) {
1330 } else if (likely(c
< 64)) {
1332 } else if (likely(c
< 256)) {
1333 if (unlikely(c
& 128)) {
1335 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1337 if (unlikely(c
& 64)) {
1339 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1346 sticky
= a0
| a1
| a2
| a3
;
1347 a0
= a1
= a2
= a3
= 0;
1351 sticky
|= shr_double(a3
, 0, c
);
1352 a3
= shr_double(a2
, a3
, c
);
1353 a2
= shr_double(a1
, a2
, c
);
1354 a1
= shr_double(a0
, a1
, c
);
1358 a
->frac_lo
= a3
| (sticky
!= 0);
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1366 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1368 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1371 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1374 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1375 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1379 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1382 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1383 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1384 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1385 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1391 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1393 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1396 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1398 r
->frac_hi
= a
->frac_hi
;
1399 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1404 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1406 r
->frac_hi
= a
->frac
;
1410 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1412 r
->frac_hi
= a
->frac_hi
;
1413 r
->frac_hm
= a
->frac_lo
;
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1425 static const uint16_t rsqrt_tab
[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1466 #include "softfloat-parts-addsub.c.inc"
1475 * Pack/unpack routines with a specific FloatFmt.
1478 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1479 float_status
*s
, const FloatFmt
*params
)
1481 float16_unpack_raw(p
, f
);
1482 parts_canonicalize(p
, s
, params
);
1485 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1488 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1491 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1494 bfloat16_unpack_raw(p
, f
);
1495 parts_canonicalize(p
, s
, &bfloat16_params
);
1498 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1500 const FloatFmt
*params
)
1502 parts_uncanon(p
, s
, params
);
1503 return float16_pack_raw(p
);
1506 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1509 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1512 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1515 parts_uncanon(p
, s
, &bfloat16_params
);
1516 return bfloat16_pack_raw(p
);
1519 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1522 float32_unpack_raw(p
, f
);
1523 parts_canonicalize(p
, s
, &float32_params
);
1526 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1529 parts_uncanon(p
, s
, &float32_params
);
1530 return float32_pack_raw(p
);
1533 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1536 float64_unpack_raw(p
, f
);
1537 parts_canonicalize(p
, s
, &float64_params
);
1540 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1543 parts_uncanon(p
, s
, &float64_params
);
1544 return float64_pack_raw(p
);
1547 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1550 float128_unpack_raw(p
, f
);
1551 parts_canonicalize(p
, s
, &float128_params
);
1554 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1557 parts_uncanon(p
, s
, &float128_params
);
1558 return float128_pack_raw(p
);
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s
->floatx80_rounding_precision
) {
1567 case floatx80_precision_x
:
1568 case floatx80_precision_d
:
1569 case floatx80_precision_s
:
1572 g_assert_not_reached();
1575 if (unlikely(floatx80_invalid_encoding(f
))) {
1576 float_raise(float_flag_invalid
, s
);
1580 floatx80_unpack_raw(p
, f
);
1582 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1583 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1587 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1588 : parts_is_snan_frac(p
->frac_hi
, s
)
1589 ? float_class_snan
: float_class_qnan
);
1594 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1597 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1602 case float_class_normal
:
1603 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1604 parts_uncanon_normal(p
, s
, fmt
);
1612 frac_truncjam(&p64
, p
);
1613 parts_uncanon_normal(&p64
, s
, fmt
);
1617 if (exp
!= fmt
->exp_max
) {
1620 /* rounded to inf -- fall through to set frac correctly */
1622 case float_class_inf
:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac
= floatx80_infinity_low
;
1628 case float_class_zero
:
1633 case float_class_snan
:
1634 case float_class_qnan
:
1635 /* NaNs have the integer bit set. */
1636 frac
= p
->frac_hi
| (1ull << 63);
1641 g_assert_not_reached();
1644 return packFloatx80(p
->sign
, exp
, frac
);
1648 * Addition and subtraction
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1654 FloatParts64 pa
, pb
, *pr
;
1656 float16_unpack_canonical(&pa
, a
, status
);
1657 float16_unpack_canonical(&pb
, b
, status
);
1658 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1660 return float16_round_pack_canonical(pr
, status
);
1663 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1665 return float16_addsub(a
, b
, status
, false);
1668 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1670 return float16_addsub(a
, b
, status
, true);
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1676 FloatParts64 pa
, pb
, *pr
;
1678 float32_unpack_canonical(&pa
, a
, status
);
1679 float32_unpack_canonical(&pb
, b
, status
);
1680 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1682 return float32_round_pack_canonical(pr
, status
);
1685 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1687 return soft_f32_addsub(a
, b
, status
, false);
1690 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1692 return soft_f32_addsub(a
, b
, status
, true);
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1698 FloatParts64 pa
, pb
, *pr
;
1700 float64_unpack_canonical(&pa
, a
, status
);
1701 float64_unpack_canonical(&pb
, b
, status
);
1702 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1704 return float64_round_pack_canonical(pr
, status
);
1707 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1709 return soft_f64_addsub(a
, b
, status
, false);
1712 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1714 return soft_f64_addsub(a
, b
, status
, true);
1717 static float hard_f32_add(float a
, float b
)
1722 static float hard_f32_sub(float a
, float b
)
1727 static double hard_f64_add(double a
, double b
)
1732 static double hard_f64_sub(double a
, double b
)
1737 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1739 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1740 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1742 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1745 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1747 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1748 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1750 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1754 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1755 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1757 return float32_gen2(a
, b
, s
, hard
, soft
,
1758 f32_is_zon2
, f32_addsubmul_post
);
1761 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1762 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1764 return float64_gen2(a
, b
, s
, hard
, soft
,
1765 f64_is_zon2
, f64_addsubmul_post
);
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a
, float32 b
, float_status
*s
)
1771 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a
, float32 b
, float_status
*s
)
1777 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a
, float64 b
, float_status
*s
)
1783 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a
, float64 b
, float_status
*s
)
1789 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
1795 FloatParts64 pa
, pb
, *pr
;
1797 bfloat16_unpack_canonical(&pa
, a
, status
);
1798 bfloat16_unpack_canonical(&pb
, b
, status
);
1799 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1801 return bfloat16_round_pack_canonical(pr
, status
);
1804 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1806 return bfloat16_addsub(a
, b
, status
, false);
1809 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1811 return bfloat16_addsub(a
, b
, status
, true);
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
1817 FloatParts128 pa
, pb
, *pr
;
1819 float128_unpack_canonical(&pa
, a
, status
);
1820 float128_unpack_canonical(&pb
, b
, status
);
1821 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1823 return float128_round_pack_canonical(pr
, status
);
1826 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
1828 return float128_addsub(a
, b
, status
, false);
1831 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
1833 return float128_addsub(a
, b
, status
, true);
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
1839 FloatParts128 pa
, pb
, *pr
;
1841 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1842 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1843 return floatx80_default_nan(status
);
1846 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1847 return floatx80_round_pack_canonical(pr
, status
);
1850 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
1852 return floatx80_addsub(a
, b
, status
, false);
1855 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
1857 return floatx80_addsub(a
, b
, status
, true);
1864 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
1866 FloatParts64 pa
, pb
, *pr
;
1868 float16_unpack_canonical(&pa
, a
, status
);
1869 float16_unpack_canonical(&pb
, b
, status
);
1870 pr
= parts_mul(&pa
, &pb
, status
);
1872 return float16_round_pack_canonical(pr
, status
);
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
1878 FloatParts64 pa
, pb
, *pr
;
1880 float32_unpack_canonical(&pa
, a
, status
);
1881 float32_unpack_canonical(&pb
, b
, status
);
1882 pr
= parts_mul(&pa
, &pb
, status
);
1884 return float32_round_pack_canonical(pr
, status
);
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
1890 FloatParts64 pa
, pb
, *pr
;
1892 float64_unpack_canonical(&pa
, a
, status
);
1893 float64_unpack_canonical(&pb
, b
, status
);
1894 pr
= parts_mul(&pa
, &pb
, status
);
1896 return float64_round_pack_canonical(pr
, status
);
1899 static float hard_f32_mul(float a
, float b
)
1904 static double hard_f64_mul(double a
, double b
)
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a
, float32 b
, float_status
*s
)
1912 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
1913 f32_is_zon2
, f32_addsubmul_post
);
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a
, float64 b
, float_status
*s
)
1919 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
1920 f64_is_zon2
, f64_addsubmul_post
);
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
1926 FloatParts64 pa
, pb
, *pr
;
1928 bfloat16_unpack_canonical(&pa
, a
, status
);
1929 bfloat16_unpack_canonical(&pb
, b
, status
);
1930 pr
= parts_mul(&pa
, &pb
, status
);
1932 return bfloat16_round_pack_canonical(pr
, status
);
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a
, float128 b
, float_status
*status
)
1938 FloatParts128 pa
, pb
, *pr
;
1940 float128_unpack_canonical(&pa
, a
, status
);
1941 float128_unpack_canonical(&pb
, b
, status
);
1942 pr
= parts_mul(&pa
, &pb
, status
);
1944 return float128_round_pack_canonical(pr
, status
);
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
1950 FloatParts128 pa
, pb
, *pr
;
1952 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1953 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1954 return floatx80_default_nan(status
);
1957 pr
= parts_mul(&pa
, &pb
, status
);
1958 return floatx80_round_pack_canonical(pr
, status
);
1962 * Fused multiply-add
1965 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
1966 int flags
, float_status
*status
)
1968 FloatParts64 pa
, pb
, pc
, *pr
;
1970 float16_unpack_canonical(&pa
, a
, status
);
1971 float16_unpack_canonical(&pb
, b
, status
);
1972 float16_unpack_canonical(&pc
, c
, status
);
1973 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1975 return float16_round_pack_canonical(pr
, status
);
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
1980 float_status
*status
)
1982 FloatParts64 pa
, pb
, pc
, *pr
;
1984 float32_unpack_canonical(&pa
, a
, status
);
1985 float32_unpack_canonical(&pb
, b
, status
);
1986 float32_unpack_canonical(&pc
, c
, status
);
1987 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1989 return float32_round_pack_canonical(pr
, status
);
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
1994 float_status
*status
)
1996 FloatParts64 pa
, pb
, pc
, *pr
;
1998 float64_unpack_canonical(&pa
, a
, status
);
1999 float64_unpack_canonical(&pb
, b
, status
);
2000 float64_unpack_canonical(&pc
, c
, status
);
2001 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2003 return float64_round_pack_canonical(pr
, status
);
2006 static bool force_soft_fma
;
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2011 union_float32 ua
, ub
, uc
, ur
;
2017 if (unlikely(!can_use_fpu(s
))) {
2020 if (unlikely(flags
& float_muladd_halve_result
)) {
2024 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2025 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2029 if (unlikely(force_soft_fma
)) {
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2037 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2041 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2042 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2043 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2045 if (flags
& float_muladd_negate_c
) {
2050 union_float32 ua_orig
= ua
;
2051 union_float32 uc_orig
= uc
;
2053 if (flags
& float_muladd_negate_product
) {
2056 if (flags
& float_muladd_negate_c
) {
2060 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2062 if (unlikely(f32_is_inf(ur
))) {
2063 float_raise(float_flag_overflow
, s
);
2064 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2070 if (flags
& float_muladd_negate_result
) {
2071 return float32_chs(ur
.s
);
2076 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2082 union_float64 ua
, ub
, uc
, ur
;
2088 if (unlikely(!can_use_fpu(s
))) {
2091 if (unlikely(flags
& float_muladd_halve_result
)) {
2095 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2096 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2100 if (unlikely(force_soft_fma
)) {
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2108 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2112 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2113 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2114 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2116 if (flags
& float_muladd_negate_c
) {
2121 union_float64 ua_orig
= ua
;
2122 union_float64 uc_orig
= uc
;
2124 if (flags
& float_muladd_negate_product
) {
2127 if (flags
& float_muladd_negate_c
) {
2131 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2133 if (unlikely(f64_is_inf(ur
))) {
2134 float_raise(float_flag_overflow
, s
);
2135 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2141 if (flags
& float_muladd_negate_result
) {
2142 return float64_chs(ur
.s
);
2147 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2150 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2151 int flags
, float_status
*status
)
2153 FloatParts64 pa
, pb
, pc
, *pr
;
2155 bfloat16_unpack_canonical(&pa
, a
, status
);
2156 bfloat16_unpack_canonical(&pb
, b
, status
);
2157 bfloat16_unpack_canonical(&pc
, c
, status
);
2158 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2160 return bfloat16_round_pack_canonical(pr
, status
);
2163 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2164 int flags
, float_status
*status
)
2166 FloatParts128 pa
, pb
, pc
, *pr
;
2168 float128_unpack_canonical(&pa
, a
, status
);
2169 float128_unpack_canonical(&pb
, b
, status
);
2170 float128_unpack_canonical(&pc
, c
, status
);
2171 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2173 return float128_round_pack_canonical(pr
, status
);
2180 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2182 FloatParts64 pa
, pb
, *pr
;
2184 float16_unpack_canonical(&pa
, a
, status
);
2185 float16_unpack_canonical(&pb
, b
, status
);
2186 pr
= parts_div(&pa
, &pb
, status
);
2188 return float16_round_pack_canonical(pr
, status
);
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2194 FloatParts64 pa
, pb
, *pr
;
2196 float32_unpack_canonical(&pa
, a
, status
);
2197 float32_unpack_canonical(&pb
, b
, status
);
2198 pr
= parts_div(&pa
, &pb
, status
);
2200 return float32_round_pack_canonical(pr
, status
);
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2206 FloatParts64 pa
, pb
, *pr
;
2208 float64_unpack_canonical(&pa
, a
, status
);
2209 float64_unpack_canonical(&pb
, b
, status
);
2210 pr
= parts_div(&pa
, &pb
, status
);
2212 return float64_round_pack_canonical(pr
, status
);
2215 static float hard_f32_div(float a
, float b
)
2220 static double hard_f64_div(double a
, double b
)
2225 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2227 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2228 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2229 fpclassify(b
.h
) == FP_NORMAL
;
2231 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2234 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2236 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2237 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2238 fpclassify(b
.h
) == FP_NORMAL
;
2240 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2243 static bool f32_div_post(union_float32 a
, union_float32 b
)
2245 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2246 return fpclassify(a
.h
) != FP_ZERO
;
2248 return !float32_is_zero(a
.s
);
2251 static bool f64_div_post(union_float64 a
, union_float64 b
)
2253 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2254 return fpclassify(a
.h
) != FP_ZERO
;
2256 return !float64_is_zero(a
.s
);
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a
, float32 b
, float_status
*s
)
2262 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2263 f32_div_pre
, f32_div_post
);
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a
, float64 b
, float_status
*s
)
2269 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2270 f64_div_pre
, f64_div_post
);
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2276 FloatParts64 pa
, pb
, *pr
;
2278 bfloat16_unpack_canonical(&pa
, a
, status
);
2279 bfloat16_unpack_canonical(&pb
, b
, status
);
2280 pr
= parts_div(&pa
, &pb
, status
);
2282 return bfloat16_round_pack_canonical(pr
, status
);
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a
, float128 b
, float_status
*status
)
2288 FloatParts128 pa
, pb
, *pr
;
2290 float128_unpack_canonical(&pa
, a
, status
);
2291 float128_unpack_canonical(&pb
, b
, status
);
2292 pr
= parts_div(&pa
, &pb
, status
);
2294 return float128_round_pack_canonical(pr
, status
);
2297 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2299 FloatParts128 pa
, pb
, *pr
;
2301 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2302 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2303 return floatx80_default_nan(status
);
2306 pr
= parts_div(&pa
, &pb
, status
);
2307 return floatx80_round_pack_canonical(pr
, status
);
2311 * Float to Float conversions
2313 * Returns the result of converting one float format to another. The
2314 * conversion is performed according to the IEC/IEEE Standard for
2315 * Binary Floating-Point Arithmetic.
2317 * Usually this only needs to take care of raising invalid exceptions
2318 * and handling the conversion on NaNs.
2321 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2324 case float_class_qnan
:
2325 case float_class_snan
:
2327 * There is no NaN in the destination format. Raise Invalid
2328 * and return a zero with the sign of the input NaN.
2330 float_raise(float_flag_invalid
, s
);
2331 a
->cls
= float_class_zero
;
2334 case float_class_inf
:
2336 * There is no Inf in the destination format. Raise Invalid
2337 * and return the maximum normal with the correct sign.
2339 float_raise(float_flag_invalid
, s
);
2340 a
->cls
= float_class_normal
;
2341 a
->exp
= float16_params_ahp
.exp_max
;
2342 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2343 float16_params_ahp
.frac_size
+ 1);
2346 case float_class_normal
:
2347 case float_class_zero
:
2351 g_assert_not_reached();
2355 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2357 if (is_nan(a
->cls
)) {
2358 parts_return_nan(a
, s
);
2362 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2364 if (is_nan(a
->cls
)) {
2365 parts_return_nan(a
, s
);
2369 #define parts_float_to_float(P, S) \
2370 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2372 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2379 if (a
->cls
== float_class_normal
) {
2380 frac_truncjam(a
, b
);
2381 } else if (is_nan(a
->cls
)) {
2382 /* Discard the low bits of the NaN. */
2383 a
->frac
= b
->frac_hi
;
2384 parts_return_nan(a
, s
);
2388 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2396 if (is_nan(a
->cls
)) {
2397 parts_return_nan(a
, s
);
2401 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2403 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2406 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2407 parts_float_to_float(&p
, s
);
2408 return float32_round_pack_canonical(&p
, s
);
2411 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2413 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2416 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2417 parts_float_to_float(&p
, s
);
2418 return float64_round_pack_canonical(&p
, s
);
2421 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2424 const FloatFmt
*fmt
;
2426 float32_unpack_canonical(&p
, a
, s
);
2428 parts_float_to_float(&p
, s
);
2429 fmt
= &float16_params
;
2431 parts_float_to_ahp(&p
, s
);
2432 fmt
= &float16_params_ahp
;
2434 return float16a_round_pack_canonical(&p
, s
, fmt
);
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a
, float_status
*s
)
2442 float32_unpack_canonical(&p
, a
, s
);
2443 parts_float_to_float(&p
, s
);
2444 return float64_round_pack_canonical(&p
, s
);
2447 float64
float32_to_float64(float32 a
, float_status
*s
)
2449 if (likely(float32_is_normal(a
))) {
2450 /* Widening conversion can never produce inexact results. */
2456 } else if (float32_is_zero(a
)) {
2457 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2459 return soft_float32_to_float64(a
, s
);
2463 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2466 const FloatFmt
*fmt
;
2468 float64_unpack_canonical(&p
, a
, s
);
2470 parts_float_to_float(&p
, s
);
2471 fmt
= &float16_params
;
2473 parts_float_to_ahp(&p
, s
);
2474 fmt
= &float16_params_ahp
;
2476 return float16a_round_pack_canonical(&p
, s
, fmt
);
2479 float32
float64_to_float32(float64 a
, float_status
*s
)
2483 float64_unpack_canonical(&p
, a
, s
);
2484 parts_float_to_float(&p
, s
);
2485 return float32_round_pack_canonical(&p
, s
);
2488 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2492 bfloat16_unpack_canonical(&p
, a
, s
);
2493 parts_float_to_float(&p
, s
);
2494 return float32_round_pack_canonical(&p
, s
);
2497 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2501 bfloat16_unpack_canonical(&p
, a
, s
);
2502 parts_float_to_float(&p
, s
);
2503 return float64_round_pack_canonical(&p
, s
);
2506 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2510 float32_unpack_canonical(&p
, a
, s
);
2511 parts_float_to_float(&p
, s
);
2512 return bfloat16_round_pack_canonical(&p
, s
);
2515 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2519 float64_unpack_canonical(&p
, a
, s
);
2520 parts_float_to_float(&p
, s
);
2521 return bfloat16_round_pack_canonical(&p
, s
);
2524 float32
float128_to_float32(float128 a
, float_status
*s
)
2529 float128_unpack_canonical(&p128
, a
, s
);
2530 parts_float_to_float_narrow(&p64
, &p128
, s
);
2531 return float32_round_pack_canonical(&p64
, s
);
2534 float64
float128_to_float64(float128 a
, float_status
*s
)
2539 float128_unpack_canonical(&p128
, a
, s
);
2540 parts_float_to_float_narrow(&p64
, &p128
, s
);
2541 return float64_round_pack_canonical(&p64
, s
);
2544 float128
float32_to_float128(float32 a
, float_status
*s
)
2549 float32_unpack_canonical(&p64
, a
, s
);
2550 parts_float_to_float_widen(&p128
, &p64
, s
);
2551 return float128_round_pack_canonical(&p128
, s
);
2554 float128
float64_to_float128(float64 a
, float_status
*s
)
2559 float64_unpack_canonical(&p64
, a
, s
);
2560 parts_float_to_float_widen(&p128
, &p64
, s
);
2561 return float128_round_pack_canonical(&p128
, s
);
2565 * Round to integral value
2568 float16
float16_round_to_int(float16 a
, float_status
*s
)
2572 float16_unpack_canonical(&p
, a
, s
);
2573 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2574 return float16_round_pack_canonical(&p
, s
);
2577 float32
float32_round_to_int(float32 a
, float_status
*s
)
2581 float32_unpack_canonical(&p
, a
, s
);
2582 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2583 return float32_round_pack_canonical(&p
, s
);
2586 float64
float64_round_to_int(float64 a
, float_status
*s
)
2590 float64_unpack_canonical(&p
, a
, s
);
2591 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2592 return float64_round_pack_canonical(&p
, s
);
2595 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2599 bfloat16_unpack_canonical(&p
, a
, s
);
2600 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2601 return bfloat16_round_pack_canonical(&p
, s
);
2604 float128
float128_round_to_int(float128 a
, float_status
*s
)
2608 float128_unpack_canonical(&p
, a
, s
);
2609 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
2610 return float128_round_pack_canonical(&p
, s
);
2614 * Floating-point to signed integer conversions
2617 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2622 float16_unpack_canonical(&p
, a
, s
);
2623 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2626 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2631 float16_unpack_canonical(&p
, a
, s
);
2632 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2635 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2640 float16_unpack_canonical(&p
, a
, s
);
2641 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2644 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2649 float16_unpack_canonical(&p
, a
, s
);
2650 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2653 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2658 float32_unpack_canonical(&p
, a
, s
);
2659 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2662 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2667 float32_unpack_canonical(&p
, a
, s
);
2668 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2671 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2676 float32_unpack_canonical(&p
, a
, s
);
2677 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2680 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2685 float64_unpack_canonical(&p
, a
, s
);
2686 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2689 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2694 float64_unpack_canonical(&p
, a
, s
);
2695 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2698 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2703 float64_unpack_canonical(&p
, a
, s
);
2704 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2707 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2712 bfloat16_unpack_canonical(&p
, a
, s
);
2713 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2716 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2721 bfloat16_unpack_canonical(&p
, a
, s
);
2722 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2725 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2730 bfloat16_unpack_canonical(&p
, a
, s
);
2731 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2734 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
2735 int scale
, float_status
*s
)
2739 float128_unpack_canonical(&p
, a
, s
);
2740 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2743 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
2744 int scale
, float_status
*s
)
2748 float128_unpack_canonical(&p
, a
, s
);
2749 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2752 int8_t float16_to_int8(float16 a
, float_status
*s
)
2754 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2757 int16_t float16_to_int16(float16 a
, float_status
*s
)
2759 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2762 int32_t float16_to_int32(float16 a
, float_status
*s
)
2764 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2767 int64_t float16_to_int64(float16 a
, float_status
*s
)
2769 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2772 int16_t float32_to_int16(float32 a
, float_status
*s
)
2774 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2777 int32_t float32_to_int32(float32 a
, float_status
*s
)
2779 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2782 int64_t float32_to_int64(float32 a
, float_status
*s
)
2784 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2787 int16_t float64_to_int16(float64 a
, float_status
*s
)
2789 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2792 int32_t float64_to_int32(float64 a
, float_status
*s
)
2794 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2797 int64_t float64_to_int64(float64 a
, float_status
*s
)
2799 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2802 int32_t float128_to_int32(float128 a
, float_status
*s
)
2804 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2807 int64_t float128_to_int64(float128 a
, float_status
*s
)
2809 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2812 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
2814 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2817 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
2819 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2822 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
2824 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2827 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
2829 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2832 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
2834 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2837 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
2839 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2842 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
2844 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2847 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
2849 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2852 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
2854 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2857 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
2859 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2862 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
2864 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2867 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
2869 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2872 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
2874 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2877 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
2879 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2882 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
2884 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2887 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
2889 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2892 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
2894 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2898 * Floating-point to unsigned integer conversions
2901 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2906 float16_unpack_canonical(&p
, a
, s
);
2907 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
2910 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2915 float16_unpack_canonical(&p
, a
, s
);
2916 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2919 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2924 float16_unpack_canonical(&p
, a
, s
);
2925 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2928 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2933 float16_unpack_canonical(&p
, a
, s
);
2934 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2937 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2942 float32_unpack_canonical(&p
, a
, s
);
2943 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2946 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2951 float32_unpack_canonical(&p
, a
, s
);
2952 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2955 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2960 float32_unpack_canonical(&p
, a
, s
);
2961 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2964 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2969 float64_unpack_canonical(&p
, a
, s
);
2970 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2973 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2978 float64_unpack_canonical(&p
, a
, s
);
2979 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2982 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2987 float64_unpack_canonical(&p
, a
, s
);
2988 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2991 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2992 int scale
, float_status
*s
)
2996 bfloat16_unpack_canonical(&p
, a
, s
);
2997 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3000 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3001 int scale
, float_status
*s
)
3005 bfloat16_unpack_canonical(&p
, a
, s
);
3006 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3009 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3010 int scale
, float_status
*s
)
3014 bfloat16_unpack_canonical(&p
, a
, s
);
3015 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3018 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3019 int scale
, float_status
*s
)
3023 float128_unpack_canonical(&p
, a
, s
);
3024 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3027 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3028 int scale
, float_status
*s
)
3032 float128_unpack_canonical(&p
, a
, s
);
3033 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3036 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3038 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3041 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3043 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3046 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3048 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3051 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3053 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3056 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3058 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3061 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3063 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3066 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3068 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3071 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3073 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3076 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3078 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3081 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3083 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3086 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3088 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3091 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3093 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3096 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3098 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3101 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3103 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3106 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3108 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3111 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3113 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3116 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3118 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3121 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3123 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3126 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3128 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3131 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3133 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3136 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3138 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3141 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3143 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3146 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3148 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3151 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3153 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3156 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3158 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3161 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3163 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3166 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3168 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3171 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3173 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3176 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3178 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3182 * Signed integer to floating-point conversions
3185 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3189 parts_sint_to_float(&p
, a
, scale
, status
);
3190 return float16_round_pack_canonical(&p
, status
);
3193 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3195 return int64_to_float16_scalbn(a
, scale
, status
);
3198 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3200 return int64_to_float16_scalbn(a
, scale
, status
);
3203 float16
int64_to_float16(int64_t a
, float_status
*status
)
3205 return int64_to_float16_scalbn(a
, 0, status
);
3208 float16
int32_to_float16(int32_t a
, float_status
*status
)
3210 return int64_to_float16_scalbn(a
, 0, status
);
3213 float16
int16_to_float16(int16_t a
, float_status
*status
)
3215 return int64_to_float16_scalbn(a
, 0, status
);
3218 float16
int8_to_float16(int8_t a
, float_status
*status
)
3220 return int64_to_float16_scalbn(a
, 0, status
);
3223 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3227 parts64_sint_to_float(&p
, a
, scale
, status
);
3228 return float32_round_pack_canonical(&p
, status
);
3231 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3233 return int64_to_float32_scalbn(a
, scale
, status
);
3236 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3238 return int64_to_float32_scalbn(a
, scale
, status
);
3241 float32
int64_to_float32(int64_t a
, float_status
*status
)
3243 return int64_to_float32_scalbn(a
, 0, status
);
3246 float32
int32_to_float32(int32_t a
, float_status
*status
)
3248 return int64_to_float32_scalbn(a
, 0, status
);
3251 float32
int16_to_float32(int16_t a
, float_status
*status
)
3253 return int64_to_float32_scalbn(a
, 0, status
);
3256 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3260 parts_sint_to_float(&p
, a
, scale
, status
);
3261 return float64_round_pack_canonical(&p
, status
);
3264 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3266 return int64_to_float64_scalbn(a
, scale
, status
);
3269 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3271 return int64_to_float64_scalbn(a
, scale
, status
);
3274 float64
int64_to_float64(int64_t a
, float_status
*status
)
3276 return int64_to_float64_scalbn(a
, 0, status
);
3279 float64
int32_to_float64(int32_t a
, float_status
*status
)
3281 return int64_to_float64_scalbn(a
, 0, status
);
3284 float64
int16_to_float64(int16_t a
, float_status
*status
)
3286 return int64_to_float64_scalbn(a
, 0, status
);
3289 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3293 parts_sint_to_float(&p
, a
, scale
, status
);
3294 return bfloat16_round_pack_canonical(&p
, status
);
3297 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3299 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3302 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3304 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3307 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3309 return int64_to_bfloat16_scalbn(a
, 0, status
);
3312 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3314 return int64_to_bfloat16_scalbn(a
, 0, status
);
3317 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3319 return int64_to_bfloat16_scalbn(a
, 0, status
);
3322 float128
int64_to_float128(int64_t a
, float_status
*status
)
3326 parts_sint_to_float(&p
, a
, 0, status
);
3327 return float128_round_pack_canonical(&p
, status
);
3330 float128
int32_to_float128(int32_t a
, float_status
*status
)
3332 return int64_to_float128(a
, status
);
3336 * Unsigned Integer to floating-point conversions
3339 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3343 parts_uint_to_float(&p
, a
, scale
, status
);
3344 return float16_round_pack_canonical(&p
, status
);
3347 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3349 return uint64_to_float16_scalbn(a
, scale
, status
);
3352 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3354 return uint64_to_float16_scalbn(a
, scale
, status
);
3357 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3359 return uint64_to_float16_scalbn(a
, 0, status
);
3362 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3364 return uint64_to_float16_scalbn(a
, 0, status
);
3367 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3369 return uint64_to_float16_scalbn(a
, 0, status
);
3372 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3374 return uint64_to_float16_scalbn(a
, 0, status
);
3377 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3381 parts_uint_to_float(&p
, a
, scale
, status
);
3382 return float32_round_pack_canonical(&p
, status
);
3385 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3387 return uint64_to_float32_scalbn(a
, scale
, status
);
3390 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3392 return uint64_to_float32_scalbn(a
, scale
, status
);
3395 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3397 return uint64_to_float32_scalbn(a
, 0, status
);
3400 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3402 return uint64_to_float32_scalbn(a
, 0, status
);
3405 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3407 return uint64_to_float32_scalbn(a
, 0, status
);
3410 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3414 parts_uint_to_float(&p
, a
, scale
, status
);
3415 return float64_round_pack_canonical(&p
, status
);
3418 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3420 return uint64_to_float64_scalbn(a
, scale
, status
);
3423 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3425 return uint64_to_float64_scalbn(a
, scale
, status
);
3428 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3430 return uint64_to_float64_scalbn(a
, 0, status
);
3433 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3435 return uint64_to_float64_scalbn(a
, 0, status
);
3438 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3440 return uint64_to_float64_scalbn(a
, 0, status
);
3443 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3447 parts_uint_to_float(&p
, a
, scale
, status
);
3448 return bfloat16_round_pack_canonical(&p
, status
);
3451 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3453 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3456 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3458 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3461 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3463 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3466 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3468 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3471 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3473 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3476 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3480 parts_uint_to_float(&p
, a
, 0, status
);
3481 return float128_round_pack_canonical(&p
, status
);
3485 * Minimum and maximum
3488 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3490 FloatParts64 pa
, pb
, *pr
;
3492 float16_unpack_canonical(&pa
, a
, s
);
3493 float16_unpack_canonical(&pb
, b
, s
);
3494 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3496 return float16_round_pack_canonical(pr
, s
);
3499 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3500 float_status
*s
, int flags
)
3502 FloatParts64 pa
, pb
, *pr
;
3504 bfloat16_unpack_canonical(&pa
, a
, s
);
3505 bfloat16_unpack_canonical(&pb
, b
, s
);
3506 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3508 return bfloat16_round_pack_canonical(pr
, s
);
3511 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
3513 FloatParts64 pa
, pb
, *pr
;
3515 float32_unpack_canonical(&pa
, a
, s
);
3516 float32_unpack_canonical(&pb
, b
, s
);
3517 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3519 return float32_round_pack_canonical(pr
, s
);
3522 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
3524 FloatParts64 pa
, pb
, *pr
;
3526 float64_unpack_canonical(&pa
, a
, s
);
3527 float64_unpack_canonical(&pb
, b
, s
);
3528 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3530 return float64_round_pack_canonical(pr
, s
);
3533 static float128
float128_minmax(float128 a
, float128 b
,
3534 float_status
*s
, int flags
)
3536 FloatParts128 pa
, pb
, *pr
;
3538 float128_unpack_canonical(&pa
, a
, s
);
3539 float128_unpack_canonical(&pb
, b
, s
);
3540 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3542 return float128_round_pack_canonical(pr
, s
);
3545 #define MINMAX_1(type, name, flags) \
3546 type type##_##name(type a, type b, float_status *s) \
3547 { return type##_minmax(a, b, s, flags); }
3549 #define MINMAX_2(type) \
3550 MINMAX_1(type, max, 0) \
3551 MINMAX_1(type, maxnum, minmax_isnum) \
3552 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3553 MINMAX_1(type, min, minmax_ismin) \
3554 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3555 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3567 * Floating point compare
3570 static FloatRelation QEMU_FLATTEN
3571 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
3573 FloatParts64 pa
, pb
;
3575 float16_unpack_canonical(&pa
, a
, s
);
3576 float16_unpack_canonical(&pb
, b
, s
);
3577 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3580 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3582 return float16_do_compare(a
, b
, s
, false);
3585 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3587 return float16_do_compare(a
, b
, s
, true);
3590 static FloatRelation QEMU_SOFTFLOAT_ATTR
3591 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
3593 FloatParts64 pa
, pb
;
3595 float32_unpack_canonical(&pa
, a
, s
);
3596 float32_unpack_canonical(&pb
, b
, s
);
3597 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3600 static FloatRelation QEMU_FLATTEN
3601 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
3603 union_float32 ua
, ub
;
3608 if (QEMU_NO_HARDFLOAT
) {
3612 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3613 if (isgreaterequal(ua
.h
, ub
.h
)) {
3614 if (isgreater(ua
.h
, ub
.h
)) {
3615 return float_relation_greater
;
3617 return float_relation_equal
;
3619 if (likely(isless(ua
.h
, ub
.h
))) {
3620 return float_relation_less
;
3623 * The only condition remaining is unordered.
3624 * Fall through to set flags.
3627 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3630 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
3632 return float32_hs_compare(a
, b
, s
, false);
3635 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
3637 return float32_hs_compare(a
, b
, s
, true);
3640 static FloatRelation QEMU_SOFTFLOAT_ATTR
3641 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
3643 FloatParts64 pa
, pb
;
3645 float64_unpack_canonical(&pa
, a
, s
);
3646 float64_unpack_canonical(&pb
, b
, s
);
3647 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3650 static FloatRelation QEMU_FLATTEN
3651 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
3653 union_float64 ua
, ub
;
3658 if (QEMU_NO_HARDFLOAT
) {
3662 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
3663 if (isgreaterequal(ua
.h
, ub
.h
)) {
3664 if (isgreater(ua
.h
, ub
.h
)) {
3665 return float_relation_greater
;
3667 return float_relation_equal
;
3669 if (likely(isless(ua
.h
, ub
.h
))) {
3670 return float_relation_less
;
3673 * The only condition remaining is unordered.
3674 * Fall through to set flags.
3677 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3680 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
3682 return float64_hs_compare(a
, b
, s
, false);
3685 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
3687 return float64_hs_compare(a
, b
, s
, true);
3690 static FloatRelation QEMU_FLATTEN
3691 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
3693 FloatParts64 pa
, pb
;
3695 bfloat16_unpack_canonical(&pa
, a
, s
);
3696 bfloat16_unpack_canonical(&pb
, b
, s
);
3697 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3700 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
3702 return bfloat16_do_compare(a
, b
, s
, false);
3705 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
3707 return bfloat16_do_compare(a
, b
, s
, true);
3710 static FloatRelation QEMU_FLATTEN
3711 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
3713 FloatParts128 pa
, pb
;
3715 float128_unpack_canonical(&pa
, a
, s
);
3716 float128_unpack_canonical(&pb
, b
, s
);
3717 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3720 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
3722 return float128_do_compare(a
, b
, s
, false);
3725 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
3727 return float128_do_compare(a
, b
, s
, true);
3734 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
3738 float16_unpack_canonical(&p
, a
, status
);
3739 parts_scalbn(&p
, n
, status
);
3740 return float16_round_pack_canonical(&p
, status
);
3743 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
3747 float32_unpack_canonical(&p
, a
, status
);
3748 parts_scalbn(&p
, n
, status
);
3749 return float32_round_pack_canonical(&p
, status
);
3752 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
3756 float64_unpack_canonical(&p
, a
, status
);
3757 parts_scalbn(&p
, n
, status
);
3758 return float64_round_pack_canonical(&p
, status
);
3761 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
3765 bfloat16_unpack_canonical(&p
, a
, status
);
3766 parts_scalbn(&p
, n
, status
);
3767 return bfloat16_round_pack_canonical(&p
, status
);
3770 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
3774 float128_unpack_canonical(&p
, a
, status
);
3775 parts_scalbn(&p
, n
, status
);
3776 return float128_round_pack_canonical(&p
, status
);
3783 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
3787 float16_unpack_canonical(&p
, a
, status
);
3788 parts_sqrt(&p
, status
, &float16_params
);
3789 return float16_round_pack_canonical(&p
, status
);
3792 static float32 QEMU_SOFTFLOAT_ATTR
3793 soft_f32_sqrt(float32 a
, float_status
*status
)
3797 float32_unpack_canonical(&p
, a
, status
);
3798 parts_sqrt(&p
, status
, &float32_params
);
3799 return float32_round_pack_canonical(&p
, status
);
3802 static float64 QEMU_SOFTFLOAT_ATTR
3803 soft_f64_sqrt(float64 a
, float_status
*status
)
3807 float64_unpack_canonical(&p
, a
, status
);
3808 parts_sqrt(&p
, status
, &float64_params
);
3809 return float64_round_pack_canonical(&p
, status
);
3812 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
3814 union_float32 ua
, ur
;
3817 if (unlikely(!can_use_fpu(s
))) {
3821 float32_input_flush1(&ua
.s
, s
);
3822 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
3823 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3824 fpclassify(ua
.h
) == FP_ZERO
) ||
3828 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
3829 float32_is_neg(ua
.s
))) {
3836 return soft_f32_sqrt(ua
.s
, s
);
3839 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
3841 union_float64 ua
, ur
;
3844 if (unlikely(!can_use_fpu(s
))) {
3848 float64_input_flush1(&ua
.s
, s
);
3849 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
3850 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3851 fpclassify(ua
.h
) == FP_ZERO
) ||
3855 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
3856 float64_is_neg(ua
.s
))) {
3863 return soft_f64_sqrt(ua
.s
, s
);
3866 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
3870 bfloat16_unpack_canonical(&p
, a
, status
);
3871 parts_sqrt(&p
, status
, &bfloat16_params
);
3872 return bfloat16_round_pack_canonical(&p
, status
);
3875 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
3879 float128_unpack_canonical(&p
, a
, status
);
3880 parts_sqrt(&p
, status
, &float128_params
);
3881 return float128_round_pack_canonical(&p
, status
);
3884 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
3888 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
3889 return floatx80_default_nan(s
);
3891 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
3892 return floatx80_round_pack_canonical(&p
, s
);
3895 /*----------------------------------------------------------------------------
3896 | The pattern for a default generated NaN.
3897 *----------------------------------------------------------------------------*/
3899 float16
float16_default_nan(float_status
*status
)
3903 parts_default_nan(&p
, status
);
3904 p
.frac
>>= float16_params
.frac_shift
;
3905 return float16_pack_raw(&p
);
3908 float32
float32_default_nan(float_status
*status
)
3912 parts_default_nan(&p
, status
);
3913 p
.frac
>>= float32_params
.frac_shift
;
3914 return float32_pack_raw(&p
);
3917 float64
float64_default_nan(float_status
*status
)
3921 parts_default_nan(&p
, status
);
3922 p
.frac
>>= float64_params
.frac_shift
;
3923 return float64_pack_raw(&p
);
3926 float128
float128_default_nan(float_status
*status
)
3930 parts_default_nan(&p
, status
);
3931 frac_shr(&p
, float128_params
.frac_shift
);
3932 return float128_pack_raw(&p
);
3935 bfloat16
bfloat16_default_nan(float_status
*status
)
3939 parts_default_nan(&p
, status
);
3940 p
.frac
>>= bfloat16_params
.frac_shift
;
3941 return bfloat16_pack_raw(&p
);
3944 /*----------------------------------------------------------------------------
3945 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3946 *----------------------------------------------------------------------------*/
3948 float16
float16_silence_nan(float16 a
, float_status
*status
)
3952 float16_unpack_raw(&p
, a
);
3953 p
.frac
<<= float16_params
.frac_shift
;
3954 parts_silence_nan(&p
, status
);
3955 p
.frac
>>= float16_params
.frac_shift
;
3956 return float16_pack_raw(&p
);
3959 float32
float32_silence_nan(float32 a
, float_status
*status
)
3963 float32_unpack_raw(&p
, a
);
3964 p
.frac
<<= float32_params
.frac_shift
;
3965 parts_silence_nan(&p
, status
);
3966 p
.frac
>>= float32_params
.frac_shift
;
3967 return float32_pack_raw(&p
);
3970 float64
float64_silence_nan(float64 a
, float_status
*status
)
3974 float64_unpack_raw(&p
, a
);
3975 p
.frac
<<= float64_params
.frac_shift
;
3976 parts_silence_nan(&p
, status
);
3977 p
.frac
>>= float64_params
.frac_shift
;
3978 return float64_pack_raw(&p
);
3981 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
3985 bfloat16_unpack_raw(&p
, a
);
3986 p
.frac
<<= bfloat16_params
.frac_shift
;
3987 parts_silence_nan(&p
, status
);
3988 p
.frac
>>= bfloat16_params
.frac_shift
;
3989 return bfloat16_pack_raw(&p
);
3992 float128
float128_silence_nan(float128 a
, float_status
*status
)
3996 float128_unpack_raw(&p
, a
);
3997 frac_shl(&p
, float128_params
.frac_shift
);
3998 parts_silence_nan(&p
, status
);
3999 frac_shr(&p
, float128_params
.frac_shift
);
4000 return float128_pack_raw(&p
);
4003 /*----------------------------------------------------------------------------
4004 | If `a' is denormal and we are in flush-to-zero mode then set the
4005 | input-denormal exception and return zero. Otherwise just return the value.
4006 *----------------------------------------------------------------------------*/
4008 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4010 if (p
.exp
== 0 && p
.frac
!= 0) {
4011 float_raise(float_flag_input_denormal
, status
);
4018 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4020 if (status
->flush_inputs_to_zero
) {
4023 float16_unpack_raw(&p
, a
);
4024 if (parts_squash_denormal(p
, status
)) {
4025 return float16_set_sign(float16_zero
, p
.sign
);
4031 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4033 if (status
->flush_inputs_to_zero
) {
4036 float32_unpack_raw(&p
, a
);
4037 if (parts_squash_denormal(p
, status
)) {
4038 return float32_set_sign(float32_zero
, p
.sign
);
4044 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4046 if (status
->flush_inputs_to_zero
) {
4049 float64_unpack_raw(&p
, a
);
4050 if (parts_squash_denormal(p
, status
)) {
4051 return float64_set_sign(float64_zero
, p
.sign
);
4057 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4059 if (status
->flush_inputs_to_zero
) {
4062 bfloat16_unpack_raw(&p
, a
);
4063 if (parts_squash_denormal(p
, status
)) {
4064 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4070 /*----------------------------------------------------------------------------
4071 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4072 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4073 | input. If `zSign' is 1, the input is negated before being converted to an
4074 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4075 | is simply rounded to an integer, with the inexact exception raised if the
4076 | input cannot be represented exactly as an integer. However, if the fixed-
4077 | point input is too large, the invalid exception is raised and the largest
4078 | positive or negative integer is returned.
4079 *----------------------------------------------------------------------------*/
4081 static int32_t roundAndPackInt32(bool zSign
, uint64_t absZ
,
4082 float_status
*status
)
4084 int8_t roundingMode
;
4085 bool roundNearestEven
;
4086 int8_t roundIncrement
, roundBits
;
4089 roundingMode
= status
->float_rounding_mode
;
4090 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4091 switch (roundingMode
) {
4092 case float_round_nearest_even
:
4093 case float_round_ties_away
:
4094 roundIncrement
= 0x40;
4096 case float_round_to_zero
:
4099 case float_round_up
:
4100 roundIncrement
= zSign
? 0 : 0x7f;
4102 case float_round_down
:
4103 roundIncrement
= zSign
? 0x7f : 0;
4105 case float_round_to_odd
:
4106 roundIncrement
= absZ
& 0x80 ? 0 : 0x7f;
4111 roundBits
= absZ
& 0x7F;
4112 absZ
= ( absZ
+ roundIncrement
)>>7;
4113 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4117 if ( zSign
) z
= - z
;
4118 if ( ( absZ
>>32 ) || ( z
&& ( ( z
< 0 ) ^ zSign
) ) ) {
4119 float_raise(float_flag_invalid
, status
);
4120 return zSign
? INT32_MIN
: INT32_MAX
;
4123 float_raise(float_flag_inexact
, status
);
4129 /*----------------------------------------------------------------------------
4130 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4131 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4132 | and returns the properly rounded 64-bit integer corresponding to the input.
4133 | If `zSign' is 1, the input is negated before being converted to an integer.
4134 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4135 | the inexact exception raised if the input cannot be represented exactly as
4136 | an integer. However, if the fixed-point input is too large, the invalid
4137 | exception is raised and the largest positive or negative integer is
4139 *----------------------------------------------------------------------------*/
4141 static int64_t roundAndPackInt64(bool zSign
, uint64_t absZ0
, uint64_t absZ1
,
4142 float_status
*status
)
4144 int8_t roundingMode
;
4145 bool roundNearestEven
, increment
;
4148 roundingMode
= status
->float_rounding_mode
;
4149 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4150 switch (roundingMode
) {
4151 case float_round_nearest_even
:
4152 case float_round_ties_away
:
4153 increment
= ((int64_t) absZ1
< 0);
4155 case float_round_to_zero
:
4158 case float_round_up
:
4159 increment
= !zSign
&& absZ1
;
4161 case float_round_down
:
4162 increment
= zSign
&& absZ1
;
4164 case float_round_to_odd
:
4165 increment
= !(absZ0
& 1) && absZ1
;
4172 if ( absZ0
== 0 ) goto overflow
;
4173 if (!(absZ1
<< 1) && roundNearestEven
) {
4178 if ( zSign
) z
= - z
;
4179 if ( z
&& ( ( z
< 0 ) ^ zSign
) ) {
4181 float_raise(float_flag_invalid
, status
);
4182 return zSign
? INT64_MIN
: INT64_MAX
;
4185 float_raise(float_flag_inexact
, status
);
4191 /*----------------------------------------------------------------------------
4192 | Normalizes the subnormal single-precision floating-point value represented
4193 | by the denormalized significand `aSig'. The normalized exponent and
4194 | significand are stored at the locations pointed to by `zExpPtr' and
4195 | `zSigPtr', respectively.
4196 *----------------------------------------------------------------------------*/
4199 normalizeFloat32Subnormal(uint32_t aSig
, int *zExpPtr
, uint32_t *zSigPtr
)
4203 shiftCount
= clz32(aSig
) - 8;
4204 *zSigPtr
= aSig
<<shiftCount
;
4205 *zExpPtr
= 1 - shiftCount
;
4209 /*----------------------------------------------------------------------------
4210 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4211 | and significand `zSig', and returns the proper single-precision floating-
4212 | point value corresponding to the abstract input. Ordinarily, the abstract
4213 | value is simply rounded and packed into the single-precision format, with
4214 | the inexact exception raised if the abstract input cannot be represented
4215 | exactly. However, if the abstract value is too large, the overflow and
4216 | inexact exceptions are raised and an infinity or maximal finite value is
4217 | returned. If the abstract value is too small, the input value is rounded to
4218 | a subnormal number, and the underflow and inexact exceptions are raised if
4219 | the abstract input cannot be represented exactly as a subnormal single-
4220 | precision floating-point number.
4221 | The input significand `zSig' has its binary point between bits 30
4222 | and 29, which is 7 bits to the left of the usual location. This shifted
4223 | significand must be normalized or smaller. If `zSig' is not normalized,
4224 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4225 | and it must not require rounding. In the usual case that `zSig' is
4226 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4227 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4228 | Binary Floating-Point Arithmetic.
4229 *----------------------------------------------------------------------------*/
4231 static float32
roundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4232 float_status
*status
)
4234 int8_t roundingMode
;
4235 bool roundNearestEven
;
4236 int8_t roundIncrement
, roundBits
;
4239 roundingMode
= status
->float_rounding_mode
;
4240 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4241 switch (roundingMode
) {
4242 case float_round_nearest_even
:
4243 case float_round_ties_away
:
4244 roundIncrement
= 0x40;
4246 case float_round_to_zero
:
4249 case float_round_up
:
4250 roundIncrement
= zSign
? 0 : 0x7f;
4252 case float_round_down
:
4253 roundIncrement
= zSign
? 0x7f : 0;
4255 case float_round_to_odd
:
4256 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4262 roundBits
= zSig
& 0x7F;
4263 if ( 0xFD <= (uint16_t) zExp
) {
4264 if ( ( 0xFD < zExp
)
4265 || ( ( zExp
== 0xFD )
4266 && ( (int32_t) ( zSig
+ roundIncrement
) < 0 ) )
4268 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4269 roundIncrement
!= 0;
4270 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4271 return packFloat32(zSign
, 0xFF, -!overflow_to_inf
);
4274 if (status
->flush_to_zero
) {
4275 float_raise(float_flag_output_denormal
, status
);
4276 return packFloat32(zSign
, 0, 0);
4278 isTiny
= status
->tininess_before_rounding
4280 || (zSig
+ roundIncrement
< 0x80000000);
4281 shift32RightJamming( zSig
, - zExp
, &zSig
);
4283 roundBits
= zSig
& 0x7F;
4284 if (isTiny
&& roundBits
) {
4285 float_raise(float_flag_underflow
, status
);
4287 if (roundingMode
== float_round_to_odd
) {
4289 * For round-to-odd case, the roundIncrement depends on
4290 * zSig which just changed.
4292 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4297 float_raise(float_flag_inexact
, status
);
4299 zSig
= ( zSig
+ roundIncrement
)>>7;
4300 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4303 if ( zSig
== 0 ) zExp
= 0;
4304 return packFloat32( zSign
, zExp
, zSig
);
4308 /*----------------------------------------------------------------------------
4309 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4310 | and significand `zSig', and returns the proper single-precision floating-
4311 | point value corresponding to the abstract input. This routine is just like
4312 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4313 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4314 | floating-point exponent.
4315 *----------------------------------------------------------------------------*/
4318 normalizeRoundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4319 float_status
*status
)
4323 shiftCount
= clz32(zSig
) - 1;
4324 return roundAndPackFloat32(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4329 /*----------------------------------------------------------------------------
4330 | Normalizes the subnormal double-precision floating-point value represented
4331 | by the denormalized significand `aSig'. The normalized exponent and
4332 | significand are stored at the locations pointed to by `zExpPtr' and
4333 | `zSigPtr', respectively.
4334 *----------------------------------------------------------------------------*/
4337 normalizeFloat64Subnormal(uint64_t aSig
, int *zExpPtr
, uint64_t *zSigPtr
)
4341 shiftCount
= clz64(aSig
) - 11;
4342 *zSigPtr
= aSig
<<shiftCount
;
4343 *zExpPtr
= 1 - shiftCount
;
4347 /*----------------------------------------------------------------------------
4348 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4349 | double-precision floating-point value, returning the result. After being
4350 | shifted into the proper positions, the three fields are simply added
4351 | together to form the result. This means that any integer portion of `zSig'
4352 | will be added into the exponent. Since a properly normalized significand
4353 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4354 | than the desired result exponent whenever `zSig' is a complete, normalized
4356 *----------------------------------------------------------------------------*/
4358 static inline float64
packFloat64(bool zSign
, int zExp
, uint64_t zSig
)
4361 return make_float64(
4362 ( ( (uint64_t) zSign
)<<63 ) + ( ( (uint64_t) zExp
)<<52 ) + zSig
);
4366 /*----------------------------------------------------------------------------
4367 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4368 | and significand `zSig', and returns the proper double-precision floating-
4369 | point value corresponding to the abstract input. Ordinarily, the abstract
4370 | value is simply rounded and packed into the double-precision format, with
4371 | the inexact exception raised if the abstract input cannot be represented
4372 | exactly. However, if the abstract value is too large, the overflow and
4373 | inexact exceptions are raised and an infinity or maximal finite value is
4374 | returned. If the abstract value is too small, the input value is rounded to
4375 | a subnormal number, and the underflow and inexact exceptions are raised if
4376 | the abstract input cannot be represented exactly as a subnormal double-
4377 | precision floating-point number.
4378 | The input significand `zSig' has its binary point between bits 62
4379 | and 61, which is 10 bits to the left of the usual location. This shifted
4380 | significand must be normalized or smaller. If `zSig' is not normalized,
4381 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4382 | and it must not require rounding. In the usual case that `zSig' is
4383 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4384 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4385 | Binary Floating-Point Arithmetic.
4386 *----------------------------------------------------------------------------*/
4388 static float64
roundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4389 float_status
*status
)
4391 int8_t roundingMode
;
4392 bool roundNearestEven
;
4393 int roundIncrement
, roundBits
;
4396 roundingMode
= status
->float_rounding_mode
;
4397 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4398 switch (roundingMode
) {
4399 case float_round_nearest_even
:
4400 case float_round_ties_away
:
4401 roundIncrement
= 0x200;
4403 case float_round_to_zero
:
4406 case float_round_up
:
4407 roundIncrement
= zSign
? 0 : 0x3ff;
4409 case float_round_down
:
4410 roundIncrement
= zSign
? 0x3ff : 0;
4412 case float_round_to_odd
:
4413 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4418 roundBits
= zSig
& 0x3FF;
4419 if ( 0x7FD <= (uint16_t) zExp
) {
4420 if ( ( 0x7FD < zExp
)
4421 || ( ( zExp
== 0x7FD )
4422 && ( (int64_t) ( zSig
+ roundIncrement
) < 0 ) )
4424 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4425 roundIncrement
!= 0;
4426 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4427 return packFloat64(zSign
, 0x7FF, -(!overflow_to_inf
));
4430 if (status
->flush_to_zero
) {
4431 float_raise(float_flag_output_denormal
, status
);
4432 return packFloat64(zSign
, 0, 0);
4434 isTiny
= status
->tininess_before_rounding
4436 || (zSig
+ roundIncrement
< UINT64_C(0x8000000000000000));
4437 shift64RightJamming( zSig
, - zExp
, &zSig
);
4439 roundBits
= zSig
& 0x3FF;
4440 if (isTiny
&& roundBits
) {
4441 float_raise(float_flag_underflow
, status
);
4443 if (roundingMode
== float_round_to_odd
) {
4445 * For round-to-odd case, the roundIncrement depends on
4446 * zSig which just changed.
4448 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4453 float_raise(float_flag_inexact
, status
);
4455 zSig
= ( zSig
+ roundIncrement
)>>10;
4456 if (!(roundBits
^ 0x200) && roundNearestEven
) {
4459 if ( zSig
== 0 ) zExp
= 0;
4460 return packFloat64( zSign
, zExp
, zSig
);
4464 /*----------------------------------------------------------------------------
4465 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4466 | and significand `zSig', and returns the proper double-precision floating-
4467 | point value corresponding to the abstract input. This routine is just like
4468 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4469 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4470 | floating-point exponent.
4471 *----------------------------------------------------------------------------*/
4474 normalizeRoundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4475 float_status
*status
)
4479 shiftCount
= clz64(zSig
) - 1;
4480 return roundAndPackFloat64(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4485 /*----------------------------------------------------------------------------
4486 | Normalizes the subnormal extended double-precision floating-point value
4487 | represented by the denormalized significand `aSig'. The normalized exponent
4488 | and significand are stored at the locations pointed to by `zExpPtr' and
4489 | `zSigPtr', respectively.
4490 *----------------------------------------------------------------------------*/
4492 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4497 shiftCount
= clz64(aSig
);
4498 *zSigPtr
= aSig
<<shiftCount
;
4499 *zExpPtr
= 1 - shiftCount
;
4502 /*----------------------------------------------------------------------------
4503 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4504 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4505 | and returns the proper extended double-precision floating-point value
4506 | corresponding to the abstract input. Ordinarily, the abstract value is
4507 | rounded and packed into the extended double-precision format, with the
4508 | inexact exception raised if the abstract input cannot be represented
4509 | exactly. However, if the abstract value is too large, the overflow and
4510 | inexact exceptions are raised and an infinity or maximal finite value is
4511 | returned. If the abstract value is too small, the input value is rounded to
4512 | a subnormal number, and the underflow and inexact exceptions are raised if
4513 | the abstract input cannot be represented exactly as a subnormal extended
4514 | double-precision floating-point number.
4515 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4516 | the result is rounded to the same number of bits as single or double
4517 | precision, respectively. Otherwise, the result is rounded to the full
4518 | precision of the extended double-precision format.
4519 | The input significand must be normalized or smaller. If the input
4520 | significand is not normalized, `zExp' must be 0; in that case, the result
4521 | returned is a subnormal number, and it must not require rounding. The
4522 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4523 | Floating-Point Arithmetic.
4524 *----------------------------------------------------------------------------*/
4526 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4527 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4528 float_status
*status
)
4530 FloatRoundMode roundingMode
;
4531 bool roundNearestEven
, increment
, isTiny
;
4532 int64_t roundIncrement
, roundMask
, roundBits
;
4534 roundingMode
= status
->float_rounding_mode
;
4535 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4536 switch (roundingPrecision
) {
4537 case floatx80_precision_x
:
4539 case floatx80_precision_d
:
4540 roundIncrement
= UINT64_C(0x0000000000000400);
4541 roundMask
= UINT64_C(0x00000000000007FF);
4543 case floatx80_precision_s
:
4544 roundIncrement
= UINT64_C(0x0000008000000000);
4545 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4548 g_assert_not_reached();
4550 zSig0
|= ( zSig1
!= 0 );
4551 switch (roundingMode
) {
4552 case float_round_nearest_even
:
4553 case float_round_ties_away
:
4555 case float_round_to_zero
:
4558 case float_round_up
:
4559 roundIncrement
= zSign
? 0 : roundMask
;
4561 case float_round_down
:
4562 roundIncrement
= zSign
? roundMask
: 0;
4567 roundBits
= zSig0
& roundMask
;
4568 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4569 if ( ( 0x7FFE < zExp
)
4570 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4575 if (status
->flush_to_zero
) {
4576 float_raise(float_flag_output_denormal
, status
);
4577 return packFloatx80(zSign
, 0, 0);
4579 isTiny
= status
->tininess_before_rounding
4581 || (zSig0
<= zSig0
+ roundIncrement
);
4582 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4584 roundBits
= zSig0
& roundMask
;
4585 if (isTiny
&& roundBits
) {
4586 float_raise(float_flag_underflow
, status
);
4589 float_raise(float_flag_inexact
, status
);
4591 zSig0
+= roundIncrement
;
4592 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4593 roundIncrement
= roundMask
+ 1;
4594 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4595 roundMask
|= roundIncrement
;
4597 zSig0
&= ~ roundMask
;
4598 return packFloatx80( zSign
, zExp
, zSig0
);
4602 float_raise(float_flag_inexact
, status
);
4604 zSig0
+= roundIncrement
;
4605 if ( zSig0
< roundIncrement
) {
4607 zSig0
= UINT64_C(0x8000000000000000);
4609 roundIncrement
= roundMask
+ 1;
4610 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4611 roundMask
|= roundIncrement
;
4613 zSig0
&= ~ roundMask
;
4614 if ( zSig0
== 0 ) zExp
= 0;
4615 return packFloatx80( zSign
, zExp
, zSig0
);
4617 switch (roundingMode
) {
4618 case float_round_nearest_even
:
4619 case float_round_ties_away
:
4620 increment
= ((int64_t)zSig1
< 0);
4622 case float_round_to_zero
:
4625 case float_round_up
:
4626 increment
= !zSign
&& zSig1
;
4628 case float_round_down
:
4629 increment
= zSign
&& zSig1
;
4634 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4635 if ( ( 0x7FFE < zExp
)
4636 || ( ( zExp
== 0x7FFE )
4637 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4643 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4644 if ( ( roundingMode
== float_round_to_zero
)
4645 || ( zSign
&& ( roundingMode
== float_round_up
) )
4646 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4648 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4650 return packFloatx80(zSign
,
4651 floatx80_infinity_high
,
4652 floatx80_infinity_low
);
4655 isTiny
= status
->tininess_before_rounding
4658 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4659 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4661 if (isTiny
&& zSig1
) {
4662 float_raise(float_flag_underflow
, status
);
4665 float_raise(float_flag_inexact
, status
);
4667 switch (roundingMode
) {
4668 case float_round_nearest_even
:
4669 case float_round_ties_away
:
4670 increment
= ((int64_t)zSig1
< 0);
4672 case float_round_to_zero
:
4675 case float_round_up
:
4676 increment
= !zSign
&& zSig1
;
4678 case float_round_down
:
4679 increment
= zSign
&& zSig1
;
4686 if (!(zSig1
<< 1) && roundNearestEven
) {
4689 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4691 return packFloatx80( zSign
, zExp
, zSig0
);
4695 float_raise(float_flag_inexact
, status
);
4701 zSig0
= UINT64_C(0x8000000000000000);
4704 if (!(zSig1
<< 1) && roundNearestEven
) {
4710 if ( zSig0
== 0 ) zExp
= 0;
4712 return packFloatx80( zSign
, zExp
, zSig0
);
4716 /*----------------------------------------------------------------------------
4717 | Takes an abstract floating-point value having sign `zSign', exponent
4718 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4719 | and returns the proper extended double-precision floating-point value
4720 | corresponding to the abstract input. This routine is just like
4721 | `roundAndPackFloatx80' except that the input significand does not have to be
4723 *----------------------------------------------------------------------------*/
4725 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4726 bool zSign
, int32_t zExp
,
4727 uint64_t zSig0
, uint64_t zSig1
,
4728 float_status
*status
)
4737 shiftCount
= clz64(zSig0
);
4738 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4740 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4741 zSig0
, zSig1
, status
);
4745 /*----------------------------------------------------------------------------
4746 | Returns the least-significant 64 fraction bits of the quadruple-precision
4747 | floating-point value `a'.
4748 *----------------------------------------------------------------------------*/
4750 static inline uint64_t extractFloat128Frac1( float128 a
)
4757 /*----------------------------------------------------------------------------
4758 | Returns the most-significant 48 fraction bits of the quadruple-precision
4759 | floating-point value `a'.
4760 *----------------------------------------------------------------------------*/
4762 static inline uint64_t extractFloat128Frac0( float128 a
)
4765 return a
.high
& UINT64_C(0x0000FFFFFFFFFFFF);
4769 /*----------------------------------------------------------------------------
4770 | Returns the exponent bits of the quadruple-precision floating-point value
4772 *----------------------------------------------------------------------------*/
4774 static inline int32_t extractFloat128Exp( float128 a
)
4777 return ( a
.high
>>48 ) & 0x7FFF;
4781 /*----------------------------------------------------------------------------
4782 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4783 *----------------------------------------------------------------------------*/
4785 static inline bool extractFloat128Sign(float128 a
)
4787 return a
.high
>> 63;
4790 /*----------------------------------------------------------------------------
4791 | Normalizes the subnormal quadruple-precision floating-point value
4792 | represented by the denormalized significand formed by the concatenation of
4793 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4794 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4795 | significand are stored at the location pointed to by `zSig0Ptr', and the
4796 | least significant 64 bits of the normalized significand are stored at the
4797 | location pointed to by `zSig1Ptr'.
4798 *----------------------------------------------------------------------------*/
4801 normalizeFloat128Subnormal(
4812 shiftCount
= clz64(aSig1
) - 15;
4813 if ( shiftCount
< 0 ) {
4814 *zSig0Ptr
= aSig1
>>( - shiftCount
);
4815 *zSig1Ptr
= aSig1
<<( shiftCount
& 63 );
4818 *zSig0Ptr
= aSig1
<<shiftCount
;
4821 *zExpPtr
= - shiftCount
- 63;
4824 shiftCount
= clz64(aSig0
) - 15;
4825 shortShift128Left( aSig0
, aSig1
, shiftCount
, zSig0Ptr
, zSig1Ptr
);
4826 *zExpPtr
= 1 - shiftCount
;
4831 /*----------------------------------------------------------------------------
4832 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4833 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4834 | floating-point value, returning the result. After being shifted into the
4835 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4836 | added together to form the most significant 32 bits of the result. This
4837 | means that any integer portion of `zSig0' will be added into the exponent.
4838 | Since a properly normalized significand will have an integer portion equal
4839 | to 1, the `zExp' input should be 1 less than the desired result exponent
4840 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4842 *----------------------------------------------------------------------------*/
4844 static inline float128
4845 packFloat128(bool zSign
, int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
)
4850 z
.high
= ((uint64_t)zSign
<< 63) + ((uint64_t)zExp
<< 48) + zSig0
;
4854 /*----------------------------------------------------------------------------
4855 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4856 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4857 | and `zSig2', and returns the proper quadruple-precision floating-point value
4858 | corresponding to the abstract input. Ordinarily, the abstract value is
4859 | simply rounded and packed into the quadruple-precision format, with the
4860 | inexact exception raised if the abstract input cannot be represented
4861 | exactly. However, if the abstract value is too large, the overflow and
4862 | inexact exceptions are raised and an infinity or maximal finite value is
4863 | returned. If the abstract value is too small, the input value is rounded to
4864 | a subnormal number, and the underflow and inexact exceptions are raised if
4865 | the abstract input cannot be represented exactly as a subnormal quadruple-
4866 | precision floating-point number.
4867 | The input significand must be normalized or smaller. If the input
4868 | significand is not normalized, `zExp' must be 0; in that case, the result
4869 | returned is a subnormal number, and it must not require rounding. In the
4870 | usual case that the input significand is normalized, `zExp' must be 1 less
4871 | than the ``true'' floating-point exponent. The handling of underflow and
4872 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4873 *----------------------------------------------------------------------------*/
4875 static float128
roundAndPackFloat128(bool zSign
, int32_t zExp
,
4876 uint64_t zSig0
, uint64_t zSig1
,
4877 uint64_t zSig2
, float_status
*status
)
4879 int8_t roundingMode
;
4880 bool roundNearestEven
, increment
, isTiny
;
4882 roundingMode
= status
->float_rounding_mode
;
4883 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4884 switch (roundingMode
) {
4885 case float_round_nearest_even
:
4886 case float_round_ties_away
:
4887 increment
= ((int64_t)zSig2
< 0);
4889 case float_round_to_zero
:
4892 case float_round_up
:
4893 increment
= !zSign
&& zSig2
;
4895 case float_round_down
:
4896 increment
= zSign
&& zSig2
;
4898 case float_round_to_odd
:
4899 increment
= !(zSig1
& 0x1) && zSig2
;
4904 if ( 0x7FFD <= (uint32_t) zExp
) {
4905 if ( ( 0x7FFD < zExp
)
4906 || ( ( zExp
== 0x7FFD )
4908 UINT64_C(0x0001FFFFFFFFFFFF),
4909 UINT64_C(0xFFFFFFFFFFFFFFFF),
4916 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4917 if ( ( roundingMode
== float_round_to_zero
)
4918 || ( zSign
&& ( roundingMode
== float_round_up
) )
4919 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4920 || (roundingMode
== float_round_to_odd
)
4926 UINT64_C(0x0000FFFFFFFFFFFF),
4927 UINT64_C(0xFFFFFFFFFFFFFFFF)
4930 return packFloat128( zSign
, 0x7FFF, 0, 0 );
4933 if (status
->flush_to_zero
) {
4934 float_raise(float_flag_output_denormal
, status
);
4935 return packFloat128(zSign
, 0, 0, 0);
4937 isTiny
= status
->tininess_before_rounding
4940 || lt128(zSig0
, zSig1
,
4941 UINT64_C(0x0001FFFFFFFFFFFF),
4942 UINT64_C(0xFFFFFFFFFFFFFFFF));
4943 shift128ExtraRightJamming(
4944 zSig0
, zSig1
, zSig2
, - zExp
, &zSig0
, &zSig1
, &zSig2
);
4946 if (isTiny
&& zSig2
) {
4947 float_raise(float_flag_underflow
, status
);
4949 switch (roundingMode
) {
4950 case float_round_nearest_even
:
4951 case float_round_ties_away
:
4952 increment
= ((int64_t)zSig2
< 0);
4954 case float_round_to_zero
:
4957 case float_round_up
:
4958 increment
= !zSign
&& zSig2
;
4960 case float_round_down
:
4961 increment
= zSign
&& zSig2
;
4963 case float_round_to_odd
:
4964 increment
= !(zSig1
& 0x1) && zSig2
;
4972 float_raise(float_flag_inexact
, status
);
4975 add128( zSig0
, zSig1
, 0, 1, &zSig0
, &zSig1
);
4976 if ((zSig2
+ zSig2
== 0) && roundNearestEven
) {
4981 if ( ( zSig0
| zSig1
) == 0 ) zExp
= 0;
4983 return packFloat128( zSign
, zExp
, zSig0
, zSig1
);
4987 /*----------------------------------------------------------------------------
4988 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4989 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4990 | returns the proper quadruple-precision floating-point value corresponding
4991 | to the abstract input. This routine is just like `roundAndPackFloat128'
4992 | except that the input significand has fewer bits and does not have to be
4993 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4995 *----------------------------------------------------------------------------*/
4997 static float128
normalizeRoundAndPackFloat128(bool zSign
, int32_t zExp
,
4998 uint64_t zSig0
, uint64_t zSig1
,
4999 float_status
*status
)
5009 shiftCount
= clz64(zSig0
) - 15;
5010 if ( 0 <= shiftCount
) {
5012 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
5015 shift128ExtraRightJamming(
5016 zSig0
, zSig1
, 0, - shiftCount
, &zSig0
, &zSig1
, &zSig2
);
5019 return roundAndPackFloat128(zSign
, zExp
, zSig0
, zSig1
, zSig2
, status
);
5024 /*----------------------------------------------------------------------------
5025 | Returns the result of converting the 32-bit two's complement integer `a'
5026 | to the extended double-precision floating-point format. The conversion
5027 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5029 *----------------------------------------------------------------------------*/
5031 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
5038 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5040 absA
= zSign
? - a
: a
;
5041 shiftCount
= clz32(absA
) + 32;
5043 return packFloatx80( zSign
, 0x403E - shiftCount
, zSig
<<shiftCount
);
5047 /*----------------------------------------------------------------------------
5048 | Returns the result of converting the 64-bit two's complement integer `a'
5049 | to the extended double-precision floating-point format. The conversion
5050 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5052 *----------------------------------------------------------------------------*/
5054 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
5060 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5062 absA
= zSign
? - a
: a
;
5063 shiftCount
= clz64(absA
);
5064 return packFloatx80( zSign
, 0x403E - shiftCount
, absA
<<shiftCount
);
5068 /*----------------------------------------------------------------------------
5069 | Returns the result of converting the single-precision floating-point value
5070 | `a' to the extended double-precision floating-point format. The conversion
5071 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5073 *----------------------------------------------------------------------------*/
5075 floatx80
float32_to_floatx80(float32 a
, float_status
*status
)
5081 a
= float32_squash_input_denormal(a
, status
);
5082 aSig
= extractFloat32Frac( a
);
5083 aExp
= extractFloat32Exp( a
);
5084 aSign
= extractFloat32Sign( a
);
5085 if ( aExp
== 0xFF ) {
5087 floatx80 res
= commonNaNToFloatx80(float32ToCommonNaN(a
, status
),
5089 return floatx80_silence_nan(res
, status
);
5091 return packFloatx80(aSign
,
5092 floatx80_infinity_high
,
5093 floatx80_infinity_low
);
5096 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5097 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5100 return packFloatx80( aSign
, aExp
+ 0x3F80, ( (uint64_t) aSig
)<<40 );
5104 /*----------------------------------------------------------------------------
5105 | Returns the remainder of the single-precision floating-point value `a'
5106 | with respect to the corresponding value `b'. The operation is performed
5107 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5108 *----------------------------------------------------------------------------*/
5110 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
5113 int aExp
, bExp
, expDiff
;
5114 uint32_t aSig
, bSig
;
5116 uint64_t aSig64
, bSig64
, q64
;
5117 uint32_t alternateASig
;
5119 a
= float32_squash_input_denormal(a
, status
);
5120 b
= float32_squash_input_denormal(b
, status
);
5122 aSig
= extractFloat32Frac( a
);
5123 aExp
= extractFloat32Exp( a
);
5124 aSign
= extractFloat32Sign( a
);
5125 bSig
= extractFloat32Frac( b
);
5126 bExp
= extractFloat32Exp( b
);
5127 if ( aExp
== 0xFF ) {
5128 if ( aSig
|| ( ( bExp
== 0xFF ) && bSig
) ) {
5129 return propagateFloat32NaN(a
, b
, status
);
5131 float_raise(float_flag_invalid
, status
);
5132 return float32_default_nan(status
);
5134 if ( bExp
== 0xFF ) {
5136 return propagateFloat32NaN(a
, b
, status
);
5142 float_raise(float_flag_invalid
, status
);
5143 return float32_default_nan(status
);
5145 normalizeFloat32Subnormal( bSig
, &bExp
, &bSig
);
5148 if ( aSig
== 0 ) return a
;
5149 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5151 expDiff
= aExp
- bExp
;
5154 if ( expDiff
< 32 ) {
5157 if ( expDiff
< 0 ) {
5158 if ( expDiff
< -1 ) return a
;
5161 q
= ( bSig
<= aSig
);
5162 if ( q
) aSig
-= bSig
;
5163 if ( 0 < expDiff
) {
5164 q
= ( ( (uint64_t) aSig
)<<32 ) / bSig
;
5167 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5175 if ( bSig
<= aSig
) aSig
-= bSig
;
5176 aSig64
= ( (uint64_t) aSig
)<<40;
5177 bSig64
= ( (uint64_t) bSig
)<<40;
5179 while ( 0 < expDiff
) {
5180 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5181 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5182 aSig64
= - ( ( bSig
* q64
)<<38 );
5186 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5187 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5188 q
= q64
>>( 64 - expDiff
);
5190 aSig
= ( ( aSig64
>>33 )<<( expDiff
- 1 ) ) - bSig
* q
;
5193 alternateASig
= aSig
;
5196 } while ( 0 <= (int32_t) aSig
);
5197 sigMean
= aSig
+ alternateASig
;
5198 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5199 aSig
= alternateASig
;
5201 zSign
= ( (int32_t) aSig
< 0 );
5202 if ( zSign
) aSig
= - aSig
;
5203 return normalizeRoundAndPackFloat32(aSign
^ zSign
, bExp
, aSig
, status
);
5208 /*----------------------------------------------------------------------------
5209 | Returns the binary exponential of the single-precision floating-point value
5210 | `a'. The operation is performed according to the IEC/IEEE Standard for
5211 | Binary Floating-Point Arithmetic.
5213 | Uses the following identities:
5215 | 1. -------------------------------------------------------------------------
5219 | 2. -------------------------------------------------------------------------
5222 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5224 *----------------------------------------------------------------------------*/
5226 static const float64 float32_exp2_coefficients
[15] =
5228 const_float64( 0x3ff0000000000000ll
), /* 1 */
5229 const_float64( 0x3fe0000000000000ll
), /* 2 */
5230 const_float64( 0x3fc5555555555555ll
), /* 3 */
5231 const_float64( 0x3fa5555555555555ll
), /* 4 */
5232 const_float64( 0x3f81111111111111ll
), /* 5 */
5233 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5234 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5235 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5236 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5237 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5238 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5239 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5240 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5241 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5242 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5245 float32
float32_exp2(float32 a
, float_status
*status
)
5252 a
= float32_squash_input_denormal(a
, status
);
5254 aSig
= extractFloat32Frac( a
);
5255 aExp
= extractFloat32Exp( a
);
5256 aSign
= extractFloat32Sign( a
);
5258 if ( aExp
== 0xFF) {
5260 return propagateFloat32NaN(a
, float32_zero
, status
);
5262 return (aSign
) ? float32_zero
: a
;
5265 if (aSig
== 0) return float32_one
;
5268 float_raise(float_flag_inexact
, status
);
5270 /* ******************************* */
5271 /* using float64 for approximation */
5272 /* ******************************* */
5273 x
= float32_to_float64(a
, status
);
5274 x
= float64_mul(x
, float64_ln2
, status
);
5278 for (i
= 0 ; i
< 15 ; i
++) {
5281 f
= float64_mul(xn
, float32_exp2_coefficients
[i
], status
);
5282 r
= float64_add(r
, f
, status
);
5284 xn
= float64_mul(xn
, x
, status
);
5287 return float64_to_float32(r
, status
);
5290 /*----------------------------------------------------------------------------
5291 | Returns the binary log of the single-precision floating-point value `a'.
5292 | The operation is performed according to the IEC/IEEE Standard for Binary
5293 | Floating-Point Arithmetic.
5294 *----------------------------------------------------------------------------*/
5295 float32
float32_log2(float32 a
, float_status
*status
)
5299 uint32_t aSig
, zSig
, i
;
5301 a
= float32_squash_input_denormal(a
, status
);
5302 aSig
= extractFloat32Frac( a
);
5303 aExp
= extractFloat32Exp( a
);
5304 aSign
= extractFloat32Sign( a
);
5307 if ( aSig
== 0 ) return packFloat32( 1, 0xFF, 0 );
5308 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5311 float_raise(float_flag_invalid
, status
);
5312 return float32_default_nan(status
);
5314 if ( aExp
== 0xFF ) {
5316 return propagateFloat32NaN(a
, float32_zero
, status
);
5326 for (i
= 1 << 22; i
> 0; i
>>= 1) {
5327 aSig
= ( (uint64_t)aSig
* aSig
) >> 23;
5328 if ( aSig
& 0x01000000 ) {
5337 return normalizeRoundAndPackFloat32(zSign
, 0x85, zSig
, status
);
5340 /*----------------------------------------------------------------------------
5341 | Returns the result of converting the double-precision floating-point value
5342 | `a' to the extended double-precision floating-point format. The conversion
5343 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5345 *----------------------------------------------------------------------------*/
5347 floatx80
float64_to_floatx80(float64 a
, float_status
*status
)
5353 a
= float64_squash_input_denormal(a
, status
);
5354 aSig
= extractFloat64Frac( a
);
5355 aExp
= extractFloat64Exp( a
);
5356 aSign
= extractFloat64Sign( a
);
5357 if ( aExp
== 0x7FF ) {
5359 floatx80 res
= commonNaNToFloatx80(float64ToCommonNaN(a
, status
),
5361 return floatx80_silence_nan(res
, status
);
5363 return packFloatx80(aSign
,
5364 floatx80_infinity_high
,
5365 floatx80_infinity_low
);
5368 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5369 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5373 aSign
, aExp
+ 0x3C00, (aSig
| UINT64_C(0x0010000000000000)) << 11);
5377 /*----------------------------------------------------------------------------
5378 | Returns the remainder of the double-precision floating-point value `a'
5379 | with respect to the corresponding value `b'. The operation is performed
5380 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5381 *----------------------------------------------------------------------------*/
5383 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
5386 int aExp
, bExp
, expDiff
;
5387 uint64_t aSig
, bSig
;
5388 uint64_t q
, alternateASig
;
5391 a
= float64_squash_input_denormal(a
, status
);
5392 b
= float64_squash_input_denormal(b
, status
);
5393 aSig
= extractFloat64Frac( a
);
5394 aExp
= extractFloat64Exp( a
);
5395 aSign
= extractFloat64Sign( a
);
5396 bSig
= extractFloat64Frac( b
);
5397 bExp
= extractFloat64Exp( b
);
5398 if ( aExp
== 0x7FF ) {
5399 if ( aSig
|| ( ( bExp
== 0x7FF ) && bSig
) ) {
5400 return propagateFloat64NaN(a
, b
, status
);
5402 float_raise(float_flag_invalid
, status
);
5403 return float64_default_nan(status
);
5405 if ( bExp
== 0x7FF ) {
5407 return propagateFloat64NaN(a
, b
, status
);
5413 float_raise(float_flag_invalid
, status
);
5414 return float64_default_nan(status
);
5416 normalizeFloat64Subnormal( bSig
, &bExp
, &bSig
);
5419 if ( aSig
== 0 ) return a
;
5420 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5422 expDiff
= aExp
- bExp
;
5423 aSig
= (aSig
| UINT64_C(0x0010000000000000)) << 11;
5424 bSig
= (bSig
| UINT64_C(0x0010000000000000)) << 11;
5425 if ( expDiff
< 0 ) {
5426 if ( expDiff
< -1 ) return a
;
5429 q
= ( bSig
<= aSig
);
5430 if ( q
) aSig
-= bSig
;
5432 while ( 0 < expDiff
) {
5433 q
= estimateDiv128To64( aSig
, 0, bSig
);
5434 q
= ( 2 < q
) ? q
- 2 : 0;
5435 aSig
= - ( ( bSig
>>2 ) * q
);
5439 if ( 0 < expDiff
) {
5440 q
= estimateDiv128To64( aSig
, 0, bSig
);
5441 q
= ( 2 < q
) ? q
- 2 : 0;
5444 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5451 alternateASig
= aSig
;
5454 } while ( 0 <= (int64_t) aSig
);
5455 sigMean
= aSig
+ alternateASig
;
5456 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5457 aSig
= alternateASig
;
5459 zSign
= ( (int64_t) aSig
< 0 );
5460 if ( zSign
) aSig
= - aSig
;
5461 return normalizeRoundAndPackFloat64(aSign
^ zSign
, bExp
, aSig
, status
);
5465 /*----------------------------------------------------------------------------
5466 | Returns the binary log of the double-precision floating-point value `a'.
5467 | The operation is performed according to the IEC/IEEE Standard for Binary
5468 | Floating-Point Arithmetic.
5469 *----------------------------------------------------------------------------*/
5470 float64
float64_log2(float64 a
, float_status
*status
)
5474 uint64_t aSig
, aSig0
, aSig1
, zSig
, i
;
5475 a
= float64_squash_input_denormal(a
, status
);
5477 aSig
= extractFloat64Frac( a
);
5478 aExp
= extractFloat64Exp( a
);
5479 aSign
= extractFloat64Sign( a
);
5482 if ( aSig
== 0 ) return packFloat64( 1, 0x7FF, 0 );
5483 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5486 float_raise(float_flag_invalid
, status
);
5487 return float64_default_nan(status
);
5489 if ( aExp
== 0x7FF ) {
5491 return propagateFloat64NaN(a
, float64_zero
, status
);
5497 aSig
|= UINT64_C(0x0010000000000000);
5499 zSig
= (uint64_t)aExp
<< 52;
5500 for (i
= 1LL << 51; i
> 0; i
>>= 1) {
5501 mul64To128( aSig
, aSig
, &aSig0
, &aSig1
);
5502 aSig
= ( aSig0
<< 12 ) | ( aSig1
>> 52 );
5503 if ( aSig
& UINT64_C(0x0020000000000000) ) {
5511 return normalizeRoundAndPackFloat64(zSign
, 0x408, zSig
, status
);
5514 /*----------------------------------------------------------------------------
5515 | Returns the result of converting the extended double-precision floating-
5516 | point value `a' to the 32-bit two's complement integer format. The
5517 | conversion is performed according to the IEC/IEEE Standard for Binary
5518 | Floating-Point Arithmetic---which means in particular that the conversion
5519 | is rounded according to the current rounding mode. If `a' is a NaN, the
5520 | largest positive integer is returned. Otherwise, if the conversion
5521 | overflows, the largest integer with the same sign as `a' is returned.
5522 *----------------------------------------------------------------------------*/
5524 int32_t floatx80_to_int32(floatx80 a
, float_status
*status
)
5527 int32_t aExp
, shiftCount
;
5530 if (floatx80_invalid_encoding(a
)) {
5531 float_raise(float_flag_invalid
, status
);
5534 aSig
= extractFloatx80Frac( a
);
5535 aExp
= extractFloatx80Exp( a
);
5536 aSign
= extractFloatx80Sign( a
);
5537 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5538 shiftCount
= 0x4037 - aExp
;
5539 if ( shiftCount
<= 0 ) shiftCount
= 1;
5540 shift64RightJamming( aSig
, shiftCount
, &aSig
);
5541 return roundAndPackInt32(aSign
, aSig
, status
);
5545 /*----------------------------------------------------------------------------
5546 | Returns the result of converting the extended double-precision floating-
5547 | point value `a' to the 32-bit two's complement integer format. The
5548 | conversion is performed according to the IEC/IEEE Standard for Binary
5549 | Floating-Point Arithmetic, except that the conversion is always rounded
5550 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5551 | Otherwise, if the conversion overflows, the largest integer with the same
5552 | sign as `a' is returned.
5553 *----------------------------------------------------------------------------*/
5555 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*status
)
5558 int32_t aExp
, shiftCount
;
5559 uint64_t aSig
, savedASig
;
5562 if (floatx80_invalid_encoding(a
)) {
5563 float_raise(float_flag_invalid
, status
);
5566 aSig
= extractFloatx80Frac( a
);
5567 aExp
= extractFloatx80Exp( a
);
5568 aSign
= extractFloatx80Sign( a
);
5569 if ( 0x401E < aExp
) {
5570 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5573 else if ( aExp
< 0x3FFF ) {
5575 float_raise(float_flag_inexact
, status
);
5579 shiftCount
= 0x403E - aExp
;
5581 aSig
>>= shiftCount
;
5583 if ( aSign
) z
= - z
;
5584 if ( ( z
< 0 ) ^ aSign
) {
5586 float_raise(float_flag_invalid
, status
);
5587 return aSign
? (int32_t) 0x80000000 : 0x7FFFFFFF;
5589 if ( ( aSig
<<shiftCount
) != savedASig
) {
5590 float_raise(float_flag_inexact
, status
);
5596 /*----------------------------------------------------------------------------
5597 | Returns the result of converting the extended double-precision floating-
5598 | point value `a' to the 64-bit two's complement integer format. The
5599 | conversion is performed according to the IEC/IEEE Standard for Binary
5600 | Floating-Point Arithmetic---which means in particular that the conversion
5601 | is rounded according to the current rounding mode. If `a' is a NaN,
5602 | the largest positive integer is returned. Otherwise, if the conversion
5603 | overflows, the largest integer with the same sign as `a' is returned.
5604 *----------------------------------------------------------------------------*/
5606 int64_t floatx80_to_int64(floatx80 a
, float_status
*status
)
5609 int32_t aExp
, shiftCount
;
5610 uint64_t aSig
, aSigExtra
;
5612 if (floatx80_invalid_encoding(a
)) {
5613 float_raise(float_flag_invalid
, status
);
5616 aSig
= extractFloatx80Frac( a
);
5617 aExp
= extractFloatx80Exp( a
);
5618 aSign
= extractFloatx80Sign( a
);
5619 shiftCount
= 0x403E - aExp
;
5620 if ( shiftCount
<= 0 ) {
5622 float_raise(float_flag_invalid
, status
);
5623 if (!aSign
|| floatx80_is_any_nan(a
)) {
5631 shift64ExtraRightJamming( aSig
, 0, shiftCount
, &aSig
, &aSigExtra
);
5633 return roundAndPackInt64(aSign
, aSig
, aSigExtra
, status
);
5637 /*----------------------------------------------------------------------------
5638 | Returns the result of converting the extended double-precision floating-
5639 | point value `a' to the 64-bit two's complement integer format. The
5640 | conversion is performed according to the IEC/IEEE Standard for Binary
5641 | Floating-Point Arithmetic, except that the conversion is always rounded
5642 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5643 | Otherwise, if the conversion overflows, the largest integer with the same
5644 | sign as `a' is returned.
5645 *----------------------------------------------------------------------------*/
5647 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*status
)
5650 int32_t aExp
, shiftCount
;
5654 if (floatx80_invalid_encoding(a
)) {
5655 float_raise(float_flag_invalid
, status
);
5658 aSig
= extractFloatx80Frac( a
);
5659 aExp
= extractFloatx80Exp( a
);
5660 aSign
= extractFloatx80Sign( a
);
5661 shiftCount
= aExp
- 0x403E;
5662 if ( 0 <= shiftCount
) {
5663 aSig
&= UINT64_C(0x7FFFFFFFFFFFFFFF);
5664 if ( ( a
.high
!= 0xC03E ) || aSig
) {
5665 float_raise(float_flag_invalid
, status
);
5666 if ( ! aSign
|| ( ( aExp
== 0x7FFF ) && aSig
) ) {
5672 else if ( aExp
< 0x3FFF ) {
5674 float_raise(float_flag_inexact
, status
);
5678 z
= aSig
>>( - shiftCount
);
5679 if ( (uint64_t) ( aSig
<<( shiftCount
& 63 ) ) ) {
5680 float_raise(float_flag_inexact
, status
);
5682 if ( aSign
) z
= - z
;
5687 /*----------------------------------------------------------------------------
5688 | Returns the result of converting the extended double-precision floating-
5689 | point value `a' to the single-precision floating-point format. The
5690 | conversion is performed according to the IEC/IEEE Standard for Binary
5691 | Floating-Point Arithmetic.
5692 *----------------------------------------------------------------------------*/
5694 float32
floatx80_to_float32(floatx80 a
, float_status
*status
)
5700 if (floatx80_invalid_encoding(a
)) {
5701 float_raise(float_flag_invalid
, status
);
5702 return float32_default_nan(status
);
5704 aSig
= extractFloatx80Frac( a
);
5705 aExp
= extractFloatx80Exp( a
);
5706 aSign
= extractFloatx80Sign( a
);
5707 if ( aExp
== 0x7FFF ) {
5708 if ( (uint64_t) ( aSig
<<1 ) ) {
5709 float32 res
= commonNaNToFloat32(floatx80ToCommonNaN(a
, status
),
5711 return float32_silence_nan(res
, status
);
5713 return packFloat32( aSign
, 0xFF, 0 );
5715 shift64RightJamming( aSig
, 33, &aSig
);
5716 if ( aExp
|| aSig
) aExp
-= 0x3F81;
5717 return roundAndPackFloat32(aSign
, aExp
, aSig
, status
);
5721 /*----------------------------------------------------------------------------
5722 | Returns the result of converting the extended double-precision floating-
5723 | point value `a' to the double-precision floating-point format. The
5724 | conversion is performed according to the IEC/IEEE Standard for Binary
5725 | Floating-Point Arithmetic.
5726 *----------------------------------------------------------------------------*/
5728 float64
floatx80_to_float64(floatx80 a
, float_status
*status
)
5732 uint64_t aSig
, zSig
;
5734 if (floatx80_invalid_encoding(a
)) {
5735 float_raise(float_flag_invalid
, status
);
5736 return float64_default_nan(status
);
5738 aSig
= extractFloatx80Frac( a
);
5739 aExp
= extractFloatx80Exp( a
);
5740 aSign
= extractFloatx80Sign( a
);
5741 if ( aExp
== 0x7FFF ) {
5742 if ( (uint64_t) ( aSig
<<1 ) ) {
5743 float64 res
= commonNaNToFloat64(floatx80ToCommonNaN(a
, status
),
5745 return float64_silence_nan(res
, status
);
5747 return packFloat64( aSign
, 0x7FF, 0 );
5749 shift64RightJamming( aSig
, 1, &zSig
);
5750 if ( aExp
|| aSig
) aExp
-= 0x3C01;
5751 return roundAndPackFloat64(aSign
, aExp
, zSig
, status
);
5755 /*----------------------------------------------------------------------------
5756 | Returns the result of converting the extended double-precision floating-
5757 | point value `a' to the quadruple-precision floating-point format. The
5758 | conversion is performed according to the IEC/IEEE Standard for Binary
5759 | Floating-Point Arithmetic.
5760 *----------------------------------------------------------------------------*/
5762 float128
floatx80_to_float128(floatx80 a
, float_status
*status
)
5766 uint64_t aSig
, zSig0
, zSig1
;
5768 if (floatx80_invalid_encoding(a
)) {
5769 float_raise(float_flag_invalid
, status
);
5770 return float128_default_nan(status
);
5772 aSig
= extractFloatx80Frac( a
);
5773 aExp
= extractFloatx80Exp( a
);
5774 aSign
= extractFloatx80Sign( a
);
5775 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) {
5776 float128 res
= commonNaNToFloat128(floatx80ToCommonNaN(a
, status
),
5778 return float128_silence_nan(res
, status
);
5780 shift128Right( aSig
<<1, 0, 16, &zSig0
, &zSig1
);
5781 return packFloat128( aSign
, aExp
, zSig0
, zSig1
);
5785 /*----------------------------------------------------------------------------
5786 | Rounds the extended double-precision floating-point value `a'
5787 | to the precision provided by floatx80_rounding_precision and returns the
5788 | result as an extended double-precision floating-point value.
5789 | The operation is performed according to the IEC/IEEE Standard for Binary
5790 | Floating-Point Arithmetic.
5791 *----------------------------------------------------------------------------*/
5793 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5797 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
5798 return floatx80_default_nan(status
);
5800 return floatx80_round_pack_canonical(&p
, status
);
5803 /*----------------------------------------------------------------------------
5804 | Rounds the extended double-precision floating-point value `a' to an integer,
5805 | and returns the result as an extended quadruple-precision floating-point
5806 | value. The operation is performed according to the IEC/IEEE Standard for
5807 | Binary Floating-Point Arithmetic.
5808 *----------------------------------------------------------------------------*/
5810 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
5814 uint64_t lastBitMask
, roundBitsMask
;
5817 if (floatx80_invalid_encoding(a
)) {
5818 float_raise(float_flag_invalid
, status
);
5819 return floatx80_default_nan(status
);
5821 aExp
= extractFloatx80Exp( a
);
5822 if ( 0x403E <= aExp
) {
5823 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 ) ) {
5824 return propagateFloatx80NaN(a
, a
, status
);
5828 if ( aExp
< 0x3FFF ) {
5830 && ( (uint64_t) ( extractFloatx80Frac( a
) ) == 0 ) ) {
5833 float_raise(float_flag_inexact
, status
);
5834 aSign
= extractFloatx80Sign( a
);
5835 switch (status
->float_rounding_mode
) {
5836 case float_round_nearest_even
:
5837 if ( ( aExp
== 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 )
5840 packFloatx80( aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5843 case float_round_ties_away
:
5844 if (aExp
== 0x3FFE) {
5845 return packFloatx80(aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5848 case float_round_down
:
5851 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5852 : packFloatx80( 0, 0, 0 );
5853 case float_round_up
:
5855 aSign
? packFloatx80( 1, 0, 0 )
5856 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5858 case float_round_to_zero
:
5861 g_assert_not_reached();
5863 return packFloatx80( aSign
, 0, 0 );
5866 lastBitMask
<<= 0x403E - aExp
;
5867 roundBitsMask
= lastBitMask
- 1;
5869 switch (status
->float_rounding_mode
) {
5870 case float_round_nearest_even
:
5871 z
.low
+= lastBitMask
>>1;
5872 if ((z
.low
& roundBitsMask
) == 0) {
5873 z
.low
&= ~lastBitMask
;
5876 case float_round_ties_away
:
5877 z
.low
+= lastBitMask
>> 1;
5879 case float_round_to_zero
:
5881 case float_round_up
:
5882 if (!extractFloatx80Sign(z
)) {
5883 z
.low
+= roundBitsMask
;
5886 case float_round_down
:
5887 if (extractFloatx80Sign(z
)) {
5888 z
.low
+= roundBitsMask
;
5894 z
.low
&= ~ roundBitsMask
;
5897 z
.low
= UINT64_C(0x8000000000000000);
5899 if (z
.low
!= a
.low
) {
5900 float_raise(float_flag_inexact
, status
);
5906 /*----------------------------------------------------------------------------
5907 | Returns the remainder of the extended double-precision floating-point value
5908 | `a' with respect to the corresponding value `b'. The operation is performed
5909 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5910 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5911 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5912 | the absolute value of the integer quotient.
5913 *----------------------------------------------------------------------------*/
5915 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
, uint64_t *quotient
,
5916 float_status
*status
)
5919 int32_t aExp
, bExp
, expDiff
, aExpOrig
;
5920 uint64_t aSig0
, aSig1
, bSig
;
5921 uint64_t q
, term0
, term1
, alternateASig0
, alternateASig1
;
5924 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5925 float_raise(float_flag_invalid
, status
);
5926 return floatx80_default_nan(status
);
5928 aSig0
= extractFloatx80Frac( a
);
5929 aExpOrig
= aExp
= extractFloatx80Exp( a
);
5930 aSign
= extractFloatx80Sign( a
);
5931 bSig
= extractFloatx80Frac( b
);
5932 bExp
= extractFloatx80Exp( b
);
5933 if ( aExp
== 0x7FFF ) {
5934 if ( (uint64_t) ( aSig0
<<1 )
5935 || ( ( bExp
== 0x7FFF ) && (uint64_t) ( bSig
<<1 ) ) ) {
5936 return propagateFloatx80NaN(a
, b
, status
);
5940 if ( bExp
== 0x7FFF ) {
5941 if ((uint64_t)(bSig
<< 1)) {
5942 return propagateFloatx80NaN(a
, b
, status
);
5944 if (aExp
== 0 && aSig0
>> 63) {
5946 * Pseudo-denormal argument must be returned in normalized
5949 return packFloatx80(aSign
, 1, aSig0
);
5956 float_raise(float_flag_invalid
, status
);
5957 return floatx80_default_nan(status
);
5959 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
5962 if ( aSig0
== 0 ) return a
;
5963 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
5966 expDiff
= aExp
- bExp
;
5968 if ( expDiff
< 0 ) {
5969 if ( mod
|| expDiff
< -1 ) {
5970 if (aExp
== 1 && aExpOrig
== 0) {
5972 * Pseudo-denormal argument must be returned in
5975 return packFloatx80(aSign
, aExp
, aSig0
);
5979 shift128Right( aSig0
, 0, 1, &aSig0
, &aSig1
);
5982 *quotient
= q
= ( bSig
<= aSig0
);
5983 if ( q
) aSig0
-= bSig
;
5985 while ( 0 < expDiff
) {
5986 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
5987 q
= ( 2 < q
) ? q
- 2 : 0;
5988 mul64To128( bSig
, q
, &term0
, &term1
);
5989 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
5990 shortShift128Left( aSig0
, aSig1
, 62, &aSig0
, &aSig1
);
5996 if ( 0 < expDiff
) {
5997 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
5998 q
= ( 2 < q
) ? q
- 2 : 0;
6000 mul64To128( bSig
, q
<<( 64 - expDiff
), &term0
, &term1
);
6001 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6002 shortShift128Left( 0, bSig
, 64 - expDiff
, &term0
, &term1
);
6003 while ( le128( term0
, term1
, aSig0
, aSig1
) ) {
6005 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6008 *quotient
<<= expDiff
;
6019 sub128( term0
, term1
, aSig0
, aSig1
, &alternateASig0
, &alternateASig1
);
6020 if ( lt128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6021 || ( eq128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6024 aSig0
= alternateASig0
;
6025 aSig1
= alternateASig1
;
6031 normalizeRoundAndPackFloatx80(
6032 floatx80_precision_x
, zSign
, bExp
+ expDiff
, aSig0
, aSig1
, status
);
6036 /*----------------------------------------------------------------------------
6037 | Returns the remainder of the extended double-precision floating-point value
6038 | `a' with respect to the corresponding value `b'. The operation is performed
6039 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6040 *----------------------------------------------------------------------------*/
6042 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
6045 return floatx80_modrem(a
, b
, false, "ient
, status
);
6048 /*----------------------------------------------------------------------------
6049 | Returns the remainder of the extended double-precision floating-point value
6050 | `a' with respect to the corresponding value `b', with the quotient truncated
6052 *----------------------------------------------------------------------------*/
6054 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
6057 return floatx80_modrem(a
, b
, true, "ient
, status
);
6060 /*----------------------------------------------------------------------------
6061 | Returns the result of converting the quadruple-precision floating-point
6062 | value `a' to the extended double-precision floating-point format. The
6063 | conversion is performed according to the IEC/IEEE Standard for Binary
6064 | Floating-Point Arithmetic.
6065 *----------------------------------------------------------------------------*/
6067 floatx80
float128_to_floatx80(float128 a
, float_status
*status
)
6071 uint64_t aSig0
, aSig1
;
6073 aSig1
= extractFloat128Frac1( a
);
6074 aSig0
= extractFloat128Frac0( a
);
6075 aExp
= extractFloat128Exp( a
);
6076 aSign
= extractFloat128Sign( a
);
6077 if ( aExp
== 0x7FFF ) {
6078 if ( aSig0
| aSig1
) {
6079 floatx80 res
= commonNaNToFloatx80(float128ToCommonNaN(a
, status
),
6081 return floatx80_silence_nan(res
, status
);
6083 return packFloatx80(aSign
, floatx80_infinity_high
,
6084 floatx80_infinity_low
);
6087 if ( ( aSig0
| aSig1
) == 0 ) return packFloatx80( aSign
, 0, 0 );
6088 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6091 aSig0
|= UINT64_C(0x0001000000000000);
6093 shortShift128Left( aSig0
, aSig1
, 15, &aSig0
, &aSig1
);
6094 return roundAndPackFloatx80(80, aSign
, aExp
, aSig0
, aSig1
, status
);
6098 /*----------------------------------------------------------------------------
6099 | Returns the remainder of the quadruple-precision floating-point value `a'
6100 | with respect to the corresponding value `b'. The operation is performed
6101 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6102 *----------------------------------------------------------------------------*/
6104 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
6107 int32_t aExp
, bExp
, expDiff
;
6108 uint64_t aSig0
, aSig1
, bSig0
, bSig1
, q
, term0
, term1
, term2
;
6109 uint64_t allZero
, alternateASig0
, alternateASig1
, sigMean1
;
6112 aSig1
= extractFloat128Frac1( a
);
6113 aSig0
= extractFloat128Frac0( a
);
6114 aExp
= extractFloat128Exp( a
);
6115 aSign
= extractFloat128Sign( a
);
6116 bSig1
= extractFloat128Frac1( b
);
6117 bSig0
= extractFloat128Frac0( b
);
6118 bExp
= extractFloat128Exp( b
);
6119 if ( aExp
== 0x7FFF ) {
6120 if ( ( aSig0
| aSig1
)
6121 || ( ( bExp
== 0x7FFF ) && ( bSig0
| bSig1
) ) ) {
6122 return propagateFloat128NaN(a
, b
, status
);
6126 if ( bExp
== 0x7FFF ) {
6127 if (bSig0
| bSig1
) {
6128 return propagateFloat128NaN(a
, b
, status
);
6133 if ( ( bSig0
| bSig1
) == 0 ) {
6135 float_raise(float_flag_invalid
, status
);
6136 return float128_default_nan(status
);
6138 normalizeFloat128Subnormal( bSig0
, bSig1
, &bExp
, &bSig0
, &bSig1
);
6141 if ( ( aSig0
| aSig1
) == 0 ) return a
;
6142 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6144 expDiff
= aExp
- bExp
;
6145 if ( expDiff
< -1 ) return a
;
6147 aSig0
| UINT64_C(0x0001000000000000),
6149 15 - ( expDiff
< 0 ),
6154 bSig0
| UINT64_C(0x0001000000000000), bSig1
, 15, &bSig0
, &bSig1
);
6155 q
= le128( bSig0
, bSig1
, aSig0
, aSig1
);
6156 if ( q
) sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6158 while ( 0 < expDiff
) {
6159 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6160 q
= ( 4 < q
) ? q
- 4 : 0;
6161 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6162 shortShift192Left( term0
, term1
, term2
, 61, &term1
, &term2
, &allZero
);
6163 shortShift128Left( aSig0
, aSig1
, 61, &aSig0
, &allZero
);
6164 sub128( aSig0
, 0, term1
, term2
, &aSig0
, &aSig1
);
6167 if ( -64 < expDiff
) {
6168 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6169 q
= ( 4 < q
) ? q
- 4 : 0;
6171 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6173 if ( expDiff
< 0 ) {
6174 shift128Right( aSig0
, aSig1
, - expDiff
, &aSig0
, &aSig1
);
6177 shortShift128Left( aSig0
, aSig1
, expDiff
, &aSig0
, &aSig1
);
6179 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6180 sub128( aSig0
, aSig1
, term1
, term2
, &aSig0
, &aSig1
);
6183 shift128Right( aSig0
, aSig1
, 12, &aSig0
, &aSig1
);
6184 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6187 alternateASig0
= aSig0
;
6188 alternateASig1
= aSig1
;
6190 sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6191 } while ( 0 <= (int64_t) aSig0
);
6193 aSig0
, aSig1
, alternateASig0
, alternateASig1
, (uint64_t *)&sigMean0
, &sigMean1
);
6194 if ( ( sigMean0
< 0 )
6195 || ( ( ( sigMean0
| sigMean1
) == 0 ) && ( q
& 1 ) ) ) {
6196 aSig0
= alternateASig0
;
6197 aSig1
= alternateASig1
;
6199 zSign
= ( (int64_t) aSig0
< 0 );
6200 if ( zSign
) sub128( 0, 0, aSig0
, aSig1
, &aSig0
, &aSig1
);
6201 return normalizeRoundAndPackFloat128(aSign
^ zSign
, bExp
- 4, aSig0
, aSig1
,
6205 static inline FloatRelation
6206 floatx80_compare_internal(floatx80 a
, floatx80 b
, bool is_quiet
,
6207 float_status
*status
)
6211 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
6212 float_raise(float_flag_invalid
, status
);
6213 return float_relation_unordered
;
6215 if (( ( extractFloatx80Exp( a
) == 0x7fff ) &&
6216 ( extractFloatx80Frac( a
)<<1 ) ) ||
6217 ( ( extractFloatx80Exp( b
) == 0x7fff ) &&
6218 ( extractFloatx80Frac( b
)<<1 ) )) {
6220 floatx80_is_signaling_nan(a
, status
) ||
6221 floatx80_is_signaling_nan(b
, status
)) {
6222 float_raise(float_flag_invalid
, status
);
6224 return float_relation_unordered
;
6226 aSign
= extractFloatx80Sign( a
);
6227 bSign
= extractFloatx80Sign( b
);
6228 if ( aSign
!= bSign
) {
6230 if ( ( ( (uint16_t) ( ( a
.high
| b
.high
) << 1 ) ) == 0) &&
6231 ( ( a
.low
| b
.low
) == 0 ) ) {
6233 return float_relation_equal
;
6235 return 1 - (2 * aSign
);
6238 /* Normalize pseudo-denormals before comparison. */
6239 if ((a
.high
& 0x7fff) == 0 && a
.low
& UINT64_C(0x8000000000000000)) {
6242 if ((b
.high
& 0x7fff) == 0 && b
.low
& UINT64_C(0x8000000000000000)) {
6245 if (a
.low
== b
.low
&& a
.high
== b
.high
) {
6246 return float_relation_equal
;
6248 return 1 - 2 * (aSign
^ ( lt128( a
.high
, a
.low
, b
.high
, b
.low
) ));
6253 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*status
)
6255 return floatx80_compare_internal(a
, b
, 0, status
);
6258 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
,
6259 float_status
*status
)
6261 return floatx80_compare_internal(a
, b
, 1, status
);
6264 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
6270 if (floatx80_invalid_encoding(a
)) {
6271 float_raise(float_flag_invalid
, status
);
6272 return floatx80_default_nan(status
);
6274 aSig
= extractFloatx80Frac( a
);
6275 aExp
= extractFloatx80Exp( a
);
6276 aSign
= extractFloatx80Sign( a
);
6278 if ( aExp
== 0x7FFF ) {
6280 return propagateFloatx80NaN(a
, a
, status
);
6294 } else if (n
< -0x10000) {
6299 return normalizeRoundAndPackFloatx80(status
->floatx80_rounding_precision
,
6300 aSign
, aExp
, aSig
, 0, status
);
6303 static void __attribute__((constructor
)) softfloat_init(void)
6305 union_float64 ua
, ub
, uc
, ur
;
6307 if (QEMU_NO_HARDFLOAT
) {
6311 * Test that the host's FMA is not obviously broken. For example,
6312 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6313 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6315 ua
.s
= 0x0020000000000001ULL
;
6316 ub
.s
= 0x3ca0000000000000ULL
;
6317 uc
.s
= 0x0020000000000000ULL
;
6318 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
6319 if (ur
.s
!= 0x0020000000000001ULL
) {
6320 force_soft_fma
= true;