4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
408 static inline uint32_t extractFloat32Frac(float32 a
)
410 return float32_val(a
) & 0x007FFFFF;
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
417 static inline int extractFloat32Exp(float32 a
)
419 return (float32_val(a
) >> 23) & 0xFF;
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
426 static inline bool extractFloat32Sign(float32 a
)
428 return float32_val(a
) >> 31;
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
435 static inline uint64_t extractFloat64Frac(float64 a
)
437 return float64_val(a
) & UINT64_C(0x000FFFFFFFFFFFFF);
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
444 static inline int extractFloat64Exp(float64 a
)
446 return (float64_val(a
) >> 52) & 0x7FF;
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
453 static inline bool extractFloat64Sign(float64 a
)
455 return float64_val(a
) >> 63;
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
463 typedef enum __attribute__ ((__packed__
)) {
464 float_class_unclassified
,
468 float_class_qnan
, /* all NaNs from here */
472 #define float_cmask(bit) (1u << (bit))
475 float_cmask_zero
= float_cmask(float_class_zero
),
476 float_cmask_normal
= float_cmask(float_class_normal
),
477 float_cmask_inf
= float_cmask(float_class_inf
),
478 float_cmask_qnan
= float_cmask(float_class_qnan
),
479 float_cmask_snan
= float_cmask(float_class_snan
),
481 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
482 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
485 /* Flags for parts_minmax. */
487 /* Set for minimum; clear for maximum. */
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
498 return unlikely(c
>= float_class_qnan
);
501 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
503 return c
== float_class_snan
;
506 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
508 return c
== float_class_qnan
;
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
525 /* Routines that know the structure may reference the singular name. */
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
550 uint64_t frac_hm
; /* high-middle */
551 uint64_t frac_lm
; /* low-middle */
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
586 #define FLOAT_PARAMS(E, F) \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
592 static const FloatFmt float16_params
= {
596 static const FloatFmt float16_params_ahp
= {
601 static const FloatFmt bfloat16_params
= {
605 static const FloatFmt float32_params
= {
609 static const FloatFmt float64_params
= {
613 static const FloatFmt float128_params
= {
614 FLOAT_PARAMS(15, 112)
617 #define FLOATX80_PARAMS(R) \
619 .frac_size = R == 64 ? 63 : R, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
623 static const FloatFmt floatx80_params
[3] = {
624 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
632 const int f_size
= fmt
->frac_size
;
633 const int e_size
= fmt
->exp_size
;
635 *r
= (FloatParts64
) {
636 .cls
= float_class_unclassified
,
637 .sign
= extract64(raw
, f_size
+ e_size
, 1),
638 .exp
= extract64(raw
, f_size
, e_size
),
639 .frac
= extract64(raw
, 0, f_size
)
643 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
645 unpack_raw64(p
, &float16_params
, f
);
648 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
650 unpack_raw64(p
, &bfloat16_params
, f
);
653 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
655 unpack_raw64(p
, &float32_params
, f
);
658 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
660 unpack_raw64(p
, &float64_params
, f
);
663 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
665 *p
= (FloatParts128
) {
666 .cls
= float_class_unclassified
,
667 .sign
= extract32(f
.high
, 15, 1),
668 .exp
= extract32(f
.high
, 0, 15),
673 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
675 const int f_size
= float128_params
.frac_size
- 64;
676 const int e_size
= float128_params
.exp_size
;
678 *p
= (FloatParts128
) {
679 .cls
= float_class_unclassified
,
680 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
681 .exp
= extract64(f
.high
, f_size
, e_size
),
682 .frac_hi
= extract64(f
.high
, 0, f_size
),
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
690 const int f_size
= fmt
->frac_size
;
691 const int e_size
= fmt
->exp_size
;
694 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
695 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
696 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
700 static inline float16
float16_pack_raw(const FloatParts64
*p
)
702 return make_float16(pack_raw64(p
, &float16_params
));
705 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
707 return pack_raw64(p
, &bfloat16_params
);
710 static inline float32
float32_pack_raw(const FloatParts64
*p
)
712 return make_float32(pack_raw64(p
, &float32_params
));
715 static inline float64
float64_pack_raw(const FloatParts64
*p
)
717 return make_float64(pack_raw64(p
, &float64_params
));
720 static float128
float128_pack_raw(const FloatParts128
*p
)
722 const int f_size
= float128_params
.frac_size
- 64;
723 const int e_size
= float128_params
.exp_size
;
726 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
727 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
728 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
729 return make_float128(hi
, p
->frac_lo
);
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
752 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
753 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
757 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
759 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
764 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
765 FloatParts64
*c
, float_status
*s
,
766 int ab_mask
, int abc_mask
);
767 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
771 int ab_mask
, int abc_mask
);
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
776 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
777 const FloatFmt
*fmt
);
778 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
779 const FloatFmt
*fmt
);
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
784 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
785 const FloatFmt
*fmt
);
786 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
787 const FloatFmt
*fmt
);
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
792 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
793 const FloatFmt
*fmt
);
794 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
795 const FloatFmt
*fmt
);
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
800 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
801 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
802 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
807 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
808 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
809 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
814 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
815 float_status
*s
, bool subtract
);
816 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
817 float_status
*s
, bool subtract
);
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
822 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
824 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
830 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
831 FloatParts64
*c
, int flags
,
833 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
834 FloatParts128
*c
, int flags
,
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
840 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
842 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
848 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
849 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
854 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
855 int scale
, int frac_size
);
856 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
857 int scale
, int frac_size
);
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
862 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
863 int scale
, float_status
*s
,
864 const FloatFmt
*fmt
);
865 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
866 int scale
, float_status
*s
,
867 const FloatFmt
*fmt
);
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
872 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
873 int scale
, int64_t min
, int64_t max
,
875 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
876 int scale
, int64_t min
, int64_t max
,
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
882 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
883 int scale
, uint64_t max
,
885 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
886 int scale
, uint64_t max
,
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
892 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
893 int scale
, float_status
*s
);
894 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
895 int scale
, float_status
*s
);
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
900 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
901 int scale
, float_status
*s
);
902 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
903 int scale
, float_status
*s
);
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
908 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
909 float_status
*s
, int flags
);
910 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
911 float_status
*s
, int flags
);
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
916 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
917 float_status
*s
, bool q
);
918 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
919 float_status
*s
, bool q
);
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
924 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
925 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
941 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
943 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
946 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
949 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
950 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
954 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
957 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
958 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
959 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
960 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
966 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
968 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
971 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
973 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
974 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
979 static void frac64_allones(FloatParts64
*a
)
984 static void frac128_allones(FloatParts128
*a
)
986 a
->frac_hi
= a
->frac_lo
= -1;
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
991 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
993 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
996 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
998 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
1000 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
1005 return ta
< tb
? -1 : 1;
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1010 static void frac64_clear(FloatParts64
*a
)
1015 static void frac128_clear(FloatParts128
*a
)
1017 a
->frac_hi
= a
->frac_lo
= 0;
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1022 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
1024 uint64_t n1
, n0
, r
, q
;
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1038 ret
= a
->frac
< b
->frac
;
1046 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a
->frac
= q
| (r
!= 0);
1054 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1056 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1057 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1060 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1061 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1063 ret
= lt128(a0
, a1
, b0
, b1
);
1065 a1
= shr_double(a0
, a1
, 1);
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0
= estimateDiv128To64(a0
, a1
, b0
);
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1077 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1078 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1081 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1
= estimateDiv128To64(r1
, r2
, b0
);
1086 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1087 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1090 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1
|= (r2
| r3
) != 0;
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1103 static bool frac64_eqz(FloatParts64
*a
)
1105 return a
->frac
== 0;
1108 static bool frac128_eqz(FloatParts128
*a
)
1110 return (a
->frac_hi
| a
->frac_lo
) == 0;
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1115 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1117 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1120 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1122 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1123 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1128 static void frac64_neg(FloatParts64
*a
)
1133 static void frac128_neg(FloatParts128
*a
)
1136 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1137 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1140 static void frac256_neg(FloatParts256
*a
)
1143 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1144 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1145 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1146 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1151 static int frac64_normalize(FloatParts64
*a
)
1154 int shift
= clz64(a
->frac
);
1161 static int frac128_normalize(FloatParts128
*a
)
1164 int shl
= clz64(a
->frac_hi
);
1165 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1168 } else if (a
->frac_lo
) {
1169 int shl
= clz64(a
->frac_lo
);
1170 a
->frac_hi
= a
->frac_lo
<< shl
;
1177 static int frac256_normalize(FloatParts256
*a
)
1179 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1180 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1192 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1195 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1198 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1201 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1211 a0
= shl_double(a0
, a1
, shl
);
1212 a1
= shl_double(a1
, a2
, shl
);
1213 a2
= shl_double(a2
, a3
, shl
);
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1226 static void frac64_shl(FloatParts64
*a
, int c
)
1231 static void frac128_shl(FloatParts128
*a
, int c
)
1233 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1241 a0
= shl_double(a0
, a1
, c
);
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1251 static void frac64_shr(FloatParts64
*a
, int c
)
1256 static void frac128_shr(FloatParts128
*a
, int c
)
1258 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1266 a1
= shr_double(a0
, a1
, c
);
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1276 static void frac64_shrjam(FloatParts64
*a
, int c
)
1278 uint64_t a0
= a
->frac
;
1280 if (likely(c
!= 0)) {
1281 if (likely(c
< 64)) {
1282 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1290 static void frac128_shrjam(FloatParts128
*a
, int c
)
1292 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1293 uint64_t sticky
= 0;
1295 if (unlikely(c
== 0)) {
1297 } else if (likely(c
< 64)) {
1299 } else if (likely(c
< 128)) {
1313 sticky
|= shr_double(a1
, 0, c
);
1314 a1
= shr_double(a0
, a1
, c
);
1318 a
->frac_lo
= a1
| (sticky
!= 0);
1322 static void frac256_shrjam(FloatParts256
*a
, int c
)
1324 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1325 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1326 uint64_t sticky
= 0;
1328 if (unlikely(c
== 0)) {
1330 } else if (likely(c
< 64)) {
1332 } else if (likely(c
< 256)) {
1333 if (unlikely(c
& 128)) {
1335 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1337 if (unlikely(c
& 64)) {
1339 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1346 sticky
= a0
| a1
| a2
| a3
;
1347 a0
= a1
= a2
= a3
= 0;
1351 sticky
|= shr_double(a3
, 0, c
);
1352 a3
= shr_double(a2
, a3
, c
);
1353 a2
= shr_double(a1
, a2
, c
);
1354 a1
= shr_double(a0
, a1
, c
);
1358 a
->frac_lo
= a3
| (sticky
!= 0);
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1366 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1368 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1371 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1374 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1375 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1379 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1382 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1383 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1384 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1385 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1391 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1393 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1396 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1398 r
->frac_hi
= a
->frac_hi
;
1399 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1404 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1406 r
->frac_hi
= a
->frac
;
1410 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1412 r
->frac_hi
= a
->frac_hi
;
1413 r
->frac_hm
= a
->frac_lo
;
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1425 static const uint16_t rsqrt_tab
[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1466 #include "softfloat-parts-addsub.c.inc"
1475 * Pack/unpack routines with a specific FloatFmt.
1478 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1479 float_status
*s
, const FloatFmt
*params
)
1481 float16_unpack_raw(p
, f
);
1482 parts_canonicalize(p
, s
, params
);
1485 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1488 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1491 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1494 bfloat16_unpack_raw(p
, f
);
1495 parts_canonicalize(p
, s
, &bfloat16_params
);
1498 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1500 const FloatFmt
*params
)
1502 parts_uncanon(p
, s
, params
);
1503 return float16_pack_raw(p
);
1506 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1509 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1512 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1515 parts_uncanon(p
, s
, &bfloat16_params
);
1516 return bfloat16_pack_raw(p
);
1519 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1522 float32_unpack_raw(p
, f
);
1523 parts_canonicalize(p
, s
, &float32_params
);
1526 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1529 parts_uncanon(p
, s
, &float32_params
);
1530 return float32_pack_raw(p
);
1533 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1536 float64_unpack_raw(p
, f
);
1537 parts_canonicalize(p
, s
, &float64_params
);
1540 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1543 parts_uncanon(p
, s
, &float64_params
);
1544 return float64_pack_raw(p
);
1547 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1550 float128_unpack_raw(p
, f
);
1551 parts_canonicalize(p
, s
, &float128_params
);
1554 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1557 parts_uncanon(p
, s
, &float128_params
);
1558 return float128_pack_raw(p
);
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s
->floatx80_rounding_precision
) {
1567 case floatx80_precision_x
:
1568 case floatx80_precision_d
:
1569 case floatx80_precision_s
:
1572 g_assert_not_reached();
1575 if (unlikely(floatx80_invalid_encoding(f
))) {
1576 float_raise(float_flag_invalid
, s
);
1580 floatx80_unpack_raw(p
, f
);
1582 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1583 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1587 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1588 : parts_is_snan_frac(p
->frac_hi
, s
)
1589 ? float_class_snan
: float_class_qnan
);
1594 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1597 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1602 case float_class_normal
:
1603 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1604 parts_uncanon_normal(p
, s
, fmt
);
1612 frac_truncjam(&p64
, p
);
1613 parts_uncanon_normal(&p64
, s
, fmt
);
1617 if (exp
!= fmt
->exp_max
) {
1620 /* rounded to inf -- fall through to set frac correctly */
1622 case float_class_inf
:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac
= floatx80_infinity_low
;
1628 case float_class_zero
:
1633 case float_class_snan
:
1634 case float_class_qnan
:
1635 /* NaNs have the integer bit set. */
1636 frac
= p
->frac_hi
| (1ull << 63);
1641 g_assert_not_reached();
1644 return packFloatx80(p
->sign
, exp
, frac
);
1648 * Addition and subtraction
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1654 FloatParts64 pa
, pb
, *pr
;
1656 float16_unpack_canonical(&pa
, a
, status
);
1657 float16_unpack_canonical(&pb
, b
, status
);
1658 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1660 return float16_round_pack_canonical(pr
, status
);
1663 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1665 return float16_addsub(a
, b
, status
, false);
1668 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1670 return float16_addsub(a
, b
, status
, true);
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1676 FloatParts64 pa
, pb
, *pr
;
1678 float32_unpack_canonical(&pa
, a
, status
);
1679 float32_unpack_canonical(&pb
, b
, status
);
1680 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1682 return float32_round_pack_canonical(pr
, status
);
1685 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1687 return soft_f32_addsub(a
, b
, status
, false);
1690 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1692 return soft_f32_addsub(a
, b
, status
, true);
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1698 FloatParts64 pa
, pb
, *pr
;
1700 float64_unpack_canonical(&pa
, a
, status
);
1701 float64_unpack_canonical(&pb
, b
, status
);
1702 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1704 return float64_round_pack_canonical(pr
, status
);
1707 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1709 return soft_f64_addsub(a
, b
, status
, false);
1712 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1714 return soft_f64_addsub(a
, b
, status
, true);
1717 static float hard_f32_add(float a
, float b
)
1722 static float hard_f32_sub(float a
, float b
)
1727 static double hard_f64_add(double a
, double b
)
1732 static double hard_f64_sub(double a
, double b
)
1737 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1739 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1740 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1742 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1745 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1747 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1748 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1750 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1754 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1755 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1757 return float32_gen2(a
, b
, s
, hard
, soft
,
1758 f32_is_zon2
, f32_addsubmul_post
);
1761 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1762 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1764 return float64_gen2(a
, b
, s
, hard
, soft
,
1765 f64_is_zon2
, f64_addsubmul_post
);
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a
, float32 b
, float_status
*s
)
1771 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a
, float32 b
, float_status
*s
)
1777 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a
, float64 b
, float_status
*s
)
1783 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a
, float64 b
, float_status
*s
)
1789 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
1795 FloatParts64 pa
, pb
, *pr
;
1797 bfloat16_unpack_canonical(&pa
, a
, status
);
1798 bfloat16_unpack_canonical(&pb
, b
, status
);
1799 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1801 return bfloat16_round_pack_canonical(pr
, status
);
1804 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1806 return bfloat16_addsub(a
, b
, status
, false);
1809 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1811 return bfloat16_addsub(a
, b
, status
, true);
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
1817 FloatParts128 pa
, pb
, *pr
;
1819 float128_unpack_canonical(&pa
, a
, status
);
1820 float128_unpack_canonical(&pb
, b
, status
);
1821 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1823 return float128_round_pack_canonical(pr
, status
);
1826 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
1828 return float128_addsub(a
, b
, status
, false);
1831 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
1833 return float128_addsub(a
, b
, status
, true);
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
1839 FloatParts128 pa
, pb
, *pr
;
1841 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1842 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1843 return floatx80_default_nan(status
);
1846 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1847 return floatx80_round_pack_canonical(pr
, status
);
1850 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
1852 return floatx80_addsub(a
, b
, status
, false);
1855 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
1857 return floatx80_addsub(a
, b
, status
, true);
1864 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
1866 FloatParts64 pa
, pb
, *pr
;
1868 float16_unpack_canonical(&pa
, a
, status
);
1869 float16_unpack_canonical(&pb
, b
, status
);
1870 pr
= parts_mul(&pa
, &pb
, status
);
1872 return float16_round_pack_canonical(pr
, status
);
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
1878 FloatParts64 pa
, pb
, *pr
;
1880 float32_unpack_canonical(&pa
, a
, status
);
1881 float32_unpack_canonical(&pb
, b
, status
);
1882 pr
= parts_mul(&pa
, &pb
, status
);
1884 return float32_round_pack_canonical(pr
, status
);
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
1890 FloatParts64 pa
, pb
, *pr
;
1892 float64_unpack_canonical(&pa
, a
, status
);
1893 float64_unpack_canonical(&pb
, b
, status
);
1894 pr
= parts_mul(&pa
, &pb
, status
);
1896 return float64_round_pack_canonical(pr
, status
);
1899 static float hard_f32_mul(float a
, float b
)
1904 static double hard_f64_mul(double a
, double b
)
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a
, float32 b
, float_status
*s
)
1912 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
1913 f32_is_zon2
, f32_addsubmul_post
);
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a
, float64 b
, float_status
*s
)
1919 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
1920 f64_is_zon2
, f64_addsubmul_post
);
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
1926 FloatParts64 pa
, pb
, *pr
;
1928 bfloat16_unpack_canonical(&pa
, a
, status
);
1929 bfloat16_unpack_canonical(&pb
, b
, status
);
1930 pr
= parts_mul(&pa
, &pb
, status
);
1932 return bfloat16_round_pack_canonical(pr
, status
);
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a
, float128 b
, float_status
*status
)
1938 FloatParts128 pa
, pb
, *pr
;
1940 float128_unpack_canonical(&pa
, a
, status
);
1941 float128_unpack_canonical(&pb
, b
, status
);
1942 pr
= parts_mul(&pa
, &pb
, status
);
1944 return float128_round_pack_canonical(pr
, status
);
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
1950 FloatParts128 pa
, pb
, *pr
;
1952 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1953 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1954 return floatx80_default_nan(status
);
1957 pr
= parts_mul(&pa
, &pb
, status
);
1958 return floatx80_round_pack_canonical(pr
, status
);
1962 * Fused multiply-add
1965 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
1966 int flags
, float_status
*status
)
1968 FloatParts64 pa
, pb
, pc
, *pr
;
1970 float16_unpack_canonical(&pa
, a
, status
);
1971 float16_unpack_canonical(&pb
, b
, status
);
1972 float16_unpack_canonical(&pc
, c
, status
);
1973 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1975 return float16_round_pack_canonical(pr
, status
);
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
1980 float_status
*status
)
1982 FloatParts64 pa
, pb
, pc
, *pr
;
1984 float32_unpack_canonical(&pa
, a
, status
);
1985 float32_unpack_canonical(&pb
, b
, status
);
1986 float32_unpack_canonical(&pc
, c
, status
);
1987 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1989 return float32_round_pack_canonical(pr
, status
);
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
1994 float_status
*status
)
1996 FloatParts64 pa
, pb
, pc
, *pr
;
1998 float64_unpack_canonical(&pa
, a
, status
);
1999 float64_unpack_canonical(&pb
, b
, status
);
2000 float64_unpack_canonical(&pc
, c
, status
);
2001 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2003 return float64_round_pack_canonical(pr
, status
);
2006 static bool force_soft_fma
;
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2011 union_float32 ua
, ub
, uc
, ur
;
2017 if (unlikely(!can_use_fpu(s
))) {
2020 if (unlikely(flags
& float_muladd_halve_result
)) {
2024 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2025 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2029 if (unlikely(force_soft_fma
)) {
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2037 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2041 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2042 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2043 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2045 if (flags
& float_muladd_negate_c
) {
2050 union_float32 ua_orig
= ua
;
2051 union_float32 uc_orig
= uc
;
2053 if (flags
& float_muladd_negate_product
) {
2056 if (flags
& float_muladd_negate_c
) {
2060 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2062 if (unlikely(f32_is_inf(ur
))) {
2063 float_raise(float_flag_overflow
, s
);
2064 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2070 if (flags
& float_muladd_negate_result
) {
2071 return float32_chs(ur
.s
);
2076 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2082 union_float64 ua
, ub
, uc
, ur
;
2088 if (unlikely(!can_use_fpu(s
))) {
2091 if (unlikely(flags
& float_muladd_halve_result
)) {
2095 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2096 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2100 if (unlikely(force_soft_fma
)) {
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2108 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2112 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2113 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2114 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2116 if (flags
& float_muladd_negate_c
) {
2121 union_float64 ua_orig
= ua
;
2122 union_float64 uc_orig
= uc
;
2124 if (flags
& float_muladd_negate_product
) {
2127 if (flags
& float_muladd_negate_c
) {
2131 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2133 if (unlikely(f64_is_inf(ur
))) {
2134 float_raise(float_flag_overflow
, s
);
2135 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2141 if (flags
& float_muladd_negate_result
) {
2142 return float64_chs(ur
.s
);
2147 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2150 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2151 int flags
, float_status
*status
)
2153 FloatParts64 pa
, pb
, pc
, *pr
;
2155 bfloat16_unpack_canonical(&pa
, a
, status
);
2156 bfloat16_unpack_canonical(&pb
, b
, status
);
2157 bfloat16_unpack_canonical(&pc
, c
, status
);
2158 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2160 return bfloat16_round_pack_canonical(pr
, status
);
2163 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2164 int flags
, float_status
*status
)
2166 FloatParts128 pa
, pb
, pc
, *pr
;
2168 float128_unpack_canonical(&pa
, a
, status
);
2169 float128_unpack_canonical(&pb
, b
, status
);
2170 float128_unpack_canonical(&pc
, c
, status
);
2171 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2173 return float128_round_pack_canonical(pr
, status
);
2180 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2182 FloatParts64 pa
, pb
, *pr
;
2184 float16_unpack_canonical(&pa
, a
, status
);
2185 float16_unpack_canonical(&pb
, b
, status
);
2186 pr
= parts_div(&pa
, &pb
, status
);
2188 return float16_round_pack_canonical(pr
, status
);
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2194 FloatParts64 pa
, pb
, *pr
;
2196 float32_unpack_canonical(&pa
, a
, status
);
2197 float32_unpack_canonical(&pb
, b
, status
);
2198 pr
= parts_div(&pa
, &pb
, status
);
2200 return float32_round_pack_canonical(pr
, status
);
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2206 FloatParts64 pa
, pb
, *pr
;
2208 float64_unpack_canonical(&pa
, a
, status
);
2209 float64_unpack_canonical(&pb
, b
, status
);
2210 pr
= parts_div(&pa
, &pb
, status
);
2212 return float64_round_pack_canonical(pr
, status
);
2215 static float hard_f32_div(float a
, float b
)
2220 static double hard_f64_div(double a
, double b
)
2225 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2227 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2228 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2229 fpclassify(b
.h
) == FP_NORMAL
;
2231 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2234 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2236 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2237 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2238 fpclassify(b
.h
) == FP_NORMAL
;
2240 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2243 static bool f32_div_post(union_float32 a
, union_float32 b
)
2245 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2246 return fpclassify(a
.h
) != FP_ZERO
;
2248 return !float32_is_zero(a
.s
);
2251 static bool f64_div_post(union_float64 a
, union_float64 b
)
2253 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2254 return fpclassify(a
.h
) != FP_ZERO
;
2256 return !float64_is_zero(a
.s
);
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a
, float32 b
, float_status
*s
)
2262 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2263 f32_div_pre
, f32_div_post
);
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a
, float64 b
, float_status
*s
)
2269 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2270 f64_div_pre
, f64_div_post
);
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2276 FloatParts64 pa
, pb
, *pr
;
2278 bfloat16_unpack_canonical(&pa
, a
, status
);
2279 bfloat16_unpack_canonical(&pb
, b
, status
);
2280 pr
= parts_div(&pa
, &pb
, status
);
2282 return bfloat16_round_pack_canonical(pr
, status
);
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a
, float128 b
, float_status
*status
)
2288 FloatParts128 pa
, pb
, *pr
;
2290 float128_unpack_canonical(&pa
, a
, status
);
2291 float128_unpack_canonical(&pb
, b
, status
);
2292 pr
= parts_div(&pa
, &pb
, status
);
2294 return float128_round_pack_canonical(pr
, status
);
2298 * Float to Float conversions
2300 * Returns the result of converting one float format to another. The
2301 * conversion is performed according to the IEC/IEEE Standard for
2302 * Binary Floating-Point Arithmetic.
2304 * Usually this only needs to take care of raising invalid exceptions
2305 * and handling the conversion on NaNs.
2308 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2311 case float_class_qnan
:
2312 case float_class_snan
:
2314 * There is no NaN in the destination format. Raise Invalid
2315 * and return a zero with the sign of the input NaN.
2317 float_raise(float_flag_invalid
, s
);
2318 a
->cls
= float_class_zero
;
2321 case float_class_inf
:
2323 * There is no Inf in the destination format. Raise Invalid
2324 * and return the maximum normal with the correct sign.
2326 float_raise(float_flag_invalid
, s
);
2327 a
->cls
= float_class_normal
;
2328 a
->exp
= float16_params_ahp
.exp_max
;
2329 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2330 float16_params_ahp
.frac_size
+ 1);
2333 case float_class_normal
:
2334 case float_class_zero
:
2338 g_assert_not_reached();
2342 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2344 if (is_nan(a
->cls
)) {
2345 parts_return_nan(a
, s
);
2349 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2351 if (is_nan(a
->cls
)) {
2352 parts_return_nan(a
, s
);
2356 #define parts_float_to_float(P, S) \
2357 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2359 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2366 if (a
->cls
== float_class_normal
) {
2367 frac_truncjam(a
, b
);
2368 } else if (is_nan(a
->cls
)) {
2369 /* Discard the low bits of the NaN. */
2370 a
->frac
= b
->frac_hi
;
2371 parts_return_nan(a
, s
);
2375 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2383 if (is_nan(a
->cls
)) {
2384 parts_return_nan(a
, s
);
2388 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2390 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2393 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2394 parts_float_to_float(&p
, s
);
2395 return float32_round_pack_canonical(&p
, s
);
2398 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2400 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2403 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2404 parts_float_to_float(&p
, s
);
2405 return float64_round_pack_canonical(&p
, s
);
2408 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2411 const FloatFmt
*fmt
;
2413 float32_unpack_canonical(&p
, a
, s
);
2415 parts_float_to_float(&p
, s
);
2416 fmt
= &float16_params
;
2418 parts_float_to_ahp(&p
, s
);
2419 fmt
= &float16_params_ahp
;
2421 return float16a_round_pack_canonical(&p
, s
, fmt
);
2424 static float64 QEMU_SOFTFLOAT_ATTR
2425 soft_float32_to_float64(float32 a
, float_status
*s
)
2429 float32_unpack_canonical(&p
, a
, s
);
2430 parts_float_to_float(&p
, s
);
2431 return float64_round_pack_canonical(&p
, s
);
2434 float64
float32_to_float64(float32 a
, float_status
*s
)
2436 if (likely(float32_is_normal(a
))) {
2437 /* Widening conversion can never produce inexact results. */
2443 } else if (float32_is_zero(a
)) {
2444 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2446 return soft_float32_to_float64(a
, s
);
2450 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2453 const FloatFmt
*fmt
;
2455 float64_unpack_canonical(&p
, a
, s
);
2457 parts_float_to_float(&p
, s
);
2458 fmt
= &float16_params
;
2460 parts_float_to_ahp(&p
, s
);
2461 fmt
= &float16_params_ahp
;
2463 return float16a_round_pack_canonical(&p
, s
, fmt
);
2466 float32
float64_to_float32(float64 a
, float_status
*s
)
2470 float64_unpack_canonical(&p
, a
, s
);
2471 parts_float_to_float(&p
, s
);
2472 return float32_round_pack_canonical(&p
, s
);
2475 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2479 bfloat16_unpack_canonical(&p
, a
, s
);
2480 parts_float_to_float(&p
, s
);
2481 return float32_round_pack_canonical(&p
, s
);
2484 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2488 bfloat16_unpack_canonical(&p
, a
, s
);
2489 parts_float_to_float(&p
, s
);
2490 return float64_round_pack_canonical(&p
, s
);
2493 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2497 float32_unpack_canonical(&p
, a
, s
);
2498 parts_float_to_float(&p
, s
);
2499 return bfloat16_round_pack_canonical(&p
, s
);
2502 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2506 float64_unpack_canonical(&p
, a
, s
);
2507 parts_float_to_float(&p
, s
);
2508 return bfloat16_round_pack_canonical(&p
, s
);
2511 float32
float128_to_float32(float128 a
, float_status
*s
)
2516 float128_unpack_canonical(&p128
, a
, s
);
2517 parts_float_to_float_narrow(&p64
, &p128
, s
);
2518 return float32_round_pack_canonical(&p64
, s
);
2521 float64
float128_to_float64(float128 a
, float_status
*s
)
2526 float128_unpack_canonical(&p128
, a
, s
);
2527 parts_float_to_float_narrow(&p64
, &p128
, s
);
2528 return float64_round_pack_canonical(&p64
, s
);
2531 float128
float32_to_float128(float32 a
, float_status
*s
)
2536 float32_unpack_canonical(&p64
, a
, s
);
2537 parts_float_to_float_widen(&p128
, &p64
, s
);
2538 return float128_round_pack_canonical(&p128
, s
);
2541 float128
float64_to_float128(float64 a
, float_status
*s
)
2546 float64_unpack_canonical(&p64
, a
, s
);
2547 parts_float_to_float_widen(&p128
, &p64
, s
);
2548 return float128_round_pack_canonical(&p128
, s
);
2552 * Round to integral value
2555 float16
float16_round_to_int(float16 a
, float_status
*s
)
2559 float16_unpack_canonical(&p
, a
, s
);
2560 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2561 return float16_round_pack_canonical(&p
, s
);
2564 float32
float32_round_to_int(float32 a
, float_status
*s
)
2568 float32_unpack_canonical(&p
, a
, s
);
2569 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2570 return float32_round_pack_canonical(&p
, s
);
2573 float64
float64_round_to_int(float64 a
, float_status
*s
)
2577 float64_unpack_canonical(&p
, a
, s
);
2578 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2579 return float64_round_pack_canonical(&p
, s
);
2582 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2586 bfloat16_unpack_canonical(&p
, a
, s
);
2587 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2588 return bfloat16_round_pack_canonical(&p
, s
);
2591 float128
float128_round_to_int(float128 a
, float_status
*s
)
2595 float128_unpack_canonical(&p
, a
, s
);
2596 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
2597 return float128_round_pack_canonical(&p
, s
);
2601 * Floating-point to signed integer conversions
2604 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2609 float16_unpack_canonical(&p
, a
, s
);
2610 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2613 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2618 float16_unpack_canonical(&p
, a
, s
);
2619 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2622 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2627 float16_unpack_canonical(&p
, a
, s
);
2628 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2631 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2636 float16_unpack_canonical(&p
, a
, s
);
2637 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2640 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2645 float32_unpack_canonical(&p
, a
, s
);
2646 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2649 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2654 float32_unpack_canonical(&p
, a
, s
);
2655 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2658 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2663 float32_unpack_canonical(&p
, a
, s
);
2664 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2667 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2672 float64_unpack_canonical(&p
, a
, s
);
2673 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2676 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2681 float64_unpack_canonical(&p
, a
, s
);
2682 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2685 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2690 float64_unpack_canonical(&p
, a
, s
);
2691 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2694 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2699 bfloat16_unpack_canonical(&p
, a
, s
);
2700 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2703 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2708 bfloat16_unpack_canonical(&p
, a
, s
);
2709 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2712 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2717 bfloat16_unpack_canonical(&p
, a
, s
);
2718 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2721 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
2722 int scale
, float_status
*s
)
2726 float128_unpack_canonical(&p
, a
, s
);
2727 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2730 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
2731 int scale
, float_status
*s
)
2735 float128_unpack_canonical(&p
, a
, s
);
2736 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2739 int8_t float16_to_int8(float16 a
, float_status
*s
)
2741 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2744 int16_t float16_to_int16(float16 a
, float_status
*s
)
2746 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2749 int32_t float16_to_int32(float16 a
, float_status
*s
)
2751 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2754 int64_t float16_to_int64(float16 a
, float_status
*s
)
2756 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2759 int16_t float32_to_int16(float32 a
, float_status
*s
)
2761 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2764 int32_t float32_to_int32(float32 a
, float_status
*s
)
2766 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2769 int64_t float32_to_int64(float32 a
, float_status
*s
)
2771 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2774 int16_t float64_to_int16(float64 a
, float_status
*s
)
2776 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2779 int32_t float64_to_int32(float64 a
, float_status
*s
)
2781 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2784 int64_t float64_to_int64(float64 a
, float_status
*s
)
2786 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2789 int32_t float128_to_int32(float128 a
, float_status
*s
)
2791 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2794 int64_t float128_to_int64(float128 a
, float_status
*s
)
2796 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2799 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
2801 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2804 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
2806 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2809 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
2811 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2814 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
2816 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2819 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
2821 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2824 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
2826 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2829 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
2831 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2834 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
2836 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2839 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
2841 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2844 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
2846 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2849 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
2851 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2854 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
2856 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2859 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
2861 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2864 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
2866 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2869 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
2871 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2874 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
2876 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2879 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
2881 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2885 * Floating-point to unsigned integer conversions
2888 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2893 float16_unpack_canonical(&p
, a
, s
);
2894 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
2897 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2902 float16_unpack_canonical(&p
, a
, s
);
2903 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2906 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2911 float16_unpack_canonical(&p
, a
, s
);
2912 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2915 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2920 float16_unpack_canonical(&p
, a
, s
);
2921 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2924 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2929 float32_unpack_canonical(&p
, a
, s
);
2930 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2933 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2938 float32_unpack_canonical(&p
, a
, s
);
2939 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2942 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2947 float32_unpack_canonical(&p
, a
, s
);
2948 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2951 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2956 float64_unpack_canonical(&p
, a
, s
);
2957 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2960 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2965 float64_unpack_canonical(&p
, a
, s
);
2966 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2969 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2974 float64_unpack_canonical(&p
, a
, s
);
2975 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2978 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2979 int scale
, float_status
*s
)
2983 bfloat16_unpack_canonical(&p
, a
, s
);
2984 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2987 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2988 int scale
, float_status
*s
)
2992 bfloat16_unpack_canonical(&p
, a
, s
);
2993 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2996 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2997 int scale
, float_status
*s
)
3001 bfloat16_unpack_canonical(&p
, a
, s
);
3002 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3005 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3006 int scale
, float_status
*s
)
3010 float128_unpack_canonical(&p
, a
, s
);
3011 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3014 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3015 int scale
, float_status
*s
)
3019 float128_unpack_canonical(&p
, a
, s
);
3020 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3023 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3025 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3028 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3030 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3033 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3035 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3038 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3040 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3043 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3045 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3048 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3050 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3053 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3055 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3058 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3060 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3063 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3065 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3068 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3070 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3073 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3075 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3078 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3080 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3083 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3085 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3088 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3090 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3093 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3095 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3098 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3100 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3103 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3105 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3108 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3110 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3113 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3115 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3118 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3120 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3123 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3125 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3128 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3130 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3133 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3135 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3138 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3140 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3143 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3145 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3148 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3150 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3153 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3155 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3158 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3160 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3163 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3165 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3169 * Signed integer to floating-point conversions
3172 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3176 parts_sint_to_float(&p
, a
, scale
, status
);
3177 return float16_round_pack_canonical(&p
, status
);
3180 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3182 return int64_to_float16_scalbn(a
, scale
, status
);
3185 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3187 return int64_to_float16_scalbn(a
, scale
, status
);
3190 float16
int64_to_float16(int64_t a
, float_status
*status
)
3192 return int64_to_float16_scalbn(a
, 0, status
);
3195 float16
int32_to_float16(int32_t a
, float_status
*status
)
3197 return int64_to_float16_scalbn(a
, 0, status
);
3200 float16
int16_to_float16(int16_t a
, float_status
*status
)
3202 return int64_to_float16_scalbn(a
, 0, status
);
3205 float16
int8_to_float16(int8_t a
, float_status
*status
)
3207 return int64_to_float16_scalbn(a
, 0, status
);
3210 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3214 parts64_sint_to_float(&p
, a
, scale
, status
);
3215 return float32_round_pack_canonical(&p
, status
);
3218 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3220 return int64_to_float32_scalbn(a
, scale
, status
);
3223 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3225 return int64_to_float32_scalbn(a
, scale
, status
);
3228 float32
int64_to_float32(int64_t a
, float_status
*status
)
3230 return int64_to_float32_scalbn(a
, 0, status
);
3233 float32
int32_to_float32(int32_t a
, float_status
*status
)
3235 return int64_to_float32_scalbn(a
, 0, status
);
3238 float32
int16_to_float32(int16_t a
, float_status
*status
)
3240 return int64_to_float32_scalbn(a
, 0, status
);
3243 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3247 parts_sint_to_float(&p
, a
, scale
, status
);
3248 return float64_round_pack_canonical(&p
, status
);
3251 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3253 return int64_to_float64_scalbn(a
, scale
, status
);
3256 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3258 return int64_to_float64_scalbn(a
, scale
, status
);
3261 float64
int64_to_float64(int64_t a
, float_status
*status
)
3263 return int64_to_float64_scalbn(a
, 0, status
);
3266 float64
int32_to_float64(int32_t a
, float_status
*status
)
3268 return int64_to_float64_scalbn(a
, 0, status
);
3271 float64
int16_to_float64(int16_t a
, float_status
*status
)
3273 return int64_to_float64_scalbn(a
, 0, status
);
3276 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3280 parts_sint_to_float(&p
, a
, scale
, status
);
3281 return bfloat16_round_pack_canonical(&p
, status
);
3284 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3286 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3289 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3291 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3294 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3296 return int64_to_bfloat16_scalbn(a
, 0, status
);
3299 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3301 return int64_to_bfloat16_scalbn(a
, 0, status
);
3304 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3306 return int64_to_bfloat16_scalbn(a
, 0, status
);
3309 float128
int64_to_float128(int64_t a
, float_status
*status
)
3313 parts_sint_to_float(&p
, a
, 0, status
);
3314 return float128_round_pack_canonical(&p
, status
);
3317 float128
int32_to_float128(int32_t a
, float_status
*status
)
3319 return int64_to_float128(a
, status
);
3323 * Unsigned Integer to floating-point conversions
3326 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3330 parts_uint_to_float(&p
, a
, scale
, status
);
3331 return float16_round_pack_canonical(&p
, status
);
3334 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3336 return uint64_to_float16_scalbn(a
, scale
, status
);
3339 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3341 return uint64_to_float16_scalbn(a
, scale
, status
);
3344 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3346 return uint64_to_float16_scalbn(a
, 0, status
);
3349 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3351 return uint64_to_float16_scalbn(a
, 0, status
);
3354 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3356 return uint64_to_float16_scalbn(a
, 0, status
);
3359 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3361 return uint64_to_float16_scalbn(a
, 0, status
);
3364 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3368 parts_uint_to_float(&p
, a
, scale
, status
);
3369 return float32_round_pack_canonical(&p
, status
);
3372 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3374 return uint64_to_float32_scalbn(a
, scale
, status
);
3377 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3379 return uint64_to_float32_scalbn(a
, scale
, status
);
3382 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3384 return uint64_to_float32_scalbn(a
, 0, status
);
3387 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3389 return uint64_to_float32_scalbn(a
, 0, status
);
3392 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3394 return uint64_to_float32_scalbn(a
, 0, status
);
3397 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3401 parts_uint_to_float(&p
, a
, scale
, status
);
3402 return float64_round_pack_canonical(&p
, status
);
3405 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3407 return uint64_to_float64_scalbn(a
, scale
, status
);
3410 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3412 return uint64_to_float64_scalbn(a
, scale
, status
);
3415 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3417 return uint64_to_float64_scalbn(a
, 0, status
);
3420 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3422 return uint64_to_float64_scalbn(a
, 0, status
);
3425 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3427 return uint64_to_float64_scalbn(a
, 0, status
);
3430 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3434 parts_uint_to_float(&p
, a
, scale
, status
);
3435 return bfloat16_round_pack_canonical(&p
, status
);
3438 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3440 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3443 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3445 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3448 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3450 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3453 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3455 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3458 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3460 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3463 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3467 parts_uint_to_float(&p
, a
, 0, status
);
3468 return float128_round_pack_canonical(&p
, status
);
3472 * Minimum and maximum
3475 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3477 FloatParts64 pa
, pb
, *pr
;
3479 float16_unpack_canonical(&pa
, a
, s
);
3480 float16_unpack_canonical(&pb
, b
, s
);
3481 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3483 return float16_round_pack_canonical(pr
, s
);
3486 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3487 float_status
*s
, int flags
)
3489 FloatParts64 pa
, pb
, *pr
;
3491 bfloat16_unpack_canonical(&pa
, a
, s
);
3492 bfloat16_unpack_canonical(&pb
, b
, s
);
3493 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3495 return bfloat16_round_pack_canonical(pr
, s
);
3498 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
3500 FloatParts64 pa
, pb
, *pr
;
3502 float32_unpack_canonical(&pa
, a
, s
);
3503 float32_unpack_canonical(&pb
, b
, s
);
3504 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3506 return float32_round_pack_canonical(pr
, s
);
3509 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
3511 FloatParts64 pa
, pb
, *pr
;
3513 float64_unpack_canonical(&pa
, a
, s
);
3514 float64_unpack_canonical(&pb
, b
, s
);
3515 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3517 return float64_round_pack_canonical(pr
, s
);
3520 static float128
float128_minmax(float128 a
, float128 b
,
3521 float_status
*s
, int flags
)
3523 FloatParts128 pa
, pb
, *pr
;
3525 float128_unpack_canonical(&pa
, a
, s
);
3526 float128_unpack_canonical(&pb
, b
, s
);
3527 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3529 return float128_round_pack_canonical(pr
, s
);
3532 #define MINMAX_1(type, name, flags) \
3533 type type##_##name(type a, type b, float_status *s) \
3534 { return type##_minmax(a, b, s, flags); }
3536 #define MINMAX_2(type) \
3537 MINMAX_1(type, max, 0) \
3538 MINMAX_1(type, maxnum, minmax_isnum) \
3539 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3540 MINMAX_1(type, min, minmax_ismin) \
3541 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3542 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3554 * Floating point compare
3557 static FloatRelation QEMU_FLATTEN
3558 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
3560 FloatParts64 pa
, pb
;
3562 float16_unpack_canonical(&pa
, a
, s
);
3563 float16_unpack_canonical(&pb
, b
, s
);
3564 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3567 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3569 return float16_do_compare(a
, b
, s
, false);
3572 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3574 return float16_do_compare(a
, b
, s
, true);
3577 static FloatRelation QEMU_SOFTFLOAT_ATTR
3578 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
3580 FloatParts64 pa
, pb
;
3582 float32_unpack_canonical(&pa
, a
, s
);
3583 float32_unpack_canonical(&pb
, b
, s
);
3584 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3587 static FloatRelation QEMU_FLATTEN
3588 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
3590 union_float32 ua
, ub
;
3595 if (QEMU_NO_HARDFLOAT
) {
3599 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3600 if (isgreaterequal(ua
.h
, ub
.h
)) {
3601 if (isgreater(ua
.h
, ub
.h
)) {
3602 return float_relation_greater
;
3604 return float_relation_equal
;
3606 if (likely(isless(ua
.h
, ub
.h
))) {
3607 return float_relation_less
;
3610 * The only condition remaining is unordered.
3611 * Fall through to set flags.
3614 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3617 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
3619 return float32_hs_compare(a
, b
, s
, false);
3622 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
3624 return float32_hs_compare(a
, b
, s
, true);
3627 static FloatRelation QEMU_SOFTFLOAT_ATTR
3628 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
3630 FloatParts64 pa
, pb
;
3632 float64_unpack_canonical(&pa
, a
, s
);
3633 float64_unpack_canonical(&pb
, b
, s
);
3634 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3637 static FloatRelation QEMU_FLATTEN
3638 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
3640 union_float64 ua
, ub
;
3645 if (QEMU_NO_HARDFLOAT
) {
3649 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
3650 if (isgreaterequal(ua
.h
, ub
.h
)) {
3651 if (isgreater(ua
.h
, ub
.h
)) {
3652 return float_relation_greater
;
3654 return float_relation_equal
;
3656 if (likely(isless(ua
.h
, ub
.h
))) {
3657 return float_relation_less
;
3660 * The only condition remaining is unordered.
3661 * Fall through to set flags.
3664 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3667 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
3669 return float64_hs_compare(a
, b
, s
, false);
3672 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
3674 return float64_hs_compare(a
, b
, s
, true);
3677 static FloatRelation QEMU_FLATTEN
3678 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
3680 FloatParts64 pa
, pb
;
3682 bfloat16_unpack_canonical(&pa
, a
, s
);
3683 bfloat16_unpack_canonical(&pb
, b
, s
);
3684 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3687 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
3689 return bfloat16_do_compare(a
, b
, s
, false);
3692 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
3694 return bfloat16_do_compare(a
, b
, s
, true);
3697 static FloatRelation QEMU_FLATTEN
3698 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
3700 FloatParts128 pa
, pb
;
3702 float128_unpack_canonical(&pa
, a
, s
);
3703 float128_unpack_canonical(&pb
, b
, s
);
3704 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3707 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
3709 return float128_do_compare(a
, b
, s
, false);
3712 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
3714 return float128_do_compare(a
, b
, s
, true);
3721 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
3725 float16_unpack_canonical(&p
, a
, status
);
3726 parts_scalbn(&p
, n
, status
);
3727 return float16_round_pack_canonical(&p
, status
);
3730 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
3734 float32_unpack_canonical(&p
, a
, status
);
3735 parts_scalbn(&p
, n
, status
);
3736 return float32_round_pack_canonical(&p
, status
);
3739 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
3743 float64_unpack_canonical(&p
, a
, status
);
3744 parts_scalbn(&p
, n
, status
);
3745 return float64_round_pack_canonical(&p
, status
);
3748 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
3752 bfloat16_unpack_canonical(&p
, a
, status
);
3753 parts_scalbn(&p
, n
, status
);
3754 return bfloat16_round_pack_canonical(&p
, status
);
3757 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
3761 float128_unpack_canonical(&p
, a
, status
);
3762 parts_scalbn(&p
, n
, status
);
3763 return float128_round_pack_canonical(&p
, status
);
3770 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
3774 float16_unpack_canonical(&p
, a
, status
);
3775 parts_sqrt(&p
, status
, &float16_params
);
3776 return float16_round_pack_canonical(&p
, status
);
3779 static float32 QEMU_SOFTFLOAT_ATTR
3780 soft_f32_sqrt(float32 a
, float_status
*status
)
3784 float32_unpack_canonical(&p
, a
, status
);
3785 parts_sqrt(&p
, status
, &float32_params
);
3786 return float32_round_pack_canonical(&p
, status
);
3789 static float64 QEMU_SOFTFLOAT_ATTR
3790 soft_f64_sqrt(float64 a
, float_status
*status
)
3794 float64_unpack_canonical(&p
, a
, status
);
3795 parts_sqrt(&p
, status
, &float64_params
);
3796 return float64_round_pack_canonical(&p
, status
);
3799 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
3801 union_float32 ua
, ur
;
3804 if (unlikely(!can_use_fpu(s
))) {
3808 float32_input_flush1(&ua
.s
, s
);
3809 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
3810 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3811 fpclassify(ua
.h
) == FP_ZERO
) ||
3815 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
3816 float32_is_neg(ua
.s
))) {
3823 return soft_f32_sqrt(ua
.s
, s
);
3826 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
3828 union_float64 ua
, ur
;
3831 if (unlikely(!can_use_fpu(s
))) {
3835 float64_input_flush1(&ua
.s
, s
);
3836 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
3837 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3838 fpclassify(ua
.h
) == FP_ZERO
) ||
3842 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
3843 float64_is_neg(ua
.s
))) {
3850 return soft_f64_sqrt(ua
.s
, s
);
3853 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
3857 bfloat16_unpack_canonical(&p
, a
, status
);
3858 parts_sqrt(&p
, status
, &bfloat16_params
);
3859 return bfloat16_round_pack_canonical(&p
, status
);
3862 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
3866 float128_unpack_canonical(&p
, a
, status
);
3867 parts_sqrt(&p
, status
, &float128_params
);
3868 return float128_round_pack_canonical(&p
, status
);
3871 /*----------------------------------------------------------------------------
3872 | The pattern for a default generated NaN.
3873 *----------------------------------------------------------------------------*/
3875 float16
float16_default_nan(float_status
*status
)
3879 parts_default_nan(&p
, status
);
3880 p
.frac
>>= float16_params
.frac_shift
;
3881 return float16_pack_raw(&p
);
3884 float32
float32_default_nan(float_status
*status
)
3888 parts_default_nan(&p
, status
);
3889 p
.frac
>>= float32_params
.frac_shift
;
3890 return float32_pack_raw(&p
);
3893 float64
float64_default_nan(float_status
*status
)
3897 parts_default_nan(&p
, status
);
3898 p
.frac
>>= float64_params
.frac_shift
;
3899 return float64_pack_raw(&p
);
3902 float128
float128_default_nan(float_status
*status
)
3906 parts_default_nan(&p
, status
);
3907 frac_shr(&p
, float128_params
.frac_shift
);
3908 return float128_pack_raw(&p
);
3911 bfloat16
bfloat16_default_nan(float_status
*status
)
3915 parts_default_nan(&p
, status
);
3916 p
.frac
>>= bfloat16_params
.frac_shift
;
3917 return bfloat16_pack_raw(&p
);
3920 /*----------------------------------------------------------------------------
3921 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3922 *----------------------------------------------------------------------------*/
3924 float16
float16_silence_nan(float16 a
, float_status
*status
)
3928 float16_unpack_raw(&p
, a
);
3929 p
.frac
<<= float16_params
.frac_shift
;
3930 parts_silence_nan(&p
, status
);
3931 p
.frac
>>= float16_params
.frac_shift
;
3932 return float16_pack_raw(&p
);
3935 float32
float32_silence_nan(float32 a
, float_status
*status
)
3939 float32_unpack_raw(&p
, a
);
3940 p
.frac
<<= float32_params
.frac_shift
;
3941 parts_silence_nan(&p
, status
);
3942 p
.frac
>>= float32_params
.frac_shift
;
3943 return float32_pack_raw(&p
);
3946 float64
float64_silence_nan(float64 a
, float_status
*status
)
3950 float64_unpack_raw(&p
, a
);
3951 p
.frac
<<= float64_params
.frac_shift
;
3952 parts_silence_nan(&p
, status
);
3953 p
.frac
>>= float64_params
.frac_shift
;
3954 return float64_pack_raw(&p
);
3957 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
3961 bfloat16_unpack_raw(&p
, a
);
3962 p
.frac
<<= bfloat16_params
.frac_shift
;
3963 parts_silence_nan(&p
, status
);
3964 p
.frac
>>= bfloat16_params
.frac_shift
;
3965 return bfloat16_pack_raw(&p
);
3968 float128
float128_silence_nan(float128 a
, float_status
*status
)
3972 float128_unpack_raw(&p
, a
);
3973 frac_shl(&p
, float128_params
.frac_shift
);
3974 parts_silence_nan(&p
, status
);
3975 frac_shr(&p
, float128_params
.frac_shift
);
3976 return float128_pack_raw(&p
);
3979 /*----------------------------------------------------------------------------
3980 | If `a' is denormal and we are in flush-to-zero mode then set the
3981 | input-denormal exception and return zero. Otherwise just return the value.
3982 *----------------------------------------------------------------------------*/
3984 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
3986 if (p
.exp
== 0 && p
.frac
!= 0) {
3987 float_raise(float_flag_input_denormal
, status
);
3994 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
3996 if (status
->flush_inputs_to_zero
) {
3999 float16_unpack_raw(&p
, a
);
4000 if (parts_squash_denormal(p
, status
)) {
4001 return float16_set_sign(float16_zero
, p
.sign
);
4007 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4009 if (status
->flush_inputs_to_zero
) {
4012 float32_unpack_raw(&p
, a
);
4013 if (parts_squash_denormal(p
, status
)) {
4014 return float32_set_sign(float32_zero
, p
.sign
);
4020 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4022 if (status
->flush_inputs_to_zero
) {
4025 float64_unpack_raw(&p
, a
);
4026 if (parts_squash_denormal(p
, status
)) {
4027 return float64_set_sign(float64_zero
, p
.sign
);
4033 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4035 if (status
->flush_inputs_to_zero
) {
4038 bfloat16_unpack_raw(&p
, a
);
4039 if (parts_squash_denormal(p
, status
)) {
4040 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4046 /*----------------------------------------------------------------------------
4047 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4048 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4049 | input. If `zSign' is 1, the input is negated before being converted to an
4050 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4051 | is simply rounded to an integer, with the inexact exception raised if the
4052 | input cannot be represented exactly as an integer. However, if the fixed-
4053 | point input is too large, the invalid exception is raised and the largest
4054 | positive or negative integer is returned.
4055 *----------------------------------------------------------------------------*/
4057 static int32_t roundAndPackInt32(bool zSign
, uint64_t absZ
,
4058 float_status
*status
)
4060 int8_t roundingMode
;
4061 bool roundNearestEven
;
4062 int8_t roundIncrement
, roundBits
;
4065 roundingMode
= status
->float_rounding_mode
;
4066 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4067 switch (roundingMode
) {
4068 case float_round_nearest_even
:
4069 case float_round_ties_away
:
4070 roundIncrement
= 0x40;
4072 case float_round_to_zero
:
4075 case float_round_up
:
4076 roundIncrement
= zSign
? 0 : 0x7f;
4078 case float_round_down
:
4079 roundIncrement
= zSign
? 0x7f : 0;
4081 case float_round_to_odd
:
4082 roundIncrement
= absZ
& 0x80 ? 0 : 0x7f;
4087 roundBits
= absZ
& 0x7F;
4088 absZ
= ( absZ
+ roundIncrement
)>>7;
4089 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4093 if ( zSign
) z
= - z
;
4094 if ( ( absZ
>>32 ) || ( z
&& ( ( z
< 0 ) ^ zSign
) ) ) {
4095 float_raise(float_flag_invalid
, status
);
4096 return zSign
? INT32_MIN
: INT32_MAX
;
4099 float_raise(float_flag_inexact
, status
);
4105 /*----------------------------------------------------------------------------
4106 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4107 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4108 | and returns the properly rounded 64-bit integer corresponding to the input.
4109 | If `zSign' is 1, the input is negated before being converted to an integer.
4110 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4111 | the inexact exception raised if the input cannot be represented exactly as
4112 | an integer. However, if the fixed-point input is too large, the invalid
4113 | exception is raised and the largest positive or negative integer is
4115 *----------------------------------------------------------------------------*/
4117 static int64_t roundAndPackInt64(bool zSign
, uint64_t absZ0
, uint64_t absZ1
,
4118 float_status
*status
)
4120 int8_t roundingMode
;
4121 bool roundNearestEven
, increment
;
4124 roundingMode
= status
->float_rounding_mode
;
4125 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4126 switch (roundingMode
) {
4127 case float_round_nearest_even
:
4128 case float_round_ties_away
:
4129 increment
= ((int64_t) absZ1
< 0);
4131 case float_round_to_zero
:
4134 case float_round_up
:
4135 increment
= !zSign
&& absZ1
;
4137 case float_round_down
:
4138 increment
= zSign
&& absZ1
;
4140 case float_round_to_odd
:
4141 increment
= !(absZ0
& 1) && absZ1
;
4148 if ( absZ0
== 0 ) goto overflow
;
4149 if (!(absZ1
<< 1) && roundNearestEven
) {
4154 if ( zSign
) z
= - z
;
4155 if ( z
&& ( ( z
< 0 ) ^ zSign
) ) {
4157 float_raise(float_flag_invalid
, status
);
4158 return zSign
? INT64_MIN
: INT64_MAX
;
4161 float_raise(float_flag_inexact
, status
);
4167 /*----------------------------------------------------------------------------
4168 | Normalizes the subnormal single-precision floating-point value represented
4169 | by the denormalized significand `aSig'. The normalized exponent and
4170 | significand are stored at the locations pointed to by `zExpPtr' and
4171 | `zSigPtr', respectively.
4172 *----------------------------------------------------------------------------*/
4175 normalizeFloat32Subnormal(uint32_t aSig
, int *zExpPtr
, uint32_t *zSigPtr
)
4179 shiftCount
= clz32(aSig
) - 8;
4180 *zSigPtr
= aSig
<<shiftCount
;
4181 *zExpPtr
= 1 - shiftCount
;
4185 /*----------------------------------------------------------------------------
4186 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4187 | and significand `zSig', and returns the proper single-precision floating-
4188 | point value corresponding to the abstract input. Ordinarily, the abstract
4189 | value is simply rounded and packed into the single-precision format, with
4190 | the inexact exception raised if the abstract input cannot be represented
4191 | exactly. However, if the abstract value is too large, the overflow and
4192 | inexact exceptions are raised and an infinity or maximal finite value is
4193 | returned. If the abstract value is too small, the input value is rounded to
4194 | a subnormal number, and the underflow and inexact exceptions are raised if
4195 | the abstract input cannot be represented exactly as a subnormal single-
4196 | precision floating-point number.
4197 | The input significand `zSig' has its binary point between bits 30
4198 | and 29, which is 7 bits to the left of the usual location. This shifted
4199 | significand must be normalized or smaller. If `zSig' is not normalized,
4200 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4201 | and it must not require rounding. In the usual case that `zSig' is
4202 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4203 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4204 | Binary Floating-Point Arithmetic.
4205 *----------------------------------------------------------------------------*/
4207 static float32
roundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4208 float_status
*status
)
4210 int8_t roundingMode
;
4211 bool roundNearestEven
;
4212 int8_t roundIncrement
, roundBits
;
4215 roundingMode
= status
->float_rounding_mode
;
4216 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4217 switch (roundingMode
) {
4218 case float_round_nearest_even
:
4219 case float_round_ties_away
:
4220 roundIncrement
= 0x40;
4222 case float_round_to_zero
:
4225 case float_round_up
:
4226 roundIncrement
= zSign
? 0 : 0x7f;
4228 case float_round_down
:
4229 roundIncrement
= zSign
? 0x7f : 0;
4231 case float_round_to_odd
:
4232 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4238 roundBits
= zSig
& 0x7F;
4239 if ( 0xFD <= (uint16_t) zExp
) {
4240 if ( ( 0xFD < zExp
)
4241 || ( ( zExp
== 0xFD )
4242 && ( (int32_t) ( zSig
+ roundIncrement
) < 0 ) )
4244 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4245 roundIncrement
!= 0;
4246 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4247 return packFloat32(zSign
, 0xFF, -!overflow_to_inf
);
4250 if (status
->flush_to_zero
) {
4251 float_raise(float_flag_output_denormal
, status
);
4252 return packFloat32(zSign
, 0, 0);
4254 isTiny
= status
->tininess_before_rounding
4256 || (zSig
+ roundIncrement
< 0x80000000);
4257 shift32RightJamming( zSig
, - zExp
, &zSig
);
4259 roundBits
= zSig
& 0x7F;
4260 if (isTiny
&& roundBits
) {
4261 float_raise(float_flag_underflow
, status
);
4263 if (roundingMode
== float_round_to_odd
) {
4265 * For round-to-odd case, the roundIncrement depends on
4266 * zSig which just changed.
4268 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4273 float_raise(float_flag_inexact
, status
);
4275 zSig
= ( zSig
+ roundIncrement
)>>7;
4276 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4279 if ( zSig
== 0 ) zExp
= 0;
4280 return packFloat32( zSign
, zExp
, zSig
);
4284 /*----------------------------------------------------------------------------
4285 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4286 | and significand `zSig', and returns the proper single-precision floating-
4287 | point value corresponding to the abstract input. This routine is just like
4288 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4289 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4290 | floating-point exponent.
4291 *----------------------------------------------------------------------------*/
4294 normalizeRoundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4295 float_status
*status
)
4299 shiftCount
= clz32(zSig
) - 1;
4300 return roundAndPackFloat32(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4305 /*----------------------------------------------------------------------------
4306 | Normalizes the subnormal double-precision floating-point value represented
4307 | by the denormalized significand `aSig'. The normalized exponent and
4308 | significand are stored at the locations pointed to by `zExpPtr' and
4309 | `zSigPtr', respectively.
4310 *----------------------------------------------------------------------------*/
4313 normalizeFloat64Subnormal(uint64_t aSig
, int *zExpPtr
, uint64_t *zSigPtr
)
4317 shiftCount
= clz64(aSig
) - 11;
4318 *zSigPtr
= aSig
<<shiftCount
;
4319 *zExpPtr
= 1 - shiftCount
;
4323 /*----------------------------------------------------------------------------
4324 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4325 | double-precision floating-point value, returning the result. After being
4326 | shifted into the proper positions, the three fields are simply added
4327 | together to form the result. This means that any integer portion of `zSig'
4328 | will be added into the exponent. Since a properly normalized significand
4329 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4330 | than the desired result exponent whenever `zSig' is a complete, normalized
4332 *----------------------------------------------------------------------------*/
4334 static inline float64
packFloat64(bool zSign
, int zExp
, uint64_t zSig
)
4337 return make_float64(
4338 ( ( (uint64_t) zSign
)<<63 ) + ( ( (uint64_t) zExp
)<<52 ) + zSig
);
4342 /*----------------------------------------------------------------------------
4343 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4344 | and significand `zSig', and returns the proper double-precision floating-
4345 | point value corresponding to the abstract input. Ordinarily, the abstract
4346 | value is simply rounded and packed into the double-precision format, with
4347 | the inexact exception raised if the abstract input cannot be represented
4348 | exactly. However, if the abstract value is too large, the overflow and
4349 | inexact exceptions are raised and an infinity or maximal finite value is
4350 | returned. If the abstract value is too small, the input value is rounded to
4351 | a subnormal number, and the underflow and inexact exceptions are raised if
4352 | the abstract input cannot be represented exactly as a subnormal double-
4353 | precision floating-point number.
4354 | The input significand `zSig' has its binary point between bits 62
4355 | and 61, which is 10 bits to the left of the usual location. This shifted
4356 | significand must be normalized or smaller. If `zSig' is not normalized,
4357 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4358 | and it must not require rounding. In the usual case that `zSig' is
4359 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4360 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4361 | Binary Floating-Point Arithmetic.
4362 *----------------------------------------------------------------------------*/
4364 static float64
roundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4365 float_status
*status
)
4367 int8_t roundingMode
;
4368 bool roundNearestEven
;
4369 int roundIncrement
, roundBits
;
4372 roundingMode
= status
->float_rounding_mode
;
4373 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4374 switch (roundingMode
) {
4375 case float_round_nearest_even
:
4376 case float_round_ties_away
:
4377 roundIncrement
= 0x200;
4379 case float_round_to_zero
:
4382 case float_round_up
:
4383 roundIncrement
= zSign
? 0 : 0x3ff;
4385 case float_round_down
:
4386 roundIncrement
= zSign
? 0x3ff : 0;
4388 case float_round_to_odd
:
4389 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4394 roundBits
= zSig
& 0x3FF;
4395 if ( 0x7FD <= (uint16_t) zExp
) {
4396 if ( ( 0x7FD < zExp
)
4397 || ( ( zExp
== 0x7FD )
4398 && ( (int64_t) ( zSig
+ roundIncrement
) < 0 ) )
4400 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4401 roundIncrement
!= 0;
4402 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4403 return packFloat64(zSign
, 0x7FF, -(!overflow_to_inf
));
4406 if (status
->flush_to_zero
) {
4407 float_raise(float_flag_output_denormal
, status
);
4408 return packFloat64(zSign
, 0, 0);
4410 isTiny
= status
->tininess_before_rounding
4412 || (zSig
+ roundIncrement
< UINT64_C(0x8000000000000000));
4413 shift64RightJamming( zSig
, - zExp
, &zSig
);
4415 roundBits
= zSig
& 0x3FF;
4416 if (isTiny
&& roundBits
) {
4417 float_raise(float_flag_underflow
, status
);
4419 if (roundingMode
== float_round_to_odd
) {
4421 * For round-to-odd case, the roundIncrement depends on
4422 * zSig which just changed.
4424 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4429 float_raise(float_flag_inexact
, status
);
4431 zSig
= ( zSig
+ roundIncrement
)>>10;
4432 if (!(roundBits
^ 0x200) && roundNearestEven
) {
4435 if ( zSig
== 0 ) zExp
= 0;
4436 return packFloat64( zSign
, zExp
, zSig
);
4440 /*----------------------------------------------------------------------------
4441 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4442 | and significand `zSig', and returns the proper double-precision floating-
4443 | point value corresponding to the abstract input. This routine is just like
4444 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4445 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4446 | floating-point exponent.
4447 *----------------------------------------------------------------------------*/
4450 normalizeRoundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4451 float_status
*status
)
4455 shiftCount
= clz64(zSig
) - 1;
4456 return roundAndPackFloat64(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4461 /*----------------------------------------------------------------------------
4462 | Normalizes the subnormal extended double-precision floating-point value
4463 | represented by the denormalized significand `aSig'. The normalized exponent
4464 | and significand are stored at the locations pointed to by `zExpPtr' and
4465 | `zSigPtr', respectively.
4466 *----------------------------------------------------------------------------*/
4468 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4473 shiftCount
= clz64(aSig
);
4474 *zSigPtr
= aSig
<<shiftCount
;
4475 *zExpPtr
= 1 - shiftCount
;
4478 /*----------------------------------------------------------------------------
4479 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4480 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4481 | and returns the proper extended double-precision floating-point value
4482 | corresponding to the abstract input. Ordinarily, the abstract value is
4483 | rounded and packed into the extended double-precision format, with the
4484 | inexact exception raised if the abstract input cannot be represented
4485 | exactly. However, if the abstract value is too large, the overflow and
4486 | inexact exceptions are raised and an infinity or maximal finite value is
4487 | returned. If the abstract value is too small, the input value is rounded to
4488 | a subnormal number, and the underflow and inexact exceptions are raised if
4489 | the abstract input cannot be represented exactly as a subnormal extended
4490 | double-precision floating-point number.
4491 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4492 | the result is rounded to the same number of bits as single or double
4493 | precision, respectively. Otherwise, the result is rounded to the full
4494 | precision of the extended double-precision format.
4495 | The input significand must be normalized or smaller. If the input
4496 | significand is not normalized, `zExp' must be 0; in that case, the result
4497 | returned is a subnormal number, and it must not require rounding. The
4498 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4499 | Floating-Point Arithmetic.
4500 *----------------------------------------------------------------------------*/
4502 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4503 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4504 float_status
*status
)
4506 FloatRoundMode roundingMode
;
4507 bool roundNearestEven
, increment
, isTiny
;
4508 int64_t roundIncrement
, roundMask
, roundBits
;
4510 roundingMode
= status
->float_rounding_mode
;
4511 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4512 switch (roundingPrecision
) {
4513 case floatx80_precision_x
:
4515 case floatx80_precision_d
:
4516 roundIncrement
= UINT64_C(0x0000000000000400);
4517 roundMask
= UINT64_C(0x00000000000007FF);
4519 case floatx80_precision_s
:
4520 roundIncrement
= UINT64_C(0x0000008000000000);
4521 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4524 g_assert_not_reached();
4526 zSig0
|= ( zSig1
!= 0 );
4527 switch (roundingMode
) {
4528 case float_round_nearest_even
:
4529 case float_round_ties_away
:
4531 case float_round_to_zero
:
4534 case float_round_up
:
4535 roundIncrement
= zSign
? 0 : roundMask
;
4537 case float_round_down
:
4538 roundIncrement
= zSign
? roundMask
: 0;
4543 roundBits
= zSig0
& roundMask
;
4544 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4545 if ( ( 0x7FFE < zExp
)
4546 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4551 if (status
->flush_to_zero
) {
4552 float_raise(float_flag_output_denormal
, status
);
4553 return packFloatx80(zSign
, 0, 0);
4555 isTiny
= status
->tininess_before_rounding
4557 || (zSig0
<= zSig0
+ roundIncrement
);
4558 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4560 roundBits
= zSig0
& roundMask
;
4561 if (isTiny
&& roundBits
) {
4562 float_raise(float_flag_underflow
, status
);
4565 float_raise(float_flag_inexact
, status
);
4567 zSig0
+= roundIncrement
;
4568 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4569 roundIncrement
= roundMask
+ 1;
4570 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4571 roundMask
|= roundIncrement
;
4573 zSig0
&= ~ roundMask
;
4574 return packFloatx80( zSign
, zExp
, zSig0
);
4578 float_raise(float_flag_inexact
, status
);
4580 zSig0
+= roundIncrement
;
4581 if ( zSig0
< roundIncrement
) {
4583 zSig0
= UINT64_C(0x8000000000000000);
4585 roundIncrement
= roundMask
+ 1;
4586 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4587 roundMask
|= roundIncrement
;
4589 zSig0
&= ~ roundMask
;
4590 if ( zSig0
== 0 ) zExp
= 0;
4591 return packFloatx80( zSign
, zExp
, zSig0
);
4593 switch (roundingMode
) {
4594 case float_round_nearest_even
:
4595 case float_round_ties_away
:
4596 increment
= ((int64_t)zSig1
< 0);
4598 case float_round_to_zero
:
4601 case float_round_up
:
4602 increment
= !zSign
&& zSig1
;
4604 case float_round_down
:
4605 increment
= zSign
&& zSig1
;
4610 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4611 if ( ( 0x7FFE < zExp
)
4612 || ( ( zExp
== 0x7FFE )
4613 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4619 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4620 if ( ( roundingMode
== float_round_to_zero
)
4621 || ( zSign
&& ( roundingMode
== float_round_up
) )
4622 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4624 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4626 return packFloatx80(zSign
,
4627 floatx80_infinity_high
,
4628 floatx80_infinity_low
);
4631 isTiny
= status
->tininess_before_rounding
4634 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4635 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4637 if (isTiny
&& zSig1
) {
4638 float_raise(float_flag_underflow
, status
);
4641 float_raise(float_flag_inexact
, status
);
4643 switch (roundingMode
) {
4644 case float_round_nearest_even
:
4645 case float_round_ties_away
:
4646 increment
= ((int64_t)zSig1
< 0);
4648 case float_round_to_zero
:
4651 case float_round_up
:
4652 increment
= !zSign
&& zSig1
;
4654 case float_round_down
:
4655 increment
= zSign
&& zSig1
;
4662 if (!(zSig1
<< 1) && roundNearestEven
) {
4665 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4667 return packFloatx80( zSign
, zExp
, zSig0
);
4671 float_raise(float_flag_inexact
, status
);
4677 zSig0
= UINT64_C(0x8000000000000000);
4680 if (!(zSig1
<< 1) && roundNearestEven
) {
4686 if ( zSig0
== 0 ) zExp
= 0;
4688 return packFloatx80( zSign
, zExp
, zSig0
);
4692 /*----------------------------------------------------------------------------
4693 | Takes an abstract floating-point value having sign `zSign', exponent
4694 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4695 | and returns the proper extended double-precision floating-point value
4696 | corresponding to the abstract input. This routine is just like
4697 | `roundAndPackFloatx80' except that the input significand does not have to be
4699 *----------------------------------------------------------------------------*/
4701 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4702 bool zSign
, int32_t zExp
,
4703 uint64_t zSig0
, uint64_t zSig1
,
4704 float_status
*status
)
4713 shiftCount
= clz64(zSig0
);
4714 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4716 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4717 zSig0
, zSig1
, status
);
4721 /*----------------------------------------------------------------------------
4722 | Returns the least-significant 64 fraction bits of the quadruple-precision
4723 | floating-point value `a'.
4724 *----------------------------------------------------------------------------*/
4726 static inline uint64_t extractFloat128Frac1( float128 a
)
4733 /*----------------------------------------------------------------------------
4734 | Returns the most-significant 48 fraction bits of the quadruple-precision
4735 | floating-point value `a'.
4736 *----------------------------------------------------------------------------*/
4738 static inline uint64_t extractFloat128Frac0( float128 a
)
4741 return a
.high
& UINT64_C(0x0000FFFFFFFFFFFF);
4745 /*----------------------------------------------------------------------------
4746 | Returns the exponent bits of the quadruple-precision floating-point value
4748 *----------------------------------------------------------------------------*/
4750 static inline int32_t extractFloat128Exp( float128 a
)
4753 return ( a
.high
>>48 ) & 0x7FFF;
4757 /*----------------------------------------------------------------------------
4758 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4759 *----------------------------------------------------------------------------*/
4761 static inline bool extractFloat128Sign(float128 a
)
4763 return a
.high
>> 63;
4766 /*----------------------------------------------------------------------------
4767 | Normalizes the subnormal quadruple-precision floating-point value
4768 | represented by the denormalized significand formed by the concatenation of
4769 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4770 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4771 | significand are stored at the location pointed to by `zSig0Ptr', and the
4772 | least significant 64 bits of the normalized significand are stored at the
4773 | location pointed to by `zSig1Ptr'.
4774 *----------------------------------------------------------------------------*/
4777 normalizeFloat128Subnormal(
4788 shiftCount
= clz64(aSig1
) - 15;
4789 if ( shiftCount
< 0 ) {
4790 *zSig0Ptr
= aSig1
>>( - shiftCount
);
4791 *zSig1Ptr
= aSig1
<<( shiftCount
& 63 );
4794 *zSig0Ptr
= aSig1
<<shiftCount
;
4797 *zExpPtr
= - shiftCount
- 63;
4800 shiftCount
= clz64(aSig0
) - 15;
4801 shortShift128Left( aSig0
, aSig1
, shiftCount
, zSig0Ptr
, zSig1Ptr
);
4802 *zExpPtr
= 1 - shiftCount
;
4807 /*----------------------------------------------------------------------------
4808 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4809 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4810 | floating-point value, returning the result. After being shifted into the
4811 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4812 | added together to form the most significant 32 bits of the result. This
4813 | means that any integer portion of `zSig0' will be added into the exponent.
4814 | Since a properly normalized significand will have an integer portion equal
4815 | to 1, the `zExp' input should be 1 less than the desired result exponent
4816 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4818 *----------------------------------------------------------------------------*/
4820 static inline float128
4821 packFloat128(bool zSign
, int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
)
4826 z
.high
= ((uint64_t)zSign
<< 63) + ((uint64_t)zExp
<< 48) + zSig0
;
4830 /*----------------------------------------------------------------------------
4831 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4832 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4833 | and `zSig2', and returns the proper quadruple-precision floating-point value
4834 | corresponding to the abstract input. Ordinarily, the abstract value is
4835 | simply rounded and packed into the quadruple-precision format, with the
4836 | inexact exception raised if the abstract input cannot be represented
4837 | exactly. However, if the abstract value is too large, the overflow and
4838 | inexact exceptions are raised and an infinity or maximal finite value is
4839 | returned. If the abstract value is too small, the input value is rounded to
4840 | a subnormal number, and the underflow and inexact exceptions are raised if
4841 | the abstract input cannot be represented exactly as a subnormal quadruple-
4842 | precision floating-point number.
4843 | The input significand must be normalized or smaller. If the input
4844 | significand is not normalized, `zExp' must be 0; in that case, the result
4845 | returned is a subnormal number, and it must not require rounding. In the
4846 | usual case that the input significand is normalized, `zExp' must be 1 less
4847 | than the ``true'' floating-point exponent. The handling of underflow and
4848 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4851 static float128
roundAndPackFloat128(bool zSign
, int32_t zExp
,
4852 uint64_t zSig0
, uint64_t zSig1
,
4853 uint64_t zSig2
, float_status
*status
)
4855 int8_t roundingMode
;
4856 bool roundNearestEven
, increment
, isTiny
;
4858 roundingMode
= status
->float_rounding_mode
;
4859 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4860 switch (roundingMode
) {
4861 case float_round_nearest_even
:
4862 case float_round_ties_away
:
4863 increment
= ((int64_t)zSig2
< 0);
4865 case float_round_to_zero
:
4868 case float_round_up
:
4869 increment
= !zSign
&& zSig2
;
4871 case float_round_down
:
4872 increment
= zSign
&& zSig2
;
4874 case float_round_to_odd
:
4875 increment
= !(zSig1
& 0x1) && zSig2
;
4880 if ( 0x7FFD <= (uint32_t) zExp
) {
4881 if ( ( 0x7FFD < zExp
)
4882 || ( ( zExp
== 0x7FFD )
4884 UINT64_C(0x0001FFFFFFFFFFFF),
4885 UINT64_C(0xFFFFFFFFFFFFFFFF),
4892 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4893 if ( ( roundingMode
== float_round_to_zero
)
4894 || ( zSign
&& ( roundingMode
== float_round_up
) )
4895 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4896 || (roundingMode
== float_round_to_odd
)
4902 UINT64_C(0x0000FFFFFFFFFFFF),
4903 UINT64_C(0xFFFFFFFFFFFFFFFF)
4906 return packFloat128( zSign
, 0x7FFF, 0, 0 );
4909 if (status
->flush_to_zero
) {
4910 float_raise(float_flag_output_denormal
, status
);
4911 return packFloat128(zSign
, 0, 0, 0);
4913 isTiny
= status
->tininess_before_rounding
4916 || lt128(zSig0
, zSig1
,
4917 UINT64_C(0x0001FFFFFFFFFFFF),
4918 UINT64_C(0xFFFFFFFFFFFFFFFF));
4919 shift128ExtraRightJamming(
4920 zSig0
, zSig1
, zSig2
, - zExp
, &zSig0
, &zSig1
, &zSig2
);
4922 if (isTiny
&& zSig2
) {
4923 float_raise(float_flag_underflow
, status
);
4925 switch (roundingMode
) {
4926 case float_round_nearest_even
:
4927 case float_round_ties_away
:
4928 increment
= ((int64_t)zSig2
< 0);
4930 case float_round_to_zero
:
4933 case float_round_up
:
4934 increment
= !zSign
&& zSig2
;
4936 case float_round_down
:
4937 increment
= zSign
&& zSig2
;
4939 case float_round_to_odd
:
4940 increment
= !(zSig1
& 0x1) && zSig2
;
4948 float_raise(float_flag_inexact
, status
);
4951 add128( zSig0
, zSig1
, 0, 1, &zSig0
, &zSig1
);
4952 if ((zSig2
+ zSig2
== 0) && roundNearestEven
) {
4957 if ( ( zSig0
| zSig1
) == 0 ) zExp
= 0;
4959 return packFloat128( zSign
, zExp
, zSig0
, zSig1
);
4963 /*----------------------------------------------------------------------------
4964 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4965 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4966 | returns the proper quadruple-precision floating-point value corresponding
4967 | to the abstract input. This routine is just like `roundAndPackFloat128'
4968 | except that the input significand has fewer bits and does not have to be
4969 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4971 *----------------------------------------------------------------------------*/
4973 static float128
normalizeRoundAndPackFloat128(bool zSign
, int32_t zExp
,
4974 uint64_t zSig0
, uint64_t zSig1
,
4975 float_status
*status
)
4985 shiftCount
= clz64(zSig0
) - 15;
4986 if ( 0 <= shiftCount
) {
4988 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4991 shift128ExtraRightJamming(
4992 zSig0
, zSig1
, 0, - shiftCount
, &zSig0
, &zSig1
, &zSig2
);
4995 return roundAndPackFloat128(zSign
, zExp
, zSig0
, zSig1
, zSig2
, status
);
5000 /*----------------------------------------------------------------------------
5001 | Returns the result of converting the 32-bit two's complement integer `a'
5002 | to the extended double-precision floating-point format. The conversion
5003 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5005 *----------------------------------------------------------------------------*/
5007 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
5014 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5016 absA
= zSign
? - a
: a
;
5017 shiftCount
= clz32(absA
) + 32;
5019 return packFloatx80( zSign
, 0x403E - shiftCount
, zSig
<<shiftCount
);
5023 /*----------------------------------------------------------------------------
5024 | Returns the result of converting the 64-bit two's complement integer `a'
5025 | to the extended double-precision floating-point format. The conversion
5026 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5028 *----------------------------------------------------------------------------*/
5030 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
5036 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5038 absA
= zSign
? - a
: a
;
5039 shiftCount
= clz64(absA
);
5040 return packFloatx80( zSign
, 0x403E - shiftCount
, absA
<<shiftCount
);
5044 /*----------------------------------------------------------------------------
5045 | Returns the result of converting the single-precision floating-point value
5046 | `a' to the extended double-precision floating-point format. The conversion
5047 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5049 *----------------------------------------------------------------------------*/
5051 floatx80
float32_to_floatx80(float32 a
, float_status
*status
)
5057 a
= float32_squash_input_denormal(a
, status
);
5058 aSig
= extractFloat32Frac( a
);
5059 aExp
= extractFloat32Exp( a
);
5060 aSign
= extractFloat32Sign( a
);
5061 if ( aExp
== 0xFF ) {
5063 floatx80 res
= commonNaNToFloatx80(float32ToCommonNaN(a
, status
),
5065 return floatx80_silence_nan(res
, status
);
5067 return packFloatx80(aSign
,
5068 floatx80_infinity_high
,
5069 floatx80_infinity_low
);
5072 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5073 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5076 return packFloatx80( aSign
, aExp
+ 0x3F80, ( (uint64_t) aSig
)<<40 );
5080 /*----------------------------------------------------------------------------
5081 | Returns the remainder of the single-precision floating-point value `a'
5082 | with respect to the corresponding value `b'. The operation is performed
5083 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5084 *----------------------------------------------------------------------------*/
5086 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
5089 int aExp
, bExp
, expDiff
;
5090 uint32_t aSig
, bSig
;
5092 uint64_t aSig64
, bSig64
, q64
;
5093 uint32_t alternateASig
;
5095 a
= float32_squash_input_denormal(a
, status
);
5096 b
= float32_squash_input_denormal(b
, status
);
5098 aSig
= extractFloat32Frac( a
);
5099 aExp
= extractFloat32Exp( a
);
5100 aSign
= extractFloat32Sign( a
);
5101 bSig
= extractFloat32Frac( b
);
5102 bExp
= extractFloat32Exp( b
);
5103 if ( aExp
== 0xFF ) {
5104 if ( aSig
|| ( ( bExp
== 0xFF ) && bSig
) ) {
5105 return propagateFloat32NaN(a
, b
, status
);
5107 float_raise(float_flag_invalid
, status
);
5108 return float32_default_nan(status
);
5110 if ( bExp
== 0xFF ) {
5112 return propagateFloat32NaN(a
, b
, status
);
5118 float_raise(float_flag_invalid
, status
);
5119 return float32_default_nan(status
);
5121 normalizeFloat32Subnormal( bSig
, &bExp
, &bSig
);
5124 if ( aSig
== 0 ) return a
;
5125 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5127 expDiff
= aExp
- bExp
;
5130 if ( expDiff
< 32 ) {
5133 if ( expDiff
< 0 ) {
5134 if ( expDiff
< -1 ) return a
;
5137 q
= ( bSig
<= aSig
);
5138 if ( q
) aSig
-= bSig
;
5139 if ( 0 < expDiff
) {
5140 q
= ( ( (uint64_t) aSig
)<<32 ) / bSig
;
5143 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5151 if ( bSig
<= aSig
) aSig
-= bSig
;
5152 aSig64
= ( (uint64_t) aSig
)<<40;
5153 bSig64
= ( (uint64_t) bSig
)<<40;
5155 while ( 0 < expDiff
) {
5156 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5157 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5158 aSig64
= - ( ( bSig
* q64
)<<38 );
5162 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5163 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5164 q
= q64
>>( 64 - expDiff
);
5166 aSig
= ( ( aSig64
>>33 )<<( expDiff
- 1 ) ) - bSig
* q
;
5169 alternateASig
= aSig
;
5172 } while ( 0 <= (int32_t) aSig
);
5173 sigMean
= aSig
+ alternateASig
;
5174 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5175 aSig
= alternateASig
;
5177 zSign
= ( (int32_t) aSig
< 0 );
5178 if ( zSign
) aSig
= - aSig
;
5179 return normalizeRoundAndPackFloat32(aSign
^ zSign
, bExp
, aSig
, status
);
5184 /*----------------------------------------------------------------------------
5185 | Returns the binary exponential of the single-precision floating-point value
5186 | `a'. The operation is performed according to the IEC/IEEE Standard for
5187 | Binary Floating-Point Arithmetic.
5189 | Uses the following identities:
5191 | 1. -------------------------------------------------------------------------
5195 | 2. -------------------------------------------------------------------------
5198 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5200 *----------------------------------------------------------------------------*/
5202 static const float64 float32_exp2_coefficients
[15] =
5204 const_float64( 0x3ff0000000000000ll
), /* 1 */
5205 const_float64( 0x3fe0000000000000ll
), /* 2 */
5206 const_float64( 0x3fc5555555555555ll
), /* 3 */
5207 const_float64( 0x3fa5555555555555ll
), /* 4 */
5208 const_float64( 0x3f81111111111111ll
), /* 5 */
5209 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5210 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5211 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5212 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5213 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5214 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5215 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5216 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5217 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5218 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5221 float32
float32_exp2(float32 a
, float_status
*status
)
5228 a
= float32_squash_input_denormal(a
, status
);
5230 aSig
= extractFloat32Frac( a
);
5231 aExp
= extractFloat32Exp( a
);
5232 aSign
= extractFloat32Sign( a
);
5234 if ( aExp
== 0xFF) {
5236 return propagateFloat32NaN(a
, float32_zero
, status
);
5238 return (aSign
) ? float32_zero
: a
;
5241 if (aSig
== 0) return float32_one
;
5244 float_raise(float_flag_inexact
, status
);
5246 /* ******************************* */
5247 /* using float64 for approximation */
5248 /* ******************************* */
5249 x
= float32_to_float64(a
, status
);
5250 x
= float64_mul(x
, float64_ln2
, status
);
5254 for (i
= 0 ; i
< 15 ; i
++) {
5257 f
= float64_mul(xn
, float32_exp2_coefficients
[i
], status
);
5258 r
= float64_add(r
, f
, status
);
5260 xn
= float64_mul(xn
, x
, status
);
5263 return float64_to_float32(r
, status
);
5266 /*----------------------------------------------------------------------------
5267 | Returns the binary log of the single-precision floating-point value `a'.
5268 | The operation is performed according to the IEC/IEEE Standard for Binary
5269 | Floating-Point Arithmetic.
5270 *----------------------------------------------------------------------------*/
5271 float32
float32_log2(float32 a
, float_status
*status
)
5275 uint32_t aSig
, zSig
, i
;
5277 a
= float32_squash_input_denormal(a
, status
);
5278 aSig
= extractFloat32Frac( a
);
5279 aExp
= extractFloat32Exp( a
);
5280 aSign
= extractFloat32Sign( a
);
5283 if ( aSig
== 0 ) return packFloat32( 1, 0xFF, 0 );
5284 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5287 float_raise(float_flag_invalid
, status
);
5288 return float32_default_nan(status
);
5290 if ( aExp
== 0xFF ) {
5292 return propagateFloat32NaN(a
, float32_zero
, status
);
5302 for (i
= 1 << 22; i
> 0; i
>>= 1) {
5303 aSig
= ( (uint64_t)aSig
* aSig
) >> 23;
5304 if ( aSig
& 0x01000000 ) {
5313 return normalizeRoundAndPackFloat32(zSign
, 0x85, zSig
, status
);
5316 /*----------------------------------------------------------------------------
5317 | Returns the result of converting the double-precision floating-point value
5318 | `a' to the extended double-precision floating-point format. The conversion
5319 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5321 *----------------------------------------------------------------------------*/
5323 floatx80
float64_to_floatx80(float64 a
, float_status
*status
)
5329 a
= float64_squash_input_denormal(a
, status
);
5330 aSig
= extractFloat64Frac( a
);
5331 aExp
= extractFloat64Exp( a
);
5332 aSign
= extractFloat64Sign( a
);
5333 if ( aExp
== 0x7FF ) {
5335 floatx80 res
= commonNaNToFloatx80(float64ToCommonNaN(a
, status
),
5337 return floatx80_silence_nan(res
, status
);
5339 return packFloatx80(aSign
,
5340 floatx80_infinity_high
,
5341 floatx80_infinity_low
);
5344 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5345 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5349 aSign
, aExp
+ 0x3C00, (aSig
| UINT64_C(0x0010000000000000)) << 11);
5353 /*----------------------------------------------------------------------------
5354 | Returns the remainder of the double-precision floating-point value `a'
5355 | with respect to the corresponding value `b'. The operation is performed
5356 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5357 *----------------------------------------------------------------------------*/
5359 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
5362 int aExp
, bExp
, expDiff
;
5363 uint64_t aSig
, bSig
;
5364 uint64_t q
, alternateASig
;
5367 a
= float64_squash_input_denormal(a
, status
);
5368 b
= float64_squash_input_denormal(b
, status
);
5369 aSig
= extractFloat64Frac( a
);
5370 aExp
= extractFloat64Exp( a
);
5371 aSign
= extractFloat64Sign( a
);
5372 bSig
= extractFloat64Frac( b
);
5373 bExp
= extractFloat64Exp( b
);
5374 if ( aExp
== 0x7FF ) {
5375 if ( aSig
|| ( ( bExp
== 0x7FF ) && bSig
) ) {
5376 return propagateFloat64NaN(a
, b
, status
);
5378 float_raise(float_flag_invalid
, status
);
5379 return float64_default_nan(status
);
5381 if ( bExp
== 0x7FF ) {
5383 return propagateFloat64NaN(a
, b
, status
);
5389 float_raise(float_flag_invalid
, status
);
5390 return float64_default_nan(status
);
5392 normalizeFloat64Subnormal( bSig
, &bExp
, &bSig
);
5395 if ( aSig
== 0 ) return a
;
5396 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5398 expDiff
= aExp
- bExp
;
5399 aSig
= (aSig
| UINT64_C(0x0010000000000000)) << 11;
5400 bSig
= (bSig
| UINT64_C(0x0010000000000000)) << 11;
5401 if ( expDiff
< 0 ) {
5402 if ( expDiff
< -1 ) return a
;
5405 q
= ( bSig
<= aSig
);
5406 if ( q
) aSig
-= bSig
;
5408 while ( 0 < expDiff
) {
5409 q
= estimateDiv128To64( aSig
, 0, bSig
);
5410 q
= ( 2 < q
) ? q
- 2 : 0;
5411 aSig
= - ( ( bSig
>>2 ) * q
);
5415 if ( 0 < expDiff
) {
5416 q
= estimateDiv128To64( aSig
, 0, bSig
);
5417 q
= ( 2 < q
) ? q
- 2 : 0;
5420 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5427 alternateASig
= aSig
;
5430 } while ( 0 <= (int64_t) aSig
);
5431 sigMean
= aSig
+ alternateASig
;
5432 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5433 aSig
= alternateASig
;
5435 zSign
= ( (int64_t) aSig
< 0 );
5436 if ( zSign
) aSig
= - aSig
;
5437 return normalizeRoundAndPackFloat64(aSign
^ zSign
, bExp
, aSig
, status
);
5441 /*----------------------------------------------------------------------------
5442 | Returns the binary log of the double-precision floating-point value `a'.
5443 | The operation is performed according to the IEC/IEEE Standard for Binary
5444 | Floating-Point Arithmetic.
5445 *----------------------------------------------------------------------------*/
5446 float64
float64_log2(float64 a
, float_status
*status
)
5450 uint64_t aSig
, aSig0
, aSig1
, zSig
, i
;
5451 a
= float64_squash_input_denormal(a
, status
);
5453 aSig
= extractFloat64Frac( a
);
5454 aExp
= extractFloat64Exp( a
);
5455 aSign
= extractFloat64Sign( a
);
5458 if ( aSig
== 0 ) return packFloat64( 1, 0x7FF, 0 );
5459 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5462 float_raise(float_flag_invalid
, status
);
5463 return float64_default_nan(status
);
5465 if ( aExp
== 0x7FF ) {
5467 return propagateFloat64NaN(a
, float64_zero
, status
);
5473 aSig
|= UINT64_C(0x0010000000000000);
5475 zSig
= (uint64_t)aExp
<< 52;
5476 for (i
= 1LL << 51; i
> 0; i
>>= 1) {
5477 mul64To128( aSig
, aSig
, &aSig0
, &aSig1
);
5478 aSig
= ( aSig0
<< 12 ) | ( aSig1
>> 52 );
5479 if ( aSig
& UINT64_C(0x0020000000000000) ) {
5487 return normalizeRoundAndPackFloat64(zSign
, 0x408, zSig
, status
);
5490 /*----------------------------------------------------------------------------
5491 | Returns the result of converting the extended double-precision floating-
5492 | point value `a' to the 32-bit two's complement integer format. The
5493 | conversion is performed according to the IEC/IEEE Standard for Binary
5494 | Floating-Point Arithmetic---which means in particular that the conversion
5495 | is rounded according to the current rounding mode. If `a' is a NaN, the
5496 | largest positive integer is returned. Otherwise, if the conversion
5497 | overflows, the largest integer with the same sign as `a' is returned.
5498 *----------------------------------------------------------------------------*/
5500 int32_t floatx80_to_int32(floatx80 a
, float_status
*status
)
5503 int32_t aExp
, shiftCount
;
5506 if (floatx80_invalid_encoding(a
)) {
5507 float_raise(float_flag_invalid
, status
);
5510 aSig
= extractFloatx80Frac( a
);
5511 aExp
= extractFloatx80Exp( a
);
5512 aSign
= extractFloatx80Sign( a
);
5513 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5514 shiftCount
= 0x4037 - aExp
;
5515 if ( shiftCount
<= 0 ) shiftCount
= 1;
5516 shift64RightJamming( aSig
, shiftCount
, &aSig
);
5517 return roundAndPackInt32(aSign
, aSig
, status
);
5521 /*----------------------------------------------------------------------------
5522 | Returns the result of converting the extended double-precision floating-
5523 | point value `a' to the 32-bit two's complement integer format. The
5524 | conversion is performed according to the IEC/IEEE Standard for Binary
5525 | Floating-Point Arithmetic, except that the conversion is always rounded
5526 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5527 | Otherwise, if the conversion overflows, the largest integer with the same
5528 | sign as `a' is returned.
5529 *----------------------------------------------------------------------------*/
5531 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*status
)
5534 int32_t aExp
, shiftCount
;
5535 uint64_t aSig
, savedASig
;
5538 if (floatx80_invalid_encoding(a
)) {
5539 float_raise(float_flag_invalid
, status
);
5542 aSig
= extractFloatx80Frac( a
);
5543 aExp
= extractFloatx80Exp( a
);
5544 aSign
= extractFloatx80Sign( a
);
5545 if ( 0x401E < aExp
) {
5546 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5549 else if ( aExp
< 0x3FFF ) {
5551 float_raise(float_flag_inexact
, status
);
5555 shiftCount
= 0x403E - aExp
;
5557 aSig
>>= shiftCount
;
5559 if ( aSign
) z
= - z
;
5560 if ( ( z
< 0 ) ^ aSign
) {
5562 float_raise(float_flag_invalid
, status
);
5563 return aSign
? (int32_t) 0x80000000 : 0x7FFFFFFF;
5565 if ( ( aSig
<<shiftCount
) != savedASig
) {
5566 float_raise(float_flag_inexact
, status
);
5572 /*----------------------------------------------------------------------------
5573 | Returns the result of converting the extended double-precision floating-
5574 | point value `a' to the 64-bit two's complement integer format. The
5575 | conversion is performed according to the IEC/IEEE Standard for Binary
5576 | Floating-Point Arithmetic---which means in particular that the conversion
5577 | is rounded according to the current rounding mode. If `a' is a NaN,
5578 | the largest positive integer is returned. Otherwise, if the conversion
5579 | overflows, the largest integer with the same sign as `a' is returned.
5580 *----------------------------------------------------------------------------*/
5582 int64_t floatx80_to_int64(floatx80 a
, float_status
*status
)
5585 int32_t aExp
, shiftCount
;
5586 uint64_t aSig
, aSigExtra
;
5588 if (floatx80_invalid_encoding(a
)) {
5589 float_raise(float_flag_invalid
, status
);
5592 aSig
= extractFloatx80Frac( a
);
5593 aExp
= extractFloatx80Exp( a
);
5594 aSign
= extractFloatx80Sign( a
);
5595 shiftCount
= 0x403E - aExp
;
5596 if ( shiftCount
<= 0 ) {
5598 float_raise(float_flag_invalid
, status
);
5599 if (!aSign
|| floatx80_is_any_nan(a
)) {
5607 shift64ExtraRightJamming( aSig
, 0, shiftCount
, &aSig
, &aSigExtra
);
5609 return roundAndPackInt64(aSign
, aSig
, aSigExtra
, status
);
5613 /*----------------------------------------------------------------------------
5614 | Returns the result of converting the extended double-precision floating-
5615 | point value `a' to the 64-bit two's complement integer format. The
5616 | conversion is performed according to the IEC/IEEE Standard for Binary
5617 | Floating-Point Arithmetic, except that the conversion is always rounded
5618 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5619 | Otherwise, if the conversion overflows, the largest integer with the same
5620 | sign as `a' is returned.
5621 *----------------------------------------------------------------------------*/
5623 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*status
)
5626 int32_t aExp
, shiftCount
;
5630 if (floatx80_invalid_encoding(a
)) {
5631 float_raise(float_flag_invalid
, status
);
5634 aSig
= extractFloatx80Frac( a
);
5635 aExp
= extractFloatx80Exp( a
);
5636 aSign
= extractFloatx80Sign( a
);
5637 shiftCount
= aExp
- 0x403E;
5638 if ( 0 <= shiftCount
) {
5639 aSig
&= UINT64_C(0x7FFFFFFFFFFFFFFF);
5640 if ( ( a
.high
!= 0xC03E ) || aSig
) {
5641 float_raise(float_flag_invalid
, status
);
5642 if ( ! aSign
|| ( ( aExp
== 0x7FFF ) && aSig
) ) {
5648 else if ( aExp
< 0x3FFF ) {
5650 float_raise(float_flag_inexact
, status
);
5654 z
= aSig
>>( - shiftCount
);
5655 if ( (uint64_t) ( aSig
<<( shiftCount
& 63 ) ) ) {
5656 float_raise(float_flag_inexact
, status
);
5658 if ( aSign
) z
= - z
;
5663 /*----------------------------------------------------------------------------
5664 | Returns the result of converting the extended double-precision floating-
5665 | point value `a' to the single-precision floating-point format. The
5666 | conversion is performed according to the IEC/IEEE Standard for Binary
5667 | Floating-Point Arithmetic.
5668 *----------------------------------------------------------------------------*/
5670 float32
floatx80_to_float32(floatx80 a
, float_status
*status
)
5676 if (floatx80_invalid_encoding(a
)) {
5677 float_raise(float_flag_invalid
, status
);
5678 return float32_default_nan(status
);
5680 aSig
= extractFloatx80Frac( a
);
5681 aExp
= extractFloatx80Exp( a
);
5682 aSign
= extractFloatx80Sign( a
);
5683 if ( aExp
== 0x7FFF ) {
5684 if ( (uint64_t) ( aSig
<<1 ) ) {
5685 float32 res
= commonNaNToFloat32(floatx80ToCommonNaN(a
, status
),
5687 return float32_silence_nan(res
, status
);
5689 return packFloat32( aSign
, 0xFF, 0 );
5691 shift64RightJamming( aSig
, 33, &aSig
);
5692 if ( aExp
|| aSig
) aExp
-= 0x3F81;
5693 return roundAndPackFloat32(aSign
, aExp
, aSig
, status
);
5697 /*----------------------------------------------------------------------------
5698 | Returns the result of converting the extended double-precision floating-
5699 | point value `a' to the double-precision floating-point format. The
5700 | conversion is performed according to the IEC/IEEE Standard for Binary
5701 | Floating-Point Arithmetic.
5702 *----------------------------------------------------------------------------*/
5704 float64
floatx80_to_float64(floatx80 a
, float_status
*status
)
5708 uint64_t aSig
, zSig
;
5710 if (floatx80_invalid_encoding(a
)) {
5711 float_raise(float_flag_invalid
, status
);
5712 return float64_default_nan(status
);
5714 aSig
= extractFloatx80Frac( a
);
5715 aExp
= extractFloatx80Exp( a
);
5716 aSign
= extractFloatx80Sign( a
);
5717 if ( aExp
== 0x7FFF ) {
5718 if ( (uint64_t) ( aSig
<<1 ) ) {
5719 float64 res
= commonNaNToFloat64(floatx80ToCommonNaN(a
, status
),
5721 return float64_silence_nan(res
, status
);
5723 return packFloat64( aSign
, 0x7FF, 0 );
5725 shift64RightJamming( aSig
, 1, &zSig
);
5726 if ( aExp
|| aSig
) aExp
-= 0x3C01;
5727 return roundAndPackFloat64(aSign
, aExp
, zSig
, status
);
5731 /*----------------------------------------------------------------------------
5732 | Returns the result of converting the extended double-precision floating-
5733 | point value `a' to the quadruple-precision floating-point format. The
5734 | conversion is performed according to the IEC/IEEE Standard for Binary
5735 | Floating-Point Arithmetic.
5736 *----------------------------------------------------------------------------*/
5738 float128
floatx80_to_float128(floatx80 a
, float_status
*status
)
5742 uint64_t aSig
, zSig0
, zSig1
;
5744 if (floatx80_invalid_encoding(a
)) {
5745 float_raise(float_flag_invalid
, status
);
5746 return float128_default_nan(status
);
5748 aSig
= extractFloatx80Frac( a
);
5749 aExp
= extractFloatx80Exp( a
);
5750 aSign
= extractFloatx80Sign( a
);
5751 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) {
5752 float128 res
= commonNaNToFloat128(floatx80ToCommonNaN(a
, status
),
5754 return float128_silence_nan(res
, status
);
5756 shift128Right( aSig
<<1, 0, 16, &zSig0
, &zSig1
);
5757 return packFloat128( aSign
, aExp
, zSig0
, zSig1
);
5761 /*----------------------------------------------------------------------------
5762 | Rounds the extended double-precision floating-point value `a'
5763 | to the precision provided by floatx80_rounding_precision and returns the
5764 | result as an extended double-precision floating-point value.
5765 | The operation is performed according to the IEC/IEEE Standard for Binary
5766 | Floating-Point Arithmetic.
5767 *----------------------------------------------------------------------------*/
5769 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5771 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
5772 extractFloatx80Sign(a
),
5773 extractFloatx80Exp(a
),
5774 extractFloatx80Frac(a
), 0, status
);
5777 /*----------------------------------------------------------------------------
5778 | Rounds the extended double-precision floating-point value `a' to an integer,
5779 | and returns the result as an extended quadruple-precision floating-point
5780 | value. The operation is performed according to the IEC/IEEE Standard for
5781 | Binary Floating-Point Arithmetic.
5782 *----------------------------------------------------------------------------*/
5784 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
5788 uint64_t lastBitMask
, roundBitsMask
;
5791 if (floatx80_invalid_encoding(a
)) {
5792 float_raise(float_flag_invalid
, status
);
5793 return floatx80_default_nan(status
);
5795 aExp
= extractFloatx80Exp( a
);
5796 if ( 0x403E <= aExp
) {
5797 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 ) ) {
5798 return propagateFloatx80NaN(a
, a
, status
);
5802 if ( aExp
< 0x3FFF ) {
5804 && ( (uint64_t) ( extractFloatx80Frac( a
) ) == 0 ) ) {
5807 float_raise(float_flag_inexact
, status
);
5808 aSign
= extractFloatx80Sign( a
);
5809 switch (status
->float_rounding_mode
) {
5810 case float_round_nearest_even
:
5811 if ( ( aExp
== 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 )
5814 packFloatx80( aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5817 case float_round_ties_away
:
5818 if (aExp
== 0x3FFE) {
5819 return packFloatx80(aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5822 case float_round_down
:
5825 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5826 : packFloatx80( 0, 0, 0 );
5827 case float_round_up
:
5829 aSign
? packFloatx80( 1, 0, 0 )
5830 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5832 case float_round_to_zero
:
5835 g_assert_not_reached();
5837 return packFloatx80( aSign
, 0, 0 );
5840 lastBitMask
<<= 0x403E - aExp
;
5841 roundBitsMask
= lastBitMask
- 1;
5843 switch (status
->float_rounding_mode
) {
5844 case float_round_nearest_even
:
5845 z
.low
+= lastBitMask
>>1;
5846 if ((z
.low
& roundBitsMask
) == 0) {
5847 z
.low
&= ~lastBitMask
;
5850 case float_round_ties_away
:
5851 z
.low
+= lastBitMask
>> 1;
5853 case float_round_to_zero
:
5855 case float_round_up
:
5856 if (!extractFloatx80Sign(z
)) {
5857 z
.low
+= roundBitsMask
;
5860 case float_round_down
:
5861 if (extractFloatx80Sign(z
)) {
5862 z
.low
+= roundBitsMask
;
5868 z
.low
&= ~ roundBitsMask
;
5871 z
.low
= UINT64_C(0x8000000000000000);
5873 if (z
.low
!= a
.low
) {
5874 float_raise(float_flag_inexact
, status
);
5880 /*----------------------------------------------------------------------------
5881 | Returns the result of dividing the extended double-precision floating-point
5882 | value `a' by the corresponding value `b'. The operation is performed
5883 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5884 *----------------------------------------------------------------------------*/
5886 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
5888 bool aSign
, bSign
, zSign
;
5889 int32_t aExp
, bExp
, zExp
;
5890 uint64_t aSig
, bSig
, zSig0
, zSig1
;
5891 uint64_t rem0
, rem1
, rem2
, term0
, term1
, term2
;
5893 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5894 float_raise(float_flag_invalid
, status
);
5895 return floatx80_default_nan(status
);
5897 aSig
= extractFloatx80Frac( a
);
5898 aExp
= extractFloatx80Exp( a
);
5899 aSign
= extractFloatx80Sign( a
);
5900 bSig
= extractFloatx80Frac( b
);
5901 bExp
= extractFloatx80Exp( b
);
5902 bSign
= extractFloatx80Sign( b
);
5903 zSign
= aSign
^ bSign
;
5904 if ( aExp
== 0x7FFF ) {
5905 if ((uint64_t)(aSig
<< 1)) {
5906 return propagateFloatx80NaN(a
, b
, status
);
5908 if ( bExp
== 0x7FFF ) {
5909 if ((uint64_t)(bSig
<< 1)) {
5910 return propagateFloatx80NaN(a
, b
, status
);
5914 return packFloatx80(zSign
, floatx80_infinity_high
,
5915 floatx80_infinity_low
);
5917 if ( bExp
== 0x7FFF ) {
5918 if ((uint64_t)(bSig
<< 1)) {
5919 return propagateFloatx80NaN(a
, b
, status
);
5921 return packFloatx80( zSign
, 0, 0 );
5925 if ( ( aExp
| aSig
) == 0 ) {
5927 float_raise(float_flag_invalid
, status
);
5928 return floatx80_default_nan(status
);
5930 float_raise(float_flag_divbyzero
, status
);
5931 return packFloatx80(zSign
, floatx80_infinity_high
,
5932 floatx80_infinity_low
);
5934 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
5937 if ( aSig
== 0 ) return packFloatx80( zSign
, 0, 0 );
5938 normalizeFloatx80Subnormal( aSig
, &aExp
, &aSig
);
5940 zExp
= aExp
- bExp
+ 0x3FFE;
5942 if ( bSig
<= aSig
) {
5943 shift128Right( aSig
, 0, 1, &aSig
, &rem1
);
5946 zSig0
= estimateDiv128To64( aSig
, rem1
, bSig
);
5947 mul64To128( bSig
, zSig0
, &term0
, &term1
);
5948 sub128( aSig
, rem1
, term0
, term1
, &rem0
, &rem1
);
5949 while ( (int64_t) rem0
< 0 ) {
5951 add128( rem0
, rem1
, 0, bSig
, &rem0
, &rem1
);
5953 zSig1
= estimateDiv128To64( rem1
, 0, bSig
);
5954 if ( (uint64_t) ( zSig1
<<1 ) <= 8 ) {
5955 mul64To128( bSig
, zSig1
, &term1
, &term2
);
5956 sub128( rem1
, 0, term1
, term2
, &rem1
, &rem2
);
5957 while ( (int64_t) rem1
< 0 ) {
5959 add128( rem1
, rem2
, 0, bSig
, &rem1
, &rem2
);
5961 zSig1
|= ( ( rem1
| rem2
) != 0 );
5963 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
5964 zSign
, zExp
, zSig0
, zSig1
, status
);
5967 /*----------------------------------------------------------------------------
5968 | Returns the remainder of the extended double-precision floating-point value
5969 | `a' with respect to the corresponding value `b'. The operation is performed
5970 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5971 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5972 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5973 | the absolute value of the integer quotient.
5974 *----------------------------------------------------------------------------*/
5976 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
, uint64_t *quotient
,
5977 float_status
*status
)
5980 int32_t aExp
, bExp
, expDiff
, aExpOrig
;
5981 uint64_t aSig0
, aSig1
, bSig
;
5982 uint64_t q
, term0
, term1
, alternateASig0
, alternateASig1
;
5985 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5986 float_raise(float_flag_invalid
, status
);
5987 return floatx80_default_nan(status
);
5989 aSig0
= extractFloatx80Frac( a
);
5990 aExpOrig
= aExp
= extractFloatx80Exp( a
);
5991 aSign
= extractFloatx80Sign( a
);
5992 bSig
= extractFloatx80Frac( b
);
5993 bExp
= extractFloatx80Exp( b
);
5994 if ( aExp
== 0x7FFF ) {
5995 if ( (uint64_t) ( aSig0
<<1 )
5996 || ( ( bExp
== 0x7FFF ) && (uint64_t) ( bSig
<<1 ) ) ) {
5997 return propagateFloatx80NaN(a
, b
, status
);
6001 if ( bExp
== 0x7FFF ) {
6002 if ((uint64_t)(bSig
<< 1)) {
6003 return propagateFloatx80NaN(a
, b
, status
);
6005 if (aExp
== 0 && aSig0
>> 63) {
6007 * Pseudo-denormal argument must be returned in normalized
6010 return packFloatx80(aSign
, 1, aSig0
);
6017 float_raise(float_flag_invalid
, status
);
6018 return floatx80_default_nan(status
);
6020 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
6023 if ( aSig0
== 0 ) return a
;
6024 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
6027 expDiff
= aExp
- bExp
;
6029 if ( expDiff
< 0 ) {
6030 if ( mod
|| expDiff
< -1 ) {
6031 if (aExp
== 1 && aExpOrig
== 0) {
6033 * Pseudo-denormal argument must be returned in
6036 return packFloatx80(aSign
, aExp
, aSig0
);
6040 shift128Right( aSig0
, 0, 1, &aSig0
, &aSig1
);
6043 *quotient
= q
= ( bSig
<= aSig0
);
6044 if ( q
) aSig0
-= bSig
;
6046 while ( 0 < expDiff
) {
6047 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
6048 q
= ( 2 < q
) ? q
- 2 : 0;
6049 mul64To128( bSig
, q
, &term0
, &term1
);
6050 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6051 shortShift128Left( aSig0
, aSig1
, 62, &aSig0
, &aSig1
);
6057 if ( 0 < expDiff
) {
6058 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
6059 q
= ( 2 < q
) ? q
- 2 : 0;
6061 mul64To128( bSig
, q
<<( 64 - expDiff
), &term0
, &term1
);
6062 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6063 shortShift128Left( 0, bSig
, 64 - expDiff
, &term0
, &term1
);
6064 while ( le128( term0
, term1
, aSig0
, aSig1
) ) {
6066 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6069 *quotient
<<= expDiff
;
6080 sub128( term0
, term1
, aSig0
, aSig1
, &alternateASig0
, &alternateASig1
);
6081 if ( lt128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6082 || ( eq128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6085 aSig0
= alternateASig0
;
6086 aSig1
= alternateASig1
;
6092 normalizeRoundAndPackFloatx80(
6093 floatx80_precision_x
, zSign
, bExp
+ expDiff
, aSig0
, aSig1
, status
);
6097 /*----------------------------------------------------------------------------
6098 | Returns the remainder of the extended double-precision floating-point value
6099 | `a' with respect to the corresponding value `b'. The operation is performed
6100 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6101 *----------------------------------------------------------------------------*/
6103 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
6106 return floatx80_modrem(a
, b
, false, "ient
, status
);
6109 /*----------------------------------------------------------------------------
6110 | Returns the remainder of the extended double-precision floating-point value
6111 | `a' with respect to the corresponding value `b', with the quotient truncated
6113 *----------------------------------------------------------------------------*/
6115 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
6118 return floatx80_modrem(a
, b
, true, "ient
, status
);
6121 /*----------------------------------------------------------------------------
6122 | Returns the square root of the extended double-precision floating-point
6123 | value `a'. The operation is performed according to the IEC/IEEE Standard
6124 | for Binary Floating-Point Arithmetic.
6125 *----------------------------------------------------------------------------*/
6127 floatx80
floatx80_sqrt(floatx80 a
, float_status
*status
)
6131 uint64_t aSig0
, aSig1
, zSig0
, zSig1
, doubleZSig0
;
6132 uint64_t rem0
, rem1
, rem2
, rem3
, term0
, term1
, term2
, term3
;
6134 if (floatx80_invalid_encoding(a
)) {
6135 float_raise(float_flag_invalid
, status
);
6136 return floatx80_default_nan(status
);
6138 aSig0
= extractFloatx80Frac( a
);
6139 aExp
= extractFloatx80Exp( a
);
6140 aSign
= extractFloatx80Sign( a
);
6141 if ( aExp
== 0x7FFF ) {
6142 if ((uint64_t)(aSig0
<< 1)) {
6143 return propagateFloatx80NaN(a
, a
, status
);
6145 if ( ! aSign
) return a
;
6149 if ( ( aExp
| aSig0
) == 0 ) return a
;
6151 float_raise(float_flag_invalid
, status
);
6152 return floatx80_default_nan(status
);
6155 if ( aSig0
== 0 ) return packFloatx80( 0, 0, 0 );
6156 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
6158 zExp
= ( ( aExp
- 0x3FFF )>>1 ) + 0x3FFF;
6159 zSig0
= estimateSqrt32( aExp
, aSig0
>>32 );
6160 shift128Right( aSig0
, 0, 2 + ( aExp
& 1 ), &aSig0
, &aSig1
);
6161 zSig0
= estimateDiv128To64( aSig0
, aSig1
, zSig0
<<32 ) + ( zSig0
<<30 );
6162 doubleZSig0
= zSig0
<<1;
6163 mul64To128( zSig0
, zSig0
, &term0
, &term1
);
6164 sub128( aSig0
, aSig1
, term0
, term1
, &rem0
, &rem1
);
6165 while ( (int64_t) rem0
< 0 ) {
6168 add128( rem0
, rem1
, zSig0
>>63, doubleZSig0
| 1, &rem0
, &rem1
);
6170 zSig1
= estimateDiv128To64( rem1
, 0, doubleZSig0
);
6171 if ( ( zSig1
& UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6172 if ( zSig1
== 0 ) zSig1
= 1;
6173 mul64To128( doubleZSig0
, zSig1
, &term1
, &term2
);
6174 sub128( rem1
, 0, term1
, term2
, &rem1
, &rem2
);
6175 mul64To128( zSig1
, zSig1
, &term2
, &term3
);
6176 sub192( rem1
, rem2
, 0, 0, term2
, term3
, &rem1
, &rem2
, &rem3
);
6177 while ( (int64_t) rem1
< 0 ) {
6179 shortShift128Left( 0, zSig1
, 1, &term2
, &term3
);
6181 term2
|= doubleZSig0
;
6182 add192( rem1
, rem2
, rem3
, 0, term2
, term3
, &rem1
, &rem2
, &rem3
);
6184 zSig1
|= ( ( rem1
| rem2
| rem3
) != 0 );
6186 shortShift128Left( 0, zSig1
, 1, &zSig0
, &zSig1
);
6187 zSig0
|= doubleZSig0
;
6188 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
6189 0, zExp
, zSig0
, zSig1
, status
);
6192 /*----------------------------------------------------------------------------
6193 | Returns the result of converting the quadruple-precision floating-point
6194 | value `a' to the extended double-precision floating-point format. The
6195 | conversion is performed according to the IEC/IEEE Standard for Binary
6196 | Floating-Point Arithmetic.
6197 *----------------------------------------------------------------------------*/
6199 floatx80
float128_to_floatx80(float128 a
, float_status
*status
)
6203 uint64_t aSig0
, aSig1
;
6205 aSig1
= extractFloat128Frac1( a
);
6206 aSig0
= extractFloat128Frac0( a
);
6207 aExp
= extractFloat128Exp( a
);
6208 aSign
= extractFloat128Sign( a
);
6209 if ( aExp
== 0x7FFF ) {
6210 if ( aSig0
| aSig1
) {
6211 floatx80 res
= commonNaNToFloatx80(float128ToCommonNaN(a
, status
),
6213 return floatx80_silence_nan(res
, status
);
6215 return packFloatx80(aSign
, floatx80_infinity_high
,
6216 floatx80_infinity_low
);
6219 if ( ( aSig0
| aSig1
) == 0 ) return packFloatx80( aSign
, 0, 0 );
6220 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6223 aSig0
|= UINT64_C(0x0001000000000000);
6225 shortShift128Left( aSig0
, aSig1
, 15, &aSig0
, &aSig1
);
6226 return roundAndPackFloatx80(80, aSign
, aExp
, aSig0
, aSig1
, status
);
6230 /*----------------------------------------------------------------------------
6231 | Returns the remainder of the quadruple-precision floating-point value `a'
6232 | with respect to the corresponding value `b'. The operation is performed
6233 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6234 *----------------------------------------------------------------------------*/
6236 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
6239 int32_t aExp
, bExp
, expDiff
;
6240 uint64_t aSig0
, aSig1
, bSig0
, bSig1
, q
, term0
, term1
, term2
;
6241 uint64_t allZero
, alternateASig0
, alternateASig1
, sigMean1
;
6244 aSig1
= extractFloat128Frac1( a
);
6245 aSig0
= extractFloat128Frac0( a
);
6246 aExp
= extractFloat128Exp( a
);
6247 aSign
= extractFloat128Sign( a
);
6248 bSig1
= extractFloat128Frac1( b
);
6249 bSig0
= extractFloat128Frac0( b
);
6250 bExp
= extractFloat128Exp( b
);
6251 if ( aExp
== 0x7FFF ) {
6252 if ( ( aSig0
| aSig1
)
6253 || ( ( bExp
== 0x7FFF ) && ( bSig0
| bSig1
) ) ) {
6254 return propagateFloat128NaN(a
, b
, status
);
6258 if ( bExp
== 0x7FFF ) {
6259 if (bSig0
| bSig1
) {
6260 return propagateFloat128NaN(a
, b
, status
);
6265 if ( ( bSig0
| bSig1
) == 0 ) {
6267 float_raise(float_flag_invalid
, status
);
6268 return float128_default_nan(status
);
6270 normalizeFloat128Subnormal( bSig0
, bSig1
, &bExp
, &bSig0
, &bSig1
);
6273 if ( ( aSig0
| aSig1
) == 0 ) return a
;
6274 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6276 expDiff
= aExp
- bExp
;
6277 if ( expDiff
< -1 ) return a
;
6279 aSig0
| UINT64_C(0x0001000000000000),
6281 15 - ( expDiff
< 0 ),
6286 bSig0
| UINT64_C(0x0001000000000000), bSig1
, 15, &bSig0
, &bSig1
);
6287 q
= le128( bSig0
, bSig1
, aSig0
, aSig1
);
6288 if ( q
) sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6290 while ( 0 < expDiff
) {
6291 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6292 q
= ( 4 < q
) ? q
- 4 : 0;
6293 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6294 shortShift192Left( term0
, term1
, term2
, 61, &term1
, &term2
, &allZero
);
6295 shortShift128Left( aSig0
, aSig1
, 61, &aSig0
, &allZero
);
6296 sub128( aSig0
, 0, term1
, term2
, &aSig0
, &aSig1
);
6299 if ( -64 < expDiff
) {
6300 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6301 q
= ( 4 < q
) ? q
- 4 : 0;
6303 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6305 if ( expDiff
< 0 ) {
6306 shift128Right( aSig0
, aSig1
, - expDiff
, &aSig0
, &aSig1
);
6309 shortShift128Left( aSig0
, aSig1
, expDiff
, &aSig0
, &aSig1
);
6311 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6312 sub128( aSig0
, aSig1
, term1
, term2
, &aSig0
, &aSig1
);
6315 shift128Right( aSig0
, aSig1
, 12, &aSig0
, &aSig1
);
6316 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6319 alternateASig0
= aSig0
;
6320 alternateASig1
= aSig1
;
6322 sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6323 } while ( 0 <= (int64_t) aSig0
);
6325 aSig0
, aSig1
, alternateASig0
, alternateASig1
, (uint64_t *)&sigMean0
, &sigMean1
);
6326 if ( ( sigMean0
< 0 )
6327 || ( ( ( sigMean0
| sigMean1
) == 0 ) && ( q
& 1 ) ) ) {
6328 aSig0
= alternateASig0
;
6329 aSig1
= alternateASig1
;
6331 zSign
= ( (int64_t) aSig0
< 0 );
6332 if ( zSign
) sub128( 0, 0, aSig0
, aSig1
, &aSig0
, &aSig1
);
6333 return normalizeRoundAndPackFloat128(aSign
^ zSign
, bExp
- 4, aSig0
, aSig1
,
6337 static inline FloatRelation
6338 floatx80_compare_internal(floatx80 a
, floatx80 b
, bool is_quiet
,
6339 float_status
*status
)
6343 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
6344 float_raise(float_flag_invalid
, status
);
6345 return float_relation_unordered
;
6347 if (( ( extractFloatx80Exp( a
) == 0x7fff ) &&
6348 ( extractFloatx80Frac( a
)<<1 ) ) ||
6349 ( ( extractFloatx80Exp( b
) == 0x7fff ) &&
6350 ( extractFloatx80Frac( b
)<<1 ) )) {
6352 floatx80_is_signaling_nan(a
, status
) ||
6353 floatx80_is_signaling_nan(b
, status
)) {
6354 float_raise(float_flag_invalid
, status
);
6356 return float_relation_unordered
;
6358 aSign
= extractFloatx80Sign( a
);
6359 bSign
= extractFloatx80Sign( b
);
6360 if ( aSign
!= bSign
) {
6362 if ( ( ( (uint16_t) ( ( a
.high
| b
.high
) << 1 ) ) == 0) &&
6363 ( ( a
.low
| b
.low
) == 0 ) ) {
6365 return float_relation_equal
;
6367 return 1 - (2 * aSign
);
6370 /* Normalize pseudo-denormals before comparison. */
6371 if ((a
.high
& 0x7fff) == 0 && a
.low
& UINT64_C(0x8000000000000000)) {
6374 if ((b
.high
& 0x7fff) == 0 && b
.low
& UINT64_C(0x8000000000000000)) {
6377 if (a
.low
== b
.low
&& a
.high
== b
.high
) {
6378 return float_relation_equal
;
6380 return 1 - 2 * (aSign
^ ( lt128( a
.high
, a
.low
, b
.high
, b
.low
) ));
6385 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*status
)
6387 return floatx80_compare_internal(a
, b
, 0, status
);
6390 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
,
6391 float_status
*status
)
6393 return floatx80_compare_internal(a
, b
, 1, status
);
6396 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
6402 if (floatx80_invalid_encoding(a
)) {
6403 float_raise(float_flag_invalid
, status
);
6404 return floatx80_default_nan(status
);
6406 aSig
= extractFloatx80Frac( a
);
6407 aExp
= extractFloatx80Exp( a
);
6408 aSign
= extractFloatx80Sign( a
);
6410 if ( aExp
== 0x7FFF ) {
6412 return propagateFloatx80NaN(a
, a
, status
);
6426 } else if (n
< -0x10000) {
6431 return normalizeRoundAndPackFloatx80(status
->floatx80_rounding_precision
,
6432 aSign
, aExp
, aSig
, 0, status
);
6435 static void __attribute__((constructor
)) softfloat_init(void)
6437 union_float64 ua
, ub
, uc
, ur
;
6439 if (QEMU_NO_HARDFLOAT
) {
6443 * Test that the host's FMA is not obviously broken. For example,
6444 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6445 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6447 ua
.s
= 0x0020000000000001ULL
;
6448 ub
.s
= 0x3ca0000000000000ULL
;
6449 uc
.s
= 0x0020000000000000ULL
;
6450 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
6451 if (ur
.s
!= 0x0020000000000001ULL
) {
6452 force_soft_fma
= true;