4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
408 static inline uint32_t extractFloat32Frac(float32 a
)
410 return float32_val(a
) & 0x007FFFFF;
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
417 static inline int extractFloat32Exp(float32 a
)
419 return (float32_val(a
) >> 23) & 0xFF;
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
426 static inline bool extractFloat32Sign(float32 a
)
428 return float32_val(a
) >> 31;
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
435 static inline uint64_t extractFloat64Frac(float64 a
)
437 return float64_val(a
) & UINT64_C(0x000FFFFFFFFFFFFF);
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
444 static inline int extractFloat64Exp(float64 a
)
446 return (float64_val(a
) >> 52) & 0x7FF;
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
453 static inline bool extractFloat64Sign(float64 a
)
455 return float64_val(a
) >> 63;
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
463 typedef enum __attribute__ ((__packed__
)) {
464 float_class_unclassified
,
468 float_class_qnan
, /* all NaNs from here */
472 #define float_cmask(bit) (1u << (bit))
475 float_cmask_zero
= float_cmask(float_class_zero
),
476 float_cmask_normal
= float_cmask(float_class_normal
),
477 float_cmask_inf
= float_cmask(float_class_inf
),
478 float_cmask_qnan
= float_cmask(float_class_qnan
),
479 float_cmask_snan
= float_cmask(float_class_snan
),
481 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
482 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
485 /* Flags for parts_minmax. */
487 /* Set for minimum; clear for maximum. */
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
498 return unlikely(c
>= float_class_qnan
);
501 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
503 return c
== float_class_snan
;
506 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
508 return c
== float_class_qnan
;
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
525 /* Routines that know the structure may reference the singular name. */
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
550 uint64_t frac_hm
; /* high-middle */
551 uint64_t frac_lm
; /* low-middle */
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
586 #define FLOAT_PARAMS(E, F) \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
592 static const FloatFmt float16_params
= {
596 static const FloatFmt float16_params_ahp
= {
601 static const FloatFmt bfloat16_params
= {
605 static const FloatFmt float32_params
= {
609 static const FloatFmt float64_params
= {
613 static const FloatFmt float128_params
= {
614 FLOAT_PARAMS(15, 112)
617 #define FLOATX80_PARAMS(R) \
619 .frac_size = R == 64 ? 63 : R, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
623 static const FloatFmt floatx80_params
[3] = {
624 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
632 const int f_size
= fmt
->frac_size
;
633 const int e_size
= fmt
->exp_size
;
635 *r
= (FloatParts64
) {
636 .cls
= float_class_unclassified
,
637 .sign
= extract64(raw
, f_size
+ e_size
, 1),
638 .exp
= extract64(raw
, f_size
, e_size
),
639 .frac
= extract64(raw
, 0, f_size
)
643 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
645 unpack_raw64(p
, &float16_params
, f
);
648 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
650 unpack_raw64(p
, &bfloat16_params
, f
);
653 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
655 unpack_raw64(p
, &float32_params
, f
);
658 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
660 unpack_raw64(p
, &float64_params
, f
);
663 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
665 *p
= (FloatParts128
) {
666 .cls
= float_class_unclassified
,
667 .sign
= extract32(f
.high
, 15, 1),
668 .exp
= extract32(f
.high
, 0, 15),
673 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
675 const int f_size
= float128_params
.frac_size
- 64;
676 const int e_size
= float128_params
.exp_size
;
678 *p
= (FloatParts128
) {
679 .cls
= float_class_unclassified
,
680 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
681 .exp
= extract64(f
.high
, f_size
, e_size
),
682 .frac_hi
= extract64(f
.high
, 0, f_size
),
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
690 const int f_size
= fmt
->frac_size
;
691 const int e_size
= fmt
->exp_size
;
694 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
695 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
696 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
700 static inline float16
float16_pack_raw(const FloatParts64
*p
)
702 return make_float16(pack_raw64(p
, &float16_params
));
705 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
707 return pack_raw64(p
, &bfloat16_params
);
710 static inline float32
float32_pack_raw(const FloatParts64
*p
)
712 return make_float32(pack_raw64(p
, &float32_params
));
715 static inline float64
float64_pack_raw(const FloatParts64
*p
)
717 return make_float64(pack_raw64(p
, &float64_params
));
720 static float128
float128_pack_raw(const FloatParts128
*p
)
722 const int f_size
= float128_params
.frac_size
- 64;
723 const int e_size
= float128_params
.exp_size
;
726 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
727 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
728 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
729 return make_float128(hi
, p
->frac_lo
);
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
752 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
753 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
757 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
759 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
764 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
765 FloatParts64
*c
, float_status
*s
,
766 int ab_mask
, int abc_mask
);
767 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
771 int ab_mask
, int abc_mask
);
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
776 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
777 const FloatFmt
*fmt
);
778 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
779 const FloatFmt
*fmt
);
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
784 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
785 const FloatFmt
*fmt
);
786 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
787 const FloatFmt
*fmt
);
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
792 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
793 const FloatFmt
*fmt
);
794 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
795 const FloatFmt
*fmt
);
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
800 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
801 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
802 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
807 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
808 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
809 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
814 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
815 float_status
*s
, bool subtract
);
816 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
817 float_status
*s
, bool subtract
);
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
822 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
824 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
830 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
831 FloatParts64
*c
, int flags
,
833 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
834 FloatParts128
*c
, int flags
,
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
840 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
842 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
848 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
849 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
854 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
855 int scale
, int frac_size
);
856 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
857 int scale
, int frac_size
);
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
862 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
863 int scale
, float_status
*s
,
864 const FloatFmt
*fmt
);
865 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
866 int scale
, float_status
*s
,
867 const FloatFmt
*fmt
);
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
872 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
873 int scale
, int64_t min
, int64_t max
,
875 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
876 int scale
, int64_t min
, int64_t max
,
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
882 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
883 int scale
, uint64_t max
,
885 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
886 int scale
, uint64_t max
,
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
892 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
893 int scale
, float_status
*s
);
894 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
895 int scale
, float_status
*s
);
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
900 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
901 int scale
, float_status
*s
);
902 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
903 int scale
, float_status
*s
);
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
908 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
909 float_status
*s
, int flags
);
910 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
911 float_status
*s
, int flags
);
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
916 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
917 float_status
*s
, bool q
);
918 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
919 float_status
*s
, bool q
);
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
924 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
925 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
941 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
943 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
946 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
949 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
950 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
954 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
957 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
958 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
959 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
960 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
966 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
968 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
971 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
973 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
974 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
979 static void frac64_allones(FloatParts64
*a
)
984 static void frac128_allones(FloatParts128
*a
)
986 a
->frac_hi
= a
->frac_lo
= -1;
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
991 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
993 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
996 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
998 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
1000 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
1005 return ta
< tb
? -1 : 1;
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1010 static void frac64_clear(FloatParts64
*a
)
1015 static void frac128_clear(FloatParts128
*a
)
1017 a
->frac_hi
= a
->frac_lo
= 0;
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1022 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
1024 uint64_t n1
, n0
, r
, q
;
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1038 ret
= a
->frac
< b
->frac
;
1046 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a
->frac
= q
| (r
!= 0);
1054 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1056 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1057 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1060 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1061 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1063 ret
= lt128(a0
, a1
, b0
, b1
);
1065 a1
= shr_double(a0
, a1
, 1);
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0
= estimateDiv128To64(a0
, a1
, b0
);
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1077 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1078 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1081 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1
= estimateDiv128To64(r1
, r2
, b0
);
1086 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1087 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1090 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1
|= (r2
| r3
) != 0;
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1103 static bool frac64_eqz(FloatParts64
*a
)
1105 return a
->frac
== 0;
1108 static bool frac128_eqz(FloatParts128
*a
)
1110 return (a
->frac_hi
| a
->frac_lo
) == 0;
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1115 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1117 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1120 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1122 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1123 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1128 static void frac64_neg(FloatParts64
*a
)
1133 static void frac128_neg(FloatParts128
*a
)
1136 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1137 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1140 static void frac256_neg(FloatParts256
*a
)
1143 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1144 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1145 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1146 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1151 static int frac64_normalize(FloatParts64
*a
)
1154 int shift
= clz64(a
->frac
);
1161 static int frac128_normalize(FloatParts128
*a
)
1164 int shl
= clz64(a
->frac_hi
);
1165 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1168 } else if (a
->frac_lo
) {
1169 int shl
= clz64(a
->frac_lo
);
1170 a
->frac_hi
= a
->frac_lo
<< shl
;
1177 static int frac256_normalize(FloatParts256
*a
)
1179 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1180 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1192 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1195 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1198 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1201 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1211 a0
= shl_double(a0
, a1
, shl
);
1212 a1
= shl_double(a1
, a2
, shl
);
1213 a2
= shl_double(a2
, a3
, shl
);
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1226 static void frac64_shl(FloatParts64
*a
, int c
)
1231 static void frac128_shl(FloatParts128
*a
, int c
)
1233 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1241 a0
= shl_double(a0
, a1
, c
);
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1251 static void frac64_shr(FloatParts64
*a
, int c
)
1256 static void frac128_shr(FloatParts128
*a
, int c
)
1258 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1266 a1
= shr_double(a0
, a1
, c
);
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1276 static void frac64_shrjam(FloatParts64
*a
, int c
)
1278 uint64_t a0
= a
->frac
;
1280 if (likely(c
!= 0)) {
1281 if (likely(c
< 64)) {
1282 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1290 static void frac128_shrjam(FloatParts128
*a
, int c
)
1292 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1293 uint64_t sticky
= 0;
1295 if (unlikely(c
== 0)) {
1297 } else if (likely(c
< 64)) {
1299 } else if (likely(c
< 128)) {
1313 sticky
|= shr_double(a1
, 0, c
);
1314 a1
= shr_double(a0
, a1
, c
);
1318 a
->frac_lo
= a1
| (sticky
!= 0);
1322 static void frac256_shrjam(FloatParts256
*a
, int c
)
1324 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1325 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1326 uint64_t sticky
= 0;
1328 if (unlikely(c
== 0)) {
1330 } else if (likely(c
< 64)) {
1332 } else if (likely(c
< 256)) {
1333 if (unlikely(c
& 128)) {
1335 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1337 if (unlikely(c
& 64)) {
1339 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1346 sticky
= a0
| a1
| a2
| a3
;
1347 a0
= a1
= a2
= a3
= 0;
1351 sticky
|= shr_double(a3
, 0, c
);
1352 a3
= shr_double(a2
, a3
, c
);
1353 a2
= shr_double(a1
, a2
, c
);
1354 a1
= shr_double(a0
, a1
, c
);
1358 a
->frac_lo
= a3
| (sticky
!= 0);
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1366 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1368 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1371 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1374 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1375 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1379 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1382 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1383 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1384 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1385 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1391 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1393 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1396 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1398 r
->frac_hi
= a
->frac_hi
;
1399 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1404 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1406 r
->frac_hi
= a
->frac
;
1410 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1412 r
->frac_hi
= a
->frac_hi
;
1413 r
->frac_hm
= a
->frac_lo
;
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1425 static const uint16_t rsqrt_tab
[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1466 #include "softfloat-parts-addsub.c.inc"
1475 * Pack/unpack routines with a specific FloatFmt.
1478 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1479 float_status
*s
, const FloatFmt
*params
)
1481 float16_unpack_raw(p
, f
);
1482 parts_canonicalize(p
, s
, params
);
1485 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1488 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1491 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1494 bfloat16_unpack_raw(p
, f
);
1495 parts_canonicalize(p
, s
, &bfloat16_params
);
1498 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1500 const FloatFmt
*params
)
1502 parts_uncanon(p
, s
, params
);
1503 return float16_pack_raw(p
);
1506 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1509 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1512 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1515 parts_uncanon(p
, s
, &bfloat16_params
);
1516 return bfloat16_pack_raw(p
);
1519 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1522 float32_unpack_raw(p
, f
);
1523 parts_canonicalize(p
, s
, &float32_params
);
1526 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1529 parts_uncanon(p
, s
, &float32_params
);
1530 return float32_pack_raw(p
);
1533 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1536 float64_unpack_raw(p
, f
);
1537 parts_canonicalize(p
, s
, &float64_params
);
1540 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1543 parts_uncanon(p
, s
, &float64_params
);
1544 return float64_pack_raw(p
);
1547 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1550 float128_unpack_raw(p
, f
);
1551 parts_canonicalize(p
, s
, &float128_params
);
1554 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1557 parts_uncanon(p
, s
, &float128_params
);
1558 return float128_pack_raw(p
);
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s
->floatx80_rounding_precision
) {
1567 case floatx80_precision_x
:
1568 case floatx80_precision_d
:
1569 case floatx80_precision_s
:
1572 g_assert_not_reached();
1575 if (unlikely(floatx80_invalid_encoding(f
))) {
1576 float_raise(float_flag_invalid
, s
);
1580 floatx80_unpack_raw(p
, f
);
1582 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1583 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1587 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1588 : parts_is_snan_frac(p
->frac_hi
, s
)
1589 ? float_class_snan
: float_class_qnan
);
1594 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1597 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1602 case float_class_normal
:
1603 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1604 parts_uncanon_normal(p
, s
, fmt
);
1612 frac_truncjam(&p64
, p
);
1613 parts_uncanon_normal(&p64
, s
, fmt
);
1617 if (exp
!= fmt
->exp_max
) {
1620 /* rounded to inf -- fall through to set frac correctly */
1622 case float_class_inf
:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac
= floatx80_infinity_low
;
1628 case float_class_zero
:
1633 case float_class_snan
:
1634 case float_class_qnan
:
1635 /* NaNs have the integer bit set. */
1636 frac
= p
->frac_hi
| (1ull << 63);
1641 g_assert_not_reached();
1644 return packFloatx80(p
->sign
, exp
, frac
);
1648 * Addition and subtraction
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1654 FloatParts64 pa
, pb
, *pr
;
1656 float16_unpack_canonical(&pa
, a
, status
);
1657 float16_unpack_canonical(&pb
, b
, status
);
1658 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1660 return float16_round_pack_canonical(pr
, status
);
1663 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1665 return float16_addsub(a
, b
, status
, false);
1668 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1670 return float16_addsub(a
, b
, status
, true);
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1676 FloatParts64 pa
, pb
, *pr
;
1678 float32_unpack_canonical(&pa
, a
, status
);
1679 float32_unpack_canonical(&pb
, b
, status
);
1680 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1682 return float32_round_pack_canonical(pr
, status
);
1685 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1687 return soft_f32_addsub(a
, b
, status
, false);
1690 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1692 return soft_f32_addsub(a
, b
, status
, true);
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1698 FloatParts64 pa
, pb
, *pr
;
1700 float64_unpack_canonical(&pa
, a
, status
);
1701 float64_unpack_canonical(&pb
, b
, status
);
1702 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1704 return float64_round_pack_canonical(pr
, status
);
1707 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1709 return soft_f64_addsub(a
, b
, status
, false);
1712 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1714 return soft_f64_addsub(a
, b
, status
, true);
1717 static float hard_f32_add(float a
, float b
)
1722 static float hard_f32_sub(float a
, float b
)
1727 static double hard_f64_add(double a
, double b
)
1732 static double hard_f64_sub(double a
, double b
)
1737 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1739 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1740 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1742 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1745 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1747 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1748 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1750 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1754 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1755 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1757 return float32_gen2(a
, b
, s
, hard
, soft
,
1758 f32_is_zon2
, f32_addsubmul_post
);
1761 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1762 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1764 return float64_gen2(a
, b
, s
, hard
, soft
,
1765 f64_is_zon2
, f64_addsubmul_post
);
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a
, float32 b
, float_status
*s
)
1771 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a
, float32 b
, float_status
*s
)
1777 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a
, float64 b
, float_status
*s
)
1783 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a
, float64 b
, float_status
*s
)
1789 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
1795 FloatParts64 pa
, pb
, *pr
;
1797 bfloat16_unpack_canonical(&pa
, a
, status
);
1798 bfloat16_unpack_canonical(&pb
, b
, status
);
1799 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1801 return bfloat16_round_pack_canonical(pr
, status
);
1804 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1806 return bfloat16_addsub(a
, b
, status
, false);
1809 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1811 return bfloat16_addsub(a
, b
, status
, true);
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
1817 FloatParts128 pa
, pb
, *pr
;
1819 float128_unpack_canonical(&pa
, a
, status
);
1820 float128_unpack_canonical(&pb
, b
, status
);
1821 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1823 return float128_round_pack_canonical(pr
, status
);
1826 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
1828 return float128_addsub(a
, b
, status
, false);
1831 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
1833 return float128_addsub(a
, b
, status
, true);
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
1839 FloatParts128 pa
, pb
, *pr
;
1841 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1842 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1843 return floatx80_default_nan(status
);
1846 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1847 return floatx80_round_pack_canonical(pr
, status
);
1850 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
1852 return floatx80_addsub(a
, b
, status
, false);
1855 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
1857 return floatx80_addsub(a
, b
, status
, true);
1864 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
1866 FloatParts64 pa
, pb
, *pr
;
1868 float16_unpack_canonical(&pa
, a
, status
);
1869 float16_unpack_canonical(&pb
, b
, status
);
1870 pr
= parts_mul(&pa
, &pb
, status
);
1872 return float16_round_pack_canonical(pr
, status
);
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
1878 FloatParts64 pa
, pb
, *pr
;
1880 float32_unpack_canonical(&pa
, a
, status
);
1881 float32_unpack_canonical(&pb
, b
, status
);
1882 pr
= parts_mul(&pa
, &pb
, status
);
1884 return float32_round_pack_canonical(pr
, status
);
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
1890 FloatParts64 pa
, pb
, *pr
;
1892 float64_unpack_canonical(&pa
, a
, status
);
1893 float64_unpack_canonical(&pb
, b
, status
);
1894 pr
= parts_mul(&pa
, &pb
, status
);
1896 return float64_round_pack_canonical(pr
, status
);
1899 static float hard_f32_mul(float a
, float b
)
1904 static double hard_f64_mul(double a
, double b
)
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a
, float32 b
, float_status
*s
)
1912 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
1913 f32_is_zon2
, f32_addsubmul_post
);
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a
, float64 b
, float_status
*s
)
1919 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
1920 f64_is_zon2
, f64_addsubmul_post
);
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
1926 FloatParts64 pa
, pb
, *pr
;
1928 bfloat16_unpack_canonical(&pa
, a
, status
);
1929 bfloat16_unpack_canonical(&pb
, b
, status
);
1930 pr
= parts_mul(&pa
, &pb
, status
);
1932 return bfloat16_round_pack_canonical(pr
, status
);
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a
, float128 b
, float_status
*status
)
1938 FloatParts128 pa
, pb
, *pr
;
1940 float128_unpack_canonical(&pa
, a
, status
);
1941 float128_unpack_canonical(&pb
, b
, status
);
1942 pr
= parts_mul(&pa
, &pb
, status
);
1944 return float128_round_pack_canonical(pr
, status
);
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
1950 FloatParts128 pa
, pb
, *pr
;
1952 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1953 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1954 return floatx80_default_nan(status
);
1957 pr
= parts_mul(&pa
, &pb
, status
);
1958 return floatx80_round_pack_canonical(pr
, status
);
1962 * Fused multiply-add
1965 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
1966 int flags
, float_status
*status
)
1968 FloatParts64 pa
, pb
, pc
, *pr
;
1970 float16_unpack_canonical(&pa
, a
, status
);
1971 float16_unpack_canonical(&pb
, b
, status
);
1972 float16_unpack_canonical(&pc
, c
, status
);
1973 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1975 return float16_round_pack_canonical(pr
, status
);
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
1980 float_status
*status
)
1982 FloatParts64 pa
, pb
, pc
, *pr
;
1984 float32_unpack_canonical(&pa
, a
, status
);
1985 float32_unpack_canonical(&pb
, b
, status
);
1986 float32_unpack_canonical(&pc
, c
, status
);
1987 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1989 return float32_round_pack_canonical(pr
, status
);
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
1994 float_status
*status
)
1996 FloatParts64 pa
, pb
, pc
, *pr
;
1998 float64_unpack_canonical(&pa
, a
, status
);
1999 float64_unpack_canonical(&pb
, b
, status
);
2000 float64_unpack_canonical(&pc
, c
, status
);
2001 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2003 return float64_round_pack_canonical(pr
, status
);
2006 static bool force_soft_fma
;
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
2011 union_float32 ua
, ub
, uc
, ur
;
2017 if (unlikely(!can_use_fpu(s
))) {
2020 if (unlikely(flags
& float_muladd_halve_result
)) {
2024 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2025 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2029 if (unlikely(force_soft_fma
)) {
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2037 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2041 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2042 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2043 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2045 if (flags
& float_muladd_negate_c
) {
2050 union_float32 ua_orig
= ua
;
2051 union_float32 uc_orig
= uc
;
2053 if (flags
& float_muladd_negate_product
) {
2056 if (flags
& float_muladd_negate_c
) {
2060 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2062 if (unlikely(f32_is_inf(ur
))) {
2063 float_raise(float_flag_overflow
, s
);
2064 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2070 if (flags
& float_muladd_negate_result
) {
2071 return float32_chs(ur
.s
);
2076 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2082 union_float64 ua
, ub
, uc
, ur
;
2088 if (unlikely(!can_use_fpu(s
))) {
2091 if (unlikely(flags
& float_muladd_halve_result
)) {
2095 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2096 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2100 if (unlikely(force_soft_fma
)) {
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2108 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2112 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2113 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2114 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2116 if (flags
& float_muladd_negate_c
) {
2121 union_float64 ua_orig
= ua
;
2122 union_float64 uc_orig
= uc
;
2124 if (flags
& float_muladd_negate_product
) {
2127 if (flags
& float_muladd_negate_c
) {
2131 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2133 if (unlikely(f64_is_inf(ur
))) {
2134 float_raise(float_flag_overflow
, s
);
2135 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2141 if (flags
& float_muladd_negate_result
) {
2142 return float64_chs(ur
.s
);
2147 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2150 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2151 int flags
, float_status
*status
)
2153 FloatParts64 pa
, pb
, pc
, *pr
;
2155 bfloat16_unpack_canonical(&pa
, a
, status
);
2156 bfloat16_unpack_canonical(&pb
, b
, status
);
2157 bfloat16_unpack_canonical(&pc
, c
, status
);
2158 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2160 return bfloat16_round_pack_canonical(pr
, status
);
2163 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2164 int flags
, float_status
*status
)
2166 FloatParts128 pa
, pb
, pc
, *pr
;
2168 float128_unpack_canonical(&pa
, a
, status
);
2169 float128_unpack_canonical(&pb
, b
, status
);
2170 float128_unpack_canonical(&pc
, c
, status
);
2171 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2173 return float128_round_pack_canonical(pr
, status
);
2180 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2182 FloatParts64 pa
, pb
, *pr
;
2184 float16_unpack_canonical(&pa
, a
, status
);
2185 float16_unpack_canonical(&pb
, b
, status
);
2186 pr
= parts_div(&pa
, &pb
, status
);
2188 return float16_round_pack_canonical(pr
, status
);
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2194 FloatParts64 pa
, pb
, *pr
;
2196 float32_unpack_canonical(&pa
, a
, status
);
2197 float32_unpack_canonical(&pb
, b
, status
);
2198 pr
= parts_div(&pa
, &pb
, status
);
2200 return float32_round_pack_canonical(pr
, status
);
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2206 FloatParts64 pa
, pb
, *pr
;
2208 float64_unpack_canonical(&pa
, a
, status
);
2209 float64_unpack_canonical(&pb
, b
, status
);
2210 pr
= parts_div(&pa
, &pb
, status
);
2212 return float64_round_pack_canonical(pr
, status
);
2215 static float hard_f32_div(float a
, float b
)
2220 static double hard_f64_div(double a
, double b
)
2225 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2227 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2228 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2229 fpclassify(b
.h
) == FP_NORMAL
;
2231 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2234 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2236 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2237 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2238 fpclassify(b
.h
) == FP_NORMAL
;
2240 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2243 static bool f32_div_post(union_float32 a
, union_float32 b
)
2245 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2246 return fpclassify(a
.h
) != FP_ZERO
;
2248 return !float32_is_zero(a
.s
);
2251 static bool f64_div_post(union_float64 a
, union_float64 b
)
2253 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2254 return fpclassify(a
.h
) != FP_ZERO
;
2256 return !float64_is_zero(a
.s
);
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a
, float32 b
, float_status
*s
)
2262 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2263 f32_div_pre
, f32_div_post
);
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a
, float64 b
, float_status
*s
)
2269 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2270 f64_div_pre
, f64_div_post
);
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2276 FloatParts64 pa
, pb
, *pr
;
2278 bfloat16_unpack_canonical(&pa
, a
, status
);
2279 bfloat16_unpack_canonical(&pb
, b
, status
);
2280 pr
= parts_div(&pa
, &pb
, status
);
2282 return bfloat16_round_pack_canonical(pr
, status
);
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a
, float128 b
, float_status
*status
)
2288 FloatParts128 pa
, pb
, *pr
;
2290 float128_unpack_canonical(&pa
, a
, status
);
2291 float128_unpack_canonical(&pb
, b
, status
);
2292 pr
= parts_div(&pa
, &pb
, status
);
2294 return float128_round_pack_canonical(pr
, status
);
2297 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
2299 FloatParts128 pa
, pb
, *pr
;
2301 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
2302 !floatx80_unpack_canonical(&pb
, b
, status
)) {
2303 return floatx80_default_nan(status
);
2306 pr
= parts_div(&pa
, &pb
, status
);
2307 return floatx80_round_pack_canonical(pr
, status
);
2311 * Float to Float conversions
2313 * Returns the result of converting one float format to another. The
2314 * conversion is performed according to the IEC/IEEE Standard for
2315 * Binary Floating-Point Arithmetic.
2317 * Usually this only needs to take care of raising invalid exceptions
2318 * and handling the conversion on NaNs.
2321 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2324 case float_class_qnan
:
2325 case float_class_snan
:
2327 * There is no NaN in the destination format. Raise Invalid
2328 * and return a zero with the sign of the input NaN.
2330 float_raise(float_flag_invalid
, s
);
2331 a
->cls
= float_class_zero
;
2334 case float_class_inf
:
2336 * There is no Inf in the destination format. Raise Invalid
2337 * and return the maximum normal with the correct sign.
2339 float_raise(float_flag_invalid
, s
);
2340 a
->cls
= float_class_normal
;
2341 a
->exp
= float16_params_ahp
.exp_max
;
2342 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2343 float16_params_ahp
.frac_size
+ 1);
2346 case float_class_normal
:
2347 case float_class_zero
:
2351 g_assert_not_reached();
2355 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2357 if (is_nan(a
->cls
)) {
2358 parts_return_nan(a
, s
);
2362 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2364 if (is_nan(a
->cls
)) {
2365 parts_return_nan(a
, s
);
2369 #define parts_float_to_float(P, S) \
2370 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2372 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2379 if (a
->cls
== float_class_normal
) {
2380 frac_truncjam(a
, b
);
2381 } else if (is_nan(a
->cls
)) {
2382 /* Discard the low bits of the NaN. */
2383 a
->frac
= b
->frac_hi
;
2384 parts_return_nan(a
, s
);
2388 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2396 if (is_nan(a
->cls
)) {
2397 parts_return_nan(a
, s
);
2401 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2403 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2406 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2407 parts_float_to_float(&p
, s
);
2408 return float32_round_pack_canonical(&p
, s
);
2411 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2413 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2416 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2417 parts_float_to_float(&p
, s
);
2418 return float64_round_pack_canonical(&p
, s
);
2421 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2424 const FloatFmt
*fmt
;
2426 float32_unpack_canonical(&p
, a
, s
);
2428 parts_float_to_float(&p
, s
);
2429 fmt
= &float16_params
;
2431 parts_float_to_ahp(&p
, s
);
2432 fmt
= &float16_params_ahp
;
2434 return float16a_round_pack_canonical(&p
, s
, fmt
);
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a
, float_status
*s
)
2442 float32_unpack_canonical(&p
, a
, s
);
2443 parts_float_to_float(&p
, s
);
2444 return float64_round_pack_canonical(&p
, s
);
2447 float64
float32_to_float64(float32 a
, float_status
*s
)
2449 if (likely(float32_is_normal(a
))) {
2450 /* Widening conversion can never produce inexact results. */
2456 } else if (float32_is_zero(a
)) {
2457 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2459 return soft_float32_to_float64(a
, s
);
2463 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2466 const FloatFmt
*fmt
;
2468 float64_unpack_canonical(&p
, a
, s
);
2470 parts_float_to_float(&p
, s
);
2471 fmt
= &float16_params
;
2473 parts_float_to_ahp(&p
, s
);
2474 fmt
= &float16_params_ahp
;
2476 return float16a_round_pack_canonical(&p
, s
, fmt
);
2479 float32
float64_to_float32(float64 a
, float_status
*s
)
2483 float64_unpack_canonical(&p
, a
, s
);
2484 parts_float_to_float(&p
, s
);
2485 return float32_round_pack_canonical(&p
, s
);
2488 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2492 bfloat16_unpack_canonical(&p
, a
, s
);
2493 parts_float_to_float(&p
, s
);
2494 return float32_round_pack_canonical(&p
, s
);
2497 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2501 bfloat16_unpack_canonical(&p
, a
, s
);
2502 parts_float_to_float(&p
, s
);
2503 return float64_round_pack_canonical(&p
, s
);
2506 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2510 float32_unpack_canonical(&p
, a
, s
);
2511 parts_float_to_float(&p
, s
);
2512 return bfloat16_round_pack_canonical(&p
, s
);
2515 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2519 float64_unpack_canonical(&p
, a
, s
);
2520 parts_float_to_float(&p
, s
);
2521 return bfloat16_round_pack_canonical(&p
, s
);
2524 float32
float128_to_float32(float128 a
, float_status
*s
)
2529 float128_unpack_canonical(&p128
, a
, s
);
2530 parts_float_to_float_narrow(&p64
, &p128
, s
);
2531 return float32_round_pack_canonical(&p64
, s
);
2534 float64
float128_to_float64(float128 a
, float_status
*s
)
2539 float128_unpack_canonical(&p128
, a
, s
);
2540 parts_float_to_float_narrow(&p64
, &p128
, s
);
2541 return float64_round_pack_canonical(&p64
, s
);
2544 float128
float32_to_float128(float32 a
, float_status
*s
)
2549 float32_unpack_canonical(&p64
, a
, s
);
2550 parts_float_to_float_widen(&p128
, &p64
, s
);
2551 return float128_round_pack_canonical(&p128
, s
);
2554 float128
float64_to_float128(float64 a
, float_status
*s
)
2559 float64_unpack_canonical(&p64
, a
, s
);
2560 parts_float_to_float_widen(&p128
, &p64
, s
);
2561 return float128_round_pack_canonical(&p128
, s
);
2564 float32
floatx80_to_float32(floatx80 a
, float_status
*s
)
2569 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2570 parts_float_to_float_narrow(&p64
, &p128
, s
);
2572 parts_default_nan(&p64
, s
);
2574 return float32_round_pack_canonical(&p64
, s
);
2577 float64
floatx80_to_float64(floatx80 a
, float_status
*s
)
2582 if (floatx80_unpack_canonical(&p128
, a
, s
)) {
2583 parts_float_to_float_narrow(&p64
, &p128
, s
);
2585 parts_default_nan(&p64
, s
);
2587 return float64_round_pack_canonical(&p64
, s
);
2590 float128
floatx80_to_float128(floatx80 a
, float_status
*s
)
2594 if (floatx80_unpack_canonical(&p
, a
, s
)) {
2595 parts_float_to_float(&p
, s
);
2597 parts_default_nan(&p
, s
);
2599 return float128_round_pack_canonical(&p
, s
);
2602 floatx80
float32_to_floatx80(float32 a
, float_status
*s
)
2607 float32_unpack_canonical(&p64
, a
, s
);
2608 parts_float_to_float_widen(&p128
, &p64
, s
);
2609 return floatx80_round_pack_canonical(&p128
, s
);
2612 floatx80
float64_to_floatx80(float64 a
, float_status
*s
)
2617 float64_unpack_canonical(&p64
, a
, s
);
2618 parts_float_to_float_widen(&p128
, &p64
, s
);
2619 return floatx80_round_pack_canonical(&p128
, s
);
2622 floatx80
float128_to_floatx80(float128 a
, float_status
*s
)
2626 float128_unpack_canonical(&p
, a
, s
);
2627 parts_float_to_float(&p
, s
);
2628 return floatx80_round_pack_canonical(&p
, s
);
2632 * Round to integral value
2635 float16
float16_round_to_int(float16 a
, float_status
*s
)
2639 float16_unpack_canonical(&p
, a
, s
);
2640 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2641 return float16_round_pack_canonical(&p
, s
);
2644 float32
float32_round_to_int(float32 a
, float_status
*s
)
2648 float32_unpack_canonical(&p
, a
, s
);
2649 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2650 return float32_round_pack_canonical(&p
, s
);
2653 float64
float64_round_to_int(float64 a
, float_status
*s
)
2657 float64_unpack_canonical(&p
, a
, s
);
2658 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2659 return float64_round_pack_canonical(&p
, s
);
2662 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2666 bfloat16_unpack_canonical(&p
, a
, s
);
2667 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2668 return bfloat16_round_pack_canonical(&p
, s
);
2671 float128
float128_round_to_int(float128 a
, float_status
*s
)
2675 float128_unpack_canonical(&p
, a
, s
);
2676 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
2677 return float128_round_pack_canonical(&p
, s
);
2680 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
2684 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
2685 return floatx80_default_nan(status
);
2688 parts_round_to_int(&p
, status
->float_rounding_mode
, 0, status
,
2689 &floatx80_params
[status
->floatx80_rounding_precision
]);
2690 return floatx80_round_pack_canonical(&p
, status
);
2694 * Floating-point to signed integer conversions
2697 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2702 float16_unpack_canonical(&p
, a
, s
);
2703 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2706 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2711 float16_unpack_canonical(&p
, a
, s
);
2712 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2715 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2720 float16_unpack_canonical(&p
, a
, s
);
2721 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2724 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2729 float16_unpack_canonical(&p
, a
, s
);
2730 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2733 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2738 float32_unpack_canonical(&p
, a
, s
);
2739 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2742 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2747 float32_unpack_canonical(&p
, a
, s
);
2748 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2751 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2756 float32_unpack_canonical(&p
, a
, s
);
2757 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2760 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2765 float64_unpack_canonical(&p
, a
, s
);
2766 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2769 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2774 float64_unpack_canonical(&p
, a
, s
);
2775 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2778 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2783 float64_unpack_canonical(&p
, a
, s
);
2784 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2787 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2792 bfloat16_unpack_canonical(&p
, a
, s
);
2793 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2796 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2801 bfloat16_unpack_canonical(&p
, a
, s
);
2802 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2805 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2810 bfloat16_unpack_canonical(&p
, a
, s
);
2811 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2814 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
2815 int scale
, float_status
*s
)
2819 float128_unpack_canonical(&p
, a
, s
);
2820 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2823 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
2824 int scale
, float_status
*s
)
2828 float128_unpack_canonical(&p
, a
, s
);
2829 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2832 static int32_t floatx80_to_int32_scalbn(floatx80 a
, FloatRoundMode rmode
,
2833 int scale
, float_status
*s
)
2837 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
2838 parts_default_nan(&p
, s
);
2840 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2843 static int64_t floatx80_to_int64_scalbn(floatx80 a
, FloatRoundMode rmode
,
2844 int scale
, float_status
*s
)
2848 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
2849 parts_default_nan(&p
, s
);
2851 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2854 int8_t float16_to_int8(float16 a
, float_status
*s
)
2856 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2859 int16_t float16_to_int16(float16 a
, float_status
*s
)
2861 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2864 int32_t float16_to_int32(float16 a
, float_status
*s
)
2866 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2869 int64_t float16_to_int64(float16 a
, float_status
*s
)
2871 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2874 int16_t float32_to_int16(float32 a
, float_status
*s
)
2876 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2879 int32_t float32_to_int32(float32 a
, float_status
*s
)
2881 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2884 int64_t float32_to_int64(float32 a
, float_status
*s
)
2886 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2889 int16_t float64_to_int16(float64 a
, float_status
*s
)
2891 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2894 int32_t float64_to_int32(float64 a
, float_status
*s
)
2896 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2899 int64_t float64_to_int64(float64 a
, float_status
*s
)
2901 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2904 int32_t float128_to_int32(float128 a
, float_status
*s
)
2906 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2909 int64_t float128_to_int64(float128 a
, float_status
*s
)
2911 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2914 int32_t floatx80_to_int32(floatx80 a
, float_status
*s
)
2916 return floatx80_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2919 int64_t floatx80_to_int64(floatx80 a
, float_status
*s
)
2921 return floatx80_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2924 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
2926 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2929 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
2931 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2934 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
2936 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2939 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
2941 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2944 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
2946 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2949 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
2951 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2954 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
2956 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2959 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
2961 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2964 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
2966 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2969 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
2971 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2974 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
2976 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2979 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*s
)
2981 return floatx80_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2984 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*s
)
2986 return floatx80_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2989 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
2991 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2994 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
2996 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2999 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
3001 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3004 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
3006 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
3009 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
3011 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
3014 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
3016 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
3020 * Floating-point to unsigned integer conversions
3023 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3028 float16_unpack_canonical(&p
, a
, s
);
3029 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
3032 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3037 float16_unpack_canonical(&p
, a
, s
);
3038 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3041 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3046 float16_unpack_canonical(&p
, a
, s
);
3047 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3050 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
3055 float16_unpack_canonical(&p
, a
, s
);
3056 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3059 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3064 float32_unpack_canonical(&p
, a
, s
);
3065 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3068 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3073 float32_unpack_canonical(&p
, a
, s
);
3074 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3077 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
3082 float32_unpack_canonical(&p
, a
, s
);
3083 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3086 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3091 float64_unpack_canonical(&p
, a
, s
);
3092 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3095 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3100 float64_unpack_canonical(&p
, a
, s
);
3101 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3104 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
3109 float64_unpack_canonical(&p
, a
, s
);
3110 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3113 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3114 int scale
, float_status
*s
)
3118 bfloat16_unpack_canonical(&p
, a
, s
);
3119 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
3122 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3123 int scale
, float_status
*s
)
3127 bfloat16_unpack_canonical(&p
, a
, s
);
3128 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3131 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
3132 int scale
, float_status
*s
)
3136 bfloat16_unpack_canonical(&p
, a
, s
);
3137 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3140 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
3141 int scale
, float_status
*s
)
3145 float128_unpack_canonical(&p
, a
, s
);
3146 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3149 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3150 int scale
, float_status
*s
)
3154 float128_unpack_canonical(&p
, a
, s
);
3155 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3158 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3160 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3163 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3165 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3168 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3170 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3173 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3175 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3178 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3180 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3183 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3185 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3188 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3190 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3193 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3195 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3198 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3200 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3203 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3205 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3208 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3210 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3213 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3215 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3218 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3220 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3223 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3225 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3228 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3230 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3233 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3235 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3238 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3240 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3243 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3245 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3248 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3250 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3253 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3255 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3258 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3260 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3263 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3265 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3268 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3270 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3273 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3275 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3278 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3280 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3283 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3285 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3288 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3290 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3293 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3295 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3298 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3300 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3304 * Signed integer to floating-point conversions
3307 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3311 parts_sint_to_float(&p
, a
, scale
, status
);
3312 return float16_round_pack_canonical(&p
, status
);
3315 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3317 return int64_to_float16_scalbn(a
, scale
, status
);
3320 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3322 return int64_to_float16_scalbn(a
, scale
, status
);
3325 float16
int64_to_float16(int64_t a
, float_status
*status
)
3327 return int64_to_float16_scalbn(a
, 0, status
);
3330 float16
int32_to_float16(int32_t a
, float_status
*status
)
3332 return int64_to_float16_scalbn(a
, 0, status
);
3335 float16
int16_to_float16(int16_t a
, float_status
*status
)
3337 return int64_to_float16_scalbn(a
, 0, status
);
3340 float16
int8_to_float16(int8_t a
, float_status
*status
)
3342 return int64_to_float16_scalbn(a
, 0, status
);
3345 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3349 parts64_sint_to_float(&p
, a
, scale
, status
);
3350 return float32_round_pack_canonical(&p
, status
);
3353 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3355 return int64_to_float32_scalbn(a
, scale
, status
);
3358 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3360 return int64_to_float32_scalbn(a
, scale
, status
);
3363 float32
int64_to_float32(int64_t a
, float_status
*status
)
3365 return int64_to_float32_scalbn(a
, 0, status
);
3368 float32
int32_to_float32(int32_t a
, float_status
*status
)
3370 return int64_to_float32_scalbn(a
, 0, status
);
3373 float32
int16_to_float32(int16_t a
, float_status
*status
)
3375 return int64_to_float32_scalbn(a
, 0, status
);
3378 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3382 parts_sint_to_float(&p
, a
, scale
, status
);
3383 return float64_round_pack_canonical(&p
, status
);
3386 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3388 return int64_to_float64_scalbn(a
, scale
, status
);
3391 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3393 return int64_to_float64_scalbn(a
, scale
, status
);
3396 float64
int64_to_float64(int64_t a
, float_status
*status
)
3398 return int64_to_float64_scalbn(a
, 0, status
);
3401 float64
int32_to_float64(int32_t a
, float_status
*status
)
3403 return int64_to_float64_scalbn(a
, 0, status
);
3406 float64
int16_to_float64(int16_t a
, float_status
*status
)
3408 return int64_to_float64_scalbn(a
, 0, status
);
3411 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3415 parts_sint_to_float(&p
, a
, scale
, status
);
3416 return bfloat16_round_pack_canonical(&p
, status
);
3419 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3421 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3424 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3426 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3429 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3431 return int64_to_bfloat16_scalbn(a
, 0, status
);
3434 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3436 return int64_to_bfloat16_scalbn(a
, 0, status
);
3439 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3441 return int64_to_bfloat16_scalbn(a
, 0, status
);
3444 float128
int64_to_float128(int64_t a
, float_status
*status
)
3448 parts_sint_to_float(&p
, a
, 0, status
);
3449 return float128_round_pack_canonical(&p
, status
);
3452 float128
int32_to_float128(int32_t a
, float_status
*status
)
3454 return int64_to_float128(a
, status
);
3457 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
3461 parts_sint_to_float(&p
, a
, 0, status
);
3462 return floatx80_round_pack_canonical(&p
, status
);
3465 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
3467 return int64_to_floatx80(a
, status
);
3471 * Unsigned Integer to floating-point conversions
3474 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3478 parts_uint_to_float(&p
, a
, scale
, status
);
3479 return float16_round_pack_canonical(&p
, status
);
3482 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3484 return uint64_to_float16_scalbn(a
, scale
, status
);
3487 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3489 return uint64_to_float16_scalbn(a
, scale
, status
);
3492 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3494 return uint64_to_float16_scalbn(a
, 0, status
);
3497 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3499 return uint64_to_float16_scalbn(a
, 0, status
);
3502 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3504 return uint64_to_float16_scalbn(a
, 0, status
);
3507 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3509 return uint64_to_float16_scalbn(a
, 0, status
);
3512 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3516 parts_uint_to_float(&p
, a
, scale
, status
);
3517 return float32_round_pack_canonical(&p
, status
);
3520 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3522 return uint64_to_float32_scalbn(a
, scale
, status
);
3525 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3527 return uint64_to_float32_scalbn(a
, scale
, status
);
3530 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3532 return uint64_to_float32_scalbn(a
, 0, status
);
3535 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3537 return uint64_to_float32_scalbn(a
, 0, status
);
3540 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3542 return uint64_to_float32_scalbn(a
, 0, status
);
3545 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3549 parts_uint_to_float(&p
, a
, scale
, status
);
3550 return float64_round_pack_canonical(&p
, status
);
3553 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3555 return uint64_to_float64_scalbn(a
, scale
, status
);
3558 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3560 return uint64_to_float64_scalbn(a
, scale
, status
);
3563 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3565 return uint64_to_float64_scalbn(a
, 0, status
);
3568 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3570 return uint64_to_float64_scalbn(a
, 0, status
);
3573 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3575 return uint64_to_float64_scalbn(a
, 0, status
);
3578 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3582 parts_uint_to_float(&p
, a
, scale
, status
);
3583 return bfloat16_round_pack_canonical(&p
, status
);
3586 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3588 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3591 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3593 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3596 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3598 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3601 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3603 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3606 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3608 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3611 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3615 parts_uint_to_float(&p
, a
, 0, status
);
3616 return float128_round_pack_canonical(&p
, status
);
3620 * Minimum and maximum
3623 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3625 FloatParts64 pa
, pb
, *pr
;
3627 float16_unpack_canonical(&pa
, a
, s
);
3628 float16_unpack_canonical(&pb
, b
, s
);
3629 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3631 return float16_round_pack_canonical(pr
, s
);
3634 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3635 float_status
*s
, int flags
)
3637 FloatParts64 pa
, pb
, *pr
;
3639 bfloat16_unpack_canonical(&pa
, a
, s
);
3640 bfloat16_unpack_canonical(&pb
, b
, s
);
3641 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3643 return bfloat16_round_pack_canonical(pr
, s
);
3646 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
3648 FloatParts64 pa
, pb
, *pr
;
3650 float32_unpack_canonical(&pa
, a
, s
);
3651 float32_unpack_canonical(&pb
, b
, s
);
3652 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3654 return float32_round_pack_canonical(pr
, s
);
3657 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
3659 FloatParts64 pa
, pb
, *pr
;
3661 float64_unpack_canonical(&pa
, a
, s
);
3662 float64_unpack_canonical(&pb
, b
, s
);
3663 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3665 return float64_round_pack_canonical(pr
, s
);
3668 static float128
float128_minmax(float128 a
, float128 b
,
3669 float_status
*s
, int flags
)
3671 FloatParts128 pa
, pb
, *pr
;
3673 float128_unpack_canonical(&pa
, a
, s
);
3674 float128_unpack_canonical(&pb
, b
, s
);
3675 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3677 return float128_round_pack_canonical(pr
, s
);
3680 #define MINMAX_1(type, name, flags) \
3681 type type##_##name(type a, type b, float_status *s) \
3682 { return type##_minmax(a, b, s, flags); }
3684 #define MINMAX_2(type) \
3685 MINMAX_1(type, max, 0) \
3686 MINMAX_1(type, maxnum, minmax_isnum) \
3687 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3688 MINMAX_1(type, min, minmax_ismin) \
3689 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3690 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3702 * Floating point compare
3705 static FloatRelation QEMU_FLATTEN
3706 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
3708 FloatParts64 pa
, pb
;
3710 float16_unpack_canonical(&pa
, a
, s
);
3711 float16_unpack_canonical(&pb
, b
, s
);
3712 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3715 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3717 return float16_do_compare(a
, b
, s
, false);
3720 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3722 return float16_do_compare(a
, b
, s
, true);
3725 static FloatRelation QEMU_SOFTFLOAT_ATTR
3726 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
3728 FloatParts64 pa
, pb
;
3730 float32_unpack_canonical(&pa
, a
, s
);
3731 float32_unpack_canonical(&pb
, b
, s
);
3732 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3735 static FloatRelation QEMU_FLATTEN
3736 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
3738 union_float32 ua
, ub
;
3743 if (QEMU_NO_HARDFLOAT
) {
3747 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3748 if (isgreaterequal(ua
.h
, ub
.h
)) {
3749 if (isgreater(ua
.h
, ub
.h
)) {
3750 return float_relation_greater
;
3752 return float_relation_equal
;
3754 if (likely(isless(ua
.h
, ub
.h
))) {
3755 return float_relation_less
;
3758 * The only condition remaining is unordered.
3759 * Fall through to set flags.
3762 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3765 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
3767 return float32_hs_compare(a
, b
, s
, false);
3770 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
3772 return float32_hs_compare(a
, b
, s
, true);
3775 static FloatRelation QEMU_SOFTFLOAT_ATTR
3776 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
3778 FloatParts64 pa
, pb
;
3780 float64_unpack_canonical(&pa
, a
, s
);
3781 float64_unpack_canonical(&pb
, b
, s
);
3782 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3785 static FloatRelation QEMU_FLATTEN
3786 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
3788 union_float64 ua
, ub
;
3793 if (QEMU_NO_HARDFLOAT
) {
3797 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
3798 if (isgreaterequal(ua
.h
, ub
.h
)) {
3799 if (isgreater(ua
.h
, ub
.h
)) {
3800 return float_relation_greater
;
3802 return float_relation_equal
;
3804 if (likely(isless(ua
.h
, ub
.h
))) {
3805 return float_relation_less
;
3808 * The only condition remaining is unordered.
3809 * Fall through to set flags.
3812 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3815 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
3817 return float64_hs_compare(a
, b
, s
, false);
3820 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
3822 return float64_hs_compare(a
, b
, s
, true);
3825 static FloatRelation QEMU_FLATTEN
3826 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
3828 FloatParts64 pa
, pb
;
3830 bfloat16_unpack_canonical(&pa
, a
, s
);
3831 bfloat16_unpack_canonical(&pb
, b
, s
);
3832 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3835 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
3837 return bfloat16_do_compare(a
, b
, s
, false);
3840 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
3842 return bfloat16_do_compare(a
, b
, s
, true);
3845 static FloatRelation QEMU_FLATTEN
3846 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
3848 FloatParts128 pa
, pb
;
3850 float128_unpack_canonical(&pa
, a
, s
);
3851 float128_unpack_canonical(&pb
, b
, s
);
3852 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3855 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
3857 return float128_do_compare(a
, b
, s
, false);
3860 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
3862 return float128_do_compare(a
, b
, s
, true);
3869 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
3873 float16_unpack_canonical(&p
, a
, status
);
3874 parts_scalbn(&p
, n
, status
);
3875 return float16_round_pack_canonical(&p
, status
);
3878 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
3882 float32_unpack_canonical(&p
, a
, status
);
3883 parts_scalbn(&p
, n
, status
);
3884 return float32_round_pack_canonical(&p
, status
);
3887 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
3891 float64_unpack_canonical(&p
, a
, status
);
3892 parts_scalbn(&p
, n
, status
);
3893 return float64_round_pack_canonical(&p
, status
);
3896 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
3900 bfloat16_unpack_canonical(&p
, a
, status
);
3901 parts_scalbn(&p
, n
, status
);
3902 return bfloat16_round_pack_canonical(&p
, status
);
3905 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
3909 float128_unpack_canonical(&p
, a
, status
);
3910 parts_scalbn(&p
, n
, status
);
3911 return float128_round_pack_canonical(&p
, status
);
3914 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
3918 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
3919 return floatx80_default_nan(status
);
3921 parts_scalbn(&p
, n
, status
);
3922 return floatx80_round_pack_canonical(&p
, status
);
3929 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
3933 float16_unpack_canonical(&p
, a
, status
);
3934 parts_sqrt(&p
, status
, &float16_params
);
3935 return float16_round_pack_canonical(&p
, status
);
3938 static float32 QEMU_SOFTFLOAT_ATTR
3939 soft_f32_sqrt(float32 a
, float_status
*status
)
3943 float32_unpack_canonical(&p
, a
, status
);
3944 parts_sqrt(&p
, status
, &float32_params
);
3945 return float32_round_pack_canonical(&p
, status
);
3948 static float64 QEMU_SOFTFLOAT_ATTR
3949 soft_f64_sqrt(float64 a
, float_status
*status
)
3953 float64_unpack_canonical(&p
, a
, status
);
3954 parts_sqrt(&p
, status
, &float64_params
);
3955 return float64_round_pack_canonical(&p
, status
);
3958 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
3960 union_float32 ua
, ur
;
3963 if (unlikely(!can_use_fpu(s
))) {
3967 float32_input_flush1(&ua
.s
, s
);
3968 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
3969 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3970 fpclassify(ua
.h
) == FP_ZERO
) ||
3974 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
3975 float32_is_neg(ua
.s
))) {
3982 return soft_f32_sqrt(ua
.s
, s
);
3985 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
3987 union_float64 ua
, ur
;
3990 if (unlikely(!can_use_fpu(s
))) {
3994 float64_input_flush1(&ua
.s
, s
);
3995 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
3996 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3997 fpclassify(ua
.h
) == FP_ZERO
) ||
4001 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
4002 float64_is_neg(ua
.s
))) {
4009 return soft_f64_sqrt(ua
.s
, s
);
4012 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
4016 bfloat16_unpack_canonical(&p
, a
, status
);
4017 parts_sqrt(&p
, status
, &bfloat16_params
);
4018 return bfloat16_round_pack_canonical(&p
, status
);
4021 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
4025 float128_unpack_canonical(&p
, a
, status
);
4026 parts_sqrt(&p
, status
, &float128_params
);
4027 return float128_round_pack_canonical(&p
, status
);
4030 floatx80
floatx80_sqrt(floatx80 a
, float_status
*s
)
4034 if (!floatx80_unpack_canonical(&p
, a
, s
)) {
4035 return floatx80_default_nan(s
);
4037 parts_sqrt(&p
, s
, &floatx80_params
[s
->floatx80_rounding_precision
]);
4038 return floatx80_round_pack_canonical(&p
, s
);
4041 /*----------------------------------------------------------------------------
4042 | The pattern for a default generated NaN.
4043 *----------------------------------------------------------------------------*/
4045 float16
float16_default_nan(float_status
*status
)
4049 parts_default_nan(&p
, status
);
4050 p
.frac
>>= float16_params
.frac_shift
;
4051 return float16_pack_raw(&p
);
4054 float32
float32_default_nan(float_status
*status
)
4058 parts_default_nan(&p
, status
);
4059 p
.frac
>>= float32_params
.frac_shift
;
4060 return float32_pack_raw(&p
);
4063 float64
float64_default_nan(float_status
*status
)
4067 parts_default_nan(&p
, status
);
4068 p
.frac
>>= float64_params
.frac_shift
;
4069 return float64_pack_raw(&p
);
4072 float128
float128_default_nan(float_status
*status
)
4076 parts_default_nan(&p
, status
);
4077 frac_shr(&p
, float128_params
.frac_shift
);
4078 return float128_pack_raw(&p
);
4081 bfloat16
bfloat16_default_nan(float_status
*status
)
4085 parts_default_nan(&p
, status
);
4086 p
.frac
>>= bfloat16_params
.frac_shift
;
4087 return bfloat16_pack_raw(&p
);
4090 /*----------------------------------------------------------------------------
4091 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4092 *----------------------------------------------------------------------------*/
4094 float16
float16_silence_nan(float16 a
, float_status
*status
)
4098 float16_unpack_raw(&p
, a
);
4099 p
.frac
<<= float16_params
.frac_shift
;
4100 parts_silence_nan(&p
, status
);
4101 p
.frac
>>= float16_params
.frac_shift
;
4102 return float16_pack_raw(&p
);
4105 float32
float32_silence_nan(float32 a
, float_status
*status
)
4109 float32_unpack_raw(&p
, a
);
4110 p
.frac
<<= float32_params
.frac_shift
;
4111 parts_silence_nan(&p
, status
);
4112 p
.frac
>>= float32_params
.frac_shift
;
4113 return float32_pack_raw(&p
);
4116 float64
float64_silence_nan(float64 a
, float_status
*status
)
4120 float64_unpack_raw(&p
, a
);
4121 p
.frac
<<= float64_params
.frac_shift
;
4122 parts_silence_nan(&p
, status
);
4123 p
.frac
>>= float64_params
.frac_shift
;
4124 return float64_pack_raw(&p
);
4127 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
4131 bfloat16_unpack_raw(&p
, a
);
4132 p
.frac
<<= bfloat16_params
.frac_shift
;
4133 parts_silence_nan(&p
, status
);
4134 p
.frac
>>= bfloat16_params
.frac_shift
;
4135 return bfloat16_pack_raw(&p
);
4138 float128
float128_silence_nan(float128 a
, float_status
*status
)
4142 float128_unpack_raw(&p
, a
);
4143 frac_shl(&p
, float128_params
.frac_shift
);
4144 parts_silence_nan(&p
, status
);
4145 frac_shr(&p
, float128_params
.frac_shift
);
4146 return float128_pack_raw(&p
);
4149 /*----------------------------------------------------------------------------
4150 | If `a' is denormal and we are in flush-to-zero mode then set the
4151 | input-denormal exception and return zero. Otherwise just return the value.
4152 *----------------------------------------------------------------------------*/
4154 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
4156 if (p
.exp
== 0 && p
.frac
!= 0) {
4157 float_raise(float_flag_input_denormal
, status
);
4164 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
4166 if (status
->flush_inputs_to_zero
) {
4169 float16_unpack_raw(&p
, a
);
4170 if (parts_squash_denormal(p
, status
)) {
4171 return float16_set_sign(float16_zero
, p
.sign
);
4177 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
4179 if (status
->flush_inputs_to_zero
) {
4182 float32_unpack_raw(&p
, a
);
4183 if (parts_squash_denormal(p
, status
)) {
4184 return float32_set_sign(float32_zero
, p
.sign
);
4190 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4192 if (status
->flush_inputs_to_zero
) {
4195 float64_unpack_raw(&p
, a
);
4196 if (parts_squash_denormal(p
, status
)) {
4197 return float64_set_sign(float64_zero
, p
.sign
);
4203 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4205 if (status
->flush_inputs_to_zero
) {
4208 bfloat16_unpack_raw(&p
, a
);
4209 if (parts_squash_denormal(p
, status
)) {
4210 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4216 /*----------------------------------------------------------------------------
4217 | Normalizes the subnormal single-precision floating-point value represented
4218 | by the denormalized significand `aSig'. The normalized exponent and
4219 | significand are stored at the locations pointed to by `zExpPtr' and
4220 | `zSigPtr', respectively.
4221 *----------------------------------------------------------------------------*/
4224 normalizeFloat32Subnormal(uint32_t aSig
, int *zExpPtr
, uint32_t *zSigPtr
)
4228 shiftCount
= clz32(aSig
) - 8;
4229 *zSigPtr
= aSig
<<shiftCount
;
4230 *zExpPtr
= 1 - shiftCount
;
4234 /*----------------------------------------------------------------------------
4235 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4236 | and significand `zSig', and returns the proper single-precision floating-
4237 | point value corresponding to the abstract input. Ordinarily, the abstract
4238 | value is simply rounded and packed into the single-precision format, with
4239 | the inexact exception raised if the abstract input cannot be represented
4240 | exactly. However, if the abstract value is too large, the overflow and
4241 | inexact exceptions are raised and an infinity or maximal finite value is
4242 | returned. If the abstract value is too small, the input value is rounded to
4243 | a subnormal number, and the underflow and inexact exceptions are raised if
4244 | the abstract input cannot be represented exactly as a subnormal single-
4245 | precision floating-point number.
4246 | The input significand `zSig' has its binary point between bits 30
4247 | and 29, which is 7 bits to the left of the usual location. This shifted
4248 | significand must be normalized or smaller. If `zSig' is not normalized,
4249 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4250 | and it must not require rounding. In the usual case that `zSig' is
4251 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4252 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4253 | Binary Floating-Point Arithmetic.
4254 *----------------------------------------------------------------------------*/
4256 static float32
roundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4257 float_status
*status
)
4259 int8_t roundingMode
;
4260 bool roundNearestEven
;
4261 int8_t roundIncrement
, roundBits
;
4264 roundingMode
= status
->float_rounding_mode
;
4265 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4266 switch (roundingMode
) {
4267 case float_round_nearest_even
:
4268 case float_round_ties_away
:
4269 roundIncrement
= 0x40;
4271 case float_round_to_zero
:
4274 case float_round_up
:
4275 roundIncrement
= zSign
? 0 : 0x7f;
4277 case float_round_down
:
4278 roundIncrement
= zSign
? 0x7f : 0;
4280 case float_round_to_odd
:
4281 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4287 roundBits
= zSig
& 0x7F;
4288 if ( 0xFD <= (uint16_t) zExp
) {
4289 if ( ( 0xFD < zExp
)
4290 || ( ( zExp
== 0xFD )
4291 && ( (int32_t) ( zSig
+ roundIncrement
) < 0 ) )
4293 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4294 roundIncrement
!= 0;
4295 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4296 return packFloat32(zSign
, 0xFF, -!overflow_to_inf
);
4299 if (status
->flush_to_zero
) {
4300 float_raise(float_flag_output_denormal
, status
);
4301 return packFloat32(zSign
, 0, 0);
4303 isTiny
= status
->tininess_before_rounding
4305 || (zSig
+ roundIncrement
< 0x80000000);
4306 shift32RightJamming( zSig
, - zExp
, &zSig
);
4308 roundBits
= zSig
& 0x7F;
4309 if (isTiny
&& roundBits
) {
4310 float_raise(float_flag_underflow
, status
);
4312 if (roundingMode
== float_round_to_odd
) {
4314 * For round-to-odd case, the roundIncrement depends on
4315 * zSig which just changed.
4317 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4322 float_raise(float_flag_inexact
, status
);
4324 zSig
= ( zSig
+ roundIncrement
)>>7;
4325 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4328 if ( zSig
== 0 ) zExp
= 0;
4329 return packFloat32( zSign
, zExp
, zSig
);
4333 /*----------------------------------------------------------------------------
4334 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4335 | and significand `zSig', and returns the proper single-precision floating-
4336 | point value corresponding to the abstract input. This routine is just like
4337 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4338 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4339 | floating-point exponent.
4340 *----------------------------------------------------------------------------*/
4343 normalizeRoundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4344 float_status
*status
)
4348 shiftCount
= clz32(zSig
) - 1;
4349 return roundAndPackFloat32(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4354 /*----------------------------------------------------------------------------
4355 | Normalizes the subnormal double-precision floating-point value represented
4356 | by the denormalized significand `aSig'. The normalized exponent and
4357 | significand are stored at the locations pointed to by `zExpPtr' and
4358 | `zSigPtr', respectively.
4359 *----------------------------------------------------------------------------*/
4362 normalizeFloat64Subnormal(uint64_t aSig
, int *zExpPtr
, uint64_t *zSigPtr
)
4366 shiftCount
= clz64(aSig
) - 11;
4367 *zSigPtr
= aSig
<<shiftCount
;
4368 *zExpPtr
= 1 - shiftCount
;
4372 /*----------------------------------------------------------------------------
4373 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4374 | double-precision floating-point value, returning the result. After being
4375 | shifted into the proper positions, the three fields are simply added
4376 | together to form the result. This means that any integer portion of `zSig'
4377 | will be added into the exponent. Since a properly normalized significand
4378 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4379 | than the desired result exponent whenever `zSig' is a complete, normalized
4381 *----------------------------------------------------------------------------*/
4383 static inline float64
packFloat64(bool zSign
, int zExp
, uint64_t zSig
)
4386 return make_float64(
4387 ( ( (uint64_t) zSign
)<<63 ) + ( ( (uint64_t) zExp
)<<52 ) + zSig
);
4391 /*----------------------------------------------------------------------------
4392 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4393 | and significand `zSig', and returns the proper double-precision floating-
4394 | point value corresponding to the abstract input. Ordinarily, the abstract
4395 | value is simply rounded and packed into the double-precision format, with
4396 | the inexact exception raised if the abstract input cannot be represented
4397 | exactly. However, if the abstract value is too large, the overflow and
4398 | inexact exceptions are raised and an infinity or maximal finite value is
4399 | returned. If the abstract value is too small, the input value is rounded to
4400 | a subnormal number, and the underflow and inexact exceptions are raised if
4401 | the abstract input cannot be represented exactly as a subnormal double-
4402 | precision floating-point number.
4403 | The input significand `zSig' has its binary point between bits 62
4404 | and 61, which is 10 bits to the left of the usual location. This shifted
4405 | significand must be normalized or smaller. If `zSig' is not normalized,
4406 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4407 | and it must not require rounding. In the usual case that `zSig' is
4408 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4409 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4410 | Binary Floating-Point Arithmetic.
4411 *----------------------------------------------------------------------------*/
4413 static float64
roundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4414 float_status
*status
)
4416 int8_t roundingMode
;
4417 bool roundNearestEven
;
4418 int roundIncrement
, roundBits
;
4421 roundingMode
= status
->float_rounding_mode
;
4422 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4423 switch (roundingMode
) {
4424 case float_round_nearest_even
:
4425 case float_round_ties_away
:
4426 roundIncrement
= 0x200;
4428 case float_round_to_zero
:
4431 case float_round_up
:
4432 roundIncrement
= zSign
? 0 : 0x3ff;
4434 case float_round_down
:
4435 roundIncrement
= zSign
? 0x3ff : 0;
4437 case float_round_to_odd
:
4438 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4443 roundBits
= zSig
& 0x3FF;
4444 if ( 0x7FD <= (uint16_t) zExp
) {
4445 if ( ( 0x7FD < zExp
)
4446 || ( ( zExp
== 0x7FD )
4447 && ( (int64_t) ( zSig
+ roundIncrement
) < 0 ) )
4449 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4450 roundIncrement
!= 0;
4451 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4452 return packFloat64(zSign
, 0x7FF, -(!overflow_to_inf
));
4455 if (status
->flush_to_zero
) {
4456 float_raise(float_flag_output_denormal
, status
);
4457 return packFloat64(zSign
, 0, 0);
4459 isTiny
= status
->tininess_before_rounding
4461 || (zSig
+ roundIncrement
< UINT64_C(0x8000000000000000));
4462 shift64RightJamming( zSig
, - zExp
, &zSig
);
4464 roundBits
= zSig
& 0x3FF;
4465 if (isTiny
&& roundBits
) {
4466 float_raise(float_flag_underflow
, status
);
4468 if (roundingMode
== float_round_to_odd
) {
4470 * For round-to-odd case, the roundIncrement depends on
4471 * zSig which just changed.
4473 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4478 float_raise(float_flag_inexact
, status
);
4480 zSig
= ( zSig
+ roundIncrement
)>>10;
4481 if (!(roundBits
^ 0x200) && roundNearestEven
) {
4484 if ( zSig
== 0 ) zExp
= 0;
4485 return packFloat64( zSign
, zExp
, zSig
);
4489 /*----------------------------------------------------------------------------
4490 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4491 | and significand `zSig', and returns the proper double-precision floating-
4492 | point value corresponding to the abstract input. This routine is just like
4493 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4494 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4495 | floating-point exponent.
4496 *----------------------------------------------------------------------------*/
4499 normalizeRoundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4500 float_status
*status
)
4504 shiftCount
= clz64(zSig
) - 1;
4505 return roundAndPackFloat64(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4510 /*----------------------------------------------------------------------------
4511 | Normalizes the subnormal extended double-precision floating-point value
4512 | represented by the denormalized significand `aSig'. The normalized exponent
4513 | and significand are stored at the locations pointed to by `zExpPtr' and
4514 | `zSigPtr', respectively.
4515 *----------------------------------------------------------------------------*/
4517 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4522 shiftCount
= clz64(aSig
);
4523 *zSigPtr
= aSig
<<shiftCount
;
4524 *zExpPtr
= 1 - shiftCount
;
4527 /*----------------------------------------------------------------------------
4528 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4529 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4530 | and returns the proper extended double-precision floating-point value
4531 | corresponding to the abstract input. Ordinarily, the abstract value is
4532 | rounded and packed into the extended double-precision format, with the
4533 | inexact exception raised if the abstract input cannot be represented
4534 | exactly. However, if the abstract value is too large, the overflow and
4535 | inexact exceptions are raised and an infinity or maximal finite value is
4536 | returned. If the abstract value is too small, the input value is rounded to
4537 | a subnormal number, and the underflow and inexact exceptions are raised if
4538 | the abstract input cannot be represented exactly as a subnormal extended
4539 | double-precision floating-point number.
4540 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4541 | the result is rounded to the same number of bits as single or double
4542 | precision, respectively. Otherwise, the result is rounded to the full
4543 | precision of the extended double-precision format.
4544 | The input significand must be normalized or smaller. If the input
4545 | significand is not normalized, `zExp' must be 0; in that case, the result
4546 | returned is a subnormal number, and it must not require rounding. The
4547 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4548 | Floating-Point Arithmetic.
4549 *----------------------------------------------------------------------------*/
4551 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4552 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4553 float_status
*status
)
4555 FloatRoundMode roundingMode
;
4556 bool roundNearestEven
, increment
, isTiny
;
4557 int64_t roundIncrement
, roundMask
, roundBits
;
4559 roundingMode
= status
->float_rounding_mode
;
4560 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4561 switch (roundingPrecision
) {
4562 case floatx80_precision_x
:
4564 case floatx80_precision_d
:
4565 roundIncrement
= UINT64_C(0x0000000000000400);
4566 roundMask
= UINT64_C(0x00000000000007FF);
4568 case floatx80_precision_s
:
4569 roundIncrement
= UINT64_C(0x0000008000000000);
4570 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4573 g_assert_not_reached();
4575 zSig0
|= ( zSig1
!= 0 );
4576 switch (roundingMode
) {
4577 case float_round_nearest_even
:
4578 case float_round_ties_away
:
4580 case float_round_to_zero
:
4583 case float_round_up
:
4584 roundIncrement
= zSign
? 0 : roundMask
;
4586 case float_round_down
:
4587 roundIncrement
= zSign
? roundMask
: 0;
4592 roundBits
= zSig0
& roundMask
;
4593 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4594 if ( ( 0x7FFE < zExp
)
4595 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4600 if (status
->flush_to_zero
) {
4601 float_raise(float_flag_output_denormal
, status
);
4602 return packFloatx80(zSign
, 0, 0);
4604 isTiny
= status
->tininess_before_rounding
4606 || (zSig0
<= zSig0
+ roundIncrement
);
4607 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4609 roundBits
= zSig0
& roundMask
;
4610 if (isTiny
&& roundBits
) {
4611 float_raise(float_flag_underflow
, status
);
4614 float_raise(float_flag_inexact
, status
);
4616 zSig0
+= roundIncrement
;
4617 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4618 roundIncrement
= roundMask
+ 1;
4619 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4620 roundMask
|= roundIncrement
;
4622 zSig0
&= ~ roundMask
;
4623 return packFloatx80( zSign
, zExp
, zSig0
);
4627 float_raise(float_flag_inexact
, status
);
4629 zSig0
+= roundIncrement
;
4630 if ( zSig0
< roundIncrement
) {
4632 zSig0
= UINT64_C(0x8000000000000000);
4634 roundIncrement
= roundMask
+ 1;
4635 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4636 roundMask
|= roundIncrement
;
4638 zSig0
&= ~ roundMask
;
4639 if ( zSig0
== 0 ) zExp
= 0;
4640 return packFloatx80( zSign
, zExp
, zSig0
);
4642 switch (roundingMode
) {
4643 case float_round_nearest_even
:
4644 case float_round_ties_away
:
4645 increment
= ((int64_t)zSig1
< 0);
4647 case float_round_to_zero
:
4650 case float_round_up
:
4651 increment
= !zSign
&& zSig1
;
4653 case float_round_down
:
4654 increment
= zSign
&& zSig1
;
4659 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4660 if ( ( 0x7FFE < zExp
)
4661 || ( ( zExp
== 0x7FFE )
4662 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4668 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4669 if ( ( roundingMode
== float_round_to_zero
)
4670 || ( zSign
&& ( roundingMode
== float_round_up
) )
4671 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4673 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4675 return packFloatx80(zSign
,
4676 floatx80_infinity_high
,
4677 floatx80_infinity_low
);
4680 isTiny
= status
->tininess_before_rounding
4683 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4684 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4686 if (isTiny
&& zSig1
) {
4687 float_raise(float_flag_underflow
, status
);
4690 float_raise(float_flag_inexact
, status
);
4692 switch (roundingMode
) {
4693 case float_round_nearest_even
:
4694 case float_round_ties_away
:
4695 increment
= ((int64_t)zSig1
< 0);
4697 case float_round_to_zero
:
4700 case float_round_up
:
4701 increment
= !zSign
&& zSig1
;
4703 case float_round_down
:
4704 increment
= zSign
&& zSig1
;
4711 if (!(zSig1
<< 1) && roundNearestEven
) {
4714 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4716 return packFloatx80( zSign
, zExp
, zSig0
);
4720 float_raise(float_flag_inexact
, status
);
4726 zSig0
= UINT64_C(0x8000000000000000);
4729 if (!(zSig1
<< 1) && roundNearestEven
) {
4735 if ( zSig0
== 0 ) zExp
= 0;
4737 return packFloatx80( zSign
, zExp
, zSig0
);
4741 /*----------------------------------------------------------------------------
4742 | Takes an abstract floating-point value having sign `zSign', exponent
4743 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4744 | and returns the proper extended double-precision floating-point value
4745 | corresponding to the abstract input. This routine is just like
4746 | `roundAndPackFloatx80' except that the input significand does not have to be
4748 *----------------------------------------------------------------------------*/
4750 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4751 bool zSign
, int32_t zExp
,
4752 uint64_t zSig0
, uint64_t zSig1
,
4753 float_status
*status
)
4762 shiftCount
= clz64(zSig0
);
4763 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4765 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4766 zSig0
, zSig1
, status
);
4770 /*----------------------------------------------------------------------------
4771 | Returns the least-significant 64 fraction bits of the quadruple-precision
4772 | floating-point value `a'.
4773 *----------------------------------------------------------------------------*/
4775 static inline uint64_t extractFloat128Frac1( float128 a
)
4782 /*----------------------------------------------------------------------------
4783 | Returns the most-significant 48 fraction bits of the quadruple-precision
4784 | floating-point value `a'.
4785 *----------------------------------------------------------------------------*/
4787 static inline uint64_t extractFloat128Frac0( float128 a
)
4790 return a
.high
& UINT64_C(0x0000FFFFFFFFFFFF);
4794 /*----------------------------------------------------------------------------
4795 | Returns the exponent bits of the quadruple-precision floating-point value
4797 *----------------------------------------------------------------------------*/
4799 static inline int32_t extractFloat128Exp( float128 a
)
4802 return ( a
.high
>>48 ) & 0x7FFF;
4806 /*----------------------------------------------------------------------------
4807 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4808 *----------------------------------------------------------------------------*/
4810 static inline bool extractFloat128Sign(float128 a
)
4812 return a
.high
>> 63;
4815 /*----------------------------------------------------------------------------
4816 | Normalizes the subnormal quadruple-precision floating-point value
4817 | represented by the denormalized significand formed by the concatenation of
4818 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4819 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4820 | significand are stored at the location pointed to by `zSig0Ptr', and the
4821 | least significant 64 bits of the normalized significand are stored at the
4822 | location pointed to by `zSig1Ptr'.
4823 *----------------------------------------------------------------------------*/
4826 normalizeFloat128Subnormal(
4837 shiftCount
= clz64(aSig1
) - 15;
4838 if ( shiftCount
< 0 ) {
4839 *zSig0Ptr
= aSig1
>>( - shiftCount
);
4840 *zSig1Ptr
= aSig1
<<( shiftCount
& 63 );
4843 *zSig0Ptr
= aSig1
<<shiftCount
;
4846 *zExpPtr
= - shiftCount
- 63;
4849 shiftCount
= clz64(aSig0
) - 15;
4850 shortShift128Left( aSig0
, aSig1
, shiftCount
, zSig0Ptr
, zSig1Ptr
);
4851 *zExpPtr
= 1 - shiftCount
;
4856 /*----------------------------------------------------------------------------
4857 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4858 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4859 | floating-point value, returning the result. After being shifted into the
4860 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4861 | added together to form the most significant 32 bits of the result. This
4862 | means that any integer portion of `zSig0' will be added into the exponent.
4863 | Since a properly normalized significand will have an integer portion equal
4864 | to 1, the `zExp' input should be 1 less than the desired result exponent
4865 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4867 *----------------------------------------------------------------------------*/
4869 static inline float128
4870 packFloat128(bool zSign
, int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
)
4875 z
.high
= ((uint64_t)zSign
<< 63) + ((uint64_t)zExp
<< 48) + zSig0
;
4879 /*----------------------------------------------------------------------------
4880 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4881 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4882 | and `zSig2', and returns the proper quadruple-precision floating-point value
4883 | corresponding to the abstract input. Ordinarily, the abstract value is
4884 | simply rounded and packed into the quadruple-precision format, with the
4885 | inexact exception raised if the abstract input cannot be represented
4886 | exactly. However, if the abstract value is too large, the overflow and
4887 | inexact exceptions are raised and an infinity or maximal finite value is
4888 | returned. If the abstract value is too small, the input value is rounded to
4889 | a subnormal number, and the underflow and inexact exceptions are raised if
4890 | the abstract input cannot be represented exactly as a subnormal quadruple-
4891 | precision floating-point number.
4892 | The input significand must be normalized or smaller. If the input
4893 | significand is not normalized, `zExp' must be 0; in that case, the result
4894 | returned is a subnormal number, and it must not require rounding. In the
4895 | usual case that the input significand is normalized, `zExp' must be 1 less
4896 | than the ``true'' floating-point exponent. The handling of underflow and
4897 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4898 *----------------------------------------------------------------------------*/
4900 static float128
roundAndPackFloat128(bool zSign
, int32_t zExp
,
4901 uint64_t zSig0
, uint64_t zSig1
,
4902 uint64_t zSig2
, float_status
*status
)
4904 int8_t roundingMode
;
4905 bool roundNearestEven
, increment
, isTiny
;
4907 roundingMode
= status
->float_rounding_mode
;
4908 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4909 switch (roundingMode
) {
4910 case float_round_nearest_even
:
4911 case float_round_ties_away
:
4912 increment
= ((int64_t)zSig2
< 0);
4914 case float_round_to_zero
:
4917 case float_round_up
:
4918 increment
= !zSign
&& zSig2
;
4920 case float_round_down
:
4921 increment
= zSign
&& zSig2
;
4923 case float_round_to_odd
:
4924 increment
= !(zSig1
& 0x1) && zSig2
;
4929 if ( 0x7FFD <= (uint32_t) zExp
) {
4930 if ( ( 0x7FFD < zExp
)
4931 || ( ( zExp
== 0x7FFD )
4933 UINT64_C(0x0001FFFFFFFFFFFF),
4934 UINT64_C(0xFFFFFFFFFFFFFFFF),
4941 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4942 if ( ( roundingMode
== float_round_to_zero
)
4943 || ( zSign
&& ( roundingMode
== float_round_up
) )
4944 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4945 || (roundingMode
== float_round_to_odd
)
4951 UINT64_C(0x0000FFFFFFFFFFFF),
4952 UINT64_C(0xFFFFFFFFFFFFFFFF)
4955 return packFloat128( zSign
, 0x7FFF, 0, 0 );
4958 if (status
->flush_to_zero
) {
4959 float_raise(float_flag_output_denormal
, status
);
4960 return packFloat128(zSign
, 0, 0, 0);
4962 isTiny
= status
->tininess_before_rounding
4965 || lt128(zSig0
, zSig1
,
4966 UINT64_C(0x0001FFFFFFFFFFFF),
4967 UINT64_C(0xFFFFFFFFFFFFFFFF));
4968 shift128ExtraRightJamming(
4969 zSig0
, zSig1
, zSig2
, - zExp
, &zSig0
, &zSig1
, &zSig2
);
4971 if (isTiny
&& zSig2
) {
4972 float_raise(float_flag_underflow
, status
);
4974 switch (roundingMode
) {
4975 case float_round_nearest_even
:
4976 case float_round_ties_away
:
4977 increment
= ((int64_t)zSig2
< 0);
4979 case float_round_to_zero
:
4982 case float_round_up
:
4983 increment
= !zSign
&& zSig2
;
4985 case float_round_down
:
4986 increment
= zSign
&& zSig2
;
4988 case float_round_to_odd
:
4989 increment
= !(zSig1
& 0x1) && zSig2
;
4997 float_raise(float_flag_inexact
, status
);
5000 add128( zSig0
, zSig1
, 0, 1, &zSig0
, &zSig1
);
5001 if ((zSig2
+ zSig2
== 0) && roundNearestEven
) {
5006 if ( ( zSig0
| zSig1
) == 0 ) zExp
= 0;
5008 return packFloat128( zSign
, zExp
, zSig0
, zSig1
);
5012 /*----------------------------------------------------------------------------
5013 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5014 | and significand formed by the concatenation of `zSig0' and `zSig1', and
5015 | returns the proper quadruple-precision floating-point value corresponding
5016 | to the abstract input. This routine is just like `roundAndPackFloat128'
5017 | except that the input significand has fewer bits and does not have to be
5018 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
5020 *----------------------------------------------------------------------------*/
5022 static float128
normalizeRoundAndPackFloat128(bool zSign
, int32_t zExp
,
5023 uint64_t zSig0
, uint64_t zSig1
,
5024 float_status
*status
)
5034 shiftCount
= clz64(zSig0
) - 15;
5035 if ( 0 <= shiftCount
) {
5037 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
5040 shift128ExtraRightJamming(
5041 zSig0
, zSig1
, 0, - shiftCount
, &zSig0
, &zSig1
, &zSig2
);
5044 return roundAndPackFloat128(zSign
, zExp
, zSig0
, zSig1
, zSig2
, status
);
5048 /*----------------------------------------------------------------------------
5049 | Returns the remainder of the single-precision floating-point value `a'
5050 | with respect to the corresponding value `b'. The operation is performed
5051 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5052 *----------------------------------------------------------------------------*/
5054 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
5057 int aExp
, bExp
, expDiff
;
5058 uint32_t aSig
, bSig
;
5060 uint64_t aSig64
, bSig64
, q64
;
5061 uint32_t alternateASig
;
5063 a
= float32_squash_input_denormal(a
, status
);
5064 b
= float32_squash_input_denormal(b
, status
);
5066 aSig
= extractFloat32Frac( a
);
5067 aExp
= extractFloat32Exp( a
);
5068 aSign
= extractFloat32Sign( a
);
5069 bSig
= extractFloat32Frac( b
);
5070 bExp
= extractFloat32Exp( b
);
5071 if ( aExp
== 0xFF ) {
5072 if ( aSig
|| ( ( bExp
== 0xFF ) && bSig
) ) {
5073 return propagateFloat32NaN(a
, b
, status
);
5075 float_raise(float_flag_invalid
, status
);
5076 return float32_default_nan(status
);
5078 if ( bExp
== 0xFF ) {
5080 return propagateFloat32NaN(a
, b
, status
);
5086 float_raise(float_flag_invalid
, status
);
5087 return float32_default_nan(status
);
5089 normalizeFloat32Subnormal( bSig
, &bExp
, &bSig
);
5092 if ( aSig
== 0 ) return a
;
5093 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5095 expDiff
= aExp
- bExp
;
5098 if ( expDiff
< 32 ) {
5101 if ( expDiff
< 0 ) {
5102 if ( expDiff
< -1 ) return a
;
5105 q
= ( bSig
<= aSig
);
5106 if ( q
) aSig
-= bSig
;
5107 if ( 0 < expDiff
) {
5108 q
= ( ( (uint64_t) aSig
)<<32 ) / bSig
;
5111 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5119 if ( bSig
<= aSig
) aSig
-= bSig
;
5120 aSig64
= ( (uint64_t) aSig
)<<40;
5121 bSig64
= ( (uint64_t) bSig
)<<40;
5123 while ( 0 < expDiff
) {
5124 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5125 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5126 aSig64
= - ( ( bSig
* q64
)<<38 );
5130 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5131 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5132 q
= q64
>>( 64 - expDiff
);
5134 aSig
= ( ( aSig64
>>33 )<<( expDiff
- 1 ) ) - bSig
* q
;
5137 alternateASig
= aSig
;
5140 } while ( 0 <= (int32_t) aSig
);
5141 sigMean
= aSig
+ alternateASig
;
5142 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5143 aSig
= alternateASig
;
5145 zSign
= ( (int32_t) aSig
< 0 );
5146 if ( zSign
) aSig
= - aSig
;
5147 return normalizeRoundAndPackFloat32(aSign
^ zSign
, bExp
, aSig
, status
);
5152 /*----------------------------------------------------------------------------
5153 | Returns the binary exponential of the single-precision floating-point value
5154 | `a'. The operation is performed according to the IEC/IEEE Standard for
5155 | Binary Floating-Point Arithmetic.
5157 | Uses the following identities:
5159 | 1. -------------------------------------------------------------------------
5163 | 2. -------------------------------------------------------------------------
5166 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5168 *----------------------------------------------------------------------------*/
5170 static const float64 float32_exp2_coefficients
[15] =
5172 const_float64( 0x3ff0000000000000ll
), /* 1 */
5173 const_float64( 0x3fe0000000000000ll
), /* 2 */
5174 const_float64( 0x3fc5555555555555ll
), /* 3 */
5175 const_float64( 0x3fa5555555555555ll
), /* 4 */
5176 const_float64( 0x3f81111111111111ll
), /* 5 */
5177 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5178 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5179 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5180 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5181 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5182 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5183 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5184 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5185 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5186 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5189 float32
float32_exp2(float32 a
, float_status
*status
)
5196 a
= float32_squash_input_denormal(a
, status
);
5198 aSig
= extractFloat32Frac( a
);
5199 aExp
= extractFloat32Exp( a
);
5200 aSign
= extractFloat32Sign( a
);
5202 if ( aExp
== 0xFF) {
5204 return propagateFloat32NaN(a
, float32_zero
, status
);
5206 return (aSign
) ? float32_zero
: a
;
5209 if (aSig
== 0) return float32_one
;
5212 float_raise(float_flag_inexact
, status
);
5214 /* ******************************* */
5215 /* using float64 for approximation */
5216 /* ******************************* */
5217 x
= float32_to_float64(a
, status
);
5218 x
= float64_mul(x
, float64_ln2
, status
);
5222 for (i
= 0 ; i
< 15 ; i
++) {
5225 f
= float64_mul(xn
, float32_exp2_coefficients
[i
], status
);
5226 r
= float64_add(r
, f
, status
);
5228 xn
= float64_mul(xn
, x
, status
);
5231 return float64_to_float32(r
, status
);
5234 /*----------------------------------------------------------------------------
5235 | Returns the binary log of the single-precision floating-point value `a'.
5236 | The operation is performed according to the IEC/IEEE Standard for Binary
5237 | Floating-Point Arithmetic.
5238 *----------------------------------------------------------------------------*/
5239 float32
float32_log2(float32 a
, float_status
*status
)
5243 uint32_t aSig
, zSig
, i
;
5245 a
= float32_squash_input_denormal(a
, status
);
5246 aSig
= extractFloat32Frac( a
);
5247 aExp
= extractFloat32Exp( a
);
5248 aSign
= extractFloat32Sign( a
);
5251 if ( aSig
== 0 ) return packFloat32( 1, 0xFF, 0 );
5252 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5255 float_raise(float_flag_invalid
, status
);
5256 return float32_default_nan(status
);
5258 if ( aExp
== 0xFF ) {
5260 return propagateFloat32NaN(a
, float32_zero
, status
);
5270 for (i
= 1 << 22; i
> 0; i
>>= 1) {
5271 aSig
= ( (uint64_t)aSig
* aSig
) >> 23;
5272 if ( aSig
& 0x01000000 ) {
5281 return normalizeRoundAndPackFloat32(zSign
, 0x85, zSig
, status
);
5284 /*----------------------------------------------------------------------------
5285 | Returns the remainder of the double-precision floating-point value `a'
5286 | with respect to the corresponding value `b'. The operation is performed
5287 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5288 *----------------------------------------------------------------------------*/
5290 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
5293 int aExp
, bExp
, expDiff
;
5294 uint64_t aSig
, bSig
;
5295 uint64_t q
, alternateASig
;
5298 a
= float64_squash_input_denormal(a
, status
);
5299 b
= float64_squash_input_denormal(b
, status
);
5300 aSig
= extractFloat64Frac( a
);
5301 aExp
= extractFloat64Exp( a
);
5302 aSign
= extractFloat64Sign( a
);
5303 bSig
= extractFloat64Frac( b
);
5304 bExp
= extractFloat64Exp( b
);
5305 if ( aExp
== 0x7FF ) {
5306 if ( aSig
|| ( ( bExp
== 0x7FF ) && bSig
) ) {
5307 return propagateFloat64NaN(a
, b
, status
);
5309 float_raise(float_flag_invalid
, status
);
5310 return float64_default_nan(status
);
5312 if ( bExp
== 0x7FF ) {
5314 return propagateFloat64NaN(a
, b
, status
);
5320 float_raise(float_flag_invalid
, status
);
5321 return float64_default_nan(status
);
5323 normalizeFloat64Subnormal( bSig
, &bExp
, &bSig
);
5326 if ( aSig
== 0 ) return a
;
5327 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5329 expDiff
= aExp
- bExp
;
5330 aSig
= (aSig
| UINT64_C(0x0010000000000000)) << 11;
5331 bSig
= (bSig
| UINT64_C(0x0010000000000000)) << 11;
5332 if ( expDiff
< 0 ) {
5333 if ( expDiff
< -1 ) return a
;
5336 q
= ( bSig
<= aSig
);
5337 if ( q
) aSig
-= bSig
;
5339 while ( 0 < expDiff
) {
5340 q
= estimateDiv128To64( aSig
, 0, bSig
);
5341 q
= ( 2 < q
) ? q
- 2 : 0;
5342 aSig
= - ( ( bSig
>>2 ) * q
);
5346 if ( 0 < expDiff
) {
5347 q
= estimateDiv128To64( aSig
, 0, bSig
);
5348 q
= ( 2 < q
) ? q
- 2 : 0;
5351 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5358 alternateASig
= aSig
;
5361 } while ( 0 <= (int64_t) aSig
);
5362 sigMean
= aSig
+ alternateASig
;
5363 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5364 aSig
= alternateASig
;
5366 zSign
= ( (int64_t) aSig
< 0 );
5367 if ( zSign
) aSig
= - aSig
;
5368 return normalizeRoundAndPackFloat64(aSign
^ zSign
, bExp
, aSig
, status
);
5372 /*----------------------------------------------------------------------------
5373 | Returns the binary log of the double-precision floating-point value `a'.
5374 | The operation is performed according to the IEC/IEEE Standard for Binary
5375 | Floating-Point Arithmetic.
5376 *----------------------------------------------------------------------------*/
5377 float64
float64_log2(float64 a
, float_status
*status
)
5381 uint64_t aSig
, aSig0
, aSig1
, zSig
, i
;
5382 a
= float64_squash_input_denormal(a
, status
);
5384 aSig
= extractFloat64Frac( a
);
5385 aExp
= extractFloat64Exp( a
);
5386 aSign
= extractFloat64Sign( a
);
5389 if ( aSig
== 0 ) return packFloat64( 1, 0x7FF, 0 );
5390 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5393 float_raise(float_flag_invalid
, status
);
5394 return float64_default_nan(status
);
5396 if ( aExp
== 0x7FF ) {
5398 return propagateFloat64NaN(a
, float64_zero
, status
);
5404 aSig
|= UINT64_C(0x0010000000000000);
5406 zSig
= (uint64_t)aExp
<< 52;
5407 for (i
= 1LL << 51; i
> 0; i
>>= 1) {
5408 mul64To128( aSig
, aSig
, &aSig0
, &aSig1
);
5409 aSig
= ( aSig0
<< 12 ) | ( aSig1
>> 52 );
5410 if ( aSig
& UINT64_C(0x0020000000000000) ) {
5418 return normalizeRoundAndPackFloat64(zSign
, 0x408, zSig
, status
);
5421 /*----------------------------------------------------------------------------
5422 | Rounds the extended double-precision floating-point value `a'
5423 | to the precision provided by floatx80_rounding_precision and returns the
5424 | result as an extended double-precision floating-point value.
5425 | The operation is performed according to the IEC/IEEE Standard for Binary
5426 | Floating-Point Arithmetic.
5427 *----------------------------------------------------------------------------*/
5429 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5433 if (!floatx80_unpack_canonical(&p
, a
, status
)) {
5434 return floatx80_default_nan(status
);
5436 return floatx80_round_pack_canonical(&p
, status
);
5439 /*----------------------------------------------------------------------------
5440 | Returns the remainder of the extended double-precision floating-point value
5441 | `a' with respect to the corresponding value `b'. The operation is performed
5442 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5443 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5444 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5445 | the absolute value of the integer quotient.
5446 *----------------------------------------------------------------------------*/
5448 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
, uint64_t *quotient
,
5449 float_status
*status
)
5452 int32_t aExp
, bExp
, expDiff
, aExpOrig
;
5453 uint64_t aSig0
, aSig1
, bSig
;
5454 uint64_t q
, term0
, term1
, alternateASig0
, alternateASig1
;
5457 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5458 float_raise(float_flag_invalid
, status
);
5459 return floatx80_default_nan(status
);
5461 aSig0
= extractFloatx80Frac( a
);
5462 aExpOrig
= aExp
= extractFloatx80Exp( a
);
5463 aSign
= extractFloatx80Sign( a
);
5464 bSig
= extractFloatx80Frac( b
);
5465 bExp
= extractFloatx80Exp( b
);
5466 if ( aExp
== 0x7FFF ) {
5467 if ( (uint64_t) ( aSig0
<<1 )
5468 || ( ( bExp
== 0x7FFF ) && (uint64_t) ( bSig
<<1 ) ) ) {
5469 return propagateFloatx80NaN(a
, b
, status
);
5473 if ( bExp
== 0x7FFF ) {
5474 if ((uint64_t)(bSig
<< 1)) {
5475 return propagateFloatx80NaN(a
, b
, status
);
5477 if (aExp
== 0 && aSig0
>> 63) {
5479 * Pseudo-denormal argument must be returned in normalized
5482 return packFloatx80(aSign
, 1, aSig0
);
5489 float_raise(float_flag_invalid
, status
);
5490 return floatx80_default_nan(status
);
5492 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
5495 if ( aSig0
== 0 ) return a
;
5496 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
5499 expDiff
= aExp
- bExp
;
5501 if ( expDiff
< 0 ) {
5502 if ( mod
|| expDiff
< -1 ) {
5503 if (aExp
== 1 && aExpOrig
== 0) {
5505 * Pseudo-denormal argument must be returned in
5508 return packFloatx80(aSign
, aExp
, aSig0
);
5512 shift128Right( aSig0
, 0, 1, &aSig0
, &aSig1
);
5515 *quotient
= q
= ( bSig
<= aSig0
);
5516 if ( q
) aSig0
-= bSig
;
5518 while ( 0 < expDiff
) {
5519 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
5520 q
= ( 2 < q
) ? q
- 2 : 0;
5521 mul64To128( bSig
, q
, &term0
, &term1
);
5522 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
5523 shortShift128Left( aSig0
, aSig1
, 62, &aSig0
, &aSig1
);
5529 if ( 0 < expDiff
) {
5530 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
5531 q
= ( 2 < q
) ? q
- 2 : 0;
5533 mul64To128( bSig
, q
<<( 64 - expDiff
), &term0
, &term1
);
5534 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
5535 shortShift128Left( 0, bSig
, 64 - expDiff
, &term0
, &term1
);
5536 while ( le128( term0
, term1
, aSig0
, aSig1
) ) {
5538 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
5541 *quotient
<<= expDiff
;
5552 sub128( term0
, term1
, aSig0
, aSig1
, &alternateASig0
, &alternateASig1
);
5553 if ( lt128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
5554 || ( eq128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
5557 aSig0
= alternateASig0
;
5558 aSig1
= alternateASig1
;
5564 normalizeRoundAndPackFloatx80(
5565 floatx80_precision_x
, zSign
, bExp
+ expDiff
, aSig0
, aSig1
, status
);
5569 /*----------------------------------------------------------------------------
5570 | Returns the remainder of the extended double-precision floating-point value
5571 | `a' with respect to the corresponding value `b'. The operation is performed
5572 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5573 *----------------------------------------------------------------------------*/
5575 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
5578 return floatx80_modrem(a
, b
, false, "ient
, status
);
5581 /*----------------------------------------------------------------------------
5582 | Returns the remainder of the extended double-precision floating-point value
5583 | `a' with respect to the corresponding value `b', with the quotient truncated
5585 *----------------------------------------------------------------------------*/
5587 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
5590 return floatx80_modrem(a
, b
, true, "ient
, status
);
5593 /*----------------------------------------------------------------------------
5594 | Returns the remainder of the quadruple-precision floating-point value `a'
5595 | with respect to the corresponding value `b'. The operation is performed
5596 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5597 *----------------------------------------------------------------------------*/
5599 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
5602 int32_t aExp
, bExp
, expDiff
;
5603 uint64_t aSig0
, aSig1
, bSig0
, bSig1
, q
, term0
, term1
, term2
;
5604 uint64_t allZero
, alternateASig0
, alternateASig1
, sigMean1
;
5607 aSig1
= extractFloat128Frac1( a
);
5608 aSig0
= extractFloat128Frac0( a
);
5609 aExp
= extractFloat128Exp( a
);
5610 aSign
= extractFloat128Sign( a
);
5611 bSig1
= extractFloat128Frac1( b
);
5612 bSig0
= extractFloat128Frac0( b
);
5613 bExp
= extractFloat128Exp( b
);
5614 if ( aExp
== 0x7FFF ) {
5615 if ( ( aSig0
| aSig1
)
5616 || ( ( bExp
== 0x7FFF ) && ( bSig0
| bSig1
) ) ) {
5617 return propagateFloat128NaN(a
, b
, status
);
5621 if ( bExp
== 0x7FFF ) {
5622 if (bSig0
| bSig1
) {
5623 return propagateFloat128NaN(a
, b
, status
);
5628 if ( ( bSig0
| bSig1
) == 0 ) {
5630 float_raise(float_flag_invalid
, status
);
5631 return float128_default_nan(status
);
5633 normalizeFloat128Subnormal( bSig0
, bSig1
, &bExp
, &bSig0
, &bSig1
);
5636 if ( ( aSig0
| aSig1
) == 0 ) return a
;
5637 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
5639 expDiff
= aExp
- bExp
;
5640 if ( expDiff
< -1 ) return a
;
5642 aSig0
| UINT64_C(0x0001000000000000),
5644 15 - ( expDiff
< 0 ),
5649 bSig0
| UINT64_C(0x0001000000000000), bSig1
, 15, &bSig0
, &bSig1
);
5650 q
= le128( bSig0
, bSig1
, aSig0
, aSig1
);
5651 if ( q
) sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
5653 while ( 0 < expDiff
) {
5654 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
5655 q
= ( 4 < q
) ? q
- 4 : 0;
5656 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
5657 shortShift192Left( term0
, term1
, term2
, 61, &term1
, &term2
, &allZero
);
5658 shortShift128Left( aSig0
, aSig1
, 61, &aSig0
, &allZero
);
5659 sub128( aSig0
, 0, term1
, term2
, &aSig0
, &aSig1
);
5662 if ( -64 < expDiff
) {
5663 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
5664 q
= ( 4 < q
) ? q
- 4 : 0;
5666 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
5668 if ( expDiff
< 0 ) {
5669 shift128Right( aSig0
, aSig1
, - expDiff
, &aSig0
, &aSig1
);
5672 shortShift128Left( aSig0
, aSig1
, expDiff
, &aSig0
, &aSig1
);
5674 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
5675 sub128( aSig0
, aSig1
, term1
, term2
, &aSig0
, &aSig1
);
5678 shift128Right( aSig0
, aSig1
, 12, &aSig0
, &aSig1
);
5679 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
5682 alternateASig0
= aSig0
;
5683 alternateASig1
= aSig1
;
5685 sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
5686 } while ( 0 <= (int64_t) aSig0
);
5688 aSig0
, aSig1
, alternateASig0
, alternateASig1
, (uint64_t *)&sigMean0
, &sigMean1
);
5689 if ( ( sigMean0
< 0 )
5690 || ( ( ( sigMean0
| sigMean1
) == 0 ) && ( q
& 1 ) ) ) {
5691 aSig0
= alternateASig0
;
5692 aSig1
= alternateASig1
;
5694 zSign
= ( (int64_t) aSig0
< 0 );
5695 if ( zSign
) sub128( 0, 0, aSig0
, aSig1
, &aSig0
, &aSig1
);
5696 return normalizeRoundAndPackFloat128(aSign
^ zSign
, bExp
- 4, aSig0
, aSig1
,
5700 static inline FloatRelation
5701 floatx80_compare_internal(floatx80 a
, floatx80 b
, bool is_quiet
,
5702 float_status
*status
)
5706 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5707 float_raise(float_flag_invalid
, status
);
5708 return float_relation_unordered
;
5710 if (( ( extractFloatx80Exp( a
) == 0x7fff ) &&
5711 ( extractFloatx80Frac( a
)<<1 ) ) ||
5712 ( ( extractFloatx80Exp( b
) == 0x7fff ) &&
5713 ( extractFloatx80Frac( b
)<<1 ) )) {
5715 floatx80_is_signaling_nan(a
, status
) ||
5716 floatx80_is_signaling_nan(b
, status
)) {
5717 float_raise(float_flag_invalid
, status
);
5719 return float_relation_unordered
;
5721 aSign
= extractFloatx80Sign( a
);
5722 bSign
= extractFloatx80Sign( b
);
5723 if ( aSign
!= bSign
) {
5725 if ( ( ( (uint16_t) ( ( a
.high
| b
.high
) << 1 ) ) == 0) &&
5726 ( ( a
.low
| b
.low
) == 0 ) ) {
5728 return float_relation_equal
;
5730 return 1 - (2 * aSign
);
5733 /* Normalize pseudo-denormals before comparison. */
5734 if ((a
.high
& 0x7fff) == 0 && a
.low
& UINT64_C(0x8000000000000000)) {
5737 if ((b
.high
& 0x7fff) == 0 && b
.low
& UINT64_C(0x8000000000000000)) {
5740 if (a
.low
== b
.low
&& a
.high
== b
.high
) {
5741 return float_relation_equal
;
5743 return 1 - 2 * (aSign
^ ( lt128( a
.high
, a
.low
, b
.high
, b
.low
) ));
5748 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*status
)
5750 return floatx80_compare_internal(a
, b
, 0, status
);
5753 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
,
5754 float_status
*status
)
5756 return floatx80_compare_internal(a
, b
, 1, status
);
5759 static void __attribute__((constructor
)) softfloat_init(void)
5761 union_float64 ua
, ub
, uc
, ur
;
5763 if (QEMU_NO_HARDFLOAT
) {
5767 * Test that the host's FMA is not obviously broken. For example,
5768 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5769 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5771 ua
.s
= 0x0020000000000001ULL
;
5772 ub
.s
= 0x3ca0000000000000ULL
;
5773 uc
.s
= 0x0020000000000000ULL
;
5774 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
5775 if (ur
.s
!= 0x0020000000000001ULL
) {
5776 force_soft_fma
= true;