4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
44 ===============================================================================
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
85 #include "qemu/osdep.h"
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck
, float32
)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck
, float64
)
141 #undef GEN_INPUT_FLUSH__NOCHECK
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
146 if (likely(!s->flush_inputs_to_zero)) { \
149 soft_t ## _input_flush__nocheck(a, s); \
152 GEN_INPUT_FLUSH1(float32_input_flush1
, float32
)
153 GEN_INPUT_FLUSH1(float64_input_flush1
, float64
)
154 #undef GEN_INPUT_FLUSH1
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
159 if (likely(!s->flush_inputs_to_zero)) { \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
166 GEN_INPUT_FLUSH2(float32_input_flush2
, float32
)
167 GEN_INPUT_FLUSH2(float64_input_flush2
, float64
)
168 #undef GEN_INPUT_FLUSH2
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
173 if (likely(!s->flush_inputs_to_zero)) { \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
181 GEN_INPUT_FLUSH3(float32_input_flush3
, float32
)
182 GEN_INPUT_FLUSH3(float64_input_flush3
, float64
)
183 #undef GEN_INPUT_FLUSH3
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
215 # define QEMU_HARDFLOAT_USE_ISINF 0
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
235 static inline bool can_use_fpu(const float_status
*s
)
237 if (QEMU_NO_HARDFLOAT
) {
240 return likely(s
->float_exception_flags
& float_flag_inexact
&&
241 s
->float_rounding_mode
== float_round_nearest_even
);
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
266 typedef bool (*f32_check_fn
)(union_float32 a
, union_float32 b
);
267 typedef bool (*f64_check_fn
)(union_float64 a
, union_float64 b
);
269 typedef float32 (*soft_f32_op2_fn
)(float32 a
, float32 b
, float_status
*s
);
270 typedef float64 (*soft_f64_op2_fn
)(float64 a
, float64 b
, float_status
*s
);
271 typedef float (*hard_f32_op2_fn
)(float a
, float b
);
272 typedef double (*hard_f64_op2_fn
)(double a
, double b
);
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a
, union_float32 b
)
277 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
282 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
283 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
285 return float32_is_zero_or_normal(a
.s
) &&
286 float32_is_zero_or_normal(b
.s
);
289 static inline bool f64_is_zon2(union_float64 a
, union_float64 b
)
291 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
292 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
293 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
);
295 return float64_is_zero_or_normal(a
.s
) &&
296 float64_is_zero_or_normal(b
.s
);
299 /* 3-input is-zero-or-normal */
301 bool f32_is_zon3(union_float32 a
, union_float32 b
, union_float32 c
)
303 if (QEMU_HARDFLOAT_3F32_USE_FP
) {
304 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
305 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
306 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
308 return float32_is_zero_or_normal(a
.s
) &&
309 float32_is_zero_or_normal(b
.s
) &&
310 float32_is_zero_or_normal(c
.s
);
314 bool f64_is_zon3(union_float64 a
, union_float64 b
, union_float64 c
)
316 if (QEMU_HARDFLOAT_3F64_USE_FP
) {
317 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
318 (fpclassify(b
.h
) == FP_NORMAL
|| fpclassify(b
.h
) == FP_ZERO
) &&
319 (fpclassify(c
.h
) == FP_NORMAL
|| fpclassify(c
.h
) == FP_ZERO
);
321 return float64_is_zero_or_normal(a
.s
) &&
322 float64_is_zero_or_normal(b
.s
) &&
323 float64_is_zero_or_normal(c
.s
);
326 static inline bool f32_is_inf(union_float32 a
)
328 if (QEMU_HARDFLOAT_USE_ISINF
) {
331 return float32_is_infinity(a
.s
);
334 static inline bool f64_is_inf(union_float64 a
)
336 if (QEMU_HARDFLOAT_USE_ISINF
) {
339 return float64_is_infinity(a
.s
);
342 static inline float32
343 float32_gen2(float32 xa
, float32 xb
, float_status
*s
,
344 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
,
345 f32_check_fn pre
, f32_check_fn post
)
347 union_float32 ua
, ub
, ur
;
352 if (unlikely(!can_use_fpu(s
))) {
356 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
357 if (unlikely(!pre(ua
, ub
))) {
361 ur
.h
= hard(ua
.h
, ub
.h
);
362 if (unlikely(f32_is_inf(ur
))) {
363 float_raise(float_flag_overflow
, s
);
364 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
) && post(ua
, ub
)) {
370 return soft(ua
.s
, ub
.s
, s
);
373 static inline float64
374 float64_gen2(float64 xa
, float64 xb
, float_status
*s
,
375 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
,
376 f64_check_fn pre
, f64_check_fn post
)
378 union_float64 ua
, ub
, ur
;
383 if (unlikely(!can_use_fpu(s
))) {
387 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
388 if (unlikely(!pre(ua
, ub
))) {
392 ur
.h
= hard(ua
.h
, ub
.h
);
393 if (unlikely(f64_is_inf(ur
))) {
394 float_raise(float_flag_overflow
, s
);
395 } else if (unlikely(fabs(ur
.h
) <= DBL_MIN
) && post(ua
, ub
)) {
401 return soft(ua
.s
, ub
.s
, s
);
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
408 static inline uint32_t extractFloat32Frac(float32 a
)
410 return float32_val(a
) & 0x007FFFFF;
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
417 static inline int extractFloat32Exp(float32 a
)
419 return (float32_val(a
) >> 23) & 0xFF;
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
426 static inline bool extractFloat32Sign(float32 a
)
428 return float32_val(a
) >> 31;
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
435 static inline uint64_t extractFloat64Frac(float64 a
)
437 return float64_val(a
) & UINT64_C(0x000FFFFFFFFFFFFF);
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
444 static inline int extractFloat64Exp(float64 a
)
446 return (float64_val(a
) >> 52) & 0x7FF;
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
453 static inline bool extractFloat64Sign(float64 a
)
455 return float64_val(a
) >> 63;
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
463 typedef enum __attribute__ ((__packed__
)) {
464 float_class_unclassified
,
468 float_class_qnan
, /* all NaNs from here */
472 #define float_cmask(bit) (1u << (bit))
475 float_cmask_zero
= float_cmask(float_class_zero
),
476 float_cmask_normal
= float_cmask(float_class_normal
),
477 float_cmask_inf
= float_cmask(float_class_inf
),
478 float_cmask_qnan
= float_cmask(float_class_qnan
),
479 float_cmask_snan
= float_cmask(float_class_snan
),
481 float_cmask_infzero
= float_cmask_zero
| float_cmask_inf
,
482 float_cmask_anynan
= float_cmask_qnan
| float_cmask_snan
,
485 /* Flags for parts_minmax. */
487 /* Set for minimum; clear for maximum. */
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused
)) bool is_nan(FloatClass c
)
498 return unlikely(c
>= float_class_qnan
);
501 static inline __attribute__((unused
)) bool is_snan(FloatClass c
)
503 return c
== float_class_snan
;
506 static inline __attribute__((unused
)) bool is_qnan(FloatClass c
)
508 return c
== float_class_qnan
;
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
525 /* Routines that know the structure may reference the singular name. */
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
550 uint64_t frac_hm
; /* high-middle */
551 uint64_t frac_lm
; /* low-middle */
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
586 #define FLOAT_PARAMS(E, F) \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
592 static const FloatFmt float16_params
= {
596 static const FloatFmt float16_params_ahp
= {
601 static const FloatFmt bfloat16_params
= {
605 static const FloatFmt float32_params
= {
609 static const FloatFmt float64_params
= {
613 static const FloatFmt float128_params
= {
614 FLOAT_PARAMS(15, 112)
617 #define FLOATX80_PARAMS(R) \
619 .frac_size = R == 64 ? 63 : R, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
623 static const FloatFmt floatx80_params
[3] = {
624 [floatx80_precision_s
] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d
] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x
] = { FLOATX80_PARAMS(64) },
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64
*r
, const FloatFmt
*fmt
, uint64_t raw
)
632 const int f_size
= fmt
->frac_size
;
633 const int e_size
= fmt
->exp_size
;
635 *r
= (FloatParts64
) {
636 .cls
= float_class_unclassified
,
637 .sign
= extract64(raw
, f_size
+ e_size
, 1),
638 .exp
= extract64(raw
, f_size
, e_size
),
639 .frac
= extract64(raw
, 0, f_size
)
643 static inline void float16_unpack_raw(FloatParts64
*p
, float16 f
)
645 unpack_raw64(p
, &float16_params
, f
);
648 static inline void bfloat16_unpack_raw(FloatParts64
*p
, bfloat16 f
)
650 unpack_raw64(p
, &bfloat16_params
, f
);
653 static inline void float32_unpack_raw(FloatParts64
*p
, float32 f
)
655 unpack_raw64(p
, &float32_params
, f
);
658 static inline void float64_unpack_raw(FloatParts64
*p
, float64 f
)
660 unpack_raw64(p
, &float64_params
, f
);
663 static void floatx80_unpack_raw(FloatParts128
*p
, floatx80 f
)
665 *p
= (FloatParts128
) {
666 .cls
= float_class_unclassified
,
667 .sign
= extract32(f
.high
, 15, 1),
668 .exp
= extract32(f
.high
, 0, 15),
673 static void float128_unpack_raw(FloatParts128
*p
, float128 f
)
675 const int f_size
= float128_params
.frac_size
- 64;
676 const int e_size
= float128_params
.exp_size
;
678 *p
= (FloatParts128
) {
679 .cls
= float_class_unclassified
,
680 .sign
= extract64(f
.high
, f_size
+ e_size
, 1),
681 .exp
= extract64(f
.high
, f_size
, e_size
),
682 .frac_hi
= extract64(f
.high
, 0, f_size
),
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64
*p
, const FloatFmt
*fmt
)
690 const int f_size
= fmt
->frac_size
;
691 const int e_size
= fmt
->exp_size
;
694 ret
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
695 ret
= deposit64(ret
, f_size
, e_size
, p
->exp
);
696 ret
= deposit64(ret
, 0, f_size
, p
->frac
);
700 static inline float16
float16_pack_raw(const FloatParts64
*p
)
702 return make_float16(pack_raw64(p
, &float16_params
));
705 static inline bfloat16
bfloat16_pack_raw(const FloatParts64
*p
)
707 return pack_raw64(p
, &bfloat16_params
);
710 static inline float32
float32_pack_raw(const FloatParts64
*p
)
712 return make_float32(pack_raw64(p
, &float32_params
));
715 static inline float64
float64_pack_raw(const FloatParts64
*p
)
717 return make_float64(pack_raw64(p
, &float64_params
));
720 static float128
float128_pack_raw(const FloatParts128
*p
)
722 const int f_size
= float128_params
.frac_size
- 64;
723 const int e_size
= float128_params
.exp_size
;
726 hi
= (uint64_t)p
->sign
<< (f_size
+ e_size
);
727 hi
= deposit64(hi
, f_size
, e_size
, p
->exp
);
728 hi
= deposit64(hi
, 0, f_size
, p
->frac_hi
);
729 return make_float128(hi
, p
->frac_lo
);
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
752 static void parts64_return_nan(FloatParts64
*a
, float_status
*s
);
753 static void parts128_return_nan(FloatParts128
*a
, float_status
*s
);
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
757 static FloatParts64
*parts64_pick_nan(FloatParts64
*a
, FloatParts64
*b
,
759 static FloatParts128
*parts128_pick_nan(FloatParts128
*a
, FloatParts128
*b
,
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
764 static FloatParts64
*parts64_pick_nan_muladd(FloatParts64
*a
, FloatParts64
*b
,
765 FloatParts64
*c
, float_status
*s
,
766 int ab_mask
, int abc_mask
);
767 static FloatParts128
*parts128_pick_nan_muladd(FloatParts128
*a
,
771 int ab_mask
, int abc_mask
);
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
776 static void parts64_canonicalize(FloatParts64
*p
, float_status
*status
,
777 const FloatFmt
*fmt
);
778 static void parts128_canonicalize(FloatParts128
*p
, float_status
*status
,
779 const FloatFmt
*fmt
);
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
784 static void parts64_uncanon_normal(FloatParts64
*p
, float_status
*status
,
785 const FloatFmt
*fmt
);
786 static void parts128_uncanon_normal(FloatParts128
*p
, float_status
*status
,
787 const FloatFmt
*fmt
);
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
792 static void parts64_uncanon(FloatParts64
*p
, float_status
*status
,
793 const FloatFmt
*fmt
);
794 static void parts128_uncanon(FloatParts128
*p
, float_status
*status
,
795 const FloatFmt
*fmt
);
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
800 static void parts64_add_normal(FloatParts64
*a
, FloatParts64
*b
);
801 static void parts128_add_normal(FloatParts128
*a
, FloatParts128
*b
);
802 static void parts256_add_normal(FloatParts256
*a
, FloatParts256
*b
);
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
807 static bool parts64_sub_normal(FloatParts64
*a
, FloatParts64
*b
);
808 static bool parts128_sub_normal(FloatParts128
*a
, FloatParts128
*b
);
809 static bool parts256_sub_normal(FloatParts256
*a
, FloatParts256
*b
);
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
814 static FloatParts64
*parts64_addsub(FloatParts64
*a
, FloatParts64
*b
,
815 float_status
*s
, bool subtract
);
816 static FloatParts128
*parts128_addsub(FloatParts128
*a
, FloatParts128
*b
,
817 float_status
*s
, bool subtract
);
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
822 static FloatParts64
*parts64_mul(FloatParts64
*a
, FloatParts64
*b
,
824 static FloatParts128
*parts128_mul(FloatParts128
*a
, FloatParts128
*b
,
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
830 static FloatParts64
*parts64_muladd(FloatParts64
*a
, FloatParts64
*b
,
831 FloatParts64
*c
, int flags
,
833 static FloatParts128
*parts128_muladd(FloatParts128
*a
, FloatParts128
*b
,
834 FloatParts128
*c
, int flags
,
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
840 static FloatParts64
*parts64_div(FloatParts64
*a
, FloatParts64
*b
,
842 static FloatParts128
*parts128_div(FloatParts128
*a
, FloatParts128
*b
,
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
848 static void parts64_sqrt(FloatParts64
*a
, float_status
*s
, const FloatFmt
*f
);
849 static void parts128_sqrt(FloatParts128
*a
, float_status
*s
, const FloatFmt
*f
);
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
854 static bool parts64_round_to_int_normal(FloatParts64
*a
, FloatRoundMode rm
,
855 int scale
, int frac_size
);
856 static bool parts128_round_to_int_normal(FloatParts128
*a
, FloatRoundMode r
,
857 int scale
, int frac_size
);
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
862 static void parts64_round_to_int(FloatParts64
*a
, FloatRoundMode rm
,
863 int scale
, float_status
*s
,
864 const FloatFmt
*fmt
);
865 static void parts128_round_to_int(FloatParts128
*a
, FloatRoundMode r
,
866 int scale
, float_status
*s
,
867 const FloatFmt
*fmt
);
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
872 static int64_t parts64_float_to_sint(FloatParts64
*p
, FloatRoundMode rmode
,
873 int scale
, int64_t min
, int64_t max
,
875 static int64_t parts128_float_to_sint(FloatParts128
*p
, FloatRoundMode rmode
,
876 int scale
, int64_t min
, int64_t max
,
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
882 static uint64_t parts64_float_to_uint(FloatParts64
*p
, FloatRoundMode rmode
,
883 int scale
, uint64_t max
,
885 static uint64_t parts128_float_to_uint(FloatParts128
*p
, FloatRoundMode rmode
,
886 int scale
, uint64_t max
,
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
892 static void parts64_sint_to_float(FloatParts64
*p
, int64_t a
,
893 int scale
, float_status
*s
);
894 static void parts128_sint_to_float(FloatParts128
*p
, int64_t a
,
895 int scale
, float_status
*s
);
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
900 static void parts64_uint_to_float(FloatParts64
*p
, uint64_t a
,
901 int scale
, float_status
*s
);
902 static void parts128_uint_to_float(FloatParts128
*p
, uint64_t a
,
903 int scale
, float_status
*s
);
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
908 static FloatParts64
*parts64_minmax(FloatParts64
*a
, FloatParts64
*b
,
909 float_status
*s
, int flags
);
910 static FloatParts128
*parts128_minmax(FloatParts128
*a
, FloatParts128
*b
,
911 float_status
*s
, int flags
);
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
916 static int parts64_compare(FloatParts64
*a
, FloatParts64
*b
,
917 float_status
*s
, bool q
);
918 static int parts128_compare(FloatParts128
*a
, FloatParts128
*b
,
919 float_status
*s
, bool q
);
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
924 static void parts64_scalbn(FloatParts64
*a
, int n
, float_status
*s
);
925 static void parts128_scalbn(FloatParts128
*a
, int n
, float_status
*s
);
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
941 static bool frac64_add(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
943 return uadd64_overflow(a
->frac
, b
->frac
, &r
->frac
);
946 static bool frac128_add(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
949 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
950 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
954 static bool frac256_add(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
957 r
->frac_lo
= uadd64_carry(a
->frac_lo
, b
->frac_lo
, &c
);
958 r
->frac_lm
= uadd64_carry(a
->frac_lm
, b
->frac_lm
, &c
);
959 r
->frac_hm
= uadd64_carry(a
->frac_hm
, b
->frac_hm
, &c
);
960 r
->frac_hi
= uadd64_carry(a
->frac_hi
, b
->frac_hi
, &c
);
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
966 static bool frac64_addi(FloatParts64
*r
, FloatParts64
*a
, uint64_t c
)
968 return uadd64_overflow(a
->frac
, c
, &r
->frac
);
971 static bool frac128_addi(FloatParts128
*r
, FloatParts128
*a
, uint64_t c
)
973 c
= uadd64_overflow(a
->frac_lo
, c
, &r
->frac_lo
);
974 return uadd64_overflow(a
->frac_hi
, c
, &r
->frac_hi
);
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
979 static void frac64_allones(FloatParts64
*a
)
984 static void frac128_allones(FloatParts128
*a
)
986 a
->frac_hi
= a
->frac_lo
= -1;
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
991 static int frac64_cmp(FloatParts64
*a
, FloatParts64
*b
)
993 return a
->frac
== b
->frac
? 0 : a
->frac
< b
->frac
? -1 : 1;
996 static int frac128_cmp(FloatParts128
*a
, FloatParts128
*b
)
998 uint64_t ta
= a
->frac_hi
, tb
= b
->frac_hi
;
1000 ta
= a
->frac_lo
, tb
= b
->frac_lo
;
1005 return ta
< tb
? -1 : 1;
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1010 static void frac64_clear(FloatParts64
*a
)
1015 static void frac128_clear(FloatParts128
*a
)
1017 a
->frac_hi
= a
->frac_lo
= 0;
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1022 static bool frac64_div(FloatParts64
*a
, FloatParts64
*b
)
1024 uint64_t n1
, n0
, r
, q
;
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1038 ret
= a
->frac
< b
->frac
;
1046 q
= udiv_qrnnd(&r
, n0
, n1
, b
->frac
);
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a
->frac
= q
| (r
!= 0);
1054 static bool frac128_div(FloatParts128
*a
, FloatParts128
*b
)
1056 uint64_t q0
, q1
, a0
, a1
, b0
, b1
;
1057 uint64_t r0
, r1
, r2
, r3
, t0
, t1
, t2
, t3
;
1060 a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1061 b0
= b
->frac_hi
, b1
= b
->frac_lo
;
1063 ret
= lt128(a0
, a1
, b0
, b1
);
1065 a1
= shr_double(a0
, a1
, 1);
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0
= estimateDiv128To64(a0
, a1
, b0
);
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1077 mul128By64To192(b0
, b1
, q0
, &t0
, &t1
, &t2
);
1078 sub192(a0
, a1
, 0, t0
, t1
, t2
, &r0
, &r1
, &r2
);
1081 add192(r0
, r1
, r2
, 0, b0
, b1
, &r0
, &r1
, &r2
);
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1
= estimateDiv128To64(r1
, r2
, b0
);
1086 mul128By64To192(b0
, b1
, q1
, &t1
, &t2
, &t3
);
1087 sub192(r1
, r2
, 0, t1
, t2
, t3
, &r1
, &r2
, &r3
);
1090 add192(r1
, r2
, r3
, 0, b0
, b1
, &r1
, &r2
, &r3
);
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1
|= (r2
| r3
) != 0;
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1103 static bool frac64_eqz(FloatParts64
*a
)
1105 return a
->frac
== 0;
1108 static bool frac128_eqz(FloatParts128
*a
)
1110 return (a
->frac_hi
| a
->frac_lo
) == 0;
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1115 static void frac64_mulw(FloatParts128
*r
, FloatParts64
*a
, FloatParts64
*b
)
1117 mulu64(&r
->frac_lo
, &r
->frac_hi
, a
->frac
, b
->frac
);
1120 static void frac128_mulw(FloatParts256
*r
, FloatParts128
*a
, FloatParts128
*b
)
1122 mul128To256(a
->frac_hi
, a
->frac_lo
, b
->frac_hi
, b
->frac_lo
,
1123 &r
->frac_hi
, &r
->frac_hm
, &r
->frac_lm
, &r
->frac_lo
);
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1128 static void frac64_neg(FloatParts64
*a
)
1133 static void frac128_neg(FloatParts128
*a
)
1136 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1137 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1140 static void frac256_neg(FloatParts256
*a
)
1143 a
->frac_lo
= usub64_borrow(0, a
->frac_lo
, &c
);
1144 a
->frac_lm
= usub64_borrow(0, a
->frac_lm
, &c
);
1145 a
->frac_hm
= usub64_borrow(0, a
->frac_hm
, &c
);
1146 a
->frac_hi
= usub64_borrow(0, a
->frac_hi
, &c
);
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1151 static int frac64_normalize(FloatParts64
*a
)
1154 int shift
= clz64(a
->frac
);
1161 static int frac128_normalize(FloatParts128
*a
)
1164 int shl
= clz64(a
->frac_hi
);
1165 a
->frac_hi
= shl_double(a
->frac_hi
, a
->frac_lo
, shl
);
1168 } else if (a
->frac_lo
) {
1169 int shl
= clz64(a
->frac_lo
);
1170 a
->frac_hi
= a
->frac_lo
<< shl
;
1177 static int frac256_normalize(FloatParts256
*a
)
1179 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1180 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1192 a0
= a1
, a1
= a2
, a2
= a3
, a3
= 0;
1195 a0
= a2
, a1
= a3
, a2
= 0, a3
= 0;
1198 a0
= a3
, a1
= 0, a2
= 0, a3
= 0;
1201 a0
= 0, a1
= 0, a2
= 0, a3
= 0;
1211 a0
= shl_double(a0
, a1
, shl
);
1212 a1
= shl_double(a1
, a2
, shl
);
1213 a2
= shl_double(a2
, a3
, shl
);
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1226 static void frac64_shl(FloatParts64
*a
, int c
)
1231 static void frac128_shl(FloatParts128
*a
, int c
)
1233 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1241 a0
= shl_double(a0
, a1
, c
);
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1251 static void frac64_shr(FloatParts64
*a
, int c
)
1256 static void frac128_shr(FloatParts128
*a
, int c
)
1258 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1266 a1
= shr_double(a0
, a1
, c
);
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1276 static void frac64_shrjam(FloatParts64
*a
, int c
)
1278 uint64_t a0
= a
->frac
;
1280 if (likely(c
!= 0)) {
1281 if (likely(c
< 64)) {
1282 a0
= (a0
>> c
) | (shr_double(a0
, 0, c
) != 0);
1290 static void frac128_shrjam(FloatParts128
*a
, int c
)
1292 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_lo
;
1293 uint64_t sticky
= 0;
1295 if (unlikely(c
== 0)) {
1297 } else if (likely(c
< 64)) {
1299 } else if (likely(c
< 128)) {
1313 sticky
|= shr_double(a1
, 0, c
);
1314 a1
= shr_double(a0
, a1
, c
);
1318 a
->frac_lo
= a1
| (sticky
!= 0);
1322 static void frac256_shrjam(FloatParts256
*a
, int c
)
1324 uint64_t a0
= a
->frac_hi
, a1
= a
->frac_hm
;
1325 uint64_t a2
= a
->frac_lm
, a3
= a
->frac_lo
;
1326 uint64_t sticky
= 0;
1328 if (unlikely(c
== 0)) {
1330 } else if (likely(c
< 64)) {
1332 } else if (likely(c
< 256)) {
1333 if (unlikely(c
& 128)) {
1335 a3
= a1
, a2
= a0
, a1
= 0, a0
= 0;
1337 if (unlikely(c
& 64)) {
1339 a3
= a2
, a2
= a1
, a1
= a0
, a0
= 0;
1346 sticky
= a0
| a1
| a2
| a3
;
1347 a0
= a1
= a2
= a3
= 0;
1351 sticky
|= shr_double(a3
, 0, c
);
1352 a3
= shr_double(a2
, a3
, c
);
1353 a2
= shr_double(a1
, a2
, c
);
1354 a1
= shr_double(a0
, a1
, c
);
1358 a
->frac_lo
= a3
| (sticky
!= 0);
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1366 static bool frac64_sub(FloatParts64
*r
, FloatParts64
*a
, FloatParts64
*b
)
1368 return usub64_overflow(a
->frac
, b
->frac
, &r
->frac
);
1371 static bool frac128_sub(FloatParts128
*r
, FloatParts128
*a
, FloatParts128
*b
)
1374 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1375 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1379 static bool frac256_sub(FloatParts256
*r
, FloatParts256
*a
, FloatParts256
*b
)
1382 r
->frac_lo
= usub64_borrow(a
->frac_lo
, b
->frac_lo
, &c
);
1383 r
->frac_lm
= usub64_borrow(a
->frac_lm
, b
->frac_lm
, &c
);
1384 r
->frac_hm
= usub64_borrow(a
->frac_hm
, b
->frac_hm
, &c
);
1385 r
->frac_hi
= usub64_borrow(a
->frac_hi
, b
->frac_hi
, &c
);
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1391 static void frac64_truncjam(FloatParts64
*r
, FloatParts128
*a
)
1393 r
->frac
= a
->frac_hi
| (a
->frac_lo
!= 0);
1396 static void frac128_truncjam(FloatParts128
*r
, FloatParts256
*a
)
1398 r
->frac_hi
= a
->frac_hi
;
1399 r
->frac_lo
= a
->frac_hm
| ((a
->frac_lm
| a
->frac_lo
) != 0);
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1404 static void frac64_widen(FloatParts128
*r
, FloatParts64
*a
)
1406 r
->frac_hi
= a
->frac
;
1410 static void frac128_widen(FloatParts256
*r
, FloatParts128
*a
)
1412 r
->frac_hi
= a
->frac_hi
;
1413 r
->frac_hm
= a
->frac_lo
;
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1425 static const uint16_t rsqrt_tab
[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1466 #include "softfloat-parts-addsub.c.inc"
1475 * Pack/unpack routines with a specific FloatFmt.
1478 static void float16a_unpack_canonical(FloatParts64
*p
, float16 f
,
1479 float_status
*s
, const FloatFmt
*params
)
1481 float16_unpack_raw(p
, f
);
1482 parts_canonicalize(p
, s
, params
);
1485 static void float16_unpack_canonical(FloatParts64
*p
, float16 f
,
1488 float16a_unpack_canonical(p
, f
, s
, &float16_params
);
1491 static void bfloat16_unpack_canonical(FloatParts64
*p
, bfloat16 f
,
1494 bfloat16_unpack_raw(p
, f
);
1495 parts_canonicalize(p
, s
, &bfloat16_params
);
1498 static float16
float16a_round_pack_canonical(FloatParts64
*p
,
1500 const FloatFmt
*params
)
1502 parts_uncanon(p
, s
, params
);
1503 return float16_pack_raw(p
);
1506 static float16
float16_round_pack_canonical(FloatParts64
*p
,
1509 return float16a_round_pack_canonical(p
, s
, &float16_params
);
1512 static bfloat16
bfloat16_round_pack_canonical(FloatParts64
*p
,
1515 parts_uncanon(p
, s
, &bfloat16_params
);
1516 return bfloat16_pack_raw(p
);
1519 static void float32_unpack_canonical(FloatParts64
*p
, float32 f
,
1522 float32_unpack_raw(p
, f
);
1523 parts_canonicalize(p
, s
, &float32_params
);
1526 static float32
float32_round_pack_canonical(FloatParts64
*p
,
1529 parts_uncanon(p
, s
, &float32_params
);
1530 return float32_pack_raw(p
);
1533 static void float64_unpack_canonical(FloatParts64
*p
, float64 f
,
1536 float64_unpack_raw(p
, f
);
1537 parts_canonicalize(p
, s
, &float64_params
);
1540 static float64
float64_round_pack_canonical(FloatParts64
*p
,
1543 parts_uncanon(p
, s
, &float64_params
);
1544 return float64_pack_raw(p
);
1547 static void float128_unpack_canonical(FloatParts128
*p
, float128 f
,
1550 float128_unpack_raw(p
, f
);
1551 parts_canonicalize(p
, s
, &float128_params
);
1554 static float128
float128_round_pack_canonical(FloatParts128
*p
,
1557 parts_uncanon(p
, s
, &float128_params
);
1558 return float128_pack_raw(p
);
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128
*p
, floatx80 f
,
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s
->floatx80_rounding_precision
) {
1567 case floatx80_precision_x
:
1568 case floatx80_precision_d
:
1569 case floatx80_precision_s
:
1572 g_assert_not_reached();
1575 if (unlikely(floatx80_invalid_encoding(f
))) {
1576 float_raise(float_flag_invalid
, s
);
1580 floatx80_unpack_raw(p
, f
);
1582 if (likely(p
->exp
!= floatx80_params
[floatx80_precision_x
].exp_max
)) {
1583 parts_canonicalize(p
, s
, &floatx80_params
[floatx80_precision_x
]);
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p
->frac_hi
&= MAKE_64BIT_MASK(0, 63);
1587 p
->cls
= (p
->frac_hi
== 0 ? float_class_inf
1588 : parts_is_snan_frac(p
->frac_hi
, s
)
1589 ? float_class_snan
: float_class_qnan
);
1594 static floatx80
floatx80_round_pack_canonical(FloatParts128
*p
,
1597 const FloatFmt
*fmt
= &floatx80_params
[s
->floatx80_rounding_precision
];
1602 case float_class_normal
:
1603 if (s
->floatx80_rounding_precision
== floatx80_precision_x
) {
1604 parts_uncanon_normal(p
, s
, fmt
);
1612 frac_truncjam(&p64
, p
);
1613 parts_uncanon_normal(&p64
, s
, fmt
);
1617 if (exp
!= fmt
->exp_max
) {
1620 /* rounded to inf -- fall through to set frac correctly */
1622 case float_class_inf
:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac
= floatx80_infinity_low
;
1628 case float_class_zero
:
1633 case float_class_snan
:
1634 case float_class_qnan
:
1635 /* NaNs have the integer bit set. */
1636 frac
= p
->frac_hi
| (1ull << 63);
1641 g_assert_not_reached();
1644 return packFloatx80(p
->sign
, exp
, frac
);
1648 * Addition and subtraction
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a
, float16 b
, float_status
*status
, bool subtract
)
1654 FloatParts64 pa
, pb
, *pr
;
1656 float16_unpack_canonical(&pa
, a
, status
);
1657 float16_unpack_canonical(&pb
, b
, status
);
1658 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1660 return float16_round_pack_canonical(pr
, status
);
1663 float16
float16_add(float16 a
, float16 b
, float_status
*status
)
1665 return float16_addsub(a
, b
, status
, false);
1668 float16
float16_sub(float16 a
, float16 b
, float_status
*status
)
1670 return float16_addsub(a
, b
, status
, true);
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a
, float32 b
, float_status
*status
, bool subtract
)
1676 FloatParts64 pa
, pb
, *pr
;
1678 float32_unpack_canonical(&pa
, a
, status
);
1679 float32_unpack_canonical(&pb
, b
, status
);
1680 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1682 return float32_round_pack_canonical(pr
, status
);
1685 static float32
soft_f32_add(float32 a
, float32 b
, float_status
*status
)
1687 return soft_f32_addsub(a
, b
, status
, false);
1690 static float32
soft_f32_sub(float32 a
, float32 b
, float_status
*status
)
1692 return soft_f32_addsub(a
, b
, status
, true);
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a
, float64 b
, float_status
*status
, bool subtract
)
1698 FloatParts64 pa
, pb
, *pr
;
1700 float64_unpack_canonical(&pa
, a
, status
);
1701 float64_unpack_canonical(&pb
, b
, status
);
1702 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1704 return float64_round_pack_canonical(pr
, status
);
1707 static float64
soft_f64_add(float64 a
, float64 b
, float_status
*status
)
1709 return soft_f64_addsub(a
, b
, status
, false);
1712 static float64
soft_f64_sub(float64 a
, float64 b
, float_status
*status
)
1714 return soft_f64_addsub(a
, b
, status
, true);
1717 static float hard_f32_add(float a
, float b
)
1722 static float hard_f32_sub(float a
, float b
)
1727 static double hard_f64_add(double a
, double b
)
1732 static double hard_f64_sub(double a
, double b
)
1737 static bool f32_addsubmul_post(union_float32 a
, union_float32 b
)
1739 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
1740 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1742 return !(float32_is_zero(a
.s
) && float32_is_zero(b
.s
));
1745 static bool f64_addsubmul_post(union_float64 a
, union_float64 b
)
1747 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
1748 return !(fpclassify(a
.h
) == FP_ZERO
&& fpclassify(b
.h
) == FP_ZERO
);
1750 return !(float64_is_zero(a
.s
) && float64_is_zero(b
.s
));
1754 static float32
float32_addsub(float32 a
, float32 b
, float_status
*s
,
1755 hard_f32_op2_fn hard
, soft_f32_op2_fn soft
)
1757 return float32_gen2(a
, b
, s
, hard
, soft
,
1758 f32_is_zon2
, f32_addsubmul_post
);
1761 static float64
float64_addsub(float64 a
, float64 b
, float_status
*s
,
1762 hard_f64_op2_fn hard
, soft_f64_op2_fn soft
)
1764 return float64_gen2(a
, b
, s
, hard
, soft
,
1765 f64_is_zon2
, f64_addsubmul_post
);
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a
, float32 b
, float_status
*s
)
1771 return float32_addsub(a
, b
, s
, hard_f32_add
, soft_f32_add
);
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a
, float32 b
, float_status
*s
)
1777 return float32_addsub(a
, b
, s
, hard_f32_sub
, soft_f32_sub
);
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a
, float64 b
, float_status
*s
)
1783 return float64_addsub(a
, b
, s
, hard_f64_add
, soft_f64_add
);
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a
, float64 b
, float_status
*s
)
1789 return float64_addsub(a
, b
, s
, hard_f64_sub
, soft_f64_sub
);
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a
, bfloat16 b
, float_status
*status
, bool subtract
)
1795 FloatParts64 pa
, pb
, *pr
;
1797 bfloat16_unpack_canonical(&pa
, a
, status
);
1798 bfloat16_unpack_canonical(&pb
, b
, status
);
1799 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1801 return bfloat16_round_pack_canonical(pr
, status
);
1804 bfloat16
bfloat16_add(bfloat16 a
, bfloat16 b
, float_status
*status
)
1806 return bfloat16_addsub(a
, b
, status
, false);
1809 bfloat16
bfloat16_sub(bfloat16 a
, bfloat16 b
, float_status
*status
)
1811 return bfloat16_addsub(a
, b
, status
, true);
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a
, float128 b
, float_status
*status
, bool subtract
)
1817 FloatParts128 pa
, pb
, *pr
;
1819 float128_unpack_canonical(&pa
, a
, status
);
1820 float128_unpack_canonical(&pb
, b
, status
);
1821 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1823 return float128_round_pack_canonical(pr
, status
);
1826 float128
float128_add(float128 a
, float128 b
, float_status
*status
)
1828 return float128_addsub(a
, b
, status
, false);
1831 float128
float128_sub(float128 a
, float128 b
, float_status
*status
)
1833 return float128_addsub(a
, b
, status
, true);
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a
, floatx80 b
, float_status
*status
, bool subtract
)
1839 FloatParts128 pa
, pb
, *pr
;
1841 if (!floatx80_unpack_canonical(&pa
, a
, status
) ||
1842 !floatx80_unpack_canonical(&pb
, b
, status
)) {
1843 return floatx80_default_nan(status
);
1846 pr
= parts_addsub(&pa
, &pb
, status
, subtract
);
1847 return floatx80_round_pack_canonical(pr
, status
);
1850 floatx80
floatx80_add(floatx80 a
, floatx80 b
, float_status
*status
)
1852 return floatx80_addsub(a
, b
, status
, false);
1855 floatx80
floatx80_sub(floatx80 a
, floatx80 b
, float_status
*status
)
1857 return floatx80_addsub(a
, b
, status
, true);
1864 float16 QEMU_FLATTEN
float16_mul(float16 a
, float16 b
, float_status
*status
)
1866 FloatParts64 pa
, pb
, *pr
;
1868 float16_unpack_canonical(&pa
, a
, status
);
1869 float16_unpack_canonical(&pb
, b
, status
);
1870 pr
= parts_mul(&pa
, &pb
, status
);
1872 return float16_round_pack_canonical(pr
, status
);
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a
, float32 b
, float_status
*status
)
1878 FloatParts64 pa
, pb
, *pr
;
1880 float32_unpack_canonical(&pa
, a
, status
);
1881 float32_unpack_canonical(&pb
, b
, status
);
1882 pr
= parts_mul(&pa
, &pb
, status
);
1884 return float32_round_pack_canonical(pr
, status
);
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a
, float64 b
, float_status
*status
)
1890 FloatParts64 pa
, pb
, *pr
;
1892 float64_unpack_canonical(&pa
, a
, status
);
1893 float64_unpack_canonical(&pb
, b
, status
);
1894 pr
= parts_mul(&pa
, &pb
, status
);
1896 return float64_round_pack_canonical(pr
, status
);
1899 static float hard_f32_mul(float a
, float b
)
1904 static double hard_f64_mul(double a
, double b
)
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a
, float32 b
, float_status
*s
)
1912 return float32_gen2(a
, b
, s
, hard_f32_mul
, soft_f32_mul
,
1913 f32_is_zon2
, f32_addsubmul_post
);
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a
, float64 b
, float_status
*s
)
1919 return float64_gen2(a
, b
, s
, hard_f64_mul
, soft_f64_mul
,
1920 f64_is_zon2
, f64_addsubmul_post
);
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a
, bfloat16 b
, float_status
*status
)
1926 FloatParts64 pa
, pb
, *pr
;
1928 bfloat16_unpack_canonical(&pa
, a
, status
);
1929 bfloat16_unpack_canonical(&pb
, b
, status
);
1930 pr
= parts_mul(&pa
, &pb
, status
);
1932 return bfloat16_round_pack_canonical(pr
, status
);
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a
, float128 b
, float_status
*status
)
1938 FloatParts128 pa
, pb
, *pr
;
1940 float128_unpack_canonical(&pa
, a
, status
);
1941 float128_unpack_canonical(&pb
, b
, status
);
1942 pr
= parts_mul(&pa
, &pb
, status
);
1944 return float128_round_pack_canonical(pr
, status
);
1948 * Fused multiply-add
1951 float16 QEMU_FLATTEN
float16_muladd(float16 a
, float16 b
, float16 c
,
1952 int flags
, float_status
*status
)
1954 FloatParts64 pa
, pb
, pc
, *pr
;
1956 float16_unpack_canonical(&pa
, a
, status
);
1957 float16_unpack_canonical(&pb
, b
, status
);
1958 float16_unpack_canonical(&pc
, c
, status
);
1959 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1961 return float16_round_pack_canonical(pr
, status
);
1964 static float32 QEMU_SOFTFLOAT_ATTR
1965 soft_f32_muladd(float32 a
, float32 b
, float32 c
, int flags
,
1966 float_status
*status
)
1968 FloatParts64 pa
, pb
, pc
, *pr
;
1970 float32_unpack_canonical(&pa
, a
, status
);
1971 float32_unpack_canonical(&pb
, b
, status
);
1972 float32_unpack_canonical(&pc
, c
, status
);
1973 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1975 return float32_round_pack_canonical(pr
, status
);
1978 static float64 QEMU_SOFTFLOAT_ATTR
1979 soft_f64_muladd(float64 a
, float64 b
, float64 c
, int flags
,
1980 float_status
*status
)
1982 FloatParts64 pa
, pb
, pc
, *pr
;
1984 float64_unpack_canonical(&pa
, a
, status
);
1985 float64_unpack_canonical(&pb
, b
, status
);
1986 float64_unpack_canonical(&pc
, c
, status
);
1987 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
1989 return float64_round_pack_canonical(pr
, status
);
1992 static bool force_soft_fma
;
1994 float32 QEMU_FLATTEN
1995 float32_muladd(float32 xa
, float32 xb
, float32 xc
, int flags
, float_status
*s
)
1997 union_float32 ua
, ub
, uc
, ur
;
2003 if (unlikely(!can_use_fpu(s
))) {
2006 if (unlikely(flags
& float_muladd_halve_result
)) {
2010 float32_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2011 if (unlikely(!f32_is_zon3(ua
, ub
, uc
))) {
2015 if (unlikely(force_soft_fma
)) {
2020 * When (a || b) == 0, there's no need to check for under/over flow,
2021 * since we know the addend is (normal || 0) and the product is 0.
2023 if (float32_is_zero(ua
.s
) || float32_is_zero(ub
.s
)) {
2027 prod_sign
= float32_is_neg(ua
.s
) ^ float32_is_neg(ub
.s
);
2028 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2029 up
.s
= float32_set_sign(float32_zero
, prod_sign
);
2031 if (flags
& float_muladd_negate_c
) {
2036 union_float32 ua_orig
= ua
;
2037 union_float32 uc_orig
= uc
;
2039 if (flags
& float_muladd_negate_product
) {
2042 if (flags
& float_muladd_negate_c
) {
2046 ur
.h
= fmaf(ua
.h
, ub
.h
, uc
.h
);
2048 if (unlikely(f32_is_inf(ur
))) {
2049 float_raise(float_flag_overflow
, s
);
2050 } else if (unlikely(fabsf(ur
.h
) <= FLT_MIN
)) {
2056 if (flags
& float_muladd_negate_result
) {
2057 return float32_chs(ur
.s
);
2062 return soft_f32_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2065 float64 QEMU_FLATTEN
2066 float64_muladd(float64 xa
, float64 xb
, float64 xc
, int flags
, float_status
*s
)
2068 union_float64 ua
, ub
, uc
, ur
;
2074 if (unlikely(!can_use_fpu(s
))) {
2077 if (unlikely(flags
& float_muladd_halve_result
)) {
2081 float64_input_flush3(&ua
.s
, &ub
.s
, &uc
.s
, s
);
2082 if (unlikely(!f64_is_zon3(ua
, ub
, uc
))) {
2086 if (unlikely(force_soft_fma
)) {
2091 * When (a || b) == 0, there's no need to check for under/over flow,
2092 * since we know the addend is (normal || 0) and the product is 0.
2094 if (float64_is_zero(ua
.s
) || float64_is_zero(ub
.s
)) {
2098 prod_sign
= float64_is_neg(ua
.s
) ^ float64_is_neg(ub
.s
);
2099 prod_sign
^= !!(flags
& float_muladd_negate_product
);
2100 up
.s
= float64_set_sign(float64_zero
, prod_sign
);
2102 if (flags
& float_muladd_negate_c
) {
2107 union_float64 ua_orig
= ua
;
2108 union_float64 uc_orig
= uc
;
2110 if (flags
& float_muladd_negate_product
) {
2113 if (flags
& float_muladd_negate_c
) {
2117 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
2119 if (unlikely(f64_is_inf(ur
))) {
2120 float_raise(float_flag_overflow
, s
);
2121 } else if (unlikely(fabs(ur
.h
) <= FLT_MIN
)) {
2127 if (flags
& float_muladd_negate_result
) {
2128 return float64_chs(ur
.s
);
2133 return soft_f64_muladd(ua
.s
, ub
.s
, uc
.s
, flags
, s
);
2136 bfloat16 QEMU_FLATTEN
bfloat16_muladd(bfloat16 a
, bfloat16 b
, bfloat16 c
,
2137 int flags
, float_status
*status
)
2139 FloatParts64 pa
, pb
, pc
, *pr
;
2141 bfloat16_unpack_canonical(&pa
, a
, status
);
2142 bfloat16_unpack_canonical(&pb
, b
, status
);
2143 bfloat16_unpack_canonical(&pc
, c
, status
);
2144 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2146 return bfloat16_round_pack_canonical(pr
, status
);
2149 float128 QEMU_FLATTEN
float128_muladd(float128 a
, float128 b
, float128 c
,
2150 int flags
, float_status
*status
)
2152 FloatParts128 pa
, pb
, pc
, *pr
;
2154 float128_unpack_canonical(&pa
, a
, status
);
2155 float128_unpack_canonical(&pb
, b
, status
);
2156 float128_unpack_canonical(&pc
, c
, status
);
2157 pr
= parts_muladd(&pa
, &pb
, &pc
, flags
, status
);
2159 return float128_round_pack_canonical(pr
, status
);
2166 float16
float16_div(float16 a
, float16 b
, float_status
*status
)
2168 FloatParts64 pa
, pb
, *pr
;
2170 float16_unpack_canonical(&pa
, a
, status
);
2171 float16_unpack_canonical(&pb
, b
, status
);
2172 pr
= parts_div(&pa
, &pb
, status
);
2174 return float16_round_pack_canonical(pr
, status
);
2177 static float32 QEMU_SOFTFLOAT_ATTR
2178 soft_f32_div(float32 a
, float32 b
, float_status
*status
)
2180 FloatParts64 pa
, pb
, *pr
;
2182 float32_unpack_canonical(&pa
, a
, status
);
2183 float32_unpack_canonical(&pb
, b
, status
);
2184 pr
= parts_div(&pa
, &pb
, status
);
2186 return float32_round_pack_canonical(pr
, status
);
2189 static float64 QEMU_SOFTFLOAT_ATTR
2190 soft_f64_div(float64 a
, float64 b
, float_status
*status
)
2192 FloatParts64 pa
, pb
, *pr
;
2194 float64_unpack_canonical(&pa
, a
, status
);
2195 float64_unpack_canonical(&pb
, b
, status
);
2196 pr
= parts_div(&pa
, &pb
, status
);
2198 return float64_round_pack_canonical(pr
, status
);
2201 static float hard_f32_div(float a
, float b
)
2206 static double hard_f64_div(double a
, double b
)
2211 static bool f32_div_pre(union_float32 a
, union_float32 b
)
2213 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2214 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2215 fpclassify(b
.h
) == FP_NORMAL
;
2217 return float32_is_zero_or_normal(a
.s
) && float32_is_normal(b
.s
);
2220 static bool f64_div_pre(union_float64 a
, union_float64 b
)
2222 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2223 return (fpclassify(a
.h
) == FP_NORMAL
|| fpclassify(a
.h
) == FP_ZERO
) &&
2224 fpclassify(b
.h
) == FP_NORMAL
;
2226 return float64_is_zero_or_normal(a
.s
) && float64_is_normal(b
.s
);
2229 static bool f32_div_post(union_float32 a
, union_float32 b
)
2231 if (QEMU_HARDFLOAT_2F32_USE_FP
) {
2232 return fpclassify(a
.h
) != FP_ZERO
;
2234 return !float32_is_zero(a
.s
);
2237 static bool f64_div_post(union_float64 a
, union_float64 b
)
2239 if (QEMU_HARDFLOAT_2F64_USE_FP
) {
2240 return fpclassify(a
.h
) != FP_ZERO
;
2242 return !float64_is_zero(a
.s
);
2245 float32 QEMU_FLATTEN
2246 float32_div(float32 a
, float32 b
, float_status
*s
)
2248 return float32_gen2(a
, b
, s
, hard_f32_div
, soft_f32_div
,
2249 f32_div_pre
, f32_div_post
);
2252 float64 QEMU_FLATTEN
2253 float64_div(float64 a
, float64 b
, float_status
*s
)
2255 return float64_gen2(a
, b
, s
, hard_f64_div
, soft_f64_div
,
2256 f64_div_pre
, f64_div_post
);
2259 bfloat16 QEMU_FLATTEN
2260 bfloat16_div(bfloat16 a
, bfloat16 b
, float_status
*status
)
2262 FloatParts64 pa
, pb
, *pr
;
2264 bfloat16_unpack_canonical(&pa
, a
, status
);
2265 bfloat16_unpack_canonical(&pb
, b
, status
);
2266 pr
= parts_div(&pa
, &pb
, status
);
2268 return bfloat16_round_pack_canonical(pr
, status
);
2271 float128 QEMU_FLATTEN
2272 float128_div(float128 a
, float128 b
, float_status
*status
)
2274 FloatParts128 pa
, pb
, *pr
;
2276 float128_unpack_canonical(&pa
, a
, status
);
2277 float128_unpack_canonical(&pb
, b
, status
);
2278 pr
= parts_div(&pa
, &pb
, status
);
2280 return float128_round_pack_canonical(pr
, status
);
2284 * Float to Float conversions
2286 * Returns the result of converting one float format to another. The
2287 * conversion is performed according to the IEC/IEEE Standard for
2288 * Binary Floating-Point Arithmetic.
2290 * Usually this only needs to take care of raising invalid exceptions
2291 * and handling the conversion on NaNs.
2294 static void parts_float_to_ahp(FloatParts64
*a
, float_status
*s
)
2297 case float_class_qnan
:
2298 case float_class_snan
:
2300 * There is no NaN in the destination format. Raise Invalid
2301 * and return a zero with the sign of the input NaN.
2303 float_raise(float_flag_invalid
, s
);
2304 a
->cls
= float_class_zero
;
2307 case float_class_inf
:
2309 * There is no Inf in the destination format. Raise Invalid
2310 * and return the maximum normal with the correct sign.
2312 float_raise(float_flag_invalid
, s
);
2313 a
->cls
= float_class_normal
;
2314 a
->exp
= float16_params_ahp
.exp_max
;
2315 a
->frac
= MAKE_64BIT_MASK(float16_params_ahp
.frac_shift
,
2316 float16_params_ahp
.frac_size
+ 1);
2319 case float_class_normal
:
2320 case float_class_zero
:
2324 g_assert_not_reached();
2328 static void parts64_float_to_float(FloatParts64
*a
, float_status
*s
)
2330 if (is_nan(a
->cls
)) {
2331 parts_return_nan(a
, s
);
2335 static void parts128_float_to_float(FloatParts128
*a
, float_status
*s
)
2337 if (is_nan(a
->cls
)) {
2338 parts_return_nan(a
, s
);
2342 #define parts_float_to_float(P, S) \
2343 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2345 static void parts_float_to_float_narrow(FloatParts64
*a
, FloatParts128
*b
,
2352 if (a
->cls
== float_class_normal
) {
2353 frac_truncjam(a
, b
);
2354 } else if (is_nan(a
->cls
)) {
2355 /* Discard the low bits of the NaN. */
2356 a
->frac
= b
->frac_hi
;
2357 parts_return_nan(a
, s
);
2361 static void parts_float_to_float_widen(FloatParts128
*a
, FloatParts64
*b
,
2369 if (is_nan(a
->cls
)) {
2370 parts_return_nan(a
, s
);
2374 float32
float16_to_float32(float16 a
, bool ieee
, float_status
*s
)
2376 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2379 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2380 parts_float_to_float(&p
, s
);
2381 return float32_round_pack_canonical(&p
, s
);
2384 float64
float16_to_float64(float16 a
, bool ieee
, float_status
*s
)
2386 const FloatFmt
*fmt16
= ieee
? &float16_params
: &float16_params_ahp
;
2389 float16a_unpack_canonical(&p
, a
, s
, fmt16
);
2390 parts_float_to_float(&p
, s
);
2391 return float64_round_pack_canonical(&p
, s
);
2394 float16
float32_to_float16(float32 a
, bool ieee
, float_status
*s
)
2397 const FloatFmt
*fmt
;
2399 float32_unpack_canonical(&p
, a
, s
);
2401 parts_float_to_float(&p
, s
);
2402 fmt
= &float16_params
;
2404 parts_float_to_ahp(&p
, s
);
2405 fmt
= &float16_params_ahp
;
2407 return float16a_round_pack_canonical(&p
, s
, fmt
);
2410 static float64 QEMU_SOFTFLOAT_ATTR
2411 soft_float32_to_float64(float32 a
, float_status
*s
)
2415 float32_unpack_canonical(&p
, a
, s
);
2416 parts_float_to_float(&p
, s
);
2417 return float64_round_pack_canonical(&p
, s
);
2420 float64
float32_to_float64(float32 a
, float_status
*s
)
2422 if (likely(float32_is_normal(a
))) {
2423 /* Widening conversion can never produce inexact results. */
2429 } else if (float32_is_zero(a
)) {
2430 return float64_set_sign(float64_zero
, float32_is_neg(a
));
2432 return soft_float32_to_float64(a
, s
);
2436 float16
float64_to_float16(float64 a
, bool ieee
, float_status
*s
)
2439 const FloatFmt
*fmt
;
2441 float64_unpack_canonical(&p
, a
, s
);
2443 parts_float_to_float(&p
, s
);
2444 fmt
= &float16_params
;
2446 parts_float_to_ahp(&p
, s
);
2447 fmt
= &float16_params_ahp
;
2449 return float16a_round_pack_canonical(&p
, s
, fmt
);
2452 float32
float64_to_float32(float64 a
, float_status
*s
)
2456 float64_unpack_canonical(&p
, a
, s
);
2457 parts_float_to_float(&p
, s
);
2458 return float32_round_pack_canonical(&p
, s
);
2461 float32
bfloat16_to_float32(bfloat16 a
, float_status
*s
)
2465 bfloat16_unpack_canonical(&p
, a
, s
);
2466 parts_float_to_float(&p
, s
);
2467 return float32_round_pack_canonical(&p
, s
);
2470 float64
bfloat16_to_float64(bfloat16 a
, float_status
*s
)
2474 bfloat16_unpack_canonical(&p
, a
, s
);
2475 parts_float_to_float(&p
, s
);
2476 return float64_round_pack_canonical(&p
, s
);
2479 bfloat16
float32_to_bfloat16(float32 a
, float_status
*s
)
2483 float32_unpack_canonical(&p
, a
, s
);
2484 parts_float_to_float(&p
, s
);
2485 return bfloat16_round_pack_canonical(&p
, s
);
2488 bfloat16
float64_to_bfloat16(float64 a
, float_status
*s
)
2492 float64_unpack_canonical(&p
, a
, s
);
2493 parts_float_to_float(&p
, s
);
2494 return bfloat16_round_pack_canonical(&p
, s
);
2497 float32
float128_to_float32(float128 a
, float_status
*s
)
2502 float128_unpack_canonical(&p128
, a
, s
);
2503 parts_float_to_float_narrow(&p64
, &p128
, s
);
2504 return float32_round_pack_canonical(&p64
, s
);
2507 float64
float128_to_float64(float128 a
, float_status
*s
)
2512 float128_unpack_canonical(&p128
, a
, s
);
2513 parts_float_to_float_narrow(&p64
, &p128
, s
);
2514 return float64_round_pack_canonical(&p64
, s
);
2517 float128
float32_to_float128(float32 a
, float_status
*s
)
2522 float32_unpack_canonical(&p64
, a
, s
);
2523 parts_float_to_float_widen(&p128
, &p64
, s
);
2524 return float128_round_pack_canonical(&p128
, s
);
2527 float128
float64_to_float128(float64 a
, float_status
*s
)
2532 float64_unpack_canonical(&p64
, a
, s
);
2533 parts_float_to_float_widen(&p128
, &p64
, s
);
2534 return float128_round_pack_canonical(&p128
, s
);
2538 * Round to integral value
2541 float16
float16_round_to_int(float16 a
, float_status
*s
)
2545 float16_unpack_canonical(&p
, a
, s
);
2546 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float16_params
);
2547 return float16_round_pack_canonical(&p
, s
);
2550 float32
float32_round_to_int(float32 a
, float_status
*s
)
2554 float32_unpack_canonical(&p
, a
, s
);
2555 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float32_params
);
2556 return float32_round_pack_canonical(&p
, s
);
2559 float64
float64_round_to_int(float64 a
, float_status
*s
)
2563 float64_unpack_canonical(&p
, a
, s
);
2564 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float64_params
);
2565 return float64_round_pack_canonical(&p
, s
);
2568 bfloat16
bfloat16_round_to_int(bfloat16 a
, float_status
*s
)
2572 bfloat16_unpack_canonical(&p
, a
, s
);
2573 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &bfloat16_params
);
2574 return bfloat16_round_pack_canonical(&p
, s
);
2577 float128
float128_round_to_int(float128 a
, float_status
*s
)
2581 float128_unpack_canonical(&p
, a
, s
);
2582 parts_round_to_int(&p
, s
->float_rounding_mode
, 0, s
, &float128_params
);
2583 return float128_round_pack_canonical(&p
, s
);
2587 * Floating-point to signed integer conversions
2590 int8_t float16_to_int8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2595 float16_unpack_canonical(&p
, a
, s
);
2596 return parts_float_to_sint(&p
, rmode
, scale
, INT8_MIN
, INT8_MAX
, s
);
2599 int16_t float16_to_int16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2604 float16_unpack_canonical(&p
, a
, s
);
2605 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2608 int32_t float16_to_int32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2613 float16_unpack_canonical(&p
, a
, s
);
2614 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2617 int64_t float16_to_int64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2622 float16_unpack_canonical(&p
, a
, s
);
2623 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2626 int16_t float32_to_int16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2631 float32_unpack_canonical(&p
, a
, s
);
2632 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2635 int32_t float32_to_int32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2640 float32_unpack_canonical(&p
, a
, s
);
2641 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2644 int64_t float32_to_int64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2649 float32_unpack_canonical(&p
, a
, s
);
2650 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2653 int16_t float64_to_int16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2658 float64_unpack_canonical(&p
, a
, s
);
2659 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2662 int32_t float64_to_int32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2667 float64_unpack_canonical(&p
, a
, s
);
2668 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2671 int64_t float64_to_int64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2676 float64_unpack_canonical(&p
, a
, s
);
2677 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2680 int16_t bfloat16_to_int16_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2685 bfloat16_unpack_canonical(&p
, a
, s
);
2686 return parts_float_to_sint(&p
, rmode
, scale
, INT16_MIN
, INT16_MAX
, s
);
2689 int32_t bfloat16_to_int32_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2694 bfloat16_unpack_canonical(&p
, a
, s
);
2695 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2698 int64_t bfloat16_to_int64_scalbn(bfloat16 a
, FloatRoundMode rmode
, int scale
,
2703 bfloat16_unpack_canonical(&p
, a
, s
);
2704 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2707 static int32_t float128_to_int32_scalbn(float128 a
, FloatRoundMode rmode
,
2708 int scale
, float_status
*s
)
2712 float128_unpack_canonical(&p
, a
, s
);
2713 return parts_float_to_sint(&p
, rmode
, scale
, INT32_MIN
, INT32_MAX
, s
);
2716 static int64_t float128_to_int64_scalbn(float128 a
, FloatRoundMode rmode
,
2717 int scale
, float_status
*s
)
2721 float128_unpack_canonical(&p
, a
, s
);
2722 return parts_float_to_sint(&p
, rmode
, scale
, INT64_MIN
, INT64_MAX
, s
);
2725 int8_t float16_to_int8(float16 a
, float_status
*s
)
2727 return float16_to_int8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2730 int16_t float16_to_int16(float16 a
, float_status
*s
)
2732 return float16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2735 int32_t float16_to_int32(float16 a
, float_status
*s
)
2737 return float16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2740 int64_t float16_to_int64(float16 a
, float_status
*s
)
2742 return float16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2745 int16_t float32_to_int16(float32 a
, float_status
*s
)
2747 return float32_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2750 int32_t float32_to_int32(float32 a
, float_status
*s
)
2752 return float32_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2755 int64_t float32_to_int64(float32 a
, float_status
*s
)
2757 return float32_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2760 int16_t float64_to_int16(float64 a
, float_status
*s
)
2762 return float64_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2765 int32_t float64_to_int32(float64 a
, float_status
*s
)
2767 return float64_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2770 int64_t float64_to_int64(float64 a
, float_status
*s
)
2772 return float64_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2775 int32_t float128_to_int32(float128 a
, float_status
*s
)
2777 return float128_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2780 int64_t float128_to_int64(float128 a
, float_status
*s
)
2782 return float128_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2785 int16_t float16_to_int16_round_to_zero(float16 a
, float_status
*s
)
2787 return float16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2790 int32_t float16_to_int32_round_to_zero(float16 a
, float_status
*s
)
2792 return float16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2795 int64_t float16_to_int64_round_to_zero(float16 a
, float_status
*s
)
2797 return float16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2800 int16_t float32_to_int16_round_to_zero(float32 a
, float_status
*s
)
2802 return float32_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2805 int32_t float32_to_int32_round_to_zero(float32 a
, float_status
*s
)
2807 return float32_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2810 int64_t float32_to_int64_round_to_zero(float32 a
, float_status
*s
)
2812 return float32_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2815 int16_t float64_to_int16_round_to_zero(float64 a
, float_status
*s
)
2817 return float64_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2820 int32_t float64_to_int32_round_to_zero(float64 a
, float_status
*s
)
2822 return float64_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2825 int64_t float64_to_int64_round_to_zero(float64 a
, float_status
*s
)
2827 return float64_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2830 int32_t float128_to_int32_round_to_zero(float128 a
, float_status
*s
)
2832 return float128_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2835 int64_t float128_to_int64_round_to_zero(float128 a
, float_status
*s
)
2837 return float128_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2840 int16_t bfloat16_to_int16(bfloat16 a
, float_status
*s
)
2842 return bfloat16_to_int16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2845 int32_t bfloat16_to_int32(bfloat16 a
, float_status
*s
)
2847 return bfloat16_to_int32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2850 int64_t bfloat16_to_int64(bfloat16 a
, float_status
*s
)
2852 return bfloat16_to_int64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
2855 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a
, float_status
*s
)
2857 return bfloat16_to_int16_scalbn(a
, float_round_to_zero
, 0, s
);
2860 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a
, float_status
*s
)
2862 return bfloat16_to_int32_scalbn(a
, float_round_to_zero
, 0, s
);
2865 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a
, float_status
*s
)
2867 return bfloat16_to_int64_scalbn(a
, float_round_to_zero
, 0, s
);
2871 * Floating-point to unsigned integer conversions
2874 uint8_t float16_to_uint8_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2879 float16_unpack_canonical(&p
, a
, s
);
2880 return parts_float_to_uint(&p
, rmode
, scale
, UINT8_MAX
, s
);
2883 uint16_t float16_to_uint16_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2888 float16_unpack_canonical(&p
, a
, s
);
2889 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2892 uint32_t float16_to_uint32_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2897 float16_unpack_canonical(&p
, a
, s
);
2898 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2901 uint64_t float16_to_uint64_scalbn(float16 a
, FloatRoundMode rmode
, int scale
,
2906 float16_unpack_canonical(&p
, a
, s
);
2907 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2910 uint16_t float32_to_uint16_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2915 float32_unpack_canonical(&p
, a
, s
);
2916 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2919 uint32_t float32_to_uint32_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2924 float32_unpack_canonical(&p
, a
, s
);
2925 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2928 uint64_t float32_to_uint64_scalbn(float32 a
, FloatRoundMode rmode
, int scale
,
2933 float32_unpack_canonical(&p
, a
, s
);
2934 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2937 uint16_t float64_to_uint16_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2942 float64_unpack_canonical(&p
, a
, s
);
2943 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2946 uint32_t float64_to_uint32_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2951 float64_unpack_canonical(&p
, a
, s
);
2952 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2955 uint64_t float64_to_uint64_scalbn(float64 a
, FloatRoundMode rmode
, int scale
,
2960 float64_unpack_canonical(&p
, a
, s
);
2961 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2964 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2965 int scale
, float_status
*s
)
2969 bfloat16_unpack_canonical(&p
, a
, s
);
2970 return parts_float_to_uint(&p
, rmode
, scale
, UINT16_MAX
, s
);
2973 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2974 int scale
, float_status
*s
)
2978 bfloat16_unpack_canonical(&p
, a
, s
);
2979 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
2982 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a
, FloatRoundMode rmode
,
2983 int scale
, float_status
*s
)
2987 bfloat16_unpack_canonical(&p
, a
, s
);
2988 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
2991 static uint32_t float128_to_uint32_scalbn(float128 a
, FloatRoundMode rmode
,
2992 int scale
, float_status
*s
)
2996 float128_unpack_canonical(&p
, a
, s
);
2997 return parts_float_to_uint(&p
, rmode
, scale
, UINT32_MAX
, s
);
3000 static uint64_t float128_to_uint64_scalbn(float128 a
, FloatRoundMode rmode
,
3001 int scale
, float_status
*s
)
3005 float128_unpack_canonical(&p
, a
, s
);
3006 return parts_float_to_uint(&p
, rmode
, scale
, UINT64_MAX
, s
);
3009 uint8_t float16_to_uint8(float16 a
, float_status
*s
)
3011 return float16_to_uint8_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3014 uint16_t float16_to_uint16(float16 a
, float_status
*s
)
3016 return float16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3019 uint32_t float16_to_uint32(float16 a
, float_status
*s
)
3021 return float16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3024 uint64_t float16_to_uint64(float16 a
, float_status
*s
)
3026 return float16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3029 uint16_t float32_to_uint16(float32 a
, float_status
*s
)
3031 return float32_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3034 uint32_t float32_to_uint32(float32 a
, float_status
*s
)
3036 return float32_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3039 uint64_t float32_to_uint64(float32 a
, float_status
*s
)
3041 return float32_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3044 uint16_t float64_to_uint16(float64 a
, float_status
*s
)
3046 return float64_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3049 uint32_t float64_to_uint32(float64 a
, float_status
*s
)
3051 return float64_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3054 uint64_t float64_to_uint64(float64 a
, float_status
*s
)
3056 return float64_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3059 uint32_t float128_to_uint32(float128 a
, float_status
*s
)
3061 return float128_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3064 uint64_t float128_to_uint64(float128 a
, float_status
*s
)
3066 return float128_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3069 uint16_t float16_to_uint16_round_to_zero(float16 a
, float_status
*s
)
3071 return float16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3074 uint32_t float16_to_uint32_round_to_zero(float16 a
, float_status
*s
)
3076 return float16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3079 uint64_t float16_to_uint64_round_to_zero(float16 a
, float_status
*s
)
3081 return float16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3084 uint16_t float32_to_uint16_round_to_zero(float32 a
, float_status
*s
)
3086 return float32_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3089 uint32_t float32_to_uint32_round_to_zero(float32 a
, float_status
*s
)
3091 return float32_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3094 uint64_t float32_to_uint64_round_to_zero(float32 a
, float_status
*s
)
3096 return float32_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3099 uint16_t float64_to_uint16_round_to_zero(float64 a
, float_status
*s
)
3101 return float64_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3104 uint32_t float64_to_uint32_round_to_zero(float64 a
, float_status
*s
)
3106 return float64_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3109 uint64_t float64_to_uint64_round_to_zero(float64 a
, float_status
*s
)
3111 return float64_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3114 uint32_t float128_to_uint32_round_to_zero(float128 a
, float_status
*s
)
3116 return float128_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3119 uint64_t float128_to_uint64_round_to_zero(float128 a
, float_status
*s
)
3121 return float128_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3124 uint16_t bfloat16_to_uint16(bfloat16 a
, float_status
*s
)
3126 return bfloat16_to_uint16_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3129 uint32_t bfloat16_to_uint32(bfloat16 a
, float_status
*s
)
3131 return bfloat16_to_uint32_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3134 uint64_t bfloat16_to_uint64(bfloat16 a
, float_status
*s
)
3136 return bfloat16_to_uint64_scalbn(a
, s
->float_rounding_mode
, 0, s
);
3139 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a
, float_status
*s
)
3141 return bfloat16_to_uint16_scalbn(a
, float_round_to_zero
, 0, s
);
3144 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a
, float_status
*s
)
3146 return bfloat16_to_uint32_scalbn(a
, float_round_to_zero
, 0, s
);
3149 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a
, float_status
*s
)
3151 return bfloat16_to_uint64_scalbn(a
, float_round_to_zero
, 0, s
);
3155 * Signed integer to floating-point conversions
3158 float16
int64_to_float16_scalbn(int64_t a
, int scale
, float_status
*status
)
3162 parts_sint_to_float(&p
, a
, scale
, status
);
3163 return float16_round_pack_canonical(&p
, status
);
3166 float16
int32_to_float16_scalbn(int32_t a
, int scale
, float_status
*status
)
3168 return int64_to_float16_scalbn(a
, scale
, status
);
3171 float16
int16_to_float16_scalbn(int16_t a
, int scale
, float_status
*status
)
3173 return int64_to_float16_scalbn(a
, scale
, status
);
3176 float16
int64_to_float16(int64_t a
, float_status
*status
)
3178 return int64_to_float16_scalbn(a
, 0, status
);
3181 float16
int32_to_float16(int32_t a
, float_status
*status
)
3183 return int64_to_float16_scalbn(a
, 0, status
);
3186 float16
int16_to_float16(int16_t a
, float_status
*status
)
3188 return int64_to_float16_scalbn(a
, 0, status
);
3191 float16
int8_to_float16(int8_t a
, float_status
*status
)
3193 return int64_to_float16_scalbn(a
, 0, status
);
3196 float32
int64_to_float32_scalbn(int64_t a
, int scale
, float_status
*status
)
3200 parts64_sint_to_float(&p
, a
, scale
, status
);
3201 return float32_round_pack_canonical(&p
, status
);
3204 float32
int32_to_float32_scalbn(int32_t a
, int scale
, float_status
*status
)
3206 return int64_to_float32_scalbn(a
, scale
, status
);
3209 float32
int16_to_float32_scalbn(int16_t a
, int scale
, float_status
*status
)
3211 return int64_to_float32_scalbn(a
, scale
, status
);
3214 float32
int64_to_float32(int64_t a
, float_status
*status
)
3216 return int64_to_float32_scalbn(a
, 0, status
);
3219 float32
int32_to_float32(int32_t a
, float_status
*status
)
3221 return int64_to_float32_scalbn(a
, 0, status
);
3224 float32
int16_to_float32(int16_t a
, float_status
*status
)
3226 return int64_to_float32_scalbn(a
, 0, status
);
3229 float64
int64_to_float64_scalbn(int64_t a
, int scale
, float_status
*status
)
3233 parts_sint_to_float(&p
, a
, scale
, status
);
3234 return float64_round_pack_canonical(&p
, status
);
3237 float64
int32_to_float64_scalbn(int32_t a
, int scale
, float_status
*status
)
3239 return int64_to_float64_scalbn(a
, scale
, status
);
3242 float64
int16_to_float64_scalbn(int16_t a
, int scale
, float_status
*status
)
3244 return int64_to_float64_scalbn(a
, scale
, status
);
3247 float64
int64_to_float64(int64_t a
, float_status
*status
)
3249 return int64_to_float64_scalbn(a
, 0, status
);
3252 float64
int32_to_float64(int32_t a
, float_status
*status
)
3254 return int64_to_float64_scalbn(a
, 0, status
);
3257 float64
int16_to_float64(int16_t a
, float_status
*status
)
3259 return int64_to_float64_scalbn(a
, 0, status
);
3262 bfloat16
int64_to_bfloat16_scalbn(int64_t a
, int scale
, float_status
*status
)
3266 parts_sint_to_float(&p
, a
, scale
, status
);
3267 return bfloat16_round_pack_canonical(&p
, status
);
3270 bfloat16
int32_to_bfloat16_scalbn(int32_t a
, int scale
, float_status
*status
)
3272 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3275 bfloat16
int16_to_bfloat16_scalbn(int16_t a
, int scale
, float_status
*status
)
3277 return int64_to_bfloat16_scalbn(a
, scale
, status
);
3280 bfloat16
int64_to_bfloat16(int64_t a
, float_status
*status
)
3282 return int64_to_bfloat16_scalbn(a
, 0, status
);
3285 bfloat16
int32_to_bfloat16(int32_t a
, float_status
*status
)
3287 return int64_to_bfloat16_scalbn(a
, 0, status
);
3290 bfloat16
int16_to_bfloat16(int16_t a
, float_status
*status
)
3292 return int64_to_bfloat16_scalbn(a
, 0, status
);
3295 float128
int64_to_float128(int64_t a
, float_status
*status
)
3299 parts_sint_to_float(&p
, a
, 0, status
);
3300 return float128_round_pack_canonical(&p
, status
);
3303 float128
int32_to_float128(int32_t a
, float_status
*status
)
3305 return int64_to_float128(a
, status
);
3309 * Unsigned Integer to floating-point conversions
3312 float16
uint64_to_float16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3316 parts_uint_to_float(&p
, a
, scale
, status
);
3317 return float16_round_pack_canonical(&p
, status
);
3320 float16
uint32_to_float16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3322 return uint64_to_float16_scalbn(a
, scale
, status
);
3325 float16
uint16_to_float16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3327 return uint64_to_float16_scalbn(a
, scale
, status
);
3330 float16
uint64_to_float16(uint64_t a
, float_status
*status
)
3332 return uint64_to_float16_scalbn(a
, 0, status
);
3335 float16
uint32_to_float16(uint32_t a
, float_status
*status
)
3337 return uint64_to_float16_scalbn(a
, 0, status
);
3340 float16
uint16_to_float16(uint16_t a
, float_status
*status
)
3342 return uint64_to_float16_scalbn(a
, 0, status
);
3345 float16
uint8_to_float16(uint8_t a
, float_status
*status
)
3347 return uint64_to_float16_scalbn(a
, 0, status
);
3350 float32
uint64_to_float32_scalbn(uint64_t a
, int scale
, float_status
*status
)
3354 parts_uint_to_float(&p
, a
, scale
, status
);
3355 return float32_round_pack_canonical(&p
, status
);
3358 float32
uint32_to_float32_scalbn(uint32_t a
, int scale
, float_status
*status
)
3360 return uint64_to_float32_scalbn(a
, scale
, status
);
3363 float32
uint16_to_float32_scalbn(uint16_t a
, int scale
, float_status
*status
)
3365 return uint64_to_float32_scalbn(a
, scale
, status
);
3368 float32
uint64_to_float32(uint64_t a
, float_status
*status
)
3370 return uint64_to_float32_scalbn(a
, 0, status
);
3373 float32
uint32_to_float32(uint32_t a
, float_status
*status
)
3375 return uint64_to_float32_scalbn(a
, 0, status
);
3378 float32
uint16_to_float32(uint16_t a
, float_status
*status
)
3380 return uint64_to_float32_scalbn(a
, 0, status
);
3383 float64
uint64_to_float64_scalbn(uint64_t a
, int scale
, float_status
*status
)
3387 parts_uint_to_float(&p
, a
, scale
, status
);
3388 return float64_round_pack_canonical(&p
, status
);
3391 float64
uint32_to_float64_scalbn(uint32_t a
, int scale
, float_status
*status
)
3393 return uint64_to_float64_scalbn(a
, scale
, status
);
3396 float64
uint16_to_float64_scalbn(uint16_t a
, int scale
, float_status
*status
)
3398 return uint64_to_float64_scalbn(a
, scale
, status
);
3401 float64
uint64_to_float64(uint64_t a
, float_status
*status
)
3403 return uint64_to_float64_scalbn(a
, 0, status
);
3406 float64
uint32_to_float64(uint32_t a
, float_status
*status
)
3408 return uint64_to_float64_scalbn(a
, 0, status
);
3411 float64
uint16_to_float64(uint16_t a
, float_status
*status
)
3413 return uint64_to_float64_scalbn(a
, 0, status
);
3416 bfloat16
uint64_to_bfloat16_scalbn(uint64_t a
, int scale
, float_status
*status
)
3420 parts_uint_to_float(&p
, a
, scale
, status
);
3421 return bfloat16_round_pack_canonical(&p
, status
);
3424 bfloat16
uint32_to_bfloat16_scalbn(uint32_t a
, int scale
, float_status
*status
)
3426 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3429 bfloat16
uint16_to_bfloat16_scalbn(uint16_t a
, int scale
, float_status
*status
)
3431 return uint64_to_bfloat16_scalbn(a
, scale
, status
);
3434 bfloat16
uint64_to_bfloat16(uint64_t a
, float_status
*status
)
3436 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3439 bfloat16
uint32_to_bfloat16(uint32_t a
, float_status
*status
)
3441 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3444 bfloat16
uint16_to_bfloat16(uint16_t a
, float_status
*status
)
3446 return uint64_to_bfloat16_scalbn(a
, 0, status
);
3449 float128
uint64_to_float128(uint64_t a
, float_status
*status
)
3453 parts_uint_to_float(&p
, a
, 0, status
);
3454 return float128_round_pack_canonical(&p
, status
);
3458 * Minimum and maximum
3461 static float16
float16_minmax(float16 a
, float16 b
, float_status
*s
, int flags
)
3463 FloatParts64 pa
, pb
, *pr
;
3465 float16_unpack_canonical(&pa
, a
, s
);
3466 float16_unpack_canonical(&pb
, b
, s
);
3467 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3469 return float16_round_pack_canonical(pr
, s
);
3472 static bfloat16
bfloat16_minmax(bfloat16 a
, bfloat16 b
,
3473 float_status
*s
, int flags
)
3475 FloatParts64 pa
, pb
, *pr
;
3477 bfloat16_unpack_canonical(&pa
, a
, s
);
3478 bfloat16_unpack_canonical(&pb
, b
, s
);
3479 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3481 return bfloat16_round_pack_canonical(pr
, s
);
3484 static float32
float32_minmax(float32 a
, float32 b
, float_status
*s
, int flags
)
3486 FloatParts64 pa
, pb
, *pr
;
3488 float32_unpack_canonical(&pa
, a
, s
);
3489 float32_unpack_canonical(&pb
, b
, s
);
3490 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3492 return float32_round_pack_canonical(pr
, s
);
3495 static float64
float64_minmax(float64 a
, float64 b
, float_status
*s
, int flags
)
3497 FloatParts64 pa
, pb
, *pr
;
3499 float64_unpack_canonical(&pa
, a
, s
);
3500 float64_unpack_canonical(&pb
, b
, s
);
3501 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3503 return float64_round_pack_canonical(pr
, s
);
3506 static float128
float128_minmax(float128 a
, float128 b
,
3507 float_status
*s
, int flags
)
3509 FloatParts128 pa
, pb
, *pr
;
3511 float128_unpack_canonical(&pa
, a
, s
);
3512 float128_unpack_canonical(&pb
, b
, s
);
3513 pr
= parts_minmax(&pa
, &pb
, s
, flags
);
3515 return float128_round_pack_canonical(pr
, s
);
3518 #define MINMAX_1(type, name, flags) \
3519 type type##_##name(type a, type b, float_status *s) \
3520 { return type##_minmax(a, b, s, flags); }
3522 #define MINMAX_2(type) \
3523 MINMAX_1(type, max, 0) \
3524 MINMAX_1(type, maxnum, minmax_isnum) \
3525 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3526 MINMAX_1(type, min, minmax_ismin) \
3527 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3528 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3540 * Floating point compare
3543 static FloatRelation QEMU_FLATTEN
3544 float16_do_compare(float16 a
, float16 b
, float_status
*s
, bool is_quiet
)
3546 FloatParts64 pa
, pb
;
3548 float16_unpack_canonical(&pa
, a
, s
);
3549 float16_unpack_canonical(&pb
, b
, s
);
3550 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3553 FloatRelation
float16_compare(float16 a
, float16 b
, float_status
*s
)
3555 return float16_do_compare(a
, b
, s
, false);
3558 FloatRelation
float16_compare_quiet(float16 a
, float16 b
, float_status
*s
)
3560 return float16_do_compare(a
, b
, s
, true);
3563 static FloatRelation QEMU_SOFTFLOAT_ATTR
3564 float32_do_compare(float32 a
, float32 b
, float_status
*s
, bool is_quiet
)
3566 FloatParts64 pa
, pb
;
3568 float32_unpack_canonical(&pa
, a
, s
);
3569 float32_unpack_canonical(&pb
, b
, s
);
3570 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3573 static FloatRelation QEMU_FLATTEN
3574 float32_hs_compare(float32 xa
, float32 xb
, float_status
*s
, bool is_quiet
)
3576 union_float32 ua
, ub
;
3581 if (QEMU_NO_HARDFLOAT
) {
3585 float32_input_flush2(&ua
.s
, &ub
.s
, s
);
3586 if (isgreaterequal(ua
.h
, ub
.h
)) {
3587 if (isgreater(ua
.h
, ub
.h
)) {
3588 return float_relation_greater
;
3590 return float_relation_equal
;
3592 if (likely(isless(ua
.h
, ub
.h
))) {
3593 return float_relation_less
;
3596 * The only condition remaining is unordered.
3597 * Fall through to set flags.
3600 return float32_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3603 FloatRelation
float32_compare(float32 a
, float32 b
, float_status
*s
)
3605 return float32_hs_compare(a
, b
, s
, false);
3608 FloatRelation
float32_compare_quiet(float32 a
, float32 b
, float_status
*s
)
3610 return float32_hs_compare(a
, b
, s
, true);
3613 static FloatRelation QEMU_SOFTFLOAT_ATTR
3614 float64_do_compare(float64 a
, float64 b
, float_status
*s
, bool is_quiet
)
3616 FloatParts64 pa
, pb
;
3618 float64_unpack_canonical(&pa
, a
, s
);
3619 float64_unpack_canonical(&pb
, b
, s
);
3620 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3623 static FloatRelation QEMU_FLATTEN
3624 float64_hs_compare(float64 xa
, float64 xb
, float_status
*s
, bool is_quiet
)
3626 union_float64 ua
, ub
;
3631 if (QEMU_NO_HARDFLOAT
) {
3635 float64_input_flush2(&ua
.s
, &ub
.s
, s
);
3636 if (isgreaterequal(ua
.h
, ub
.h
)) {
3637 if (isgreater(ua
.h
, ub
.h
)) {
3638 return float_relation_greater
;
3640 return float_relation_equal
;
3642 if (likely(isless(ua
.h
, ub
.h
))) {
3643 return float_relation_less
;
3646 * The only condition remaining is unordered.
3647 * Fall through to set flags.
3650 return float64_do_compare(ua
.s
, ub
.s
, s
, is_quiet
);
3653 FloatRelation
float64_compare(float64 a
, float64 b
, float_status
*s
)
3655 return float64_hs_compare(a
, b
, s
, false);
3658 FloatRelation
float64_compare_quiet(float64 a
, float64 b
, float_status
*s
)
3660 return float64_hs_compare(a
, b
, s
, true);
3663 static FloatRelation QEMU_FLATTEN
3664 bfloat16_do_compare(bfloat16 a
, bfloat16 b
, float_status
*s
, bool is_quiet
)
3666 FloatParts64 pa
, pb
;
3668 bfloat16_unpack_canonical(&pa
, a
, s
);
3669 bfloat16_unpack_canonical(&pb
, b
, s
);
3670 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3673 FloatRelation
bfloat16_compare(bfloat16 a
, bfloat16 b
, float_status
*s
)
3675 return bfloat16_do_compare(a
, b
, s
, false);
3678 FloatRelation
bfloat16_compare_quiet(bfloat16 a
, bfloat16 b
, float_status
*s
)
3680 return bfloat16_do_compare(a
, b
, s
, true);
3683 static FloatRelation QEMU_FLATTEN
3684 float128_do_compare(float128 a
, float128 b
, float_status
*s
, bool is_quiet
)
3686 FloatParts128 pa
, pb
;
3688 float128_unpack_canonical(&pa
, a
, s
);
3689 float128_unpack_canonical(&pb
, b
, s
);
3690 return parts_compare(&pa
, &pb
, s
, is_quiet
);
3693 FloatRelation
float128_compare(float128 a
, float128 b
, float_status
*s
)
3695 return float128_do_compare(a
, b
, s
, false);
3698 FloatRelation
float128_compare_quiet(float128 a
, float128 b
, float_status
*s
)
3700 return float128_do_compare(a
, b
, s
, true);
3707 float16
float16_scalbn(float16 a
, int n
, float_status
*status
)
3711 float16_unpack_canonical(&p
, a
, status
);
3712 parts_scalbn(&p
, n
, status
);
3713 return float16_round_pack_canonical(&p
, status
);
3716 float32
float32_scalbn(float32 a
, int n
, float_status
*status
)
3720 float32_unpack_canonical(&p
, a
, status
);
3721 parts_scalbn(&p
, n
, status
);
3722 return float32_round_pack_canonical(&p
, status
);
3725 float64
float64_scalbn(float64 a
, int n
, float_status
*status
)
3729 float64_unpack_canonical(&p
, a
, status
);
3730 parts_scalbn(&p
, n
, status
);
3731 return float64_round_pack_canonical(&p
, status
);
3734 bfloat16
bfloat16_scalbn(bfloat16 a
, int n
, float_status
*status
)
3738 bfloat16_unpack_canonical(&p
, a
, status
);
3739 parts_scalbn(&p
, n
, status
);
3740 return bfloat16_round_pack_canonical(&p
, status
);
3743 float128
float128_scalbn(float128 a
, int n
, float_status
*status
)
3747 float128_unpack_canonical(&p
, a
, status
);
3748 parts_scalbn(&p
, n
, status
);
3749 return float128_round_pack_canonical(&p
, status
);
3756 float16 QEMU_FLATTEN
float16_sqrt(float16 a
, float_status
*status
)
3760 float16_unpack_canonical(&p
, a
, status
);
3761 parts_sqrt(&p
, status
, &float16_params
);
3762 return float16_round_pack_canonical(&p
, status
);
3765 static float32 QEMU_SOFTFLOAT_ATTR
3766 soft_f32_sqrt(float32 a
, float_status
*status
)
3770 float32_unpack_canonical(&p
, a
, status
);
3771 parts_sqrt(&p
, status
, &float32_params
);
3772 return float32_round_pack_canonical(&p
, status
);
3775 static float64 QEMU_SOFTFLOAT_ATTR
3776 soft_f64_sqrt(float64 a
, float_status
*status
)
3780 float64_unpack_canonical(&p
, a
, status
);
3781 parts_sqrt(&p
, status
, &float64_params
);
3782 return float64_round_pack_canonical(&p
, status
);
3785 float32 QEMU_FLATTEN
float32_sqrt(float32 xa
, float_status
*s
)
3787 union_float32 ua
, ur
;
3790 if (unlikely(!can_use_fpu(s
))) {
3794 float32_input_flush1(&ua
.s
, s
);
3795 if (QEMU_HARDFLOAT_1F32_USE_FP
) {
3796 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3797 fpclassify(ua
.h
) == FP_ZERO
) ||
3801 } else if (unlikely(!float32_is_zero_or_normal(ua
.s
) ||
3802 float32_is_neg(ua
.s
))) {
3809 return soft_f32_sqrt(ua
.s
, s
);
3812 float64 QEMU_FLATTEN
float64_sqrt(float64 xa
, float_status
*s
)
3814 union_float64 ua
, ur
;
3817 if (unlikely(!can_use_fpu(s
))) {
3821 float64_input_flush1(&ua
.s
, s
);
3822 if (QEMU_HARDFLOAT_1F64_USE_FP
) {
3823 if (unlikely(!(fpclassify(ua
.h
) == FP_NORMAL
||
3824 fpclassify(ua
.h
) == FP_ZERO
) ||
3828 } else if (unlikely(!float64_is_zero_or_normal(ua
.s
) ||
3829 float64_is_neg(ua
.s
))) {
3836 return soft_f64_sqrt(ua
.s
, s
);
3839 bfloat16 QEMU_FLATTEN
bfloat16_sqrt(bfloat16 a
, float_status
*status
)
3843 bfloat16_unpack_canonical(&p
, a
, status
);
3844 parts_sqrt(&p
, status
, &bfloat16_params
);
3845 return bfloat16_round_pack_canonical(&p
, status
);
3848 float128 QEMU_FLATTEN
float128_sqrt(float128 a
, float_status
*status
)
3852 float128_unpack_canonical(&p
, a
, status
);
3853 parts_sqrt(&p
, status
, &float128_params
);
3854 return float128_round_pack_canonical(&p
, status
);
3857 /*----------------------------------------------------------------------------
3858 | The pattern for a default generated NaN.
3859 *----------------------------------------------------------------------------*/
3861 float16
float16_default_nan(float_status
*status
)
3865 parts_default_nan(&p
, status
);
3866 p
.frac
>>= float16_params
.frac_shift
;
3867 return float16_pack_raw(&p
);
3870 float32
float32_default_nan(float_status
*status
)
3874 parts_default_nan(&p
, status
);
3875 p
.frac
>>= float32_params
.frac_shift
;
3876 return float32_pack_raw(&p
);
3879 float64
float64_default_nan(float_status
*status
)
3883 parts_default_nan(&p
, status
);
3884 p
.frac
>>= float64_params
.frac_shift
;
3885 return float64_pack_raw(&p
);
3888 float128
float128_default_nan(float_status
*status
)
3892 parts_default_nan(&p
, status
);
3893 frac_shr(&p
, float128_params
.frac_shift
);
3894 return float128_pack_raw(&p
);
3897 bfloat16
bfloat16_default_nan(float_status
*status
)
3901 parts_default_nan(&p
, status
);
3902 p
.frac
>>= bfloat16_params
.frac_shift
;
3903 return bfloat16_pack_raw(&p
);
3906 /*----------------------------------------------------------------------------
3907 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3908 *----------------------------------------------------------------------------*/
3910 float16
float16_silence_nan(float16 a
, float_status
*status
)
3914 float16_unpack_raw(&p
, a
);
3915 p
.frac
<<= float16_params
.frac_shift
;
3916 parts_silence_nan(&p
, status
);
3917 p
.frac
>>= float16_params
.frac_shift
;
3918 return float16_pack_raw(&p
);
3921 float32
float32_silence_nan(float32 a
, float_status
*status
)
3925 float32_unpack_raw(&p
, a
);
3926 p
.frac
<<= float32_params
.frac_shift
;
3927 parts_silence_nan(&p
, status
);
3928 p
.frac
>>= float32_params
.frac_shift
;
3929 return float32_pack_raw(&p
);
3932 float64
float64_silence_nan(float64 a
, float_status
*status
)
3936 float64_unpack_raw(&p
, a
);
3937 p
.frac
<<= float64_params
.frac_shift
;
3938 parts_silence_nan(&p
, status
);
3939 p
.frac
>>= float64_params
.frac_shift
;
3940 return float64_pack_raw(&p
);
3943 bfloat16
bfloat16_silence_nan(bfloat16 a
, float_status
*status
)
3947 bfloat16_unpack_raw(&p
, a
);
3948 p
.frac
<<= bfloat16_params
.frac_shift
;
3949 parts_silence_nan(&p
, status
);
3950 p
.frac
>>= bfloat16_params
.frac_shift
;
3951 return bfloat16_pack_raw(&p
);
3954 float128
float128_silence_nan(float128 a
, float_status
*status
)
3958 float128_unpack_raw(&p
, a
);
3959 frac_shl(&p
, float128_params
.frac_shift
);
3960 parts_silence_nan(&p
, status
);
3961 frac_shr(&p
, float128_params
.frac_shift
);
3962 return float128_pack_raw(&p
);
3965 /*----------------------------------------------------------------------------
3966 | If `a' is denormal and we are in flush-to-zero mode then set the
3967 | input-denormal exception and return zero. Otherwise just return the value.
3968 *----------------------------------------------------------------------------*/
3970 static bool parts_squash_denormal(FloatParts64 p
, float_status
*status
)
3972 if (p
.exp
== 0 && p
.frac
!= 0) {
3973 float_raise(float_flag_input_denormal
, status
);
3980 float16
float16_squash_input_denormal(float16 a
, float_status
*status
)
3982 if (status
->flush_inputs_to_zero
) {
3985 float16_unpack_raw(&p
, a
);
3986 if (parts_squash_denormal(p
, status
)) {
3987 return float16_set_sign(float16_zero
, p
.sign
);
3993 float32
float32_squash_input_denormal(float32 a
, float_status
*status
)
3995 if (status
->flush_inputs_to_zero
) {
3998 float32_unpack_raw(&p
, a
);
3999 if (parts_squash_denormal(p
, status
)) {
4000 return float32_set_sign(float32_zero
, p
.sign
);
4006 float64
float64_squash_input_denormal(float64 a
, float_status
*status
)
4008 if (status
->flush_inputs_to_zero
) {
4011 float64_unpack_raw(&p
, a
);
4012 if (parts_squash_denormal(p
, status
)) {
4013 return float64_set_sign(float64_zero
, p
.sign
);
4019 bfloat16
bfloat16_squash_input_denormal(bfloat16 a
, float_status
*status
)
4021 if (status
->flush_inputs_to_zero
) {
4024 bfloat16_unpack_raw(&p
, a
);
4025 if (parts_squash_denormal(p
, status
)) {
4026 return bfloat16_set_sign(bfloat16_zero
, p
.sign
);
4032 /*----------------------------------------------------------------------------
4033 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4034 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4035 | input. If `zSign' is 1, the input is negated before being converted to an
4036 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4037 | is simply rounded to an integer, with the inexact exception raised if the
4038 | input cannot be represented exactly as an integer. However, if the fixed-
4039 | point input is too large, the invalid exception is raised and the largest
4040 | positive or negative integer is returned.
4041 *----------------------------------------------------------------------------*/
4043 static int32_t roundAndPackInt32(bool zSign
, uint64_t absZ
,
4044 float_status
*status
)
4046 int8_t roundingMode
;
4047 bool roundNearestEven
;
4048 int8_t roundIncrement
, roundBits
;
4051 roundingMode
= status
->float_rounding_mode
;
4052 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4053 switch (roundingMode
) {
4054 case float_round_nearest_even
:
4055 case float_round_ties_away
:
4056 roundIncrement
= 0x40;
4058 case float_round_to_zero
:
4061 case float_round_up
:
4062 roundIncrement
= zSign
? 0 : 0x7f;
4064 case float_round_down
:
4065 roundIncrement
= zSign
? 0x7f : 0;
4067 case float_round_to_odd
:
4068 roundIncrement
= absZ
& 0x80 ? 0 : 0x7f;
4073 roundBits
= absZ
& 0x7F;
4074 absZ
= ( absZ
+ roundIncrement
)>>7;
4075 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4079 if ( zSign
) z
= - z
;
4080 if ( ( absZ
>>32 ) || ( z
&& ( ( z
< 0 ) ^ zSign
) ) ) {
4081 float_raise(float_flag_invalid
, status
);
4082 return zSign
? INT32_MIN
: INT32_MAX
;
4085 float_raise(float_flag_inexact
, status
);
4091 /*----------------------------------------------------------------------------
4092 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4093 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4094 | and returns the properly rounded 64-bit integer corresponding to the input.
4095 | If `zSign' is 1, the input is negated before being converted to an integer.
4096 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4097 | the inexact exception raised if the input cannot be represented exactly as
4098 | an integer. However, if the fixed-point input is too large, the invalid
4099 | exception is raised and the largest positive or negative integer is
4101 *----------------------------------------------------------------------------*/
4103 static int64_t roundAndPackInt64(bool zSign
, uint64_t absZ0
, uint64_t absZ1
,
4104 float_status
*status
)
4106 int8_t roundingMode
;
4107 bool roundNearestEven
, increment
;
4110 roundingMode
= status
->float_rounding_mode
;
4111 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4112 switch (roundingMode
) {
4113 case float_round_nearest_even
:
4114 case float_round_ties_away
:
4115 increment
= ((int64_t) absZ1
< 0);
4117 case float_round_to_zero
:
4120 case float_round_up
:
4121 increment
= !zSign
&& absZ1
;
4123 case float_round_down
:
4124 increment
= zSign
&& absZ1
;
4126 case float_round_to_odd
:
4127 increment
= !(absZ0
& 1) && absZ1
;
4134 if ( absZ0
== 0 ) goto overflow
;
4135 if (!(absZ1
<< 1) && roundNearestEven
) {
4140 if ( zSign
) z
= - z
;
4141 if ( z
&& ( ( z
< 0 ) ^ zSign
) ) {
4143 float_raise(float_flag_invalid
, status
);
4144 return zSign
? INT64_MIN
: INT64_MAX
;
4147 float_raise(float_flag_inexact
, status
);
4153 /*----------------------------------------------------------------------------
4154 | Normalizes the subnormal single-precision floating-point value represented
4155 | by the denormalized significand `aSig'. The normalized exponent and
4156 | significand are stored at the locations pointed to by `zExpPtr' and
4157 | `zSigPtr', respectively.
4158 *----------------------------------------------------------------------------*/
4161 normalizeFloat32Subnormal(uint32_t aSig
, int *zExpPtr
, uint32_t *zSigPtr
)
4165 shiftCount
= clz32(aSig
) - 8;
4166 *zSigPtr
= aSig
<<shiftCount
;
4167 *zExpPtr
= 1 - shiftCount
;
4171 /*----------------------------------------------------------------------------
4172 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4173 | and significand `zSig', and returns the proper single-precision floating-
4174 | point value corresponding to the abstract input. Ordinarily, the abstract
4175 | value is simply rounded and packed into the single-precision format, with
4176 | the inexact exception raised if the abstract input cannot be represented
4177 | exactly. However, if the abstract value is too large, the overflow and
4178 | inexact exceptions are raised and an infinity or maximal finite value is
4179 | returned. If the abstract value is too small, the input value is rounded to
4180 | a subnormal number, and the underflow and inexact exceptions are raised if
4181 | the abstract input cannot be represented exactly as a subnormal single-
4182 | precision floating-point number.
4183 | The input significand `zSig' has its binary point between bits 30
4184 | and 29, which is 7 bits to the left of the usual location. This shifted
4185 | significand must be normalized or smaller. If `zSig' is not normalized,
4186 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4187 | and it must not require rounding. In the usual case that `zSig' is
4188 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4189 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4190 | Binary Floating-Point Arithmetic.
4191 *----------------------------------------------------------------------------*/
4193 static float32
roundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4194 float_status
*status
)
4196 int8_t roundingMode
;
4197 bool roundNearestEven
;
4198 int8_t roundIncrement
, roundBits
;
4201 roundingMode
= status
->float_rounding_mode
;
4202 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4203 switch (roundingMode
) {
4204 case float_round_nearest_even
:
4205 case float_round_ties_away
:
4206 roundIncrement
= 0x40;
4208 case float_round_to_zero
:
4211 case float_round_up
:
4212 roundIncrement
= zSign
? 0 : 0x7f;
4214 case float_round_down
:
4215 roundIncrement
= zSign
? 0x7f : 0;
4217 case float_round_to_odd
:
4218 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4224 roundBits
= zSig
& 0x7F;
4225 if ( 0xFD <= (uint16_t) zExp
) {
4226 if ( ( 0xFD < zExp
)
4227 || ( ( zExp
== 0xFD )
4228 && ( (int32_t) ( zSig
+ roundIncrement
) < 0 ) )
4230 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4231 roundIncrement
!= 0;
4232 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4233 return packFloat32(zSign
, 0xFF, -!overflow_to_inf
);
4236 if (status
->flush_to_zero
) {
4237 float_raise(float_flag_output_denormal
, status
);
4238 return packFloat32(zSign
, 0, 0);
4240 isTiny
= status
->tininess_before_rounding
4242 || (zSig
+ roundIncrement
< 0x80000000);
4243 shift32RightJamming( zSig
, - zExp
, &zSig
);
4245 roundBits
= zSig
& 0x7F;
4246 if (isTiny
&& roundBits
) {
4247 float_raise(float_flag_underflow
, status
);
4249 if (roundingMode
== float_round_to_odd
) {
4251 * For round-to-odd case, the roundIncrement depends on
4252 * zSig which just changed.
4254 roundIncrement
= zSig
& 0x80 ? 0 : 0x7f;
4259 float_raise(float_flag_inexact
, status
);
4261 zSig
= ( zSig
+ roundIncrement
)>>7;
4262 if (!(roundBits
^ 0x40) && roundNearestEven
) {
4265 if ( zSig
== 0 ) zExp
= 0;
4266 return packFloat32( zSign
, zExp
, zSig
);
4270 /*----------------------------------------------------------------------------
4271 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4272 | and significand `zSig', and returns the proper single-precision floating-
4273 | point value corresponding to the abstract input. This routine is just like
4274 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4275 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4276 | floating-point exponent.
4277 *----------------------------------------------------------------------------*/
4280 normalizeRoundAndPackFloat32(bool zSign
, int zExp
, uint32_t zSig
,
4281 float_status
*status
)
4285 shiftCount
= clz32(zSig
) - 1;
4286 return roundAndPackFloat32(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4291 /*----------------------------------------------------------------------------
4292 | Normalizes the subnormal double-precision floating-point value represented
4293 | by the denormalized significand `aSig'. The normalized exponent and
4294 | significand are stored at the locations pointed to by `zExpPtr' and
4295 | `zSigPtr', respectively.
4296 *----------------------------------------------------------------------------*/
4299 normalizeFloat64Subnormal(uint64_t aSig
, int *zExpPtr
, uint64_t *zSigPtr
)
4303 shiftCount
= clz64(aSig
) - 11;
4304 *zSigPtr
= aSig
<<shiftCount
;
4305 *zExpPtr
= 1 - shiftCount
;
4309 /*----------------------------------------------------------------------------
4310 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4311 | double-precision floating-point value, returning the result. After being
4312 | shifted into the proper positions, the three fields are simply added
4313 | together to form the result. This means that any integer portion of `zSig'
4314 | will be added into the exponent. Since a properly normalized significand
4315 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4316 | than the desired result exponent whenever `zSig' is a complete, normalized
4318 *----------------------------------------------------------------------------*/
4320 static inline float64
packFloat64(bool zSign
, int zExp
, uint64_t zSig
)
4323 return make_float64(
4324 ( ( (uint64_t) zSign
)<<63 ) + ( ( (uint64_t) zExp
)<<52 ) + zSig
);
4328 /*----------------------------------------------------------------------------
4329 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4330 | and significand `zSig', and returns the proper double-precision floating-
4331 | point value corresponding to the abstract input. Ordinarily, the abstract
4332 | value is simply rounded and packed into the double-precision format, with
4333 | the inexact exception raised if the abstract input cannot be represented
4334 | exactly. However, if the abstract value is too large, the overflow and
4335 | inexact exceptions are raised and an infinity or maximal finite value is
4336 | returned. If the abstract value is too small, the input value is rounded to
4337 | a subnormal number, and the underflow and inexact exceptions are raised if
4338 | the abstract input cannot be represented exactly as a subnormal double-
4339 | precision floating-point number.
4340 | The input significand `zSig' has its binary point between bits 62
4341 | and 61, which is 10 bits to the left of the usual location. This shifted
4342 | significand must be normalized or smaller. If `zSig' is not normalized,
4343 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4344 | and it must not require rounding. In the usual case that `zSig' is
4345 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4346 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4347 | Binary Floating-Point Arithmetic.
4348 *----------------------------------------------------------------------------*/
4350 static float64
roundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4351 float_status
*status
)
4353 int8_t roundingMode
;
4354 bool roundNearestEven
;
4355 int roundIncrement
, roundBits
;
4358 roundingMode
= status
->float_rounding_mode
;
4359 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4360 switch (roundingMode
) {
4361 case float_round_nearest_even
:
4362 case float_round_ties_away
:
4363 roundIncrement
= 0x200;
4365 case float_round_to_zero
:
4368 case float_round_up
:
4369 roundIncrement
= zSign
? 0 : 0x3ff;
4371 case float_round_down
:
4372 roundIncrement
= zSign
? 0x3ff : 0;
4374 case float_round_to_odd
:
4375 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4380 roundBits
= zSig
& 0x3FF;
4381 if ( 0x7FD <= (uint16_t) zExp
) {
4382 if ( ( 0x7FD < zExp
)
4383 || ( ( zExp
== 0x7FD )
4384 && ( (int64_t) ( zSig
+ roundIncrement
) < 0 ) )
4386 bool overflow_to_inf
= roundingMode
!= float_round_to_odd
&&
4387 roundIncrement
!= 0;
4388 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4389 return packFloat64(zSign
, 0x7FF, -(!overflow_to_inf
));
4392 if (status
->flush_to_zero
) {
4393 float_raise(float_flag_output_denormal
, status
);
4394 return packFloat64(zSign
, 0, 0);
4396 isTiny
= status
->tininess_before_rounding
4398 || (zSig
+ roundIncrement
< UINT64_C(0x8000000000000000));
4399 shift64RightJamming( zSig
, - zExp
, &zSig
);
4401 roundBits
= zSig
& 0x3FF;
4402 if (isTiny
&& roundBits
) {
4403 float_raise(float_flag_underflow
, status
);
4405 if (roundingMode
== float_round_to_odd
) {
4407 * For round-to-odd case, the roundIncrement depends on
4408 * zSig which just changed.
4410 roundIncrement
= (zSig
& 0x400) ? 0 : 0x3ff;
4415 float_raise(float_flag_inexact
, status
);
4417 zSig
= ( zSig
+ roundIncrement
)>>10;
4418 if (!(roundBits
^ 0x200) && roundNearestEven
) {
4421 if ( zSig
== 0 ) zExp
= 0;
4422 return packFloat64( zSign
, zExp
, zSig
);
4426 /*----------------------------------------------------------------------------
4427 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4428 | and significand `zSig', and returns the proper double-precision floating-
4429 | point value corresponding to the abstract input. This routine is just like
4430 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4431 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4432 | floating-point exponent.
4433 *----------------------------------------------------------------------------*/
4436 normalizeRoundAndPackFloat64(bool zSign
, int zExp
, uint64_t zSig
,
4437 float_status
*status
)
4441 shiftCount
= clz64(zSig
) - 1;
4442 return roundAndPackFloat64(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
,
4447 /*----------------------------------------------------------------------------
4448 | Normalizes the subnormal extended double-precision floating-point value
4449 | represented by the denormalized significand `aSig'. The normalized exponent
4450 | and significand are stored at the locations pointed to by `zExpPtr' and
4451 | `zSigPtr', respectively.
4452 *----------------------------------------------------------------------------*/
4454 void normalizeFloatx80Subnormal(uint64_t aSig
, int32_t *zExpPtr
,
4459 shiftCount
= clz64(aSig
);
4460 *zSigPtr
= aSig
<<shiftCount
;
4461 *zExpPtr
= 1 - shiftCount
;
4464 /*----------------------------------------------------------------------------
4465 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4466 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4467 | and returns the proper extended double-precision floating-point value
4468 | corresponding to the abstract input. Ordinarily, the abstract value is
4469 | rounded and packed into the extended double-precision format, with the
4470 | inexact exception raised if the abstract input cannot be represented
4471 | exactly. However, if the abstract value is too large, the overflow and
4472 | inexact exceptions are raised and an infinity or maximal finite value is
4473 | returned. If the abstract value is too small, the input value is rounded to
4474 | a subnormal number, and the underflow and inexact exceptions are raised if
4475 | the abstract input cannot be represented exactly as a subnormal extended
4476 | double-precision floating-point number.
4477 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4478 | the result is rounded to the same number of bits as single or double
4479 | precision, respectively. Otherwise, the result is rounded to the full
4480 | precision of the extended double-precision format.
4481 | The input significand must be normalized or smaller. If the input
4482 | significand is not normalized, `zExp' must be 0; in that case, the result
4483 | returned is a subnormal number, and it must not require rounding. The
4484 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4485 | Floating-Point Arithmetic.
4486 *----------------------------------------------------------------------------*/
4488 floatx80
roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
, bool zSign
,
4489 int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
,
4490 float_status
*status
)
4492 FloatRoundMode roundingMode
;
4493 bool roundNearestEven
, increment
, isTiny
;
4494 int64_t roundIncrement
, roundMask
, roundBits
;
4496 roundingMode
= status
->float_rounding_mode
;
4497 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4498 switch (roundingPrecision
) {
4499 case floatx80_precision_x
:
4501 case floatx80_precision_d
:
4502 roundIncrement
= UINT64_C(0x0000000000000400);
4503 roundMask
= UINT64_C(0x00000000000007FF);
4505 case floatx80_precision_s
:
4506 roundIncrement
= UINT64_C(0x0000008000000000);
4507 roundMask
= UINT64_C(0x000000FFFFFFFFFF);
4510 g_assert_not_reached();
4512 zSig0
|= ( zSig1
!= 0 );
4513 switch (roundingMode
) {
4514 case float_round_nearest_even
:
4515 case float_round_ties_away
:
4517 case float_round_to_zero
:
4520 case float_round_up
:
4521 roundIncrement
= zSign
? 0 : roundMask
;
4523 case float_round_down
:
4524 roundIncrement
= zSign
? roundMask
: 0;
4529 roundBits
= zSig0
& roundMask
;
4530 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4531 if ( ( 0x7FFE < zExp
)
4532 || ( ( zExp
== 0x7FFE ) && ( zSig0
+ roundIncrement
< zSig0
) )
4537 if (status
->flush_to_zero
) {
4538 float_raise(float_flag_output_denormal
, status
);
4539 return packFloatx80(zSign
, 0, 0);
4541 isTiny
= status
->tininess_before_rounding
4543 || (zSig0
<= zSig0
+ roundIncrement
);
4544 shift64RightJamming( zSig0
, 1 - zExp
, &zSig0
);
4546 roundBits
= zSig0
& roundMask
;
4547 if (isTiny
&& roundBits
) {
4548 float_raise(float_flag_underflow
, status
);
4551 float_raise(float_flag_inexact
, status
);
4553 zSig0
+= roundIncrement
;
4554 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4555 roundIncrement
= roundMask
+ 1;
4556 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4557 roundMask
|= roundIncrement
;
4559 zSig0
&= ~ roundMask
;
4560 return packFloatx80( zSign
, zExp
, zSig0
);
4564 float_raise(float_flag_inexact
, status
);
4566 zSig0
+= roundIncrement
;
4567 if ( zSig0
< roundIncrement
) {
4569 zSig0
= UINT64_C(0x8000000000000000);
4571 roundIncrement
= roundMask
+ 1;
4572 if ( roundNearestEven
&& ( roundBits
<<1 == roundIncrement
) ) {
4573 roundMask
|= roundIncrement
;
4575 zSig0
&= ~ roundMask
;
4576 if ( zSig0
== 0 ) zExp
= 0;
4577 return packFloatx80( zSign
, zExp
, zSig0
);
4579 switch (roundingMode
) {
4580 case float_round_nearest_even
:
4581 case float_round_ties_away
:
4582 increment
= ((int64_t)zSig1
< 0);
4584 case float_round_to_zero
:
4587 case float_round_up
:
4588 increment
= !zSign
&& zSig1
;
4590 case float_round_down
:
4591 increment
= zSign
&& zSig1
;
4596 if ( 0x7FFD <= (uint32_t) ( zExp
- 1 ) ) {
4597 if ( ( 0x7FFE < zExp
)
4598 || ( ( zExp
== 0x7FFE )
4599 && ( zSig0
== UINT64_C(0xFFFFFFFFFFFFFFFF) )
4605 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4606 if ( ( roundingMode
== float_round_to_zero
)
4607 || ( zSign
&& ( roundingMode
== float_round_up
) )
4608 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4610 return packFloatx80( zSign
, 0x7FFE, ~ roundMask
);
4612 return packFloatx80(zSign
,
4613 floatx80_infinity_high
,
4614 floatx80_infinity_low
);
4617 isTiny
= status
->tininess_before_rounding
4620 || (zSig0
< UINT64_C(0xFFFFFFFFFFFFFFFF));
4621 shift64ExtraRightJamming( zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
4623 if (isTiny
&& zSig1
) {
4624 float_raise(float_flag_underflow
, status
);
4627 float_raise(float_flag_inexact
, status
);
4629 switch (roundingMode
) {
4630 case float_round_nearest_even
:
4631 case float_round_ties_away
:
4632 increment
= ((int64_t)zSig1
< 0);
4634 case float_round_to_zero
:
4637 case float_round_up
:
4638 increment
= !zSign
&& zSig1
;
4640 case float_round_down
:
4641 increment
= zSign
&& zSig1
;
4648 if (!(zSig1
<< 1) && roundNearestEven
) {
4651 if ( (int64_t) zSig0
< 0 ) zExp
= 1;
4653 return packFloatx80( zSign
, zExp
, zSig0
);
4657 float_raise(float_flag_inexact
, status
);
4663 zSig0
= UINT64_C(0x8000000000000000);
4666 if (!(zSig1
<< 1) && roundNearestEven
) {
4672 if ( zSig0
== 0 ) zExp
= 0;
4674 return packFloatx80( zSign
, zExp
, zSig0
);
4678 /*----------------------------------------------------------------------------
4679 | Takes an abstract floating-point value having sign `zSign', exponent
4680 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4681 | and returns the proper extended double-precision floating-point value
4682 | corresponding to the abstract input. This routine is just like
4683 | `roundAndPackFloatx80' except that the input significand does not have to be
4685 *----------------------------------------------------------------------------*/
4687 floatx80
normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision
,
4688 bool zSign
, int32_t zExp
,
4689 uint64_t zSig0
, uint64_t zSig1
,
4690 float_status
*status
)
4699 shiftCount
= clz64(zSig0
);
4700 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4702 return roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
,
4703 zSig0
, zSig1
, status
);
4707 /*----------------------------------------------------------------------------
4708 | Returns the least-significant 64 fraction bits of the quadruple-precision
4709 | floating-point value `a'.
4710 *----------------------------------------------------------------------------*/
4712 static inline uint64_t extractFloat128Frac1( float128 a
)
4719 /*----------------------------------------------------------------------------
4720 | Returns the most-significant 48 fraction bits of the quadruple-precision
4721 | floating-point value `a'.
4722 *----------------------------------------------------------------------------*/
4724 static inline uint64_t extractFloat128Frac0( float128 a
)
4727 return a
.high
& UINT64_C(0x0000FFFFFFFFFFFF);
4731 /*----------------------------------------------------------------------------
4732 | Returns the exponent bits of the quadruple-precision floating-point value
4734 *----------------------------------------------------------------------------*/
4736 static inline int32_t extractFloat128Exp( float128 a
)
4739 return ( a
.high
>>48 ) & 0x7FFF;
4743 /*----------------------------------------------------------------------------
4744 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4745 *----------------------------------------------------------------------------*/
4747 static inline bool extractFloat128Sign(float128 a
)
4749 return a
.high
>> 63;
4752 /*----------------------------------------------------------------------------
4753 | Normalizes the subnormal quadruple-precision floating-point value
4754 | represented by the denormalized significand formed by the concatenation of
4755 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4756 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4757 | significand are stored at the location pointed to by `zSig0Ptr', and the
4758 | least significant 64 bits of the normalized significand are stored at the
4759 | location pointed to by `zSig1Ptr'.
4760 *----------------------------------------------------------------------------*/
4763 normalizeFloat128Subnormal(
4774 shiftCount
= clz64(aSig1
) - 15;
4775 if ( shiftCount
< 0 ) {
4776 *zSig0Ptr
= aSig1
>>( - shiftCount
);
4777 *zSig1Ptr
= aSig1
<<( shiftCount
& 63 );
4780 *zSig0Ptr
= aSig1
<<shiftCount
;
4783 *zExpPtr
= - shiftCount
- 63;
4786 shiftCount
= clz64(aSig0
) - 15;
4787 shortShift128Left( aSig0
, aSig1
, shiftCount
, zSig0Ptr
, zSig1Ptr
);
4788 *zExpPtr
= 1 - shiftCount
;
4793 /*----------------------------------------------------------------------------
4794 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4795 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4796 | floating-point value, returning the result. After being shifted into the
4797 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4798 | added together to form the most significant 32 bits of the result. This
4799 | means that any integer portion of `zSig0' will be added into the exponent.
4800 | Since a properly normalized significand will have an integer portion equal
4801 | to 1, the `zExp' input should be 1 less than the desired result exponent
4802 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4804 *----------------------------------------------------------------------------*/
4806 static inline float128
4807 packFloat128(bool zSign
, int32_t zExp
, uint64_t zSig0
, uint64_t zSig1
)
4812 z
.high
= ((uint64_t)zSign
<< 63) + ((uint64_t)zExp
<< 48) + zSig0
;
4816 /*----------------------------------------------------------------------------
4817 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4818 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4819 | and `zSig2', and returns the proper quadruple-precision floating-point value
4820 | corresponding to the abstract input. Ordinarily, the abstract value is
4821 | simply rounded and packed into the quadruple-precision format, with the
4822 | inexact exception raised if the abstract input cannot be represented
4823 | exactly. However, if the abstract value is too large, the overflow and
4824 | inexact exceptions are raised and an infinity or maximal finite value is
4825 | returned. If the abstract value is too small, the input value is rounded to
4826 | a subnormal number, and the underflow and inexact exceptions are raised if
4827 | the abstract input cannot be represented exactly as a subnormal quadruple-
4828 | precision floating-point number.
4829 | The input significand must be normalized or smaller. If the input
4830 | significand is not normalized, `zExp' must be 0; in that case, the result
4831 | returned is a subnormal number, and it must not require rounding. In the
4832 | usual case that the input significand is normalized, `zExp' must be 1 less
4833 | than the ``true'' floating-point exponent. The handling of underflow and
4834 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4835 *----------------------------------------------------------------------------*/
4837 static float128
roundAndPackFloat128(bool zSign
, int32_t zExp
,
4838 uint64_t zSig0
, uint64_t zSig1
,
4839 uint64_t zSig2
, float_status
*status
)
4841 int8_t roundingMode
;
4842 bool roundNearestEven
, increment
, isTiny
;
4844 roundingMode
= status
->float_rounding_mode
;
4845 roundNearestEven
= ( roundingMode
== float_round_nearest_even
);
4846 switch (roundingMode
) {
4847 case float_round_nearest_even
:
4848 case float_round_ties_away
:
4849 increment
= ((int64_t)zSig2
< 0);
4851 case float_round_to_zero
:
4854 case float_round_up
:
4855 increment
= !zSign
&& zSig2
;
4857 case float_round_down
:
4858 increment
= zSign
&& zSig2
;
4860 case float_round_to_odd
:
4861 increment
= !(zSig1
& 0x1) && zSig2
;
4866 if ( 0x7FFD <= (uint32_t) zExp
) {
4867 if ( ( 0x7FFD < zExp
)
4868 || ( ( zExp
== 0x7FFD )
4870 UINT64_C(0x0001FFFFFFFFFFFF),
4871 UINT64_C(0xFFFFFFFFFFFFFFFF),
4878 float_raise(float_flag_overflow
| float_flag_inexact
, status
);
4879 if ( ( roundingMode
== float_round_to_zero
)
4880 || ( zSign
&& ( roundingMode
== float_round_up
) )
4881 || ( ! zSign
&& ( roundingMode
== float_round_down
) )
4882 || (roundingMode
== float_round_to_odd
)
4888 UINT64_C(0x0000FFFFFFFFFFFF),
4889 UINT64_C(0xFFFFFFFFFFFFFFFF)
4892 return packFloat128( zSign
, 0x7FFF, 0, 0 );
4895 if (status
->flush_to_zero
) {
4896 float_raise(float_flag_output_denormal
, status
);
4897 return packFloat128(zSign
, 0, 0, 0);
4899 isTiny
= status
->tininess_before_rounding
4902 || lt128(zSig0
, zSig1
,
4903 UINT64_C(0x0001FFFFFFFFFFFF),
4904 UINT64_C(0xFFFFFFFFFFFFFFFF));
4905 shift128ExtraRightJamming(
4906 zSig0
, zSig1
, zSig2
, - zExp
, &zSig0
, &zSig1
, &zSig2
);
4908 if (isTiny
&& zSig2
) {
4909 float_raise(float_flag_underflow
, status
);
4911 switch (roundingMode
) {
4912 case float_round_nearest_even
:
4913 case float_round_ties_away
:
4914 increment
= ((int64_t)zSig2
< 0);
4916 case float_round_to_zero
:
4919 case float_round_up
:
4920 increment
= !zSign
&& zSig2
;
4922 case float_round_down
:
4923 increment
= zSign
&& zSig2
;
4925 case float_round_to_odd
:
4926 increment
= !(zSig1
& 0x1) && zSig2
;
4934 float_raise(float_flag_inexact
, status
);
4937 add128( zSig0
, zSig1
, 0, 1, &zSig0
, &zSig1
);
4938 if ((zSig2
+ zSig2
== 0) && roundNearestEven
) {
4943 if ( ( zSig0
| zSig1
) == 0 ) zExp
= 0;
4945 return packFloat128( zSign
, zExp
, zSig0
, zSig1
);
4949 /*----------------------------------------------------------------------------
4950 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4951 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4952 | returns the proper quadruple-precision floating-point value corresponding
4953 | to the abstract input. This routine is just like `roundAndPackFloat128'
4954 | except that the input significand has fewer bits and does not have to be
4955 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4957 *----------------------------------------------------------------------------*/
4959 static float128
normalizeRoundAndPackFloat128(bool zSign
, int32_t zExp
,
4960 uint64_t zSig0
, uint64_t zSig1
,
4961 float_status
*status
)
4971 shiftCount
= clz64(zSig0
) - 15;
4972 if ( 0 <= shiftCount
) {
4974 shortShift128Left( zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
4977 shift128ExtraRightJamming(
4978 zSig0
, zSig1
, 0, - shiftCount
, &zSig0
, &zSig1
, &zSig2
);
4981 return roundAndPackFloat128(zSign
, zExp
, zSig0
, zSig1
, zSig2
, status
);
4986 /*----------------------------------------------------------------------------
4987 | Returns the result of converting the 32-bit two's complement integer `a'
4988 | to the extended double-precision floating-point format. The conversion
4989 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4991 *----------------------------------------------------------------------------*/
4993 floatx80
int32_to_floatx80(int32_t a
, float_status
*status
)
5000 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5002 absA
= zSign
? - a
: a
;
5003 shiftCount
= clz32(absA
) + 32;
5005 return packFloatx80( zSign
, 0x403E - shiftCount
, zSig
<<shiftCount
);
5009 /*----------------------------------------------------------------------------
5010 | Returns the result of converting the 64-bit two's complement integer `a'
5011 | to the extended double-precision floating-point format. The conversion
5012 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5014 *----------------------------------------------------------------------------*/
5016 floatx80
int64_to_floatx80(int64_t a
, float_status
*status
)
5022 if ( a
== 0 ) return packFloatx80( 0, 0, 0 );
5024 absA
= zSign
? - a
: a
;
5025 shiftCount
= clz64(absA
);
5026 return packFloatx80( zSign
, 0x403E - shiftCount
, absA
<<shiftCount
);
5030 /*----------------------------------------------------------------------------
5031 | Returns the result of converting the single-precision floating-point value
5032 | `a' to the extended double-precision floating-point format. The conversion
5033 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5035 *----------------------------------------------------------------------------*/
5037 floatx80
float32_to_floatx80(float32 a
, float_status
*status
)
5043 a
= float32_squash_input_denormal(a
, status
);
5044 aSig
= extractFloat32Frac( a
);
5045 aExp
= extractFloat32Exp( a
);
5046 aSign
= extractFloat32Sign( a
);
5047 if ( aExp
== 0xFF ) {
5049 floatx80 res
= commonNaNToFloatx80(float32ToCommonNaN(a
, status
),
5051 return floatx80_silence_nan(res
, status
);
5053 return packFloatx80(aSign
,
5054 floatx80_infinity_high
,
5055 floatx80_infinity_low
);
5058 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5059 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5062 return packFloatx80( aSign
, aExp
+ 0x3F80, ( (uint64_t) aSig
)<<40 );
5066 /*----------------------------------------------------------------------------
5067 | Returns the remainder of the single-precision floating-point value `a'
5068 | with respect to the corresponding value `b'. The operation is performed
5069 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5070 *----------------------------------------------------------------------------*/
5072 float32
float32_rem(float32 a
, float32 b
, float_status
*status
)
5075 int aExp
, bExp
, expDiff
;
5076 uint32_t aSig
, bSig
;
5078 uint64_t aSig64
, bSig64
, q64
;
5079 uint32_t alternateASig
;
5081 a
= float32_squash_input_denormal(a
, status
);
5082 b
= float32_squash_input_denormal(b
, status
);
5084 aSig
= extractFloat32Frac( a
);
5085 aExp
= extractFloat32Exp( a
);
5086 aSign
= extractFloat32Sign( a
);
5087 bSig
= extractFloat32Frac( b
);
5088 bExp
= extractFloat32Exp( b
);
5089 if ( aExp
== 0xFF ) {
5090 if ( aSig
|| ( ( bExp
== 0xFF ) && bSig
) ) {
5091 return propagateFloat32NaN(a
, b
, status
);
5093 float_raise(float_flag_invalid
, status
);
5094 return float32_default_nan(status
);
5096 if ( bExp
== 0xFF ) {
5098 return propagateFloat32NaN(a
, b
, status
);
5104 float_raise(float_flag_invalid
, status
);
5105 return float32_default_nan(status
);
5107 normalizeFloat32Subnormal( bSig
, &bExp
, &bSig
);
5110 if ( aSig
== 0 ) return a
;
5111 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5113 expDiff
= aExp
- bExp
;
5116 if ( expDiff
< 32 ) {
5119 if ( expDiff
< 0 ) {
5120 if ( expDiff
< -1 ) return a
;
5123 q
= ( bSig
<= aSig
);
5124 if ( q
) aSig
-= bSig
;
5125 if ( 0 < expDiff
) {
5126 q
= ( ( (uint64_t) aSig
)<<32 ) / bSig
;
5129 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5137 if ( bSig
<= aSig
) aSig
-= bSig
;
5138 aSig64
= ( (uint64_t) aSig
)<<40;
5139 bSig64
= ( (uint64_t) bSig
)<<40;
5141 while ( 0 < expDiff
) {
5142 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5143 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5144 aSig64
= - ( ( bSig
* q64
)<<38 );
5148 q64
= estimateDiv128To64( aSig64
, 0, bSig64
);
5149 q64
= ( 2 < q64
) ? q64
- 2 : 0;
5150 q
= q64
>>( 64 - expDiff
);
5152 aSig
= ( ( aSig64
>>33 )<<( expDiff
- 1 ) ) - bSig
* q
;
5155 alternateASig
= aSig
;
5158 } while ( 0 <= (int32_t) aSig
);
5159 sigMean
= aSig
+ alternateASig
;
5160 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5161 aSig
= alternateASig
;
5163 zSign
= ( (int32_t) aSig
< 0 );
5164 if ( zSign
) aSig
= - aSig
;
5165 return normalizeRoundAndPackFloat32(aSign
^ zSign
, bExp
, aSig
, status
);
5170 /*----------------------------------------------------------------------------
5171 | Returns the binary exponential of the single-precision floating-point value
5172 | `a'. The operation is performed according to the IEC/IEEE Standard for
5173 | Binary Floating-Point Arithmetic.
5175 | Uses the following identities:
5177 | 1. -------------------------------------------------------------------------
5181 | 2. -------------------------------------------------------------------------
5184 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5186 *----------------------------------------------------------------------------*/
5188 static const float64 float32_exp2_coefficients
[15] =
5190 const_float64( 0x3ff0000000000000ll
), /* 1 */
5191 const_float64( 0x3fe0000000000000ll
), /* 2 */
5192 const_float64( 0x3fc5555555555555ll
), /* 3 */
5193 const_float64( 0x3fa5555555555555ll
), /* 4 */
5194 const_float64( 0x3f81111111111111ll
), /* 5 */
5195 const_float64( 0x3f56c16c16c16c17ll
), /* 6 */
5196 const_float64( 0x3f2a01a01a01a01all
), /* 7 */
5197 const_float64( 0x3efa01a01a01a01all
), /* 8 */
5198 const_float64( 0x3ec71de3a556c734ll
), /* 9 */
5199 const_float64( 0x3e927e4fb7789f5cll
), /* 10 */
5200 const_float64( 0x3e5ae64567f544e4ll
), /* 11 */
5201 const_float64( 0x3e21eed8eff8d898ll
), /* 12 */
5202 const_float64( 0x3de6124613a86d09ll
), /* 13 */
5203 const_float64( 0x3da93974a8c07c9dll
), /* 14 */
5204 const_float64( 0x3d6ae7f3e733b81fll
), /* 15 */
5207 float32
float32_exp2(float32 a
, float_status
*status
)
5214 a
= float32_squash_input_denormal(a
, status
);
5216 aSig
= extractFloat32Frac( a
);
5217 aExp
= extractFloat32Exp( a
);
5218 aSign
= extractFloat32Sign( a
);
5220 if ( aExp
== 0xFF) {
5222 return propagateFloat32NaN(a
, float32_zero
, status
);
5224 return (aSign
) ? float32_zero
: a
;
5227 if (aSig
== 0) return float32_one
;
5230 float_raise(float_flag_inexact
, status
);
5232 /* ******************************* */
5233 /* using float64 for approximation */
5234 /* ******************************* */
5235 x
= float32_to_float64(a
, status
);
5236 x
= float64_mul(x
, float64_ln2
, status
);
5240 for (i
= 0 ; i
< 15 ; i
++) {
5243 f
= float64_mul(xn
, float32_exp2_coefficients
[i
], status
);
5244 r
= float64_add(r
, f
, status
);
5246 xn
= float64_mul(xn
, x
, status
);
5249 return float64_to_float32(r
, status
);
5252 /*----------------------------------------------------------------------------
5253 | Returns the binary log of the single-precision floating-point value `a'.
5254 | The operation is performed according to the IEC/IEEE Standard for Binary
5255 | Floating-Point Arithmetic.
5256 *----------------------------------------------------------------------------*/
5257 float32
float32_log2(float32 a
, float_status
*status
)
5261 uint32_t aSig
, zSig
, i
;
5263 a
= float32_squash_input_denormal(a
, status
);
5264 aSig
= extractFloat32Frac( a
);
5265 aExp
= extractFloat32Exp( a
);
5266 aSign
= extractFloat32Sign( a
);
5269 if ( aSig
== 0 ) return packFloat32( 1, 0xFF, 0 );
5270 normalizeFloat32Subnormal( aSig
, &aExp
, &aSig
);
5273 float_raise(float_flag_invalid
, status
);
5274 return float32_default_nan(status
);
5276 if ( aExp
== 0xFF ) {
5278 return propagateFloat32NaN(a
, float32_zero
, status
);
5288 for (i
= 1 << 22; i
> 0; i
>>= 1) {
5289 aSig
= ( (uint64_t)aSig
* aSig
) >> 23;
5290 if ( aSig
& 0x01000000 ) {
5299 return normalizeRoundAndPackFloat32(zSign
, 0x85, zSig
, status
);
5302 /*----------------------------------------------------------------------------
5303 | Returns the result of converting the double-precision floating-point value
5304 | `a' to the extended double-precision floating-point format. The conversion
5305 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5307 *----------------------------------------------------------------------------*/
5309 floatx80
float64_to_floatx80(float64 a
, float_status
*status
)
5315 a
= float64_squash_input_denormal(a
, status
);
5316 aSig
= extractFloat64Frac( a
);
5317 aExp
= extractFloat64Exp( a
);
5318 aSign
= extractFloat64Sign( a
);
5319 if ( aExp
== 0x7FF ) {
5321 floatx80 res
= commonNaNToFloatx80(float64ToCommonNaN(a
, status
),
5323 return floatx80_silence_nan(res
, status
);
5325 return packFloatx80(aSign
,
5326 floatx80_infinity_high
,
5327 floatx80_infinity_low
);
5330 if ( aSig
== 0 ) return packFloatx80( aSign
, 0, 0 );
5331 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5335 aSign
, aExp
+ 0x3C00, (aSig
| UINT64_C(0x0010000000000000)) << 11);
5339 /*----------------------------------------------------------------------------
5340 | Returns the remainder of the double-precision floating-point value `a'
5341 | with respect to the corresponding value `b'. The operation is performed
5342 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5343 *----------------------------------------------------------------------------*/
5345 float64
float64_rem(float64 a
, float64 b
, float_status
*status
)
5348 int aExp
, bExp
, expDiff
;
5349 uint64_t aSig
, bSig
;
5350 uint64_t q
, alternateASig
;
5353 a
= float64_squash_input_denormal(a
, status
);
5354 b
= float64_squash_input_denormal(b
, status
);
5355 aSig
= extractFloat64Frac( a
);
5356 aExp
= extractFloat64Exp( a
);
5357 aSign
= extractFloat64Sign( a
);
5358 bSig
= extractFloat64Frac( b
);
5359 bExp
= extractFloat64Exp( b
);
5360 if ( aExp
== 0x7FF ) {
5361 if ( aSig
|| ( ( bExp
== 0x7FF ) && bSig
) ) {
5362 return propagateFloat64NaN(a
, b
, status
);
5364 float_raise(float_flag_invalid
, status
);
5365 return float64_default_nan(status
);
5367 if ( bExp
== 0x7FF ) {
5369 return propagateFloat64NaN(a
, b
, status
);
5375 float_raise(float_flag_invalid
, status
);
5376 return float64_default_nan(status
);
5378 normalizeFloat64Subnormal( bSig
, &bExp
, &bSig
);
5381 if ( aSig
== 0 ) return a
;
5382 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5384 expDiff
= aExp
- bExp
;
5385 aSig
= (aSig
| UINT64_C(0x0010000000000000)) << 11;
5386 bSig
= (bSig
| UINT64_C(0x0010000000000000)) << 11;
5387 if ( expDiff
< 0 ) {
5388 if ( expDiff
< -1 ) return a
;
5391 q
= ( bSig
<= aSig
);
5392 if ( q
) aSig
-= bSig
;
5394 while ( 0 < expDiff
) {
5395 q
= estimateDiv128To64( aSig
, 0, bSig
);
5396 q
= ( 2 < q
) ? q
- 2 : 0;
5397 aSig
= - ( ( bSig
>>2 ) * q
);
5401 if ( 0 < expDiff
) {
5402 q
= estimateDiv128To64( aSig
, 0, bSig
);
5403 q
= ( 2 < q
) ? q
- 2 : 0;
5406 aSig
= ( ( aSig
>>1 )<<( expDiff
- 1 ) ) - bSig
* q
;
5413 alternateASig
= aSig
;
5416 } while ( 0 <= (int64_t) aSig
);
5417 sigMean
= aSig
+ alternateASig
;
5418 if ( ( sigMean
< 0 ) || ( ( sigMean
== 0 ) && ( q
& 1 ) ) ) {
5419 aSig
= alternateASig
;
5421 zSign
= ( (int64_t) aSig
< 0 );
5422 if ( zSign
) aSig
= - aSig
;
5423 return normalizeRoundAndPackFloat64(aSign
^ zSign
, bExp
, aSig
, status
);
5427 /*----------------------------------------------------------------------------
5428 | Returns the binary log of the double-precision floating-point value `a'.
5429 | The operation is performed according to the IEC/IEEE Standard for Binary
5430 | Floating-Point Arithmetic.
5431 *----------------------------------------------------------------------------*/
5432 float64
float64_log2(float64 a
, float_status
*status
)
5436 uint64_t aSig
, aSig0
, aSig1
, zSig
, i
;
5437 a
= float64_squash_input_denormal(a
, status
);
5439 aSig
= extractFloat64Frac( a
);
5440 aExp
= extractFloat64Exp( a
);
5441 aSign
= extractFloat64Sign( a
);
5444 if ( aSig
== 0 ) return packFloat64( 1, 0x7FF, 0 );
5445 normalizeFloat64Subnormal( aSig
, &aExp
, &aSig
);
5448 float_raise(float_flag_invalid
, status
);
5449 return float64_default_nan(status
);
5451 if ( aExp
== 0x7FF ) {
5453 return propagateFloat64NaN(a
, float64_zero
, status
);
5459 aSig
|= UINT64_C(0x0010000000000000);
5461 zSig
= (uint64_t)aExp
<< 52;
5462 for (i
= 1LL << 51; i
> 0; i
>>= 1) {
5463 mul64To128( aSig
, aSig
, &aSig0
, &aSig1
);
5464 aSig
= ( aSig0
<< 12 ) | ( aSig1
>> 52 );
5465 if ( aSig
& UINT64_C(0x0020000000000000) ) {
5473 return normalizeRoundAndPackFloat64(zSign
, 0x408, zSig
, status
);
5476 /*----------------------------------------------------------------------------
5477 | Returns the result of converting the extended double-precision floating-
5478 | point value `a' to the 32-bit two's complement integer format. The
5479 | conversion is performed according to the IEC/IEEE Standard for Binary
5480 | Floating-Point Arithmetic---which means in particular that the conversion
5481 | is rounded according to the current rounding mode. If `a' is a NaN, the
5482 | largest positive integer is returned. Otherwise, if the conversion
5483 | overflows, the largest integer with the same sign as `a' is returned.
5484 *----------------------------------------------------------------------------*/
5486 int32_t floatx80_to_int32(floatx80 a
, float_status
*status
)
5489 int32_t aExp
, shiftCount
;
5492 if (floatx80_invalid_encoding(a
)) {
5493 float_raise(float_flag_invalid
, status
);
5496 aSig
= extractFloatx80Frac( a
);
5497 aExp
= extractFloatx80Exp( a
);
5498 aSign
= extractFloatx80Sign( a
);
5499 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5500 shiftCount
= 0x4037 - aExp
;
5501 if ( shiftCount
<= 0 ) shiftCount
= 1;
5502 shift64RightJamming( aSig
, shiftCount
, &aSig
);
5503 return roundAndPackInt32(aSign
, aSig
, status
);
5507 /*----------------------------------------------------------------------------
5508 | Returns the result of converting the extended double-precision floating-
5509 | point value `a' to the 32-bit two's complement integer format. The
5510 | conversion is performed according to the IEC/IEEE Standard for Binary
5511 | Floating-Point Arithmetic, except that the conversion is always rounded
5512 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5513 | Otherwise, if the conversion overflows, the largest integer with the same
5514 | sign as `a' is returned.
5515 *----------------------------------------------------------------------------*/
5517 int32_t floatx80_to_int32_round_to_zero(floatx80 a
, float_status
*status
)
5520 int32_t aExp
, shiftCount
;
5521 uint64_t aSig
, savedASig
;
5524 if (floatx80_invalid_encoding(a
)) {
5525 float_raise(float_flag_invalid
, status
);
5528 aSig
= extractFloatx80Frac( a
);
5529 aExp
= extractFloatx80Exp( a
);
5530 aSign
= extractFloatx80Sign( a
);
5531 if ( 0x401E < aExp
) {
5532 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) aSign
= 0;
5535 else if ( aExp
< 0x3FFF ) {
5537 float_raise(float_flag_inexact
, status
);
5541 shiftCount
= 0x403E - aExp
;
5543 aSig
>>= shiftCount
;
5545 if ( aSign
) z
= - z
;
5546 if ( ( z
< 0 ) ^ aSign
) {
5548 float_raise(float_flag_invalid
, status
);
5549 return aSign
? (int32_t) 0x80000000 : 0x7FFFFFFF;
5551 if ( ( aSig
<<shiftCount
) != savedASig
) {
5552 float_raise(float_flag_inexact
, status
);
5558 /*----------------------------------------------------------------------------
5559 | Returns the result of converting the extended double-precision floating-
5560 | point value `a' to the 64-bit two's complement integer format. The
5561 | conversion is performed according to the IEC/IEEE Standard for Binary
5562 | Floating-Point Arithmetic---which means in particular that the conversion
5563 | is rounded according to the current rounding mode. If `a' is a NaN,
5564 | the largest positive integer is returned. Otherwise, if the conversion
5565 | overflows, the largest integer with the same sign as `a' is returned.
5566 *----------------------------------------------------------------------------*/
5568 int64_t floatx80_to_int64(floatx80 a
, float_status
*status
)
5571 int32_t aExp
, shiftCount
;
5572 uint64_t aSig
, aSigExtra
;
5574 if (floatx80_invalid_encoding(a
)) {
5575 float_raise(float_flag_invalid
, status
);
5578 aSig
= extractFloatx80Frac( a
);
5579 aExp
= extractFloatx80Exp( a
);
5580 aSign
= extractFloatx80Sign( a
);
5581 shiftCount
= 0x403E - aExp
;
5582 if ( shiftCount
<= 0 ) {
5584 float_raise(float_flag_invalid
, status
);
5585 if (!aSign
|| floatx80_is_any_nan(a
)) {
5593 shift64ExtraRightJamming( aSig
, 0, shiftCount
, &aSig
, &aSigExtra
);
5595 return roundAndPackInt64(aSign
, aSig
, aSigExtra
, status
);
5599 /*----------------------------------------------------------------------------
5600 | Returns the result of converting the extended double-precision floating-
5601 | point value `a' to the 64-bit two's complement integer format. The
5602 | conversion is performed according to the IEC/IEEE Standard for Binary
5603 | Floating-Point Arithmetic, except that the conversion is always rounded
5604 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5605 | Otherwise, if the conversion overflows, the largest integer with the same
5606 | sign as `a' is returned.
5607 *----------------------------------------------------------------------------*/
5609 int64_t floatx80_to_int64_round_to_zero(floatx80 a
, float_status
*status
)
5612 int32_t aExp
, shiftCount
;
5616 if (floatx80_invalid_encoding(a
)) {
5617 float_raise(float_flag_invalid
, status
);
5620 aSig
= extractFloatx80Frac( a
);
5621 aExp
= extractFloatx80Exp( a
);
5622 aSign
= extractFloatx80Sign( a
);
5623 shiftCount
= aExp
- 0x403E;
5624 if ( 0 <= shiftCount
) {
5625 aSig
&= UINT64_C(0x7FFFFFFFFFFFFFFF);
5626 if ( ( a
.high
!= 0xC03E ) || aSig
) {
5627 float_raise(float_flag_invalid
, status
);
5628 if ( ! aSign
|| ( ( aExp
== 0x7FFF ) && aSig
) ) {
5634 else if ( aExp
< 0x3FFF ) {
5636 float_raise(float_flag_inexact
, status
);
5640 z
= aSig
>>( - shiftCount
);
5641 if ( (uint64_t) ( aSig
<<( shiftCount
& 63 ) ) ) {
5642 float_raise(float_flag_inexact
, status
);
5644 if ( aSign
) z
= - z
;
5649 /*----------------------------------------------------------------------------
5650 | Returns the result of converting the extended double-precision floating-
5651 | point value `a' to the single-precision floating-point format. The
5652 | conversion is performed according to the IEC/IEEE Standard for Binary
5653 | Floating-Point Arithmetic.
5654 *----------------------------------------------------------------------------*/
5656 float32
floatx80_to_float32(floatx80 a
, float_status
*status
)
5662 if (floatx80_invalid_encoding(a
)) {
5663 float_raise(float_flag_invalid
, status
);
5664 return float32_default_nan(status
);
5666 aSig
= extractFloatx80Frac( a
);
5667 aExp
= extractFloatx80Exp( a
);
5668 aSign
= extractFloatx80Sign( a
);
5669 if ( aExp
== 0x7FFF ) {
5670 if ( (uint64_t) ( aSig
<<1 ) ) {
5671 float32 res
= commonNaNToFloat32(floatx80ToCommonNaN(a
, status
),
5673 return float32_silence_nan(res
, status
);
5675 return packFloat32( aSign
, 0xFF, 0 );
5677 shift64RightJamming( aSig
, 33, &aSig
);
5678 if ( aExp
|| aSig
) aExp
-= 0x3F81;
5679 return roundAndPackFloat32(aSign
, aExp
, aSig
, status
);
5683 /*----------------------------------------------------------------------------
5684 | Returns the result of converting the extended double-precision floating-
5685 | point value `a' to the double-precision floating-point format. The
5686 | conversion is performed according to the IEC/IEEE Standard for Binary
5687 | Floating-Point Arithmetic.
5688 *----------------------------------------------------------------------------*/
5690 float64
floatx80_to_float64(floatx80 a
, float_status
*status
)
5694 uint64_t aSig
, zSig
;
5696 if (floatx80_invalid_encoding(a
)) {
5697 float_raise(float_flag_invalid
, status
);
5698 return float64_default_nan(status
);
5700 aSig
= extractFloatx80Frac( a
);
5701 aExp
= extractFloatx80Exp( a
);
5702 aSign
= extractFloatx80Sign( a
);
5703 if ( aExp
== 0x7FFF ) {
5704 if ( (uint64_t) ( aSig
<<1 ) ) {
5705 float64 res
= commonNaNToFloat64(floatx80ToCommonNaN(a
, status
),
5707 return float64_silence_nan(res
, status
);
5709 return packFloat64( aSign
, 0x7FF, 0 );
5711 shift64RightJamming( aSig
, 1, &zSig
);
5712 if ( aExp
|| aSig
) aExp
-= 0x3C01;
5713 return roundAndPackFloat64(aSign
, aExp
, zSig
, status
);
5717 /*----------------------------------------------------------------------------
5718 | Returns the result of converting the extended double-precision floating-
5719 | point value `a' to the quadruple-precision floating-point format. The
5720 | conversion is performed according to the IEC/IEEE Standard for Binary
5721 | Floating-Point Arithmetic.
5722 *----------------------------------------------------------------------------*/
5724 float128
floatx80_to_float128(floatx80 a
, float_status
*status
)
5728 uint64_t aSig
, zSig0
, zSig1
;
5730 if (floatx80_invalid_encoding(a
)) {
5731 float_raise(float_flag_invalid
, status
);
5732 return float128_default_nan(status
);
5734 aSig
= extractFloatx80Frac( a
);
5735 aExp
= extractFloatx80Exp( a
);
5736 aSign
= extractFloatx80Sign( a
);
5737 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( aSig
<<1 ) ) {
5738 float128 res
= commonNaNToFloat128(floatx80ToCommonNaN(a
, status
),
5740 return float128_silence_nan(res
, status
);
5742 shift128Right( aSig
<<1, 0, 16, &zSig0
, &zSig1
);
5743 return packFloat128( aSign
, aExp
, zSig0
, zSig1
);
5747 /*----------------------------------------------------------------------------
5748 | Rounds the extended double-precision floating-point value `a'
5749 | to the precision provided by floatx80_rounding_precision and returns the
5750 | result as an extended double-precision floating-point value.
5751 | The operation is performed according to the IEC/IEEE Standard for Binary
5752 | Floating-Point Arithmetic.
5753 *----------------------------------------------------------------------------*/
5755 floatx80
floatx80_round(floatx80 a
, float_status
*status
)
5757 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
5758 extractFloatx80Sign(a
),
5759 extractFloatx80Exp(a
),
5760 extractFloatx80Frac(a
), 0, status
);
5763 /*----------------------------------------------------------------------------
5764 | Rounds the extended double-precision floating-point value `a' to an integer,
5765 | and returns the result as an extended quadruple-precision floating-point
5766 | value. The operation is performed according to the IEC/IEEE Standard for
5767 | Binary Floating-Point Arithmetic.
5768 *----------------------------------------------------------------------------*/
5770 floatx80
floatx80_round_to_int(floatx80 a
, float_status
*status
)
5774 uint64_t lastBitMask
, roundBitsMask
;
5777 if (floatx80_invalid_encoding(a
)) {
5778 float_raise(float_flag_invalid
, status
);
5779 return floatx80_default_nan(status
);
5781 aExp
= extractFloatx80Exp( a
);
5782 if ( 0x403E <= aExp
) {
5783 if ( ( aExp
== 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 ) ) {
5784 return propagateFloatx80NaN(a
, a
, status
);
5788 if ( aExp
< 0x3FFF ) {
5790 && ( (uint64_t) ( extractFloatx80Frac( a
) ) == 0 ) ) {
5793 float_raise(float_flag_inexact
, status
);
5794 aSign
= extractFloatx80Sign( a
);
5795 switch (status
->float_rounding_mode
) {
5796 case float_round_nearest_even
:
5797 if ( ( aExp
== 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a
)<<1 )
5800 packFloatx80( aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5803 case float_round_ties_away
:
5804 if (aExp
== 0x3FFE) {
5805 return packFloatx80(aSign
, 0x3FFF, UINT64_C(0x8000000000000000));
5808 case float_round_down
:
5811 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5812 : packFloatx80( 0, 0, 0 );
5813 case float_round_up
:
5815 aSign
? packFloatx80( 1, 0, 0 )
5816 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5818 case float_round_to_zero
:
5821 g_assert_not_reached();
5823 return packFloatx80( aSign
, 0, 0 );
5826 lastBitMask
<<= 0x403E - aExp
;
5827 roundBitsMask
= lastBitMask
- 1;
5829 switch (status
->float_rounding_mode
) {
5830 case float_round_nearest_even
:
5831 z
.low
+= lastBitMask
>>1;
5832 if ((z
.low
& roundBitsMask
) == 0) {
5833 z
.low
&= ~lastBitMask
;
5836 case float_round_ties_away
:
5837 z
.low
+= lastBitMask
>> 1;
5839 case float_round_to_zero
:
5841 case float_round_up
:
5842 if (!extractFloatx80Sign(z
)) {
5843 z
.low
+= roundBitsMask
;
5846 case float_round_down
:
5847 if (extractFloatx80Sign(z
)) {
5848 z
.low
+= roundBitsMask
;
5854 z
.low
&= ~ roundBitsMask
;
5857 z
.low
= UINT64_C(0x8000000000000000);
5859 if (z
.low
!= a
.low
) {
5860 float_raise(float_flag_inexact
, status
);
5866 /*----------------------------------------------------------------------------
5867 | Returns the result of multiplying the extended double-precision floating-
5868 | point values `a' and `b'. The operation is performed according to the
5869 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5870 *----------------------------------------------------------------------------*/
5872 floatx80
floatx80_mul(floatx80 a
, floatx80 b
, float_status
*status
)
5874 bool aSign
, bSign
, zSign
;
5875 int32_t aExp
, bExp
, zExp
;
5876 uint64_t aSig
, bSig
, zSig0
, zSig1
;
5878 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5879 float_raise(float_flag_invalid
, status
);
5880 return floatx80_default_nan(status
);
5882 aSig
= extractFloatx80Frac( a
);
5883 aExp
= extractFloatx80Exp( a
);
5884 aSign
= extractFloatx80Sign( a
);
5885 bSig
= extractFloatx80Frac( b
);
5886 bExp
= extractFloatx80Exp( b
);
5887 bSign
= extractFloatx80Sign( b
);
5888 zSign
= aSign
^ bSign
;
5889 if ( aExp
== 0x7FFF ) {
5890 if ( (uint64_t) ( aSig
<<1 )
5891 || ( ( bExp
== 0x7FFF ) && (uint64_t) ( bSig
<<1 ) ) ) {
5892 return propagateFloatx80NaN(a
, b
, status
);
5894 if ( ( bExp
| bSig
) == 0 ) goto invalid
;
5895 return packFloatx80(zSign
, floatx80_infinity_high
,
5896 floatx80_infinity_low
);
5898 if ( bExp
== 0x7FFF ) {
5899 if ((uint64_t)(bSig
<< 1)) {
5900 return propagateFloatx80NaN(a
, b
, status
);
5902 if ( ( aExp
| aSig
) == 0 ) {
5904 float_raise(float_flag_invalid
, status
);
5905 return floatx80_default_nan(status
);
5907 return packFloatx80(zSign
, floatx80_infinity_high
,
5908 floatx80_infinity_low
);
5911 if ( aSig
== 0 ) return packFloatx80( zSign
, 0, 0 );
5912 normalizeFloatx80Subnormal( aSig
, &aExp
, &aSig
);
5915 if ( bSig
== 0 ) return packFloatx80( zSign
, 0, 0 );
5916 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
5918 zExp
= aExp
+ bExp
- 0x3FFE;
5919 mul64To128( aSig
, bSig
, &zSig0
, &zSig1
);
5920 if ( 0 < (int64_t) zSig0
) {
5921 shortShift128Left( zSig0
, zSig1
, 1, &zSig0
, &zSig1
);
5924 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
5925 zSign
, zExp
, zSig0
, zSig1
, status
);
5928 /*----------------------------------------------------------------------------
5929 | Returns the result of dividing the extended double-precision floating-point
5930 | value `a' by the corresponding value `b'. The operation is performed
5931 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932 *----------------------------------------------------------------------------*/
5934 floatx80
floatx80_div(floatx80 a
, floatx80 b
, float_status
*status
)
5936 bool aSign
, bSign
, zSign
;
5937 int32_t aExp
, bExp
, zExp
;
5938 uint64_t aSig
, bSig
, zSig0
, zSig1
;
5939 uint64_t rem0
, rem1
, rem2
, term0
, term1
, term2
;
5941 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
5942 float_raise(float_flag_invalid
, status
);
5943 return floatx80_default_nan(status
);
5945 aSig
= extractFloatx80Frac( a
);
5946 aExp
= extractFloatx80Exp( a
);
5947 aSign
= extractFloatx80Sign( a
);
5948 bSig
= extractFloatx80Frac( b
);
5949 bExp
= extractFloatx80Exp( b
);
5950 bSign
= extractFloatx80Sign( b
);
5951 zSign
= aSign
^ bSign
;
5952 if ( aExp
== 0x7FFF ) {
5953 if ((uint64_t)(aSig
<< 1)) {
5954 return propagateFloatx80NaN(a
, b
, status
);
5956 if ( bExp
== 0x7FFF ) {
5957 if ((uint64_t)(bSig
<< 1)) {
5958 return propagateFloatx80NaN(a
, b
, status
);
5962 return packFloatx80(zSign
, floatx80_infinity_high
,
5963 floatx80_infinity_low
);
5965 if ( bExp
== 0x7FFF ) {
5966 if ((uint64_t)(bSig
<< 1)) {
5967 return propagateFloatx80NaN(a
, b
, status
);
5969 return packFloatx80( zSign
, 0, 0 );
5973 if ( ( aExp
| aSig
) == 0 ) {
5975 float_raise(float_flag_invalid
, status
);
5976 return floatx80_default_nan(status
);
5978 float_raise(float_flag_divbyzero
, status
);
5979 return packFloatx80(zSign
, floatx80_infinity_high
,
5980 floatx80_infinity_low
);
5982 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
5985 if ( aSig
== 0 ) return packFloatx80( zSign
, 0, 0 );
5986 normalizeFloatx80Subnormal( aSig
, &aExp
, &aSig
);
5988 zExp
= aExp
- bExp
+ 0x3FFE;
5990 if ( bSig
<= aSig
) {
5991 shift128Right( aSig
, 0, 1, &aSig
, &rem1
);
5994 zSig0
= estimateDiv128To64( aSig
, rem1
, bSig
);
5995 mul64To128( bSig
, zSig0
, &term0
, &term1
);
5996 sub128( aSig
, rem1
, term0
, term1
, &rem0
, &rem1
);
5997 while ( (int64_t) rem0
< 0 ) {
5999 add128( rem0
, rem1
, 0, bSig
, &rem0
, &rem1
);
6001 zSig1
= estimateDiv128To64( rem1
, 0, bSig
);
6002 if ( (uint64_t) ( zSig1
<<1 ) <= 8 ) {
6003 mul64To128( bSig
, zSig1
, &term1
, &term2
);
6004 sub128( rem1
, 0, term1
, term2
, &rem1
, &rem2
);
6005 while ( (int64_t) rem1
< 0 ) {
6007 add128( rem1
, rem2
, 0, bSig
, &rem1
, &rem2
);
6009 zSig1
|= ( ( rem1
| rem2
) != 0 );
6011 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
6012 zSign
, zExp
, zSig0
, zSig1
, status
);
6015 /*----------------------------------------------------------------------------
6016 | Returns the remainder of the extended double-precision floating-point value
6017 | `a' with respect to the corresponding value `b'. The operation is performed
6018 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6019 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6020 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6021 | the absolute value of the integer quotient.
6022 *----------------------------------------------------------------------------*/
6024 floatx80
floatx80_modrem(floatx80 a
, floatx80 b
, bool mod
, uint64_t *quotient
,
6025 float_status
*status
)
6028 int32_t aExp
, bExp
, expDiff
, aExpOrig
;
6029 uint64_t aSig0
, aSig1
, bSig
;
6030 uint64_t q
, term0
, term1
, alternateASig0
, alternateASig1
;
6033 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
6034 float_raise(float_flag_invalid
, status
);
6035 return floatx80_default_nan(status
);
6037 aSig0
= extractFloatx80Frac( a
);
6038 aExpOrig
= aExp
= extractFloatx80Exp( a
);
6039 aSign
= extractFloatx80Sign( a
);
6040 bSig
= extractFloatx80Frac( b
);
6041 bExp
= extractFloatx80Exp( b
);
6042 if ( aExp
== 0x7FFF ) {
6043 if ( (uint64_t) ( aSig0
<<1 )
6044 || ( ( bExp
== 0x7FFF ) && (uint64_t) ( bSig
<<1 ) ) ) {
6045 return propagateFloatx80NaN(a
, b
, status
);
6049 if ( bExp
== 0x7FFF ) {
6050 if ((uint64_t)(bSig
<< 1)) {
6051 return propagateFloatx80NaN(a
, b
, status
);
6053 if (aExp
== 0 && aSig0
>> 63) {
6055 * Pseudo-denormal argument must be returned in normalized
6058 return packFloatx80(aSign
, 1, aSig0
);
6065 float_raise(float_flag_invalid
, status
);
6066 return floatx80_default_nan(status
);
6068 normalizeFloatx80Subnormal( bSig
, &bExp
, &bSig
);
6071 if ( aSig0
== 0 ) return a
;
6072 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
6075 expDiff
= aExp
- bExp
;
6077 if ( expDiff
< 0 ) {
6078 if ( mod
|| expDiff
< -1 ) {
6079 if (aExp
== 1 && aExpOrig
== 0) {
6081 * Pseudo-denormal argument must be returned in
6084 return packFloatx80(aSign
, aExp
, aSig0
);
6088 shift128Right( aSig0
, 0, 1, &aSig0
, &aSig1
);
6091 *quotient
= q
= ( bSig
<= aSig0
);
6092 if ( q
) aSig0
-= bSig
;
6094 while ( 0 < expDiff
) {
6095 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
6096 q
= ( 2 < q
) ? q
- 2 : 0;
6097 mul64To128( bSig
, q
, &term0
, &term1
);
6098 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6099 shortShift128Left( aSig0
, aSig1
, 62, &aSig0
, &aSig1
);
6105 if ( 0 < expDiff
) {
6106 q
= estimateDiv128To64( aSig0
, aSig1
, bSig
);
6107 q
= ( 2 < q
) ? q
- 2 : 0;
6109 mul64To128( bSig
, q
<<( 64 - expDiff
), &term0
, &term1
);
6110 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6111 shortShift128Left( 0, bSig
, 64 - expDiff
, &term0
, &term1
);
6112 while ( le128( term0
, term1
, aSig0
, aSig1
) ) {
6114 sub128( aSig0
, aSig1
, term0
, term1
, &aSig0
, &aSig1
);
6117 *quotient
<<= expDiff
;
6128 sub128( term0
, term1
, aSig0
, aSig1
, &alternateASig0
, &alternateASig1
);
6129 if ( lt128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6130 || ( eq128( alternateASig0
, alternateASig1
, aSig0
, aSig1
)
6133 aSig0
= alternateASig0
;
6134 aSig1
= alternateASig1
;
6140 normalizeRoundAndPackFloatx80(
6141 floatx80_precision_x
, zSign
, bExp
+ expDiff
, aSig0
, aSig1
, status
);
6145 /*----------------------------------------------------------------------------
6146 | Returns the remainder of the extended double-precision floating-point value
6147 | `a' with respect to the corresponding value `b'. The operation is performed
6148 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6149 *----------------------------------------------------------------------------*/
6151 floatx80
floatx80_rem(floatx80 a
, floatx80 b
, float_status
*status
)
6154 return floatx80_modrem(a
, b
, false, "ient
, status
);
6157 /*----------------------------------------------------------------------------
6158 | Returns the remainder of the extended double-precision floating-point value
6159 | `a' with respect to the corresponding value `b', with the quotient truncated
6161 *----------------------------------------------------------------------------*/
6163 floatx80
floatx80_mod(floatx80 a
, floatx80 b
, float_status
*status
)
6166 return floatx80_modrem(a
, b
, true, "ient
, status
);
6169 /*----------------------------------------------------------------------------
6170 | Returns the square root of the extended double-precision floating-point
6171 | value `a'. The operation is performed according to the IEC/IEEE Standard
6172 | for Binary Floating-Point Arithmetic.
6173 *----------------------------------------------------------------------------*/
6175 floatx80
floatx80_sqrt(floatx80 a
, float_status
*status
)
6179 uint64_t aSig0
, aSig1
, zSig0
, zSig1
, doubleZSig0
;
6180 uint64_t rem0
, rem1
, rem2
, rem3
, term0
, term1
, term2
, term3
;
6182 if (floatx80_invalid_encoding(a
)) {
6183 float_raise(float_flag_invalid
, status
);
6184 return floatx80_default_nan(status
);
6186 aSig0
= extractFloatx80Frac( a
);
6187 aExp
= extractFloatx80Exp( a
);
6188 aSign
= extractFloatx80Sign( a
);
6189 if ( aExp
== 0x7FFF ) {
6190 if ((uint64_t)(aSig0
<< 1)) {
6191 return propagateFloatx80NaN(a
, a
, status
);
6193 if ( ! aSign
) return a
;
6197 if ( ( aExp
| aSig0
) == 0 ) return a
;
6199 float_raise(float_flag_invalid
, status
);
6200 return floatx80_default_nan(status
);
6203 if ( aSig0
== 0 ) return packFloatx80( 0, 0, 0 );
6204 normalizeFloatx80Subnormal( aSig0
, &aExp
, &aSig0
);
6206 zExp
= ( ( aExp
- 0x3FFF )>>1 ) + 0x3FFF;
6207 zSig0
= estimateSqrt32( aExp
, aSig0
>>32 );
6208 shift128Right( aSig0
, 0, 2 + ( aExp
& 1 ), &aSig0
, &aSig1
);
6209 zSig0
= estimateDiv128To64( aSig0
, aSig1
, zSig0
<<32 ) + ( zSig0
<<30 );
6210 doubleZSig0
= zSig0
<<1;
6211 mul64To128( zSig0
, zSig0
, &term0
, &term1
);
6212 sub128( aSig0
, aSig1
, term0
, term1
, &rem0
, &rem1
);
6213 while ( (int64_t) rem0
< 0 ) {
6216 add128( rem0
, rem1
, zSig0
>>63, doubleZSig0
| 1, &rem0
, &rem1
);
6218 zSig1
= estimateDiv128To64( rem1
, 0, doubleZSig0
);
6219 if ( ( zSig1
& UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6220 if ( zSig1
== 0 ) zSig1
= 1;
6221 mul64To128( doubleZSig0
, zSig1
, &term1
, &term2
);
6222 sub128( rem1
, 0, term1
, term2
, &rem1
, &rem2
);
6223 mul64To128( zSig1
, zSig1
, &term2
, &term3
);
6224 sub192( rem1
, rem2
, 0, 0, term2
, term3
, &rem1
, &rem2
, &rem3
);
6225 while ( (int64_t) rem1
< 0 ) {
6227 shortShift128Left( 0, zSig1
, 1, &term2
, &term3
);
6229 term2
|= doubleZSig0
;
6230 add192( rem1
, rem2
, rem3
, 0, term2
, term3
, &rem1
, &rem2
, &rem3
);
6232 zSig1
|= ( ( rem1
| rem2
| rem3
) != 0 );
6234 shortShift128Left( 0, zSig1
, 1, &zSig0
, &zSig1
);
6235 zSig0
|= doubleZSig0
;
6236 return roundAndPackFloatx80(status
->floatx80_rounding_precision
,
6237 0, zExp
, zSig0
, zSig1
, status
);
6240 /*----------------------------------------------------------------------------
6241 | Returns the result of converting the quadruple-precision floating-point
6242 | value `a' to the extended double-precision floating-point format. The
6243 | conversion is performed according to the IEC/IEEE Standard for Binary
6244 | Floating-Point Arithmetic.
6245 *----------------------------------------------------------------------------*/
6247 floatx80
float128_to_floatx80(float128 a
, float_status
*status
)
6251 uint64_t aSig0
, aSig1
;
6253 aSig1
= extractFloat128Frac1( a
);
6254 aSig0
= extractFloat128Frac0( a
);
6255 aExp
= extractFloat128Exp( a
);
6256 aSign
= extractFloat128Sign( a
);
6257 if ( aExp
== 0x7FFF ) {
6258 if ( aSig0
| aSig1
) {
6259 floatx80 res
= commonNaNToFloatx80(float128ToCommonNaN(a
, status
),
6261 return floatx80_silence_nan(res
, status
);
6263 return packFloatx80(aSign
, floatx80_infinity_high
,
6264 floatx80_infinity_low
);
6267 if ( ( aSig0
| aSig1
) == 0 ) return packFloatx80( aSign
, 0, 0 );
6268 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6271 aSig0
|= UINT64_C(0x0001000000000000);
6273 shortShift128Left( aSig0
, aSig1
, 15, &aSig0
, &aSig1
);
6274 return roundAndPackFloatx80(80, aSign
, aExp
, aSig0
, aSig1
, status
);
6278 /*----------------------------------------------------------------------------
6279 | Returns the remainder of the quadruple-precision floating-point value `a'
6280 | with respect to the corresponding value `b'. The operation is performed
6281 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6282 *----------------------------------------------------------------------------*/
6284 float128
float128_rem(float128 a
, float128 b
, float_status
*status
)
6287 int32_t aExp
, bExp
, expDiff
;
6288 uint64_t aSig0
, aSig1
, bSig0
, bSig1
, q
, term0
, term1
, term2
;
6289 uint64_t allZero
, alternateASig0
, alternateASig1
, sigMean1
;
6292 aSig1
= extractFloat128Frac1( a
);
6293 aSig0
= extractFloat128Frac0( a
);
6294 aExp
= extractFloat128Exp( a
);
6295 aSign
= extractFloat128Sign( a
);
6296 bSig1
= extractFloat128Frac1( b
);
6297 bSig0
= extractFloat128Frac0( b
);
6298 bExp
= extractFloat128Exp( b
);
6299 if ( aExp
== 0x7FFF ) {
6300 if ( ( aSig0
| aSig1
)
6301 || ( ( bExp
== 0x7FFF ) && ( bSig0
| bSig1
) ) ) {
6302 return propagateFloat128NaN(a
, b
, status
);
6306 if ( bExp
== 0x7FFF ) {
6307 if (bSig0
| bSig1
) {
6308 return propagateFloat128NaN(a
, b
, status
);
6313 if ( ( bSig0
| bSig1
) == 0 ) {
6315 float_raise(float_flag_invalid
, status
);
6316 return float128_default_nan(status
);
6318 normalizeFloat128Subnormal( bSig0
, bSig1
, &bExp
, &bSig0
, &bSig1
);
6321 if ( ( aSig0
| aSig1
) == 0 ) return a
;
6322 normalizeFloat128Subnormal( aSig0
, aSig1
, &aExp
, &aSig0
, &aSig1
);
6324 expDiff
= aExp
- bExp
;
6325 if ( expDiff
< -1 ) return a
;
6327 aSig0
| UINT64_C(0x0001000000000000),
6329 15 - ( expDiff
< 0 ),
6334 bSig0
| UINT64_C(0x0001000000000000), bSig1
, 15, &bSig0
, &bSig1
);
6335 q
= le128( bSig0
, bSig1
, aSig0
, aSig1
);
6336 if ( q
) sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6338 while ( 0 < expDiff
) {
6339 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6340 q
= ( 4 < q
) ? q
- 4 : 0;
6341 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6342 shortShift192Left( term0
, term1
, term2
, 61, &term1
, &term2
, &allZero
);
6343 shortShift128Left( aSig0
, aSig1
, 61, &aSig0
, &allZero
);
6344 sub128( aSig0
, 0, term1
, term2
, &aSig0
, &aSig1
);
6347 if ( -64 < expDiff
) {
6348 q
= estimateDiv128To64( aSig0
, aSig1
, bSig0
);
6349 q
= ( 4 < q
) ? q
- 4 : 0;
6351 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6353 if ( expDiff
< 0 ) {
6354 shift128Right( aSig0
, aSig1
, - expDiff
, &aSig0
, &aSig1
);
6357 shortShift128Left( aSig0
, aSig1
, expDiff
, &aSig0
, &aSig1
);
6359 mul128By64To192( bSig0
, bSig1
, q
, &term0
, &term1
, &term2
);
6360 sub128( aSig0
, aSig1
, term1
, term2
, &aSig0
, &aSig1
);
6363 shift128Right( aSig0
, aSig1
, 12, &aSig0
, &aSig1
);
6364 shift128Right( bSig0
, bSig1
, 12, &bSig0
, &bSig1
);
6367 alternateASig0
= aSig0
;
6368 alternateASig1
= aSig1
;
6370 sub128( aSig0
, aSig1
, bSig0
, bSig1
, &aSig0
, &aSig1
);
6371 } while ( 0 <= (int64_t) aSig0
);
6373 aSig0
, aSig1
, alternateASig0
, alternateASig1
, (uint64_t *)&sigMean0
, &sigMean1
);
6374 if ( ( sigMean0
< 0 )
6375 || ( ( ( sigMean0
| sigMean1
) == 0 ) && ( q
& 1 ) ) ) {
6376 aSig0
= alternateASig0
;
6377 aSig1
= alternateASig1
;
6379 zSign
= ( (int64_t) aSig0
< 0 );
6380 if ( zSign
) sub128( 0, 0, aSig0
, aSig1
, &aSig0
, &aSig1
);
6381 return normalizeRoundAndPackFloat128(aSign
^ zSign
, bExp
- 4, aSig0
, aSig1
,
6385 static inline FloatRelation
6386 floatx80_compare_internal(floatx80 a
, floatx80 b
, bool is_quiet
,
6387 float_status
*status
)
6391 if (floatx80_invalid_encoding(a
) || floatx80_invalid_encoding(b
)) {
6392 float_raise(float_flag_invalid
, status
);
6393 return float_relation_unordered
;
6395 if (( ( extractFloatx80Exp( a
) == 0x7fff ) &&
6396 ( extractFloatx80Frac( a
)<<1 ) ) ||
6397 ( ( extractFloatx80Exp( b
) == 0x7fff ) &&
6398 ( extractFloatx80Frac( b
)<<1 ) )) {
6400 floatx80_is_signaling_nan(a
, status
) ||
6401 floatx80_is_signaling_nan(b
, status
)) {
6402 float_raise(float_flag_invalid
, status
);
6404 return float_relation_unordered
;
6406 aSign
= extractFloatx80Sign( a
);
6407 bSign
= extractFloatx80Sign( b
);
6408 if ( aSign
!= bSign
) {
6410 if ( ( ( (uint16_t) ( ( a
.high
| b
.high
) << 1 ) ) == 0) &&
6411 ( ( a
.low
| b
.low
) == 0 ) ) {
6413 return float_relation_equal
;
6415 return 1 - (2 * aSign
);
6418 /* Normalize pseudo-denormals before comparison. */
6419 if ((a
.high
& 0x7fff) == 0 && a
.low
& UINT64_C(0x8000000000000000)) {
6422 if ((b
.high
& 0x7fff) == 0 && b
.low
& UINT64_C(0x8000000000000000)) {
6425 if (a
.low
== b
.low
&& a
.high
== b
.high
) {
6426 return float_relation_equal
;
6428 return 1 - 2 * (aSign
^ ( lt128( a
.high
, a
.low
, b
.high
, b
.low
) ));
6433 FloatRelation
floatx80_compare(floatx80 a
, floatx80 b
, float_status
*status
)
6435 return floatx80_compare_internal(a
, b
, 0, status
);
6438 FloatRelation
floatx80_compare_quiet(floatx80 a
, floatx80 b
,
6439 float_status
*status
)
6441 return floatx80_compare_internal(a
, b
, 1, status
);
6444 floatx80
floatx80_scalbn(floatx80 a
, int n
, float_status
*status
)
6450 if (floatx80_invalid_encoding(a
)) {
6451 float_raise(float_flag_invalid
, status
);
6452 return floatx80_default_nan(status
);
6454 aSig
= extractFloatx80Frac( a
);
6455 aExp
= extractFloatx80Exp( a
);
6456 aSign
= extractFloatx80Sign( a
);
6458 if ( aExp
== 0x7FFF ) {
6460 return propagateFloatx80NaN(a
, a
, status
);
6474 } else if (n
< -0x10000) {
6479 return normalizeRoundAndPackFloatx80(status
->floatx80_rounding_precision
,
6480 aSign
, aExp
, aSig
, 0, status
);
6483 static void __attribute__((constructor
)) softfloat_init(void)
6485 union_float64 ua
, ub
, uc
, ur
;
6487 if (QEMU_NO_HARDFLOAT
) {
6491 * Test that the host's FMA is not obviously broken. For example,
6492 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6493 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6495 ua
.s
= 0x0020000000000001ULL
;
6496 ub
.s
= 0x3ca0000000000000ULL
;
6497 uc
.s
= 0x0020000000000000ULL
;
6498 ur
.h
= fma(ua
.h
, ub
.h
, uc
.h
);
6499 if (ur
.s
!= 0x0020000000000001ULL
) {
6500 force_soft_fma
= true;