]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Convert floatx80_div to FloatParts
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485 /* Flags for parts_minmax. */
486 enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493 };
494
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498 return unlikely(c >= float_class_qnan);
499 }
500
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503 return c == float_class_snan;
504 }
505
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508 return c == float_class_qnan;
509 }
510
511 /*
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
514 *
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
518 */
519
520 typedef struct {
521 FloatClass cls;
522 bool sign;
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
535 } FloatParts64;
536
537 typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543 } FloatParts128;
544
545 typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553 } FloatParts256;
554
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
558
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
569 */
570 typedef struct {
571 int exp_size;
572 int exp_bias;
573 int exp_max;
574 int frac_size;
575 int frac_shift;
576 bool arm_althp;
577 uint64_t round_mask;
578 } FloatFmt;
579
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
582 .exp_size = E, \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
585
586 #define FLOAT_PARAMS(E, F) \
587 FLOAT_PARAMS_(E), \
588 .frac_size = F, \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
591
592 static const FloatFmt float16_params = {
593 FLOAT_PARAMS(5, 10)
594 };
595
596 static const FloatFmt float16_params_ahp = {
597 FLOAT_PARAMS(5, 10),
598 .arm_althp = true
599 };
600
601 static const FloatFmt bfloat16_params = {
602 FLOAT_PARAMS(8, 7)
603 };
604
605 static const FloatFmt float32_params = {
606 FLOAT_PARAMS(8, 23)
607 };
608
609 static const FloatFmt float64_params = {
610 FLOAT_PARAMS(11, 52)
611 };
612
613 static const FloatFmt float128_params = {
614 FLOAT_PARAMS(15, 112)
615 };
616
617 #define FLOATX80_PARAMS(R) \
618 FLOAT_PARAMS_(15), \
619 .frac_size = R == 64 ? 63 : R, \
620 .frac_shift = 0, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
622
623 static const FloatFmt floatx80_params[3] = {
624 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
627 };
628
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
631 {
632 const int f_size = fmt->frac_size;
633 const int e_size = fmt->exp_size;
634
635 *r = (FloatParts64) {
636 .cls = float_class_unclassified,
637 .sign = extract64(raw, f_size + e_size, 1),
638 .exp = extract64(raw, f_size, e_size),
639 .frac = extract64(raw, 0, f_size)
640 };
641 }
642
643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
644 {
645 unpack_raw64(p, &float16_params, f);
646 }
647
648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
649 {
650 unpack_raw64(p, &bfloat16_params, f);
651 }
652
653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
654 {
655 unpack_raw64(p, &float32_params, f);
656 }
657
658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
659 {
660 unpack_raw64(p, &float64_params, f);
661 }
662
663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
664 {
665 *p = (FloatParts128) {
666 .cls = float_class_unclassified,
667 .sign = extract32(f.high, 15, 1),
668 .exp = extract32(f.high, 0, 15),
669 .frac_hi = f.low
670 };
671 }
672
673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
674 {
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677
678 *p = (FloatParts128) {
679 .cls = float_class_unclassified,
680 .sign = extract64(f.high, f_size + e_size, 1),
681 .exp = extract64(f.high, f_size, e_size),
682 .frac_hi = extract64(f.high, 0, f_size),
683 .frac_lo = f.low,
684 };
685 }
686
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
689 {
690 const int f_size = fmt->frac_size;
691 const int e_size = fmt->exp_size;
692 uint64_t ret;
693
694 ret = (uint64_t)p->sign << (f_size + e_size);
695 ret = deposit64(ret, f_size, e_size, p->exp);
696 ret = deposit64(ret, 0, f_size, p->frac);
697 return ret;
698 }
699
700 static inline float16 float16_pack_raw(const FloatParts64 *p)
701 {
702 return make_float16(pack_raw64(p, &float16_params));
703 }
704
705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
706 {
707 return pack_raw64(p, &bfloat16_params);
708 }
709
710 static inline float32 float32_pack_raw(const FloatParts64 *p)
711 {
712 return make_float32(pack_raw64(p, &float32_params));
713 }
714
715 static inline float64 float64_pack_raw(const FloatParts64 *p)
716 {
717 return make_float64(pack_raw64(p, &float64_params));
718 }
719
720 static float128 float128_pack_raw(const FloatParts128 *p)
721 {
722 const int f_size = float128_params.frac_size - 64;
723 const int e_size = float128_params.exp_size;
724 uint64_t hi;
725
726 hi = (uint64_t)p->sign << (f_size + e_size);
727 hi = deposit64(hi, f_size, e_size, p->exp);
728 hi = deposit64(hi, 0, f_size, p->frac_hi);
729 return make_float128(hi, p->frac_lo);
730 }
731
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
738 | specific.
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
741
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
744
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
748
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
751
752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
754
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
756
757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
758 float_status *s);
759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
760 float_status *s);
761
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
763
764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
765 FloatParts64 *c, float_status *s,
766 int ab_mask, int abc_mask);
767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
768 FloatParts128 *b,
769 FloatParts128 *c,
770 float_status *s,
771 int ab_mask, int abc_mask);
772
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
775
776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
777 const FloatFmt *fmt);
778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
779 const FloatFmt *fmt);
780
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
783
784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
785 const FloatFmt *fmt);
786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
787 const FloatFmt *fmt);
788
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
791
792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
793 const FloatFmt *fmt);
794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
795 const FloatFmt *fmt);
796
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
799
800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
803
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
806
807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
810
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
813
814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
815 float_status *s, bool subtract);
816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
817 float_status *s, bool subtract);
818
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
821
822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
823 float_status *s);
824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
825 float_status *s);
826
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
829
830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
831 FloatParts64 *c, int flags,
832 float_status *s);
833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
834 FloatParts128 *c, int flags,
835 float_status *s);
836
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
839
840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
841 float_status *s);
842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
843 float_status *s);
844
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
847
848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
850
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
853
854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
855 int scale, int frac_size);
856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
857 int scale, int frac_size);
858
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
861
862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
863 int scale, float_status *s,
864 const FloatFmt *fmt);
865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
866 int scale, float_status *s,
867 const FloatFmt *fmt);
868
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
871
872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
873 int scale, int64_t min, int64_t max,
874 float_status *s);
875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
876 int scale, int64_t min, int64_t max,
877 float_status *s);
878
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
881
882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
883 int scale, uint64_t max,
884 float_status *s);
885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
886 int scale, uint64_t max,
887 float_status *s);
888
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
891
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893 int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895 int scale, float_status *s);
896
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
899
900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
901 int scale, float_status *s);
902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
903 int scale, float_status *s);
904
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
907
908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
909 float_status *s, int flags);
910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
911 float_status *s, int flags);
912
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
915
916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
917 float_status *s, bool q);
918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
919 float_status *s, bool q);
920
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
923
924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
926
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
929
930 /*
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
932 */
933
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
936
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
940
941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
942 {
943 return uadd64_overflow(a->frac, b->frac, &r->frac);
944 }
945
946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
947 {
948 bool c = 0;
949 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
950 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
951 return c;
952 }
953
954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
955 {
956 bool c = 0;
957 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
958 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
959 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
960 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
961 return c;
962 }
963
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
965
966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
967 {
968 return uadd64_overflow(a->frac, c, &r->frac);
969 }
970
971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
972 {
973 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
974 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
975 }
976
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
978
979 static void frac64_allones(FloatParts64 *a)
980 {
981 a->frac = -1;
982 }
983
984 static void frac128_allones(FloatParts128 *a)
985 {
986 a->frac_hi = a->frac_lo = -1;
987 }
988
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
990
991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
992 {
993 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
994 }
995
996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
997 {
998 uint64_t ta = a->frac_hi, tb = b->frac_hi;
999 if (ta == tb) {
1000 ta = a->frac_lo, tb = b->frac_lo;
1001 if (ta == tb) {
1002 return 0;
1003 }
1004 }
1005 return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012 a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017 a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024 uint64_t n1, n0, r, q;
1025 bool ret;
1026
1027 /*
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1034 *
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1037 */
1038 ret = a->frac < b->frac;
1039 if (ret) {
1040 n0 = a->frac;
1041 n1 = 0;
1042 } else {
1043 n0 = a->frac >> 1;
1044 n1 = a->frac << 63;
1045 }
1046 q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a->frac = q | (r != 0);
1050
1051 return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056 uint64_t q0, q1, a0, a1, b0, b1;
1057 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058 bool ret = false;
1059
1060 a0 = a->frac_hi, a1 = a->frac_lo;
1061 b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063 ret = lt128(a0, a1, b0, b1);
1064 if (!ret) {
1065 a1 = shr_double(a0, a1, 1);
1066 a0 = a0 >> 1;
1067 }
1068
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0 = estimateDiv128To64(a0, a1, b0);
1071
1072 /*
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1076 */
1077 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079 while (r0 != 0) {
1080 q0--;
1081 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082 }
1083
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1 = estimateDiv128To64(r1, r2, b0);
1086 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088 while (r1 != 0) {
1089 q1--;
1090 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091 }
1092
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1 |= (r2 | r3) != 0;
1095
1096 a->frac_hi = q0;
1097 a->frac_lo = q1;
1098 return ret;
1099 }
1100
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105 return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110 return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130 a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142 bool c = 0;
1143 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153 if (a->frac) {
1154 int shift = clz64(a->frac);
1155 a->frac <<= shift;
1156 return shift;
1157 }
1158 return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163 if (a->frac_hi) {
1164 int shl = clz64(a->frac_hi);
1165 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166 a->frac_lo <<= shl;
1167 return shl;
1168 } else if (a->frac_lo) {
1169 int shl = clz64(a->frac_lo);
1170 a->frac_hi = a->frac_lo << shl;
1171 a->frac_lo = 0;
1172 return shl + 64;
1173 }
1174 return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181 int ret, shl;
1182
1183 if (likely(a0)) {
1184 shl = clz64(a0);
1185 if (shl == 0) {
1186 return 0;
1187 }
1188 ret = shl;
1189 } else {
1190 if (a1) {
1191 ret = 64;
1192 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193 } else if (a2) {
1194 ret = 128;
1195 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196 } else if (a3) {
1197 ret = 192;
1198 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199 } else {
1200 ret = 256;
1201 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202 goto done;
1203 }
1204 shl = clz64(a0);
1205 if (shl == 0) {
1206 goto done;
1207 }
1208 ret += shl;
1209 }
1210
1211 a0 = shl_double(a0, a1, shl);
1212 a1 = shl_double(a1, a2, shl);
1213 a2 = shl_double(a2, a3, shl);
1214 a3 <<= shl;
1215
1216 done:
1217 a->frac_hi = a0;
1218 a->frac_hm = a1;
1219 a->frac_lm = a2;
1220 a->frac_lo = a3;
1221 return ret;
1222 }
1223
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228 a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235 if (c & 64) {
1236 a0 = a1, a1 = 0;
1237 }
1238
1239 c &= 63;
1240 if (c) {
1241 a0 = shl_double(a0, a1, c);
1242 a1 = a1 << c;
1243 }
1244
1245 a->frac_hi = a0;
1246 a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253 a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260 if (c & 64) {
1261 a1 = a0, a0 = 0;
1262 }
1263
1264 c &= 63;
1265 if (c) {
1266 a1 = shr_double(a0, a1, c);
1267 a0 = a0 >> c;
1268 }
1269
1270 a->frac_hi = a0;
1271 a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278 uint64_t a0 = a->frac;
1279
1280 if (likely(c != 0)) {
1281 if (likely(c < 64)) {
1282 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283 } else {
1284 a0 = a0 != 0;
1285 }
1286 a->frac = a0;
1287 }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293 uint64_t sticky = 0;
1294
1295 if (unlikely(c == 0)) {
1296 return;
1297 } else if (likely(c < 64)) {
1298 /* nothing */
1299 } else if (likely(c < 128)) {
1300 sticky = a1;
1301 a1 = a0;
1302 a0 = 0;
1303 c &= 63;
1304 if (c == 0) {
1305 goto done;
1306 }
1307 } else {
1308 sticky = a0 | a1;
1309 a0 = a1 = 0;
1310 goto done;
1311 }
1312
1313 sticky |= shr_double(a1, 0, c);
1314 a1 = shr_double(a0, a1, c);
1315 a0 = a0 >> c;
1316
1317 done:
1318 a->frac_lo = a1 | (sticky != 0);
1319 a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326 uint64_t sticky = 0;
1327
1328 if (unlikely(c == 0)) {
1329 return;
1330 } else if (likely(c < 64)) {
1331 /* nothing */
1332 } else if (likely(c < 256)) {
1333 if (unlikely(c & 128)) {
1334 sticky |= a2 | a3;
1335 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336 }
1337 if (unlikely(c & 64)) {
1338 sticky |= a3;
1339 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340 }
1341 c &= 63;
1342 if (c == 0) {
1343 goto done;
1344 }
1345 } else {
1346 sticky = a0 | a1 | a2 | a3;
1347 a0 = a1 = a2 = a3 = 0;
1348 goto done;
1349 }
1350
1351 sticky |= shr_double(a3, 0, c);
1352 a3 = shr_double(a2, a3, c);
1353 a2 = shr_double(a1, a2, c);
1354 a1 = shr_double(a0, a1, c);
1355 a0 = a0 >> c;
1356
1357 done:
1358 a->frac_lo = a3 | (sticky != 0);
1359 a->frac_lm = a2;
1360 a->frac_hm = a1;
1361 a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368 return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373 bool c = 0;
1374 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376 return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381 bool c = 0;
1382 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386 return c;
1387 }
1388
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393 r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398 r->frac_hi = a->frac_hi;
1399 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406 r->frac_hi = a->frac;
1407 r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412 r->frac_hi = a->frac_hi;
1413 r->frac_hm = a->frac_lo;
1414 r->frac_lm = 0;
1415 r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1424 */
1425 static const uint16_t rsqrt_tab[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef N
1455 #undef W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef N
1463 #undef W
1464 #define N 256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef N
1469 #undef W
1470 #undef partsN
1471 #undef FloatPartsN
1472 #undef FloatPartsW
1473
1474 /*
1475 * Pack/unpack routines with a specific FloatFmt.
1476 */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479 float_status *s, const FloatFmt *params)
1480 {
1481 float16_unpack_raw(p, f);
1482 parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486 float_status *s)
1487 {
1488 float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492 float_status *s)
1493 {
1494 bfloat16_unpack_raw(p, f);
1495 parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499 float_status *s,
1500 const FloatFmt *params)
1501 {
1502 parts_uncanon(p, s, params);
1503 return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507 float_status *s)
1508 {
1509 return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513 float_status *s)
1514 {
1515 parts_uncanon(p, s, &bfloat16_params);
1516 return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520 float_status *s)
1521 {
1522 float32_unpack_raw(p, f);
1523 parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527 float_status *s)
1528 {
1529 parts_uncanon(p, s, &float32_params);
1530 return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534 float_status *s)
1535 {
1536 float64_unpack_raw(p, f);
1537 parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541 float_status *s)
1542 {
1543 parts_uncanon(p, s, &float64_params);
1544 return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548 float_status *s)
1549 {
1550 float128_unpack_raw(p, f);
1551 parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555 float_status *s)
1556 {
1557 parts_uncanon(p, s, &float128_params);
1558 return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563 float_status *s)
1564 {
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s->floatx80_rounding_precision) {
1567 case floatx80_precision_x:
1568 case floatx80_precision_d:
1569 case floatx80_precision_s:
1570 break;
1571 default:
1572 g_assert_not_reached();
1573 }
1574
1575 if (unlikely(floatx80_invalid_encoding(f))) {
1576 float_raise(float_flag_invalid, s);
1577 return false;
1578 }
1579
1580 floatx80_unpack_raw(p, f);
1581
1582 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584 } else {
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587 p->cls = (p->frac_hi == 0 ? float_class_inf
1588 : parts_is_snan_frac(p->frac_hi, s)
1589 ? float_class_snan : float_class_qnan);
1590 }
1591 return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595 float_status *s)
1596 {
1597 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598 uint64_t frac;
1599 int exp;
1600
1601 switch (p->cls) {
1602 case float_class_normal:
1603 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604 parts_uncanon_normal(p, s, fmt);
1605 frac = p->frac_hi;
1606 exp = p->exp;
1607 } else {
1608 FloatParts64 p64;
1609
1610 p64.sign = p->sign;
1611 p64.exp = p->exp;
1612 frac_truncjam(&p64, p);
1613 parts_uncanon_normal(&p64, s, fmt);
1614 frac = p64.frac;
1615 exp = p64.exp;
1616 }
1617 if (exp != fmt->exp_max) {
1618 break;
1619 }
1620 /* rounded to inf -- fall through to set frac correctly */
1621
1622 case float_class_inf:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac = floatx80_infinity_low;
1625 exp = fmt->exp_max;
1626 break;
1627
1628 case float_class_zero:
1629 frac = 0;
1630 exp = 0;
1631 break;
1632
1633 case float_class_snan:
1634 case float_class_qnan:
1635 /* NaNs have the integer bit set. */
1636 frac = p->frac_hi | (1ull << 63);
1637 exp = fmt->exp_max;
1638 break;
1639
1640 default:
1641 g_assert_not_reached();
1642 }
1643
1644 return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648 * Addition and subtraction
1649 */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654 FloatParts64 pa, pb, *pr;
1655
1656 float16_unpack_canonical(&pa, a, status);
1657 float16_unpack_canonical(&pb, b, status);
1658 pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660 return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665 return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670 return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676 FloatParts64 pa, pb, *pr;
1677
1678 float32_unpack_canonical(&pa, a, status);
1679 float32_unpack_canonical(&pb, b, status);
1680 pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682 return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687 return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692 return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698 FloatParts64 pa, pb, *pr;
1699
1700 float64_unpack_canonical(&pa, a, status);
1701 float64_unpack_canonical(&pb, b, status);
1702 pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704 return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709 return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714 return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719 return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724 return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729 return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734 return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741 }
1742 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749 } else {
1750 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751 }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757 return float32_gen2(a, b, s, hard, soft,
1758 f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764 return float64_gen2(a, b, s, hard, soft,
1765 f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795 FloatParts64 pa, pb, *pr;
1796
1797 bfloat16_unpack_canonical(&pa, a, status);
1798 bfloat16_unpack_canonical(&pb, b, status);
1799 pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801 return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806 return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811 return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817 FloatParts128 pa, pb, *pr;
1818
1819 float128_unpack_canonical(&pa, a, status);
1820 float128_unpack_canonical(&pb, b, status);
1821 pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823 return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828 return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833 return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839 FloatParts128 pa, pb, *pr;
1840
1841 if (!floatx80_unpack_canonical(&pa, a, status) ||
1842 !floatx80_unpack_canonical(&pb, b, status)) {
1843 return floatx80_default_nan(status);
1844 }
1845
1846 pr = parts_addsub(&pa, &pb, status, subtract);
1847 return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852 return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857 return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861 * Multiplication
1862 */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866 FloatParts64 pa, pb, *pr;
1867
1868 float16_unpack_canonical(&pa, a, status);
1869 float16_unpack_canonical(&pb, b, status);
1870 pr = parts_mul(&pa, &pb, status);
1871
1872 return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878 FloatParts64 pa, pb, *pr;
1879
1880 float32_unpack_canonical(&pa, a, status);
1881 float32_unpack_canonical(&pb, b, status);
1882 pr = parts_mul(&pa, &pb, status);
1883
1884 return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890 FloatParts64 pa, pb, *pr;
1891
1892 float64_unpack_canonical(&pa, a, status);
1893 float64_unpack_canonical(&pb, b, status);
1894 pr = parts_mul(&pa, &pb, status);
1895
1896 return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901 return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906 return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913 f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920 f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926 FloatParts64 pa, pb, *pr;
1927
1928 bfloat16_unpack_canonical(&pa, a, status);
1929 bfloat16_unpack_canonical(&pb, b, status);
1930 pr = parts_mul(&pa, &pb, status);
1931
1932 return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938 FloatParts128 pa, pb, *pr;
1939
1940 float128_unpack_canonical(&pa, a, status);
1941 float128_unpack_canonical(&pb, b, status);
1942 pr = parts_mul(&pa, &pb, status);
1943
1944 return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950 FloatParts128 pa, pb, *pr;
1951
1952 if (!floatx80_unpack_canonical(&pa, a, status) ||
1953 !floatx80_unpack_canonical(&pb, b, status)) {
1954 return floatx80_default_nan(status);
1955 }
1956
1957 pr = parts_mul(&pa, &pb, status);
1958 return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962 * Fused multiply-add
1963 */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966 int flags, float_status *status)
1967 {
1968 FloatParts64 pa, pb, pc, *pr;
1969
1970 float16_unpack_canonical(&pa, a, status);
1971 float16_unpack_canonical(&pb, b, status);
1972 float16_unpack_canonical(&pc, c, status);
1973 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975 return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980 float_status *status)
1981 {
1982 FloatParts64 pa, pb, pc, *pr;
1983
1984 float32_unpack_canonical(&pa, a, status);
1985 float32_unpack_canonical(&pb, b, status);
1986 float32_unpack_canonical(&pc, c, status);
1987 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989 return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994 float_status *status)
1995 {
1996 FloatParts64 pa, pb, pc, *pr;
1997
1998 float64_unpack_canonical(&pa, a, status);
1999 float64_unpack_canonical(&pb, b, status);
2000 float64_unpack_canonical(&pc, c, status);
2001 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003 return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011 union_float32 ua, ub, uc, ur;
2012
2013 ua.s = xa;
2014 ub.s = xb;
2015 uc.s = xc;
2016
2017 if (unlikely(!can_use_fpu(s))) {
2018 goto soft;
2019 }
2020 if (unlikely(flags & float_muladd_halve_result)) {
2021 goto soft;
2022 }
2023
2024 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026 goto soft;
2027 }
2028
2029 if (unlikely(force_soft_fma)) {
2030 goto soft;
2031 }
2032
2033 /*
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2036 */
2037 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038 union_float32 up;
2039 bool prod_sign;
2040
2041 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042 prod_sign ^= !!(flags & float_muladd_negate_product);
2043 up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045 if (flags & float_muladd_negate_c) {
2046 uc.h = -uc.h;
2047 }
2048 ur.h = up.h + uc.h;
2049 } else {
2050 union_float32 ua_orig = ua;
2051 union_float32 uc_orig = uc;
2052
2053 if (flags & float_muladd_negate_product) {
2054 ua.h = -ua.h;
2055 }
2056 if (flags & float_muladd_negate_c) {
2057 uc.h = -uc.h;
2058 }
2059
2060 ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062 if (unlikely(f32_is_inf(ur))) {
2063 float_raise(float_flag_overflow, s);
2064 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065 ua = ua_orig;
2066 uc = uc_orig;
2067 goto soft;
2068 }
2069 }
2070 if (flags & float_muladd_negate_result) {
2071 return float32_chs(ur.s);
2072 }
2073 return ur.s;
2074
2075 soft:
2076 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082 union_float64 ua, ub, uc, ur;
2083
2084 ua.s = xa;
2085 ub.s = xb;
2086 uc.s = xc;
2087
2088 if (unlikely(!can_use_fpu(s))) {
2089 goto soft;
2090 }
2091 if (unlikely(flags & float_muladd_halve_result)) {
2092 goto soft;
2093 }
2094
2095 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097 goto soft;
2098 }
2099
2100 if (unlikely(force_soft_fma)) {
2101 goto soft;
2102 }
2103
2104 /*
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2107 */
2108 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109 union_float64 up;
2110 bool prod_sign;
2111
2112 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113 prod_sign ^= !!(flags & float_muladd_negate_product);
2114 up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116 if (flags & float_muladd_negate_c) {
2117 uc.h = -uc.h;
2118 }
2119 ur.h = up.h + uc.h;
2120 } else {
2121 union_float64 ua_orig = ua;
2122 union_float64 uc_orig = uc;
2123
2124 if (flags & float_muladd_negate_product) {
2125 ua.h = -ua.h;
2126 }
2127 if (flags & float_muladd_negate_c) {
2128 uc.h = -uc.h;
2129 }
2130
2131 ur.h = fma(ua.h, ub.h, uc.h);
2132
2133 if (unlikely(f64_is_inf(ur))) {
2134 float_raise(float_flag_overflow, s);
2135 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136 ua = ua_orig;
2137 uc = uc_orig;
2138 goto soft;
2139 }
2140 }
2141 if (flags & float_muladd_negate_result) {
2142 return float64_chs(ur.s);
2143 }
2144 return ur.s;
2145
2146 soft:
2147 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151 int flags, float_status *status)
2152 {
2153 FloatParts64 pa, pb, pc, *pr;
2154
2155 bfloat16_unpack_canonical(&pa, a, status);
2156 bfloat16_unpack_canonical(&pb, b, status);
2157 bfloat16_unpack_canonical(&pc, c, status);
2158 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160 return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164 int flags, float_status *status)
2165 {
2166 FloatParts128 pa, pb, pc, *pr;
2167
2168 float128_unpack_canonical(&pa, a, status);
2169 float128_unpack_canonical(&pb, b, status);
2170 float128_unpack_canonical(&pc, c, status);
2171 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173 return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177 * Division
2178 */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182 FloatParts64 pa, pb, *pr;
2183
2184 float16_unpack_canonical(&pa, a, status);
2185 float16_unpack_canonical(&pb, b, status);
2186 pr = parts_div(&pa, &pb, status);
2187
2188 return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194 FloatParts64 pa, pb, *pr;
2195
2196 float32_unpack_canonical(&pa, a, status);
2197 float32_unpack_canonical(&pb, b, status);
2198 pr = parts_div(&pa, &pb, status);
2199
2200 return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206 FloatParts64 pa, pb, *pr;
2207
2208 float64_unpack_canonical(&pa, a, status);
2209 float64_unpack_canonical(&pb, b, status);
2210 pr = parts_div(&pa, &pb, status);
2211
2212 return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217 return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222 return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229 fpclassify(b.h) == FP_NORMAL;
2230 }
2231 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238 fpclassify(b.h) == FP_NORMAL;
2239 }
2240 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246 return fpclassify(a.h) != FP_ZERO;
2247 }
2248 return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254 return fpclassify(a.h) != FP_ZERO;
2255 }
2256 return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263 f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270 f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276 FloatParts64 pa, pb, *pr;
2277
2278 bfloat16_unpack_canonical(&pa, a, status);
2279 bfloat16_unpack_canonical(&pb, b, status);
2280 pr = parts_div(&pa, &pb, status);
2281
2282 return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288 FloatParts128 pa, pb, *pr;
2289
2290 float128_unpack_canonical(&pa, a, status);
2291 float128_unpack_canonical(&pb, b, status);
2292 pr = parts_div(&pa, &pb, status);
2293
2294 return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2298 {
2299 FloatParts128 pa, pb, *pr;
2300
2301 if (!floatx80_unpack_canonical(&pa, a, status) ||
2302 !floatx80_unpack_canonical(&pb, b, status)) {
2303 return floatx80_default_nan(status);
2304 }
2305
2306 pr = parts_div(&pa, &pb, status);
2307 return floatx80_round_pack_canonical(pr, status);
2308 }
2309
2310 /*
2311 * Float to Float conversions
2312 *
2313 * Returns the result of converting one float format to another. The
2314 * conversion is performed according to the IEC/IEEE Standard for
2315 * Binary Floating-Point Arithmetic.
2316 *
2317 * Usually this only needs to take care of raising invalid exceptions
2318 * and handling the conversion on NaNs.
2319 */
2320
2321 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2322 {
2323 switch (a->cls) {
2324 case float_class_qnan:
2325 case float_class_snan:
2326 /*
2327 * There is no NaN in the destination format. Raise Invalid
2328 * and return a zero with the sign of the input NaN.
2329 */
2330 float_raise(float_flag_invalid, s);
2331 a->cls = float_class_zero;
2332 break;
2333
2334 case float_class_inf:
2335 /*
2336 * There is no Inf in the destination format. Raise Invalid
2337 * and return the maximum normal with the correct sign.
2338 */
2339 float_raise(float_flag_invalid, s);
2340 a->cls = float_class_normal;
2341 a->exp = float16_params_ahp.exp_max;
2342 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2343 float16_params_ahp.frac_size + 1);
2344 break;
2345
2346 case float_class_normal:
2347 case float_class_zero:
2348 break;
2349
2350 default:
2351 g_assert_not_reached();
2352 }
2353 }
2354
2355 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2356 {
2357 if (is_nan(a->cls)) {
2358 parts_return_nan(a, s);
2359 }
2360 }
2361
2362 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2363 {
2364 if (is_nan(a->cls)) {
2365 parts_return_nan(a, s);
2366 }
2367 }
2368
2369 #define parts_float_to_float(P, S) \
2370 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2371
2372 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2373 float_status *s)
2374 {
2375 a->cls = b->cls;
2376 a->sign = b->sign;
2377 a->exp = b->exp;
2378
2379 if (a->cls == float_class_normal) {
2380 frac_truncjam(a, b);
2381 } else if (is_nan(a->cls)) {
2382 /* Discard the low bits of the NaN. */
2383 a->frac = b->frac_hi;
2384 parts_return_nan(a, s);
2385 }
2386 }
2387
2388 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2389 float_status *s)
2390 {
2391 a->cls = b->cls;
2392 a->sign = b->sign;
2393 a->exp = b->exp;
2394 frac_widen(a, b);
2395
2396 if (is_nan(a->cls)) {
2397 parts_return_nan(a, s);
2398 }
2399 }
2400
2401 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2402 {
2403 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2404 FloatParts64 p;
2405
2406 float16a_unpack_canonical(&p, a, s, fmt16);
2407 parts_float_to_float(&p, s);
2408 return float32_round_pack_canonical(&p, s);
2409 }
2410
2411 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2412 {
2413 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2414 FloatParts64 p;
2415
2416 float16a_unpack_canonical(&p, a, s, fmt16);
2417 parts_float_to_float(&p, s);
2418 return float64_round_pack_canonical(&p, s);
2419 }
2420
2421 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2422 {
2423 FloatParts64 p;
2424 const FloatFmt *fmt;
2425
2426 float32_unpack_canonical(&p, a, s);
2427 if (ieee) {
2428 parts_float_to_float(&p, s);
2429 fmt = &float16_params;
2430 } else {
2431 parts_float_to_ahp(&p, s);
2432 fmt = &float16_params_ahp;
2433 }
2434 return float16a_round_pack_canonical(&p, s, fmt);
2435 }
2436
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a, float_status *s)
2439 {
2440 FloatParts64 p;
2441
2442 float32_unpack_canonical(&p, a, s);
2443 parts_float_to_float(&p, s);
2444 return float64_round_pack_canonical(&p, s);
2445 }
2446
2447 float64 float32_to_float64(float32 a, float_status *s)
2448 {
2449 if (likely(float32_is_normal(a))) {
2450 /* Widening conversion can never produce inexact results. */
2451 union_float32 uf;
2452 union_float64 ud;
2453 uf.s = a;
2454 ud.h = uf.h;
2455 return ud.s;
2456 } else if (float32_is_zero(a)) {
2457 return float64_set_sign(float64_zero, float32_is_neg(a));
2458 } else {
2459 return soft_float32_to_float64(a, s);
2460 }
2461 }
2462
2463 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2464 {
2465 FloatParts64 p;
2466 const FloatFmt *fmt;
2467
2468 float64_unpack_canonical(&p, a, s);
2469 if (ieee) {
2470 parts_float_to_float(&p, s);
2471 fmt = &float16_params;
2472 } else {
2473 parts_float_to_ahp(&p, s);
2474 fmt = &float16_params_ahp;
2475 }
2476 return float16a_round_pack_canonical(&p, s, fmt);
2477 }
2478
2479 float32 float64_to_float32(float64 a, float_status *s)
2480 {
2481 FloatParts64 p;
2482
2483 float64_unpack_canonical(&p, a, s);
2484 parts_float_to_float(&p, s);
2485 return float32_round_pack_canonical(&p, s);
2486 }
2487
2488 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2489 {
2490 FloatParts64 p;
2491
2492 bfloat16_unpack_canonical(&p, a, s);
2493 parts_float_to_float(&p, s);
2494 return float32_round_pack_canonical(&p, s);
2495 }
2496
2497 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2498 {
2499 FloatParts64 p;
2500
2501 bfloat16_unpack_canonical(&p, a, s);
2502 parts_float_to_float(&p, s);
2503 return float64_round_pack_canonical(&p, s);
2504 }
2505
2506 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2507 {
2508 FloatParts64 p;
2509
2510 float32_unpack_canonical(&p, a, s);
2511 parts_float_to_float(&p, s);
2512 return bfloat16_round_pack_canonical(&p, s);
2513 }
2514
2515 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2516 {
2517 FloatParts64 p;
2518
2519 float64_unpack_canonical(&p, a, s);
2520 parts_float_to_float(&p, s);
2521 return bfloat16_round_pack_canonical(&p, s);
2522 }
2523
2524 float32 float128_to_float32(float128 a, float_status *s)
2525 {
2526 FloatParts64 p64;
2527 FloatParts128 p128;
2528
2529 float128_unpack_canonical(&p128, a, s);
2530 parts_float_to_float_narrow(&p64, &p128, s);
2531 return float32_round_pack_canonical(&p64, s);
2532 }
2533
2534 float64 float128_to_float64(float128 a, float_status *s)
2535 {
2536 FloatParts64 p64;
2537 FloatParts128 p128;
2538
2539 float128_unpack_canonical(&p128, a, s);
2540 parts_float_to_float_narrow(&p64, &p128, s);
2541 return float64_round_pack_canonical(&p64, s);
2542 }
2543
2544 float128 float32_to_float128(float32 a, float_status *s)
2545 {
2546 FloatParts64 p64;
2547 FloatParts128 p128;
2548
2549 float32_unpack_canonical(&p64, a, s);
2550 parts_float_to_float_widen(&p128, &p64, s);
2551 return float128_round_pack_canonical(&p128, s);
2552 }
2553
2554 float128 float64_to_float128(float64 a, float_status *s)
2555 {
2556 FloatParts64 p64;
2557 FloatParts128 p128;
2558
2559 float64_unpack_canonical(&p64, a, s);
2560 parts_float_to_float_widen(&p128, &p64, s);
2561 return float128_round_pack_canonical(&p128, s);
2562 }
2563
2564 /*
2565 * Round to integral value
2566 */
2567
2568 float16 float16_round_to_int(float16 a, float_status *s)
2569 {
2570 FloatParts64 p;
2571
2572 float16_unpack_canonical(&p, a, s);
2573 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2574 return float16_round_pack_canonical(&p, s);
2575 }
2576
2577 float32 float32_round_to_int(float32 a, float_status *s)
2578 {
2579 FloatParts64 p;
2580
2581 float32_unpack_canonical(&p, a, s);
2582 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2583 return float32_round_pack_canonical(&p, s);
2584 }
2585
2586 float64 float64_round_to_int(float64 a, float_status *s)
2587 {
2588 FloatParts64 p;
2589
2590 float64_unpack_canonical(&p, a, s);
2591 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2592 return float64_round_pack_canonical(&p, s);
2593 }
2594
2595 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2596 {
2597 FloatParts64 p;
2598
2599 bfloat16_unpack_canonical(&p, a, s);
2600 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2601 return bfloat16_round_pack_canonical(&p, s);
2602 }
2603
2604 float128 float128_round_to_int(float128 a, float_status *s)
2605 {
2606 FloatParts128 p;
2607
2608 float128_unpack_canonical(&p, a, s);
2609 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2610 return float128_round_pack_canonical(&p, s);
2611 }
2612
2613 /*
2614 * Floating-point to signed integer conversions
2615 */
2616
2617 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2618 float_status *s)
2619 {
2620 FloatParts64 p;
2621
2622 float16_unpack_canonical(&p, a, s);
2623 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2624 }
2625
2626 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2627 float_status *s)
2628 {
2629 FloatParts64 p;
2630
2631 float16_unpack_canonical(&p, a, s);
2632 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2633 }
2634
2635 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2636 float_status *s)
2637 {
2638 FloatParts64 p;
2639
2640 float16_unpack_canonical(&p, a, s);
2641 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2642 }
2643
2644 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2645 float_status *s)
2646 {
2647 FloatParts64 p;
2648
2649 float16_unpack_canonical(&p, a, s);
2650 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2651 }
2652
2653 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2654 float_status *s)
2655 {
2656 FloatParts64 p;
2657
2658 float32_unpack_canonical(&p, a, s);
2659 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2660 }
2661
2662 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2663 float_status *s)
2664 {
2665 FloatParts64 p;
2666
2667 float32_unpack_canonical(&p, a, s);
2668 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2669 }
2670
2671 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2672 float_status *s)
2673 {
2674 FloatParts64 p;
2675
2676 float32_unpack_canonical(&p, a, s);
2677 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2678 }
2679
2680 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2681 float_status *s)
2682 {
2683 FloatParts64 p;
2684
2685 float64_unpack_canonical(&p, a, s);
2686 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2687 }
2688
2689 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2690 float_status *s)
2691 {
2692 FloatParts64 p;
2693
2694 float64_unpack_canonical(&p, a, s);
2695 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2696 }
2697
2698 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2699 float_status *s)
2700 {
2701 FloatParts64 p;
2702
2703 float64_unpack_canonical(&p, a, s);
2704 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2705 }
2706
2707 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2708 float_status *s)
2709 {
2710 FloatParts64 p;
2711
2712 bfloat16_unpack_canonical(&p, a, s);
2713 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2714 }
2715
2716 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2717 float_status *s)
2718 {
2719 FloatParts64 p;
2720
2721 bfloat16_unpack_canonical(&p, a, s);
2722 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2723 }
2724
2725 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2726 float_status *s)
2727 {
2728 FloatParts64 p;
2729
2730 bfloat16_unpack_canonical(&p, a, s);
2731 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2732 }
2733
2734 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2735 int scale, float_status *s)
2736 {
2737 FloatParts128 p;
2738
2739 float128_unpack_canonical(&p, a, s);
2740 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2741 }
2742
2743 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2744 int scale, float_status *s)
2745 {
2746 FloatParts128 p;
2747
2748 float128_unpack_canonical(&p, a, s);
2749 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2750 }
2751
2752 int8_t float16_to_int8(float16 a, float_status *s)
2753 {
2754 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2755 }
2756
2757 int16_t float16_to_int16(float16 a, float_status *s)
2758 {
2759 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2760 }
2761
2762 int32_t float16_to_int32(float16 a, float_status *s)
2763 {
2764 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2765 }
2766
2767 int64_t float16_to_int64(float16 a, float_status *s)
2768 {
2769 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2770 }
2771
2772 int16_t float32_to_int16(float32 a, float_status *s)
2773 {
2774 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2775 }
2776
2777 int32_t float32_to_int32(float32 a, float_status *s)
2778 {
2779 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2780 }
2781
2782 int64_t float32_to_int64(float32 a, float_status *s)
2783 {
2784 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2785 }
2786
2787 int16_t float64_to_int16(float64 a, float_status *s)
2788 {
2789 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2790 }
2791
2792 int32_t float64_to_int32(float64 a, float_status *s)
2793 {
2794 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2795 }
2796
2797 int64_t float64_to_int64(float64 a, float_status *s)
2798 {
2799 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2800 }
2801
2802 int32_t float128_to_int32(float128 a, float_status *s)
2803 {
2804 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2805 }
2806
2807 int64_t float128_to_int64(float128 a, float_status *s)
2808 {
2809 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2810 }
2811
2812 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2813 {
2814 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2815 }
2816
2817 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2818 {
2819 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2820 }
2821
2822 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2823 {
2824 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2825 }
2826
2827 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2828 {
2829 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2830 }
2831
2832 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2833 {
2834 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2835 }
2836
2837 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2838 {
2839 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2840 }
2841
2842 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2843 {
2844 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2845 }
2846
2847 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2848 {
2849 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2850 }
2851
2852 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2853 {
2854 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2855 }
2856
2857 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2858 {
2859 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2860 }
2861
2862 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2863 {
2864 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2865 }
2866
2867 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2868 {
2869 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2870 }
2871
2872 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2873 {
2874 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2875 }
2876
2877 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2878 {
2879 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2880 }
2881
2882 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2883 {
2884 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2885 }
2886
2887 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2888 {
2889 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2890 }
2891
2892 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2893 {
2894 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2895 }
2896
2897 /*
2898 * Floating-point to unsigned integer conversions
2899 */
2900
2901 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2902 float_status *s)
2903 {
2904 FloatParts64 p;
2905
2906 float16_unpack_canonical(&p, a, s);
2907 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2908 }
2909
2910 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911 float_status *s)
2912 {
2913 FloatParts64 p;
2914
2915 float16_unpack_canonical(&p, a, s);
2916 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2917 }
2918
2919 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920 float_status *s)
2921 {
2922 FloatParts64 p;
2923
2924 float16_unpack_canonical(&p, a, s);
2925 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2926 }
2927
2928 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929 float_status *s)
2930 {
2931 FloatParts64 p;
2932
2933 float16_unpack_canonical(&p, a, s);
2934 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2935 }
2936
2937 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2938 float_status *s)
2939 {
2940 FloatParts64 p;
2941
2942 float32_unpack_canonical(&p, a, s);
2943 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2944 }
2945
2946 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947 float_status *s)
2948 {
2949 FloatParts64 p;
2950
2951 float32_unpack_canonical(&p, a, s);
2952 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2953 }
2954
2955 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956 float_status *s)
2957 {
2958 FloatParts64 p;
2959
2960 float32_unpack_canonical(&p, a, s);
2961 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2962 }
2963
2964 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2965 float_status *s)
2966 {
2967 FloatParts64 p;
2968
2969 float64_unpack_canonical(&p, a, s);
2970 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2971 }
2972
2973 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974 float_status *s)
2975 {
2976 FloatParts64 p;
2977
2978 float64_unpack_canonical(&p, a, s);
2979 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2980 }
2981
2982 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983 float_status *s)
2984 {
2985 FloatParts64 p;
2986
2987 float64_unpack_canonical(&p, a, s);
2988 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2989 }
2990
2991 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2992 int scale, float_status *s)
2993 {
2994 FloatParts64 p;
2995
2996 bfloat16_unpack_canonical(&p, a, s);
2997 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2998 }
2999
3000 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3001 int scale, float_status *s)
3002 {
3003 FloatParts64 p;
3004
3005 bfloat16_unpack_canonical(&p, a, s);
3006 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3007 }
3008
3009 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3010 int scale, float_status *s)
3011 {
3012 FloatParts64 p;
3013
3014 bfloat16_unpack_canonical(&p, a, s);
3015 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3016 }
3017
3018 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3019 int scale, float_status *s)
3020 {
3021 FloatParts128 p;
3022
3023 float128_unpack_canonical(&p, a, s);
3024 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3025 }
3026
3027 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3028 int scale, float_status *s)
3029 {
3030 FloatParts128 p;
3031
3032 float128_unpack_canonical(&p, a, s);
3033 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3034 }
3035
3036 uint8_t float16_to_uint8(float16 a, float_status *s)
3037 {
3038 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3039 }
3040
3041 uint16_t float16_to_uint16(float16 a, float_status *s)
3042 {
3043 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3044 }
3045
3046 uint32_t float16_to_uint32(float16 a, float_status *s)
3047 {
3048 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3049 }
3050
3051 uint64_t float16_to_uint64(float16 a, float_status *s)
3052 {
3053 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3054 }
3055
3056 uint16_t float32_to_uint16(float32 a, float_status *s)
3057 {
3058 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3059 }
3060
3061 uint32_t float32_to_uint32(float32 a, float_status *s)
3062 {
3063 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3064 }
3065
3066 uint64_t float32_to_uint64(float32 a, float_status *s)
3067 {
3068 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3069 }
3070
3071 uint16_t float64_to_uint16(float64 a, float_status *s)
3072 {
3073 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3074 }
3075
3076 uint32_t float64_to_uint32(float64 a, float_status *s)
3077 {
3078 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3079 }
3080
3081 uint64_t float64_to_uint64(float64 a, float_status *s)
3082 {
3083 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3084 }
3085
3086 uint32_t float128_to_uint32(float128 a, float_status *s)
3087 {
3088 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3089 }
3090
3091 uint64_t float128_to_uint64(float128 a, float_status *s)
3092 {
3093 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3094 }
3095
3096 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3097 {
3098 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3099 }
3100
3101 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3102 {
3103 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3104 }
3105
3106 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3107 {
3108 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3109 }
3110
3111 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3112 {
3113 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3114 }
3115
3116 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3117 {
3118 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3119 }
3120
3121 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3122 {
3123 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3124 }
3125
3126 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3127 {
3128 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3129 }
3130
3131 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3132 {
3133 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3134 }
3135
3136 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3137 {
3138 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3139 }
3140
3141 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3142 {
3143 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3144 }
3145
3146 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3147 {
3148 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3149 }
3150
3151 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3152 {
3153 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3154 }
3155
3156 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3157 {
3158 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3159 }
3160
3161 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3162 {
3163 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3164 }
3165
3166 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3167 {
3168 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3169 }
3170
3171 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3172 {
3173 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3174 }
3175
3176 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3177 {
3178 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3179 }
3180
3181 /*
3182 * Signed integer to floating-point conversions
3183 */
3184
3185 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3186 {
3187 FloatParts64 p;
3188
3189 parts_sint_to_float(&p, a, scale, status);
3190 return float16_round_pack_canonical(&p, status);
3191 }
3192
3193 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3194 {
3195 return int64_to_float16_scalbn(a, scale, status);
3196 }
3197
3198 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3199 {
3200 return int64_to_float16_scalbn(a, scale, status);
3201 }
3202
3203 float16 int64_to_float16(int64_t a, float_status *status)
3204 {
3205 return int64_to_float16_scalbn(a, 0, status);
3206 }
3207
3208 float16 int32_to_float16(int32_t a, float_status *status)
3209 {
3210 return int64_to_float16_scalbn(a, 0, status);
3211 }
3212
3213 float16 int16_to_float16(int16_t a, float_status *status)
3214 {
3215 return int64_to_float16_scalbn(a, 0, status);
3216 }
3217
3218 float16 int8_to_float16(int8_t a, float_status *status)
3219 {
3220 return int64_to_float16_scalbn(a, 0, status);
3221 }
3222
3223 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3224 {
3225 FloatParts64 p;
3226
3227 parts64_sint_to_float(&p, a, scale, status);
3228 return float32_round_pack_canonical(&p, status);
3229 }
3230
3231 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3232 {
3233 return int64_to_float32_scalbn(a, scale, status);
3234 }
3235
3236 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3237 {
3238 return int64_to_float32_scalbn(a, scale, status);
3239 }
3240
3241 float32 int64_to_float32(int64_t a, float_status *status)
3242 {
3243 return int64_to_float32_scalbn(a, 0, status);
3244 }
3245
3246 float32 int32_to_float32(int32_t a, float_status *status)
3247 {
3248 return int64_to_float32_scalbn(a, 0, status);
3249 }
3250
3251 float32 int16_to_float32(int16_t a, float_status *status)
3252 {
3253 return int64_to_float32_scalbn(a, 0, status);
3254 }
3255
3256 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3257 {
3258 FloatParts64 p;
3259
3260 parts_sint_to_float(&p, a, scale, status);
3261 return float64_round_pack_canonical(&p, status);
3262 }
3263
3264 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3265 {
3266 return int64_to_float64_scalbn(a, scale, status);
3267 }
3268
3269 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3270 {
3271 return int64_to_float64_scalbn(a, scale, status);
3272 }
3273
3274 float64 int64_to_float64(int64_t a, float_status *status)
3275 {
3276 return int64_to_float64_scalbn(a, 0, status);
3277 }
3278
3279 float64 int32_to_float64(int32_t a, float_status *status)
3280 {
3281 return int64_to_float64_scalbn(a, 0, status);
3282 }
3283
3284 float64 int16_to_float64(int16_t a, float_status *status)
3285 {
3286 return int64_to_float64_scalbn(a, 0, status);
3287 }
3288
3289 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3290 {
3291 FloatParts64 p;
3292
3293 parts_sint_to_float(&p, a, scale, status);
3294 return bfloat16_round_pack_canonical(&p, status);
3295 }
3296
3297 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3298 {
3299 return int64_to_bfloat16_scalbn(a, scale, status);
3300 }
3301
3302 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3303 {
3304 return int64_to_bfloat16_scalbn(a, scale, status);
3305 }
3306
3307 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3308 {
3309 return int64_to_bfloat16_scalbn(a, 0, status);
3310 }
3311
3312 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3313 {
3314 return int64_to_bfloat16_scalbn(a, 0, status);
3315 }
3316
3317 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3318 {
3319 return int64_to_bfloat16_scalbn(a, 0, status);
3320 }
3321
3322 float128 int64_to_float128(int64_t a, float_status *status)
3323 {
3324 FloatParts128 p;
3325
3326 parts_sint_to_float(&p, a, 0, status);
3327 return float128_round_pack_canonical(&p, status);
3328 }
3329
3330 float128 int32_to_float128(int32_t a, float_status *status)
3331 {
3332 return int64_to_float128(a, status);
3333 }
3334
3335 /*
3336 * Unsigned Integer to floating-point conversions
3337 */
3338
3339 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3340 {
3341 FloatParts64 p;
3342
3343 parts_uint_to_float(&p, a, scale, status);
3344 return float16_round_pack_canonical(&p, status);
3345 }
3346
3347 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3348 {
3349 return uint64_to_float16_scalbn(a, scale, status);
3350 }
3351
3352 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3353 {
3354 return uint64_to_float16_scalbn(a, scale, status);
3355 }
3356
3357 float16 uint64_to_float16(uint64_t a, float_status *status)
3358 {
3359 return uint64_to_float16_scalbn(a, 0, status);
3360 }
3361
3362 float16 uint32_to_float16(uint32_t a, float_status *status)
3363 {
3364 return uint64_to_float16_scalbn(a, 0, status);
3365 }
3366
3367 float16 uint16_to_float16(uint16_t a, float_status *status)
3368 {
3369 return uint64_to_float16_scalbn(a, 0, status);
3370 }
3371
3372 float16 uint8_to_float16(uint8_t a, float_status *status)
3373 {
3374 return uint64_to_float16_scalbn(a, 0, status);
3375 }
3376
3377 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3378 {
3379 FloatParts64 p;
3380
3381 parts_uint_to_float(&p, a, scale, status);
3382 return float32_round_pack_canonical(&p, status);
3383 }
3384
3385 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3386 {
3387 return uint64_to_float32_scalbn(a, scale, status);
3388 }
3389
3390 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3391 {
3392 return uint64_to_float32_scalbn(a, scale, status);
3393 }
3394
3395 float32 uint64_to_float32(uint64_t a, float_status *status)
3396 {
3397 return uint64_to_float32_scalbn(a, 0, status);
3398 }
3399
3400 float32 uint32_to_float32(uint32_t a, float_status *status)
3401 {
3402 return uint64_to_float32_scalbn(a, 0, status);
3403 }
3404
3405 float32 uint16_to_float32(uint16_t a, float_status *status)
3406 {
3407 return uint64_to_float32_scalbn(a, 0, status);
3408 }
3409
3410 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3411 {
3412 FloatParts64 p;
3413
3414 parts_uint_to_float(&p, a, scale, status);
3415 return float64_round_pack_canonical(&p, status);
3416 }
3417
3418 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3419 {
3420 return uint64_to_float64_scalbn(a, scale, status);
3421 }
3422
3423 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3424 {
3425 return uint64_to_float64_scalbn(a, scale, status);
3426 }
3427
3428 float64 uint64_to_float64(uint64_t a, float_status *status)
3429 {
3430 return uint64_to_float64_scalbn(a, 0, status);
3431 }
3432
3433 float64 uint32_to_float64(uint32_t a, float_status *status)
3434 {
3435 return uint64_to_float64_scalbn(a, 0, status);
3436 }
3437
3438 float64 uint16_to_float64(uint16_t a, float_status *status)
3439 {
3440 return uint64_to_float64_scalbn(a, 0, status);
3441 }
3442
3443 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3444 {
3445 FloatParts64 p;
3446
3447 parts_uint_to_float(&p, a, scale, status);
3448 return bfloat16_round_pack_canonical(&p, status);
3449 }
3450
3451 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3452 {
3453 return uint64_to_bfloat16_scalbn(a, scale, status);
3454 }
3455
3456 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3457 {
3458 return uint64_to_bfloat16_scalbn(a, scale, status);
3459 }
3460
3461 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3462 {
3463 return uint64_to_bfloat16_scalbn(a, 0, status);
3464 }
3465
3466 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3467 {
3468 return uint64_to_bfloat16_scalbn(a, 0, status);
3469 }
3470
3471 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3472 {
3473 return uint64_to_bfloat16_scalbn(a, 0, status);
3474 }
3475
3476 float128 uint64_to_float128(uint64_t a, float_status *status)
3477 {
3478 FloatParts128 p;
3479
3480 parts_uint_to_float(&p, a, 0, status);
3481 return float128_round_pack_canonical(&p, status);
3482 }
3483
3484 /*
3485 * Minimum and maximum
3486 */
3487
3488 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3489 {
3490 FloatParts64 pa, pb, *pr;
3491
3492 float16_unpack_canonical(&pa, a, s);
3493 float16_unpack_canonical(&pb, b, s);
3494 pr = parts_minmax(&pa, &pb, s, flags);
3495
3496 return float16_round_pack_canonical(pr, s);
3497 }
3498
3499 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3500 float_status *s, int flags)
3501 {
3502 FloatParts64 pa, pb, *pr;
3503
3504 bfloat16_unpack_canonical(&pa, a, s);
3505 bfloat16_unpack_canonical(&pb, b, s);
3506 pr = parts_minmax(&pa, &pb, s, flags);
3507
3508 return bfloat16_round_pack_canonical(pr, s);
3509 }
3510
3511 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3512 {
3513 FloatParts64 pa, pb, *pr;
3514
3515 float32_unpack_canonical(&pa, a, s);
3516 float32_unpack_canonical(&pb, b, s);
3517 pr = parts_minmax(&pa, &pb, s, flags);
3518
3519 return float32_round_pack_canonical(pr, s);
3520 }
3521
3522 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3523 {
3524 FloatParts64 pa, pb, *pr;
3525
3526 float64_unpack_canonical(&pa, a, s);
3527 float64_unpack_canonical(&pb, b, s);
3528 pr = parts_minmax(&pa, &pb, s, flags);
3529
3530 return float64_round_pack_canonical(pr, s);
3531 }
3532
3533 static float128 float128_minmax(float128 a, float128 b,
3534 float_status *s, int flags)
3535 {
3536 FloatParts128 pa, pb, *pr;
3537
3538 float128_unpack_canonical(&pa, a, s);
3539 float128_unpack_canonical(&pb, b, s);
3540 pr = parts_minmax(&pa, &pb, s, flags);
3541
3542 return float128_round_pack_canonical(pr, s);
3543 }
3544
3545 #define MINMAX_1(type, name, flags) \
3546 type type##_##name(type a, type b, float_status *s) \
3547 { return type##_minmax(a, b, s, flags); }
3548
3549 #define MINMAX_2(type) \
3550 MINMAX_1(type, max, 0) \
3551 MINMAX_1(type, maxnum, minmax_isnum) \
3552 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3553 MINMAX_1(type, min, minmax_ismin) \
3554 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3555 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3556
3557 MINMAX_2(float16)
3558 MINMAX_2(bfloat16)
3559 MINMAX_2(float32)
3560 MINMAX_2(float64)
3561 MINMAX_2(float128)
3562
3563 #undef MINMAX_1
3564 #undef MINMAX_2
3565
3566 /*
3567 * Floating point compare
3568 */
3569
3570 static FloatRelation QEMU_FLATTEN
3571 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3572 {
3573 FloatParts64 pa, pb;
3574
3575 float16_unpack_canonical(&pa, a, s);
3576 float16_unpack_canonical(&pb, b, s);
3577 return parts_compare(&pa, &pb, s, is_quiet);
3578 }
3579
3580 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3581 {
3582 return float16_do_compare(a, b, s, false);
3583 }
3584
3585 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3586 {
3587 return float16_do_compare(a, b, s, true);
3588 }
3589
3590 static FloatRelation QEMU_SOFTFLOAT_ATTR
3591 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3592 {
3593 FloatParts64 pa, pb;
3594
3595 float32_unpack_canonical(&pa, a, s);
3596 float32_unpack_canonical(&pb, b, s);
3597 return parts_compare(&pa, &pb, s, is_quiet);
3598 }
3599
3600 static FloatRelation QEMU_FLATTEN
3601 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3602 {
3603 union_float32 ua, ub;
3604
3605 ua.s = xa;
3606 ub.s = xb;
3607
3608 if (QEMU_NO_HARDFLOAT) {
3609 goto soft;
3610 }
3611
3612 float32_input_flush2(&ua.s, &ub.s, s);
3613 if (isgreaterequal(ua.h, ub.h)) {
3614 if (isgreater(ua.h, ub.h)) {
3615 return float_relation_greater;
3616 }
3617 return float_relation_equal;
3618 }
3619 if (likely(isless(ua.h, ub.h))) {
3620 return float_relation_less;
3621 }
3622 /*
3623 * The only condition remaining is unordered.
3624 * Fall through to set flags.
3625 */
3626 soft:
3627 return float32_do_compare(ua.s, ub.s, s, is_quiet);
3628 }
3629
3630 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3631 {
3632 return float32_hs_compare(a, b, s, false);
3633 }
3634
3635 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3636 {
3637 return float32_hs_compare(a, b, s, true);
3638 }
3639
3640 static FloatRelation QEMU_SOFTFLOAT_ATTR
3641 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3642 {
3643 FloatParts64 pa, pb;
3644
3645 float64_unpack_canonical(&pa, a, s);
3646 float64_unpack_canonical(&pb, b, s);
3647 return parts_compare(&pa, &pb, s, is_quiet);
3648 }
3649
3650 static FloatRelation QEMU_FLATTEN
3651 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3652 {
3653 union_float64 ua, ub;
3654
3655 ua.s = xa;
3656 ub.s = xb;
3657
3658 if (QEMU_NO_HARDFLOAT) {
3659 goto soft;
3660 }
3661
3662 float64_input_flush2(&ua.s, &ub.s, s);
3663 if (isgreaterequal(ua.h, ub.h)) {
3664 if (isgreater(ua.h, ub.h)) {
3665 return float_relation_greater;
3666 }
3667 return float_relation_equal;
3668 }
3669 if (likely(isless(ua.h, ub.h))) {
3670 return float_relation_less;
3671 }
3672 /*
3673 * The only condition remaining is unordered.
3674 * Fall through to set flags.
3675 */
3676 soft:
3677 return float64_do_compare(ua.s, ub.s, s, is_quiet);
3678 }
3679
3680 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3681 {
3682 return float64_hs_compare(a, b, s, false);
3683 }
3684
3685 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3686 {
3687 return float64_hs_compare(a, b, s, true);
3688 }
3689
3690 static FloatRelation QEMU_FLATTEN
3691 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3692 {
3693 FloatParts64 pa, pb;
3694
3695 bfloat16_unpack_canonical(&pa, a, s);
3696 bfloat16_unpack_canonical(&pb, b, s);
3697 return parts_compare(&pa, &pb, s, is_quiet);
3698 }
3699
3700 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3701 {
3702 return bfloat16_do_compare(a, b, s, false);
3703 }
3704
3705 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3706 {
3707 return bfloat16_do_compare(a, b, s, true);
3708 }
3709
3710 static FloatRelation QEMU_FLATTEN
3711 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3712 {
3713 FloatParts128 pa, pb;
3714
3715 float128_unpack_canonical(&pa, a, s);
3716 float128_unpack_canonical(&pb, b, s);
3717 return parts_compare(&pa, &pb, s, is_quiet);
3718 }
3719
3720 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3721 {
3722 return float128_do_compare(a, b, s, false);
3723 }
3724
3725 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3726 {
3727 return float128_do_compare(a, b, s, true);
3728 }
3729
3730 /*
3731 * Scale by 2**N
3732 */
3733
3734 float16 float16_scalbn(float16 a, int n, float_status *status)
3735 {
3736 FloatParts64 p;
3737
3738 float16_unpack_canonical(&p, a, status);
3739 parts_scalbn(&p, n, status);
3740 return float16_round_pack_canonical(&p, status);
3741 }
3742
3743 float32 float32_scalbn(float32 a, int n, float_status *status)
3744 {
3745 FloatParts64 p;
3746
3747 float32_unpack_canonical(&p, a, status);
3748 parts_scalbn(&p, n, status);
3749 return float32_round_pack_canonical(&p, status);
3750 }
3751
3752 float64 float64_scalbn(float64 a, int n, float_status *status)
3753 {
3754 FloatParts64 p;
3755
3756 float64_unpack_canonical(&p, a, status);
3757 parts_scalbn(&p, n, status);
3758 return float64_round_pack_canonical(&p, status);
3759 }
3760
3761 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3762 {
3763 FloatParts64 p;
3764
3765 bfloat16_unpack_canonical(&p, a, status);
3766 parts_scalbn(&p, n, status);
3767 return bfloat16_round_pack_canonical(&p, status);
3768 }
3769
3770 float128 float128_scalbn(float128 a, int n, float_status *status)
3771 {
3772 FloatParts128 p;
3773
3774 float128_unpack_canonical(&p, a, status);
3775 parts_scalbn(&p, n, status);
3776 return float128_round_pack_canonical(&p, status);
3777 }
3778
3779 /*
3780 * Square Root
3781 */
3782
3783 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3784 {
3785 FloatParts64 p;
3786
3787 float16_unpack_canonical(&p, a, status);
3788 parts_sqrt(&p, status, &float16_params);
3789 return float16_round_pack_canonical(&p, status);
3790 }
3791
3792 static float32 QEMU_SOFTFLOAT_ATTR
3793 soft_f32_sqrt(float32 a, float_status *status)
3794 {
3795 FloatParts64 p;
3796
3797 float32_unpack_canonical(&p, a, status);
3798 parts_sqrt(&p, status, &float32_params);
3799 return float32_round_pack_canonical(&p, status);
3800 }
3801
3802 static float64 QEMU_SOFTFLOAT_ATTR
3803 soft_f64_sqrt(float64 a, float_status *status)
3804 {
3805 FloatParts64 p;
3806
3807 float64_unpack_canonical(&p, a, status);
3808 parts_sqrt(&p, status, &float64_params);
3809 return float64_round_pack_canonical(&p, status);
3810 }
3811
3812 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3813 {
3814 union_float32 ua, ur;
3815
3816 ua.s = xa;
3817 if (unlikely(!can_use_fpu(s))) {
3818 goto soft;
3819 }
3820
3821 float32_input_flush1(&ua.s, s);
3822 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3823 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3824 fpclassify(ua.h) == FP_ZERO) ||
3825 signbit(ua.h))) {
3826 goto soft;
3827 }
3828 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3829 float32_is_neg(ua.s))) {
3830 goto soft;
3831 }
3832 ur.h = sqrtf(ua.h);
3833 return ur.s;
3834
3835 soft:
3836 return soft_f32_sqrt(ua.s, s);
3837 }
3838
3839 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3840 {
3841 union_float64 ua, ur;
3842
3843 ua.s = xa;
3844 if (unlikely(!can_use_fpu(s))) {
3845 goto soft;
3846 }
3847
3848 float64_input_flush1(&ua.s, s);
3849 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3850 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3851 fpclassify(ua.h) == FP_ZERO) ||
3852 signbit(ua.h))) {
3853 goto soft;
3854 }
3855 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3856 float64_is_neg(ua.s))) {
3857 goto soft;
3858 }
3859 ur.h = sqrt(ua.h);
3860 return ur.s;
3861
3862 soft:
3863 return soft_f64_sqrt(ua.s, s);
3864 }
3865
3866 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3867 {
3868 FloatParts64 p;
3869
3870 bfloat16_unpack_canonical(&p, a, status);
3871 parts_sqrt(&p, status, &bfloat16_params);
3872 return bfloat16_round_pack_canonical(&p, status);
3873 }
3874
3875 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3876 {
3877 FloatParts128 p;
3878
3879 float128_unpack_canonical(&p, a, status);
3880 parts_sqrt(&p, status, &float128_params);
3881 return float128_round_pack_canonical(&p, status);
3882 }
3883
3884 /*----------------------------------------------------------------------------
3885 | The pattern for a default generated NaN.
3886 *----------------------------------------------------------------------------*/
3887
3888 float16 float16_default_nan(float_status *status)
3889 {
3890 FloatParts64 p;
3891
3892 parts_default_nan(&p, status);
3893 p.frac >>= float16_params.frac_shift;
3894 return float16_pack_raw(&p);
3895 }
3896
3897 float32 float32_default_nan(float_status *status)
3898 {
3899 FloatParts64 p;
3900
3901 parts_default_nan(&p, status);
3902 p.frac >>= float32_params.frac_shift;
3903 return float32_pack_raw(&p);
3904 }
3905
3906 float64 float64_default_nan(float_status *status)
3907 {
3908 FloatParts64 p;
3909
3910 parts_default_nan(&p, status);
3911 p.frac >>= float64_params.frac_shift;
3912 return float64_pack_raw(&p);
3913 }
3914
3915 float128 float128_default_nan(float_status *status)
3916 {
3917 FloatParts128 p;
3918
3919 parts_default_nan(&p, status);
3920 frac_shr(&p, float128_params.frac_shift);
3921 return float128_pack_raw(&p);
3922 }
3923
3924 bfloat16 bfloat16_default_nan(float_status *status)
3925 {
3926 FloatParts64 p;
3927
3928 parts_default_nan(&p, status);
3929 p.frac >>= bfloat16_params.frac_shift;
3930 return bfloat16_pack_raw(&p);
3931 }
3932
3933 /*----------------------------------------------------------------------------
3934 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3935 *----------------------------------------------------------------------------*/
3936
3937 float16 float16_silence_nan(float16 a, float_status *status)
3938 {
3939 FloatParts64 p;
3940
3941 float16_unpack_raw(&p, a);
3942 p.frac <<= float16_params.frac_shift;
3943 parts_silence_nan(&p, status);
3944 p.frac >>= float16_params.frac_shift;
3945 return float16_pack_raw(&p);
3946 }
3947
3948 float32 float32_silence_nan(float32 a, float_status *status)
3949 {
3950 FloatParts64 p;
3951
3952 float32_unpack_raw(&p, a);
3953 p.frac <<= float32_params.frac_shift;
3954 parts_silence_nan(&p, status);
3955 p.frac >>= float32_params.frac_shift;
3956 return float32_pack_raw(&p);
3957 }
3958
3959 float64 float64_silence_nan(float64 a, float_status *status)
3960 {
3961 FloatParts64 p;
3962
3963 float64_unpack_raw(&p, a);
3964 p.frac <<= float64_params.frac_shift;
3965 parts_silence_nan(&p, status);
3966 p.frac >>= float64_params.frac_shift;
3967 return float64_pack_raw(&p);
3968 }
3969
3970 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3971 {
3972 FloatParts64 p;
3973
3974 bfloat16_unpack_raw(&p, a);
3975 p.frac <<= bfloat16_params.frac_shift;
3976 parts_silence_nan(&p, status);
3977 p.frac >>= bfloat16_params.frac_shift;
3978 return bfloat16_pack_raw(&p);
3979 }
3980
3981 float128 float128_silence_nan(float128 a, float_status *status)
3982 {
3983 FloatParts128 p;
3984
3985 float128_unpack_raw(&p, a);
3986 frac_shl(&p, float128_params.frac_shift);
3987 parts_silence_nan(&p, status);
3988 frac_shr(&p, float128_params.frac_shift);
3989 return float128_pack_raw(&p);
3990 }
3991
3992 /*----------------------------------------------------------------------------
3993 | If `a' is denormal and we are in flush-to-zero mode then set the
3994 | input-denormal exception and return zero. Otherwise just return the value.
3995 *----------------------------------------------------------------------------*/
3996
3997 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3998 {
3999 if (p.exp == 0 && p.frac != 0) {
4000 float_raise(float_flag_input_denormal, status);
4001 return true;
4002 }
4003
4004 return false;
4005 }
4006
4007 float16 float16_squash_input_denormal(float16 a, float_status *status)
4008 {
4009 if (status->flush_inputs_to_zero) {
4010 FloatParts64 p;
4011
4012 float16_unpack_raw(&p, a);
4013 if (parts_squash_denormal(p, status)) {
4014 return float16_set_sign(float16_zero, p.sign);
4015 }
4016 }
4017 return a;
4018 }
4019
4020 float32 float32_squash_input_denormal(float32 a, float_status *status)
4021 {
4022 if (status->flush_inputs_to_zero) {
4023 FloatParts64 p;
4024
4025 float32_unpack_raw(&p, a);
4026 if (parts_squash_denormal(p, status)) {
4027 return float32_set_sign(float32_zero, p.sign);
4028 }
4029 }
4030 return a;
4031 }
4032
4033 float64 float64_squash_input_denormal(float64 a, float_status *status)
4034 {
4035 if (status->flush_inputs_to_zero) {
4036 FloatParts64 p;
4037
4038 float64_unpack_raw(&p, a);
4039 if (parts_squash_denormal(p, status)) {
4040 return float64_set_sign(float64_zero, p.sign);
4041 }
4042 }
4043 return a;
4044 }
4045
4046 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4047 {
4048 if (status->flush_inputs_to_zero) {
4049 FloatParts64 p;
4050
4051 bfloat16_unpack_raw(&p, a);
4052 if (parts_squash_denormal(p, status)) {
4053 return bfloat16_set_sign(bfloat16_zero, p.sign);
4054 }
4055 }
4056 return a;
4057 }
4058
4059 /*----------------------------------------------------------------------------
4060 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4061 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4062 | input. If `zSign' is 1, the input is negated before being converted to an
4063 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4064 | is simply rounded to an integer, with the inexact exception raised if the
4065 | input cannot be represented exactly as an integer. However, if the fixed-
4066 | point input is too large, the invalid exception is raised and the largest
4067 | positive or negative integer is returned.
4068 *----------------------------------------------------------------------------*/
4069
4070 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4071 float_status *status)
4072 {
4073 int8_t roundingMode;
4074 bool roundNearestEven;
4075 int8_t roundIncrement, roundBits;
4076 int32_t z;
4077
4078 roundingMode = status->float_rounding_mode;
4079 roundNearestEven = ( roundingMode == float_round_nearest_even );
4080 switch (roundingMode) {
4081 case float_round_nearest_even:
4082 case float_round_ties_away:
4083 roundIncrement = 0x40;
4084 break;
4085 case float_round_to_zero:
4086 roundIncrement = 0;
4087 break;
4088 case float_round_up:
4089 roundIncrement = zSign ? 0 : 0x7f;
4090 break;
4091 case float_round_down:
4092 roundIncrement = zSign ? 0x7f : 0;
4093 break;
4094 case float_round_to_odd:
4095 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4096 break;
4097 default:
4098 abort();
4099 }
4100 roundBits = absZ & 0x7F;
4101 absZ = ( absZ + roundIncrement )>>7;
4102 if (!(roundBits ^ 0x40) && roundNearestEven) {
4103 absZ &= ~1;
4104 }
4105 z = absZ;
4106 if ( zSign ) z = - z;
4107 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4108 float_raise(float_flag_invalid, status);
4109 return zSign ? INT32_MIN : INT32_MAX;
4110 }
4111 if (roundBits) {
4112 float_raise(float_flag_inexact, status);
4113 }
4114 return z;
4115
4116 }
4117
4118 /*----------------------------------------------------------------------------
4119 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4120 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4121 | and returns the properly rounded 64-bit integer corresponding to the input.
4122 | If `zSign' is 1, the input is negated before being converted to an integer.
4123 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4124 | the inexact exception raised if the input cannot be represented exactly as
4125 | an integer. However, if the fixed-point input is too large, the invalid
4126 | exception is raised and the largest positive or negative integer is
4127 | returned.
4128 *----------------------------------------------------------------------------*/
4129
4130 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4131 float_status *status)
4132 {
4133 int8_t roundingMode;
4134 bool roundNearestEven, increment;
4135 int64_t z;
4136
4137 roundingMode = status->float_rounding_mode;
4138 roundNearestEven = ( roundingMode == float_round_nearest_even );
4139 switch (roundingMode) {
4140 case float_round_nearest_even:
4141 case float_round_ties_away:
4142 increment = ((int64_t) absZ1 < 0);
4143 break;
4144 case float_round_to_zero:
4145 increment = 0;
4146 break;
4147 case float_round_up:
4148 increment = !zSign && absZ1;
4149 break;
4150 case float_round_down:
4151 increment = zSign && absZ1;
4152 break;
4153 case float_round_to_odd:
4154 increment = !(absZ0 & 1) && absZ1;
4155 break;
4156 default:
4157 abort();
4158 }
4159 if ( increment ) {
4160 ++absZ0;
4161 if ( absZ0 == 0 ) goto overflow;
4162 if (!(absZ1 << 1) && roundNearestEven) {
4163 absZ0 &= ~1;
4164 }
4165 }
4166 z = absZ0;
4167 if ( zSign ) z = - z;
4168 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4169 overflow:
4170 float_raise(float_flag_invalid, status);
4171 return zSign ? INT64_MIN : INT64_MAX;
4172 }
4173 if (absZ1) {
4174 float_raise(float_flag_inexact, status);
4175 }
4176 return z;
4177
4178 }
4179
4180 /*----------------------------------------------------------------------------
4181 | Normalizes the subnormal single-precision floating-point value represented
4182 | by the denormalized significand `aSig'. The normalized exponent and
4183 | significand are stored at the locations pointed to by `zExpPtr' and
4184 | `zSigPtr', respectively.
4185 *----------------------------------------------------------------------------*/
4186
4187 static void
4188 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4189 {
4190 int8_t shiftCount;
4191
4192 shiftCount = clz32(aSig) - 8;
4193 *zSigPtr = aSig<<shiftCount;
4194 *zExpPtr = 1 - shiftCount;
4195
4196 }
4197
4198 /*----------------------------------------------------------------------------
4199 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4200 | and significand `zSig', and returns the proper single-precision floating-
4201 | point value corresponding to the abstract input. Ordinarily, the abstract
4202 | value is simply rounded and packed into the single-precision format, with
4203 | the inexact exception raised if the abstract input cannot be represented
4204 | exactly. However, if the abstract value is too large, the overflow and
4205 | inexact exceptions are raised and an infinity or maximal finite value is
4206 | returned. If the abstract value is too small, the input value is rounded to
4207 | a subnormal number, and the underflow and inexact exceptions are raised if
4208 | the abstract input cannot be represented exactly as a subnormal single-
4209 | precision floating-point number.
4210 | The input significand `zSig' has its binary point between bits 30
4211 | and 29, which is 7 bits to the left of the usual location. This shifted
4212 | significand must be normalized or smaller. If `zSig' is not normalized,
4213 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4214 | and it must not require rounding. In the usual case that `zSig' is
4215 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4216 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4217 | Binary Floating-Point Arithmetic.
4218 *----------------------------------------------------------------------------*/
4219
4220 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4221 float_status *status)
4222 {
4223 int8_t roundingMode;
4224 bool roundNearestEven;
4225 int8_t roundIncrement, roundBits;
4226 bool isTiny;
4227
4228 roundingMode = status->float_rounding_mode;
4229 roundNearestEven = ( roundingMode == float_round_nearest_even );
4230 switch (roundingMode) {
4231 case float_round_nearest_even:
4232 case float_round_ties_away:
4233 roundIncrement = 0x40;
4234 break;
4235 case float_round_to_zero:
4236 roundIncrement = 0;
4237 break;
4238 case float_round_up:
4239 roundIncrement = zSign ? 0 : 0x7f;
4240 break;
4241 case float_round_down:
4242 roundIncrement = zSign ? 0x7f : 0;
4243 break;
4244 case float_round_to_odd:
4245 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4246 break;
4247 default:
4248 abort();
4249 break;
4250 }
4251 roundBits = zSig & 0x7F;
4252 if ( 0xFD <= (uint16_t) zExp ) {
4253 if ( ( 0xFD < zExp )
4254 || ( ( zExp == 0xFD )
4255 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4256 ) {
4257 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4258 roundIncrement != 0;
4259 float_raise(float_flag_overflow | float_flag_inexact, status);
4260 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4261 }
4262 if ( zExp < 0 ) {
4263 if (status->flush_to_zero) {
4264 float_raise(float_flag_output_denormal, status);
4265 return packFloat32(zSign, 0, 0);
4266 }
4267 isTiny = status->tininess_before_rounding
4268 || (zExp < -1)
4269 || (zSig + roundIncrement < 0x80000000);
4270 shift32RightJamming( zSig, - zExp, &zSig );
4271 zExp = 0;
4272 roundBits = zSig & 0x7F;
4273 if (isTiny && roundBits) {
4274 float_raise(float_flag_underflow, status);
4275 }
4276 if (roundingMode == float_round_to_odd) {
4277 /*
4278 * For round-to-odd case, the roundIncrement depends on
4279 * zSig which just changed.
4280 */
4281 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4282 }
4283 }
4284 }
4285 if (roundBits) {
4286 float_raise(float_flag_inexact, status);
4287 }
4288 zSig = ( zSig + roundIncrement )>>7;
4289 if (!(roundBits ^ 0x40) && roundNearestEven) {
4290 zSig &= ~1;
4291 }
4292 if ( zSig == 0 ) zExp = 0;
4293 return packFloat32( zSign, zExp, zSig );
4294
4295 }
4296
4297 /*----------------------------------------------------------------------------
4298 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4299 | and significand `zSig', and returns the proper single-precision floating-
4300 | point value corresponding to the abstract input. This routine is just like
4301 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4302 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4303 | floating-point exponent.
4304 *----------------------------------------------------------------------------*/
4305
4306 static float32
4307 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4308 float_status *status)
4309 {
4310 int8_t shiftCount;
4311
4312 shiftCount = clz32(zSig) - 1;
4313 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4314 status);
4315
4316 }
4317
4318 /*----------------------------------------------------------------------------
4319 | Normalizes the subnormal double-precision floating-point value represented
4320 | by the denormalized significand `aSig'. The normalized exponent and
4321 | significand are stored at the locations pointed to by `zExpPtr' and
4322 | `zSigPtr', respectively.
4323 *----------------------------------------------------------------------------*/
4324
4325 static void
4326 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4327 {
4328 int8_t shiftCount;
4329
4330 shiftCount = clz64(aSig) - 11;
4331 *zSigPtr = aSig<<shiftCount;
4332 *zExpPtr = 1 - shiftCount;
4333
4334 }
4335
4336 /*----------------------------------------------------------------------------
4337 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4338 | double-precision floating-point value, returning the result. After being
4339 | shifted into the proper positions, the three fields are simply added
4340 | together to form the result. This means that any integer portion of `zSig'
4341 | will be added into the exponent. Since a properly normalized significand
4342 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4343 | than the desired result exponent whenever `zSig' is a complete, normalized
4344 | significand.
4345 *----------------------------------------------------------------------------*/
4346
4347 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4348 {
4349
4350 return make_float64(
4351 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4352
4353 }
4354
4355 /*----------------------------------------------------------------------------
4356 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4357 | and significand `zSig', and returns the proper double-precision floating-
4358 | point value corresponding to the abstract input. Ordinarily, the abstract
4359 | value is simply rounded and packed into the double-precision format, with
4360 | the inexact exception raised if the abstract input cannot be represented
4361 | exactly. However, if the abstract value is too large, the overflow and
4362 | inexact exceptions are raised and an infinity or maximal finite value is
4363 | returned. If the abstract value is too small, the input value is rounded to
4364 | a subnormal number, and the underflow and inexact exceptions are raised if
4365 | the abstract input cannot be represented exactly as a subnormal double-
4366 | precision floating-point number.
4367 | The input significand `zSig' has its binary point between bits 62
4368 | and 61, which is 10 bits to the left of the usual location. This shifted
4369 | significand must be normalized or smaller. If `zSig' is not normalized,
4370 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4371 | and it must not require rounding. In the usual case that `zSig' is
4372 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4373 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4374 | Binary Floating-Point Arithmetic.
4375 *----------------------------------------------------------------------------*/
4376
4377 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4378 float_status *status)
4379 {
4380 int8_t roundingMode;
4381 bool roundNearestEven;
4382 int roundIncrement, roundBits;
4383 bool isTiny;
4384
4385 roundingMode = status->float_rounding_mode;
4386 roundNearestEven = ( roundingMode == float_round_nearest_even );
4387 switch (roundingMode) {
4388 case float_round_nearest_even:
4389 case float_round_ties_away:
4390 roundIncrement = 0x200;
4391 break;
4392 case float_round_to_zero:
4393 roundIncrement = 0;
4394 break;
4395 case float_round_up:
4396 roundIncrement = zSign ? 0 : 0x3ff;
4397 break;
4398 case float_round_down:
4399 roundIncrement = zSign ? 0x3ff : 0;
4400 break;
4401 case float_round_to_odd:
4402 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4403 break;
4404 default:
4405 abort();
4406 }
4407 roundBits = zSig & 0x3FF;
4408 if ( 0x7FD <= (uint16_t) zExp ) {
4409 if ( ( 0x7FD < zExp )
4410 || ( ( zExp == 0x7FD )
4411 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4412 ) {
4413 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4414 roundIncrement != 0;
4415 float_raise(float_flag_overflow | float_flag_inexact, status);
4416 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4417 }
4418 if ( zExp < 0 ) {
4419 if (status->flush_to_zero) {
4420 float_raise(float_flag_output_denormal, status);
4421 return packFloat64(zSign, 0, 0);
4422 }
4423 isTiny = status->tininess_before_rounding
4424 || (zExp < -1)
4425 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4426 shift64RightJamming( zSig, - zExp, &zSig );
4427 zExp = 0;
4428 roundBits = zSig & 0x3FF;
4429 if (isTiny && roundBits) {
4430 float_raise(float_flag_underflow, status);
4431 }
4432 if (roundingMode == float_round_to_odd) {
4433 /*
4434 * For round-to-odd case, the roundIncrement depends on
4435 * zSig which just changed.
4436 */
4437 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4438 }
4439 }
4440 }
4441 if (roundBits) {
4442 float_raise(float_flag_inexact, status);
4443 }
4444 zSig = ( zSig + roundIncrement )>>10;
4445 if (!(roundBits ^ 0x200) && roundNearestEven) {
4446 zSig &= ~1;
4447 }
4448 if ( zSig == 0 ) zExp = 0;
4449 return packFloat64( zSign, zExp, zSig );
4450
4451 }
4452
4453 /*----------------------------------------------------------------------------
4454 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4455 | and significand `zSig', and returns the proper double-precision floating-
4456 | point value corresponding to the abstract input. This routine is just like
4457 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4458 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4459 | floating-point exponent.
4460 *----------------------------------------------------------------------------*/
4461
4462 static float64
4463 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4464 float_status *status)
4465 {
4466 int8_t shiftCount;
4467
4468 shiftCount = clz64(zSig) - 1;
4469 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4470 status);
4471
4472 }
4473
4474 /*----------------------------------------------------------------------------
4475 | Normalizes the subnormal extended double-precision floating-point value
4476 | represented by the denormalized significand `aSig'. The normalized exponent
4477 | and significand are stored at the locations pointed to by `zExpPtr' and
4478 | `zSigPtr', respectively.
4479 *----------------------------------------------------------------------------*/
4480
4481 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4482 uint64_t *zSigPtr)
4483 {
4484 int8_t shiftCount;
4485
4486 shiftCount = clz64(aSig);
4487 *zSigPtr = aSig<<shiftCount;
4488 *zExpPtr = 1 - shiftCount;
4489 }
4490
4491 /*----------------------------------------------------------------------------
4492 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4493 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4494 | and returns the proper extended double-precision floating-point value
4495 | corresponding to the abstract input. Ordinarily, the abstract value is
4496 | rounded and packed into the extended double-precision format, with the
4497 | inexact exception raised if the abstract input cannot be represented
4498 | exactly. However, if the abstract value is too large, the overflow and
4499 | inexact exceptions are raised and an infinity or maximal finite value is
4500 | returned. If the abstract value is too small, the input value is rounded to
4501 | a subnormal number, and the underflow and inexact exceptions are raised if
4502 | the abstract input cannot be represented exactly as a subnormal extended
4503 | double-precision floating-point number.
4504 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4505 | the result is rounded to the same number of bits as single or double
4506 | precision, respectively. Otherwise, the result is rounded to the full
4507 | precision of the extended double-precision format.
4508 | The input significand must be normalized or smaller. If the input
4509 | significand is not normalized, `zExp' must be 0; in that case, the result
4510 | returned is a subnormal number, and it must not require rounding. The
4511 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4512 | Floating-Point Arithmetic.
4513 *----------------------------------------------------------------------------*/
4514
4515 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4516 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4517 float_status *status)
4518 {
4519 FloatRoundMode roundingMode;
4520 bool roundNearestEven, increment, isTiny;
4521 int64_t roundIncrement, roundMask, roundBits;
4522
4523 roundingMode = status->float_rounding_mode;
4524 roundNearestEven = ( roundingMode == float_round_nearest_even );
4525 switch (roundingPrecision) {
4526 case floatx80_precision_x:
4527 goto precision80;
4528 case floatx80_precision_d:
4529 roundIncrement = UINT64_C(0x0000000000000400);
4530 roundMask = UINT64_C(0x00000000000007FF);
4531 break;
4532 case floatx80_precision_s:
4533 roundIncrement = UINT64_C(0x0000008000000000);
4534 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4535 break;
4536 default:
4537 g_assert_not_reached();
4538 }
4539 zSig0 |= ( zSig1 != 0 );
4540 switch (roundingMode) {
4541 case float_round_nearest_even:
4542 case float_round_ties_away:
4543 break;
4544 case float_round_to_zero:
4545 roundIncrement = 0;
4546 break;
4547 case float_round_up:
4548 roundIncrement = zSign ? 0 : roundMask;
4549 break;
4550 case float_round_down:
4551 roundIncrement = zSign ? roundMask : 0;
4552 break;
4553 default:
4554 abort();
4555 }
4556 roundBits = zSig0 & roundMask;
4557 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4558 if ( ( 0x7FFE < zExp )
4559 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4560 ) {
4561 goto overflow;
4562 }
4563 if ( zExp <= 0 ) {
4564 if (status->flush_to_zero) {
4565 float_raise(float_flag_output_denormal, status);
4566 return packFloatx80(zSign, 0, 0);
4567 }
4568 isTiny = status->tininess_before_rounding
4569 || (zExp < 0 )
4570 || (zSig0 <= zSig0 + roundIncrement);
4571 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4572 zExp = 0;
4573 roundBits = zSig0 & roundMask;
4574 if (isTiny && roundBits) {
4575 float_raise(float_flag_underflow, status);
4576 }
4577 if (roundBits) {
4578 float_raise(float_flag_inexact, status);
4579 }
4580 zSig0 += roundIncrement;
4581 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4582 roundIncrement = roundMask + 1;
4583 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4584 roundMask |= roundIncrement;
4585 }
4586 zSig0 &= ~ roundMask;
4587 return packFloatx80( zSign, zExp, zSig0 );
4588 }
4589 }
4590 if (roundBits) {
4591 float_raise(float_flag_inexact, status);
4592 }
4593 zSig0 += roundIncrement;
4594 if ( zSig0 < roundIncrement ) {
4595 ++zExp;
4596 zSig0 = UINT64_C(0x8000000000000000);
4597 }
4598 roundIncrement = roundMask + 1;
4599 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4600 roundMask |= roundIncrement;
4601 }
4602 zSig0 &= ~ roundMask;
4603 if ( zSig0 == 0 ) zExp = 0;
4604 return packFloatx80( zSign, zExp, zSig0 );
4605 precision80:
4606 switch (roundingMode) {
4607 case float_round_nearest_even:
4608 case float_round_ties_away:
4609 increment = ((int64_t)zSig1 < 0);
4610 break;
4611 case float_round_to_zero:
4612 increment = 0;
4613 break;
4614 case float_round_up:
4615 increment = !zSign && zSig1;
4616 break;
4617 case float_round_down:
4618 increment = zSign && zSig1;
4619 break;
4620 default:
4621 abort();
4622 }
4623 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4624 if ( ( 0x7FFE < zExp )
4625 || ( ( zExp == 0x7FFE )
4626 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4627 && increment
4628 )
4629 ) {
4630 roundMask = 0;
4631 overflow:
4632 float_raise(float_flag_overflow | float_flag_inexact, status);
4633 if ( ( roundingMode == float_round_to_zero )
4634 || ( zSign && ( roundingMode == float_round_up ) )
4635 || ( ! zSign && ( roundingMode == float_round_down ) )
4636 ) {
4637 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4638 }
4639 return packFloatx80(zSign,
4640 floatx80_infinity_high,
4641 floatx80_infinity_low);
4642 }
4643 if ( zExp <= 0 ) {
4644 isTiny = status->tininess_before_rounding
4645 || (zExp < 0)
4646 || !increment
4647 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4648 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4649 zExp = 0;
4650 if (isTiny && zSig1) {
4651 float_raise(float_flag_underflow, status);
4652 }
4653 if (zSig1) {
4654 float_raise(float_flag_inexact, status);
4655 }
4656 switch (roundingMode) {
4657 case float_round_nearest_even:
4658 case float_round_ties_away:
4659 increment = ((int64_t)zSig1 < 0);
4660 break;
4661 case float_round_to_zero:
4662 increment = 0;
4663 break;
4664 case float_round_up:
4665 increment = !zSign && zSig1;
4666 break;
4667 case float_round_down:
4668 increment = zSign && zSig1;
4669 break;
4670 default:
4671 abort();
4672 }
4673 if ( increment ) {
4674 ++zSig0;
4675 if (!(zSig1 << 1) && roundNearestEven) {
4676 zSig0 &= ~1;
4677 }
4678 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4679 }
4680 return packFloatx80( zSign, zExp, zSig0 );
4681 }
4682 }
4683 if (zSig1) {
4684 float_raise(float_flag_inexact, status);
4685 }
4686 if ( increment ) {
4687 ++zSig0;
4688 if ( zSig0 == 0 ) {
4689 ++zExp;
4690 zSig0 = UINT64_C(0x8000000000000000);
4691 }
4692 else {
4693 if (!(zSig1 << 1) && roundNearestEven) {
4694 zSig0 &= ~1;
4695 }
4696 }
4697 }
4698 else {
4699 if ( zSig0 == 0 ) zExp = 0;
4700 }
4701 return packFloatx80( zSign, zExp, zSig0 );
4702
4703 }
4704
4705 /*----------------------------------------------------------------------------
4706 | Takes an abstract floating-point value having sign `zSign', exponent
4707 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4708 | and returns the proper extended double-precision floating-point value
4709 | corresponding to the abstract input. This routine is just like
4710 | `roundAndPackFloatx80' except that the input significand does not have to be
4711 | normalized.
4712 *----------------------------------------------------------------------------*/
4713
4714 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4715 bool zSign, int32_t zExp,
4716 uint64_t zSig0, uint64_t zSig1,
4717 float_status *status)
4718 {
4719 int8_t shiftCount;
4720
4721 if ( zSig0 == 0 ) {
4722 zSig0 = zSig1;
4723 zSig1 = 0;
4724 zExp -= 64;
4725 }
4726 shiftCount = clz64(zSig0);
4727 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4728 zExp -= shiftCount;
4729 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4730 zSig0, zSig1, status);
4731
4732 }
4733
4734 /*----------------------------------------------------------------------------
4735 | Returns the least-significant 64 fraction bits of the quadruple-precision
4736 | floating-point value `a'.
4737 *----------------------------------------------------------------------------*/
4738
4739 static inline uint64_t extractFloat128Frac1( float128 a )
4740 {
4741
4742 return a.low;
4743
4744 }
4745
4746 /*----------------------------------------------------------------------------
4747 | Returns the most-significant 48 fraction bits of the quadruple-precision
4748 | floating-point value `a'.
4749 *----------------------------------------------------------------------------*/
4750
4751 static inline uint64_t extractFloat128Frac0( float128 a )
4752 {
4753
4754 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4755
4756 }
4757
4758 /*----------------------------------------------------------------------------
4759 | Returns the exponent bits of the quadruple-precision floating-point value
4760 | `a'.
4761 *----------------------------------------------------------------------------*/
4762
4763 static inline int32_t extractFloat128Exp( float128 a )
4764 {
4765
4766 return ( a.high>>48 ) & 0x7FFF;
4767
4768 }
4769
4770 /*----------------------------------------------------------------------------
4771 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4772 *----------------------------------------------------------------------------*/
4773
4774 static inline bool extractFloat128Sign(float128 a)
4775 {
4776 return a.high >> 63;
4777 }
4778
4779 /*----------------------------------------------------------------------------
4780 | Normalizes the subnormal quadruple-precision floating-point value
4781 | represented by the denormalized significand formed by the concatenation of
4782 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4783 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4784 | significand are stored at the location pointed to by `zSig0Ptr', and the
4785 | least significant 64 bits of the normalized significand are stored at the
4786 | location pointed to by `zSig1Ptr'.
4787 *----------------------------------------------------------------------------*/
4788
4789 static void
4790 normalizeFloat128Subnormal(
4791 uint64_t aSig0,
4792 uint64_t aSig1,
4793 int32_t *zExpPtr,
4794 uint64_t *zSig0Ptr,
4795 uint64_t *zSig1Ptr
4796 )
4797 {
4798 int8_t shiftCount;
4799
4800 if ( aSig0 == 0 ) {
4801 shiftCount = clz64(aSig1) - 15;
4802 if ( shiftCount < 0 ) {
4803 *zSig0Ptr = aSig1>>( - shiftCount );
4804 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4805 }
4806 else {
4807 *zSig0Ptr = aSig1<<shiftCount;
4808 *zSig1Ptr = 0;
4809 }
4810 *zExpPtr = - shiftCount - 63;
4811 }
4812 else {
4813 shiftCount = clz64(aSig0) - 15;
4814 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4815 *zExpPtr = 1 - shiftCount;
4816 }
4817
4818 }
4819
4820 /*----------------------------------------------------------------------------
4821 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4822 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4823 | floating-point value, returning the result. After being shifted into the
4824 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4825 | added together to form the most significant 32 bits of the result. This
4826 | means that any integer portion of `zSig0' will be added into the exponent.
4827 | Since a properly normalized significand will have an integer portion equal
4828 | to 1, the `zExp' input should be 1 less than the desired result exponent
4829 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4830 | significand.
4831 *----------------------------------------------------------------------------*/
4832
4833 static inline float128
4834 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4835 {
4836 float128 z;
4837
4838 z.low = zSig1;
4839 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4840 return z;
4841 }
4842
4843 /*----------------------------------------------------------------------------
4844 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4845 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4846 | and `zSig2', and returns the proper quadruple-precision floating-point value
4847 | corresponding to the abstract input. Ordinarily, the abstract value is
4848 | simply rounded and packed into the quadruple-precision format, with the
4849 | inexact exception raised if the abstract input cannot be represented
4850 | exactly. However, if the abstract value is too large, the overflow and
4851 | inexact exceptions are raised and an infinity or maximal finite value is
4852 | returned. If the abstract value is too small, the input value is rounded to
4853 | a subnormal number, and the underflow and inexact exceptions are raised if
4854 | the abstract input cannot be represented exactly as a subnormal quadruple-
4855 | precision floating-point number.
4856 | The input significand must be normalized or smaller. If the input
4857 | significand is not normalized, `zExp' must be 0; in that case, the result
4858 | returned is a subnormal number, and it must not require rounding. In the
4859 | usual case that the input significand is normalized, `zExp' must be 1 less
4860 | than the ``true'' floating-point exponent. The handling of underflow and
4861 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4862 *----------------------------------------------------------------------------*/
4863
4864 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4865 uint64_t zSig0, uint64_t zSig1,
4866 uint64_t zSig2, float_status *status)
4867 {
4868 int8_t roundingMode;
4869 bool roundNearestEven, increment, isTiny;
4870
4871 roundingMode = status->float_rounding_mode;
4872 roundNearestEven = ( roundingMode == float_round_nearest_even );
4873 switch (roundingMode) {
4874 case float_round_nearest_even:
4875 case float_round_ties_away:
4876 increment = ((int64_t)zSig2 < 0);
4877 break;
4878 case float_round_to_zero:
4879 increment = 0;
4880 break;
4881 case float_round_up:
4882 increment = !zSign && zSig2;
4883 break;
4884 case float_round_down:
4885 increment = zSign && zSig2;
4886 break;
4887 case float_round_to_odd:
4888 increment = !(zSig1 & 0x1) && zSig2;
4889 break;
4890 default:
4891 abort();
4892 }
4893 if ( 0x7FFD <= (uint32_t) zExp ) {
4894 if ( ( 0x7FFD < zExp )
4895 || ( ( zExp == 0x7FFD )
4896 && eq128(
4897 UINT64_C(0x0001FFFFFFFFFFFF),
4898 UINT64_C(0xFFFFFFFFFFFFFFFF),
4899 zSig0,
4900 zSig1
4901 )
4902 && increment
4903 )
4904 ) {
4905 float_raise(float_flag_overflow | float_flag_inexact, status);
4906 if ( ( roundingMode == float_round_to_zero )
4907 || ( zSign && ( roundingMode == float_round_up ) )
4908 || ( ! zSign && ( roundingMode == float_round_down ) )
4909 || (roundingMode == float_round_to_odd)
4910 ) {
4911 return
4912 packFloat128(
4913 zSign,
4914 0x7FFE,
4915 UINT64_C(0x0000FFFFFFFFFFFF),
4916 UINT64_C(0xFFFFFFFFFFFFFFFF)
4917 );
4918 }
4919 return packFloat128( zSign, 0x7FFF, 0, 0 );
4920 }
4921 if ( zExp < 0 ) {
4922 if (status->flush_to_zero) {
4923 float_raise(float_flag_output_denormal, status);
4924 return packFloat128(zSign, 0, 0, 0);
4925 }
4926 isTiny = status->tininess_before_rounding
4927 || (zExp < -1)
4928 || !increment
4929 || lt128(zSig0, zSig1,
4930 UINT64_C(0x0001FFFFFFFFFFFF),
4931 UINT64_C(0xFFFFFFFFFFFFFFFF));
4932 shift128ExtraRightJamming(
4933 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4934 zExp = 0;
4935 if (isTiny && zSig2) {
4936 float_raise(float_flag_underflow, status);
4937 }
4938 switch (roundingMode) {
4939 case float_round_nearest_even:
4940 case float_round_ties_away:
4941 increment = ((int64_t)zSig2 < 0);
4942 break;
4943 case float_round_to_zero:
4944 increment = 0;
4945 break;
4946 case float_round_up:
4947 increment = !zSign && zSig2;
4948 break;
4949 case float_round_down:
4950 increment = zSign && zSig2;
4951 break;
4952 case float_round_to_odd:
4953 increment = !(zSig1 & 0x1) && zSig2;
4954 break;
4955 default:
4956 abort();
4957 }
4958 }
4959 }
4960 if (zSig2) {
4961 float_raise(float_flag_inexact, status);
4962 }
4963 if ( increment ) {
4964 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4965 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4966 zSig1 &= ~1;
4967 }
4968 }
4969 else {
4970 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4971 }
4972 return packFloat128( zSign, zExp, zSig0, zSig1 );
4973
4974 }
4975
4976 /*----------------------------------------------------------------------------
4977 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4978 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4979 | returns the proper quadruple-precision floating-point value corresponding
4980 | to the abstract input. This routine is just like `roundAndPackFloat128'
4981 | except that the input significand has fewer bits and does not have to be
4982 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4983 | point exponent.
4984 *----------------------------------------------------------------------------*/
4985
4986 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4987 uint64_t zSig0, uint64_t zSig1,
4988 float_status *status)
4989 {
4990 int8_t shiftCount;
4991 uint64_t zSig2;
4992
4993 if ( zSig0 == 0 ) {
4994 zSig0 = zSig1;
4995 zSig1 = 0;
4996 zExp -= 64;
4997 }
4998 shiftCount = clz64(zSig0) - 15;
4999 if ( 0 <= shiftCount ) {
5000 zSig2 = 0;
5001 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5002 }
5003 else {
5004 shift128ExtraRightJamming(
5005 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5006 }
5007 zExp -= shiftCount;
5008 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5009
5010 }
5011
5012
5013 /*----------------------------------------------------------------------------
5014 | Returns the result of converting the 32-bit two's complement integer `a'
5015 | to the extended double-precision floating-point format. The conversion
5016 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5017 | Arithmetic.
5018 *----------------------------------------------------------------------------*/
5019
5020 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5021 {
5022 bool zSign;
5023 uint32_t absA;
5024 int8_t shiftCount;
5025 uint64_t zSig;
5026
5027 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5028 zSign = ( a < 0 );
5029 absA = zSign ? - a : a;
5030 shiftCount = clz32(absA) + 32;
5031 zSig = absA;
5032 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5033
5034 }
5035
5036 /*----------------------------------------------------------------------------
5037 | Returns the result of converting the 64-bit two's complement integer `a'
5038 | to the extended double-precision floating-point format. The conversion
5039 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5040 | Arithmetic.
5041 *----------------------------------------------------------------------------*/
5042
5043 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5044 {
5045 bool zSign;
5046 uint64_t absA;
5047 int8_t shiftCount;
5048
5049 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5050 zSign = ( a < 0 );
5051 absA = zSign ? - a : a;
5052 shiftCount = clz64(absA);
5053 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5054
5055 }
5056
5057 /*----------------------------------------------------------------------------
5058 | Returns the result of converting the single-precision floating-point value
5059 | `a' to the extended double-precision floating-point format. The conversion
5060 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5061 | Arithmetic.
5062 *----------------------------------------------------------------------------*/
5063
5064 floatx80 float32_to_floatx80(float32 a, float_status *status)
5065 {
5066 bool aSign;
5067 int aExp;
5068 uint32_t aSig;
5069
5070 a = float32_squash_input_denormal(a, status);
5071 aSig = extractFloat32Frac( a );
5072 aExp = extractFloat32Exp( a );
5073 aSign = extractFloat32Sign( a );
5074 if ( aExp == 0xFF ) {
5075 if (aSig) {
5076 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5077 status);
5078 return floatx80_silence_nan(res, status);
5079 }
5080 return packFloatx80(aSign,
5081 floatx80_infinity_high,
5082 floatx80_infinity_low);
5083 }
5084 if ( aExp == 0 ) {
5085 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5086 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5087 }
5088 aSig |= 0x00800000;
5089 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5090
5091 }
5092
5093 /*----------------------------------------------------------------------------
5094 | Returns the remainder of the single-precision floating-point value `a'
5095 | with respect to the corresponding value `b'. The operation is performed
5096 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5097 *----------------------------------------------------------------------------*/
5098
5099 float32 float32_rem(float32 a, float32 b, float_status *status)
5100 {
5101 bool aSign, zSign;
5102 int aExp, bExp, expDiff;
5103 uint32_t aSig, bSig;
5104 uint32_t q;
5105 uint64_t aSig64, bSig64, q64;
5106 uint32_t alternateASig;
5107 int32_t sigMean;
5108 a = float32_squash_input_denormal(a, status);
5109 b = float32_squash_input_denormal(b, status);
5110
5111 aSig = extractFloat32Frac( a );
5112 aExp = extractFloat32Exp( a );
5113 aSign = extractFloat32Sign( a );
5114 bSig = extractFloat32Frac( b );
5115 bExp = extractFloat32Exp( b );
5116 if ( aExp == 0xFF ) {
5117 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5118 return propagateFloat32NaN(a, b, status);
5119 }
5120 float_raise(float_flag_invalid, status);
5121 return float32_default_nan(status);
5122 }
5123 if ( bExp == 0xFF ) {
5124 if (bSig) {
5125 return propagateFloat32NaN(a, b, status);
5126 }
5127 return a;
5128 }
5129 if ( bExp == 0 ) {
5130 if ( bSig == 0 ) {
5131 float_raise(float_flag_invalid, status);
5132 return float32_default_nan(status);
5133 }
5134 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5135 }
5136 if ( aExp == 0 ) {
5137 if ( aSig == 0 ) return a;
5138 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5139 }
5140 expDiff = aExp - bExp;
5141 aSig |= 0x00800000;
5142 bSig |= 0x00800000;
5143 if ( expDiff < 32 ) {
5144 aSig <<= 8;
5145 bSig <<= 8;
5146 if ( expDiff < 0 ) {
5147 if ( expDiff < -1 ) return a;
5148 aSig >>= 1;
5149 }
5150 q = ( bSig <= aSig );
5151 if ( q ) aSig -= bSig;
5152 if ( 0 < expDiff ) {
5153 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5154 q >>= 32 - expDiff;
5155 bSig >>= 2;
5156 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5157 }
5158 else {
5159 aSig >>= 2;
5160 bSig >>= 2;
5161 }
5162 }
5163 else {
5164 if ( bSig <= aSig ) aSig -= bSig;
5165 aSig64 = ( (uint64_t) aSig )<<40;
5166 bSig64 = ( (uint64_t) bSig )<<40;
5167 expDiff -= 64;
5168 while ( 0 < expDiff ) {
5169 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5170 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5171 aSig64 = - ( ( bSig * q64 )<<38 );
5172 expDiff -= 62;
5173 }
5174 expDiff += 64;
5175 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5176 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5177 q = q64>>( 64 - expDiff );
5178 bSig <<= 6;
5179 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5180 }
5181 do {
5182 alternateASig = aSig;
5183 ++q;
5184 aSig -= bSig;
5185 } while ( 0 <= (int32_t) aSig );
5186 sigMean = aSig + alternateASig;
5187 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5188 aSig = alternateASig;
5189 }
5190 zSign = ( (int32_t) aSig < 0 );
5191 if ( zSign ) aSig = - aSig;
5192 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5193 }
5194
5195
5196
5197 /*----------------------------------------------------------------------------
5198 | Returns the binary exponential of the single-precision floating-point value
5199 | `a'. The operation is performed according to the IEC/IEEE Standard for
5200 | Binary Floating-Point Arithmetic.
5201 |
5202 | Uses the following identities:
5203 |
5204 | 1. -------------------------------------------------------------------------
5205 | x x*ln(2)
5206 | 2 = e
5207 |
5208 | 2. -------------------------------------------------------------------------
5209 | 2 3 4 5 n
5210 | x x x x x x x
5211 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5212 | 1! 2! 3! 4! 5! n!
5213 *----------------------------------------------------------------------------*/
5214
5215 static const float64 float32_exp2_coefficients[15] =
5216 {
5217 const_float64( 0x3ff0000000000000ll ), /* 1 */
5218 const_float64( 0x3fe0000000000000ll ), /* 2 */
5219 const_float64( 0x3fc5555555555555ll ), /* 3 */
5220 const_float64( 0x3fa5555555555555ll ), /* 4 */
5221 const_float64( 0x3f81111111111111ll ), /* 5 */
5222 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5223 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5224 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5225 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5226 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5227 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5228 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5229 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5230 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5231 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5232 };
5233
5234 float32 float32_exp2(float32 a, float_status *status)
5235 {
5236 bool aSign;
5237 int aExp;
5238 uint32_t aSig;
5239 float64 r, x, xn;
5240 int i;
5241 a = float32_squash_input_denormal(a, status);
5242
5243 aSig = extractFloat32Frac( a );
5244 aExp = extractFloat32Exp( a );
5245 aSign = extractFloat32Sign( a );
5246
5247 if ( aExp == 0xFF) {
5248 if (aSig) {
5249 return propagateFloat32NaN(a, float32_zero, status);
5250 }
5251 return (aSign) ? float32_zero : a;
5252 }
5253 if (aExp == 0) {
5254 if (aSig == 0) return float32_one;
5255 }
5256
5257 float_raise(float_flag_inexact, status);
5258
5259 /* ******************************* */
5260 /* using float64 for approximation */
5261 /* ******************************* */
5262 x = float32_to_float64(a, status);
5263 x = float64_mul(x, float64_ln2, status);
5264
5265 xn = x;
5266 r = float64_one;
5267 for (i = 0 ; i < 15 ; i++) {
5268 float64 f;
5269
5270 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5271 r = float64_add(r, f, status);
5272
5273 xn = float64_mul(xn, x, status);
5274 }
5275
5276 return float64_to_float32(r, status);
5277 }
5278
5279 /*----------------------------------------------------------------------------
5280 | Returns the binary log of the single-precision floating-point value `a'.
5281 | The operation is performed according to the IEC/IEEE Standard for Binary
5282 | Floating-Point Arithmetic.
5283 *----------------------------------------------------------------------------*/
5284 float32 float32_log2(float32 a, float_status *status)
5285 {
5286 bool aSign, zSign;
5287 int aExp;
5288 uint32_t aSig, zSig, i;
5289
5290 a = float32_squash_input_denormal(a, status);
5291 aSig = extractFloat32Frac( a );
5292 aExp = extractFloat32Exp( a );
5293 aSign = extractFloat32Sign( a );
5294
5295 if ( aExp == 0 ) {
5296 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5297 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5298 }
5299 if ( aSign ) {
5300 float_raise(float_flag_invalid, status);
5301 return float32_default_nan(status);
5302 }
5303 if ( aExp == 0xFF ) {
5304 if (aSig) {
5305 return propagateFloat32NaN(a, float32_zero, status);
5306 }
5307 return a;
5308 }
5309
5310 aExp -= 0x7F;
5311 aSig |= 0x00800000;
5312 zSign = aExp < 0;
5313 zSig = aExp << 23;
5314
5315 for (i = 1 << 22; i > 0; i >>= 1) {
5316 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5317 if ( aSig & 0x01000000 ) {
5318 aSig >>= 1;
5319 zSig |= i;
5320 }
5321 }
5322
5323 if ( zSign )
5324 zSig = -zSig;
5325
5326 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5327 }
5328
5329 /*----------------------------------------------------------------------------
5330 | Returns the result of converting the double-precision floating-point value
5331 | `a' to the extended double-precision floating-point format. The conversion
5332 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5333 | Arithmetic.
5334 *----------------------------------------------------------------------------*/
5335
5336 floatx80 float64_to_floatx80(float64 a, float_status *status)
5337 {
5338 bool aSign;
5339 int aExp;
5340 uint64_t aSig;
5341
5342 a = float64_squash_input_denormal(a, status);
5343 aSig = extractFloat64Frac( a );
5344 aExp = extractFloat64Exp( a );
5345 aSign = extractFloat64Sign( a );
5346 if ( aExp == 0x7FF ) {
5347 if (aSig) {
5348 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5349 status);
5350 return floatx80_silence_nan(res, status);
5351 }
5352 return packFloatx80(aSign,
5353 floatx80_infinity_high,
5354 floatx80_infinity_low);
5355 }
5356 if ( aExp == 0 ) {
5357 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5358 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5359 }
5360 return
5361 packFloatx80(
5362 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5363
5364 }
5365
5366 /*----------------------------------------------------------------------------
5367 | Returns the remainder of the double-precision floating-point value `a'
5368 | with respect to the corresponding value `b'. The operation is performed
5369 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5370 *----------------------------------------------------------------------------*/
5371
5372 float64 float64_rem(float64 a, float64 b, float_status *status)
5373 {
5374 bool aSign, zSign;
5375 int aExp, bExp, expDiff;
5376 uint64_t aSig, bSig;
5377 uint64_t q, alternateASig;
5378 int64_t sigMean;
5379
5380 a = float64_squash_input_denormal(a, status);
5381 b = float64_squash_input_denormal(b, status);
5382 aSig = extractFloat64Frac( a );
5383 aExp = extractFloat64Exp( a );
5384 aSign = extractFloat64Sign( a );
5385 bSig = extractFloat64Frac( b );
5386 bExp = extractFloat64Exp( b );
5387 if ( aExp == 0x7FF ) {
5388 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5389 return propagateFloat64NaN(a, b, status);
5390 }
5391 float_raise(float_flag_invalid, status);
5392 return float64_default_nan(status);
5393 }
5394 if ( bExp == 0x7FF ) {
5395 if (bSig) {
5396 return propagateFloat64NaN(a, b, status);
5397 }
5398 return a;
5399 }
5400 if ( bExp == 0 ) {
5401 if ( bSig == 0 ) {
5402 float_raise(float_flag_invalid, status);
5403 return float64_default_nan(status);
5404 }
5405 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5406 }
5407 if ( aExp == 0 ) {
5408 if ( aSig == 0 ) return a;
5409 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5410 }
5411 expDiff = aExp - bExp;
5412 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5413 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5414 if ( expDiff < 0 ) {
5415 if ( expDiff < -1 ) return a;
5416 aSig >>= 1;
5417 }
5418 q = ( bSig <= aSig );
5419 if ( q ) aSig -= bSig;
5420 expDiff -= 64;
5421 while ( 0 < expDiff ) {
5422 q = estimateDiv128To64( aSig, 0, bSig );
5423 q = ( 2 < q ) ? q - 2 : 0;
5424 aSig = - ( ( bSig>>2 ) * q );
5425 expDiff -= 62;
5426 }
5427 expDiff += 64;
5428 if ( 0 < expDiff ) {
5429 q = estimateDiv128To64( aSig, 0, bSig );
5430 q = ( 2 < q ) ? q - 2 : 0;
5431 q >>= 64 - expDiff;
5432 bSig >>= 2;
5433 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5434 }
5435 else {
5436 aSig >>= 2;
5437 bSig >>= 2;
5438 }
5439 do {
5440 alternateASig = aSig;
5441 ++q;
5442 aSig -= bSig;
5443 } while ( 0 <= (int64_t) aSig );
5444 sigMean = aSig + alternateASig;
5445 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5446 aSig = alternateASig;
5447 }
5448 zSign = ( (int64_t) aSig < 0 );
5449 if ( zSign ) aSig = - aSig;
5450 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5451
5452 }
5453
5454 /*----------------------------------------------------------------------------
5455 | Returns the binary log of the double-precision floating-point value `a'.
5456 | The operation is performed according to the IEC/IEEE Standard for Binary
5457 | Floating-Point Arithmetic.
5458 *----------------------------------------------------------------------------*/
5459 float64 float64_log2(float64 a, float_status *status)
5460 {
5461 bool aSign, zSign;
5462 int aExp;
5463 uint64_t aSig, aSig0, aSig1, zSig, i;
5464 a = float64_squash_input_denormal(a, status);
5465
5466 aSig = extractFloat64Frac( a );
5467 aExp = extractFloat64Exp( a );
5468 aSign = extractFloat64Sign( a );
5469
5470 if ( aExp == 0 ) {
5471 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5472 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5473 }
5474 if ( aSign ) {
5475 float_raise(float_flag_invalid, status);
5476 return float64_default_nan(status);
5477 }
5478 if ( aExp == 0x7FF ) {
5479 if (aSig) {
5480 return propagateFloat64NaN(a, float64_zero, status);
5481 }
5482 return a;
5483 }
5484
5485 aExp -= 0x3FF;
5486 aSig |= UINT64_C(0x0010000000000000);
5487 zSign = aExp < 0;
5488 zSig = (uint64_t)aExp << 52;
5489 for (i = 1LL << 51; i > 0; i >>= 1) {
5490 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5491 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5492 if ( aSig & UINT64_C(0x0020000000000000) ) {
5493 aSig >>= 1;
5494 zSig |= i;
5495 }
5496 }
5497
5498 if ( zSign )
5499 zSig = -zSig;
5500 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5501 }
5502
5503 /*----------------------------------------------------------------------------
5504 | Returns the result of converting the extended double-precision floating-
5505 | point value `a' to the 32-bit two's complement integer format. The
5506 | conversion is performed according to the IEC/IEEE Standard for Binary
5507 | Floating-Point Arithmetic---which means in particular that the conversion
5508 | is rounded according to the current rounding mode. If `a' is a NaN, the
5509 | largest positive integer is returned. Otherwise, if the conversion
5510 | overflows, the largest integer with the same sign as `a' is returned.
5511 *----------------------------------------------------------------------------*/
5512
5513 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5514 {
5515 bool aSign;
5516 int32_t aExp, shiftCount;
5517 uint64_t aSig;
5518
5519 if (floatx80_invalid_encoding(a)) {
5520 float_raise(float_flag_invalid, status);
5521 return 1 << 31;
5522 }
5523 aSig = extractFloatx80Frac( a );
5524 aExp = extractFloatx80Exp( a );
5525 aSign = extractFloatx80Sign( a );
5526 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5527 shiftCount = 0x4037 - aExp;
5528 if ( shiftCount <= 0 ) shiftCount = 1;
5529 shift64RightJamming( aSig, shiftCount, &aSig );
5530 return roundAndPackInt32(aSign, aSig, status);
5531
5532 }
5533
5534 /*----------------------------------------------------------------------------
5535 | Returns the result of converting the extended double-precision floating-
5536 | point value `a' to the 32-bit two's complement integer format. The
5537 | conversion is performed according to the IEC/IEEE Standard for Binary
5538 | Floating-Point Arithmetic, except that the conversion is always rounded
5539 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5540 | Otherwise, if the conversion overflows, the largest integer with the same
5541 | sign as `a' is returned.
5542 *----------------------------------------------------------------------------*/
5543
5544 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5545 {
5546 bool aSign;
5547 int32_t aExp, shiftCount;
5548 uint64_t aSig, savedASig;
5549 int32_t z;
5550
5551 if (floatx80_invalid_encoding(a)) {
5552 float_raise(float_flag_invalid, status);
5553 return 1 << 31;
5554 }
5555 aSig = extractFloatx80Frac( a );
5556 aExp = extractFloatx80Exp( a );
5557 aSign = extractFloatx80Sign( a );
5558 if ( 0x401E < aExp ) {
5559 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5560 goto invalid;
5561 }
5562 else if ( aExp < 0x3FFF ) {
5563 if (aExp || aSig) {
5564 float_raise(float_flag_inexact, status);
5565 }
5566 return 0;
5567 }
5568 shiftCount = 0x403E - aExp;
5569 savedASig = aSig;
5570 aSig >>= shiftCount;
5571 z = aSig;
5572 if ( aSign ) z = - z;
5573 if ( ( z < 0 ) ^ aSign ) {
5574 invalid:
5575 float_raise(float_flag_invalid, status);
5576 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5577 }
5578 if ( ( aSig<<shiftCount ) != savedASig ) {
5579 float_raise(float_flag_inexact, status);
5580 }
5581 return z;
5582
5583 }
5584
5585 /*----------------------------------------------------------------------------
5586 | Returns the result of converting the extended double-precision floating-
5587 | point value `a' to the 64-bit two's complement integer format. The
5588 | conversion is performed according to the IEC/IEEE Standard for Binary
5589 | Floating-Point Arithmetic---which means in particular that the conversion
5590 | is rounded according to the current rounding mode. If `a' is a NaN,
5591 | the largest positive integer is returned. Otherwise, if the conversion
5592 | overflows, the largest integer with the same sign as `a' is returned.
5593 *----------------------------------------------------------------------------*/
5594
5595 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5596 {
5597 bool aSign;
5598 int32_t aExp, shiftCount;
5599 uint64_t aSig, aSigExtra;
5600
5601 if (floatx80_invalid_encoding(a)) {
5602 float_raise(float_flag_invalid, status);
5603 return 1ULL << 63;
5604 }
5605 aSig = extractFloatx80Frac( a );
5606 aExp = extractFloatx80Exp( a );
5607 aSign = extractFloatx80Sign( a );
5608 shiftCount = 0x403E - aExp;
5609 if ( shiftCount <= 0 ) {
5610 if ( shiftCount ) {
5611 float_raise(float_flag_invalid, status);
5612 if (!aSign || floatx80_is_any_nan(a)) {
5613 return INT64_MAX;
5614 }
5615 return INT64_MIN;
5616 }
5617 aSigExtra = 0;
5618 }
5619 else {
5620 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5621 }
5622 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5623
5624 }
5625
5626 /*----------------------------------------------------------------------------
5627 | Returns the result of converting the extended double-precision floating-
5628 | point value `a' to the 64-bit two's complement integer format. The
5629 | conversion is performed according to the IEC/IEEE Standard for Binary
5630 | Floating-Point Arithmetic, except that the conversion is always rounded
5631 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5632 | Otherwise, if the conversion overflows, the largest integer with the same
5633 | sign as `a' is returned.
5634 *----------------------------------------------------------------------------*/
5635
5636 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5637 {
5638 bool aSign;
5639 int32_t aExp, shiftCount;
5640 uint64_t aSig;
5641 int64_t z;
5642
5643 if (floatx80_invalid_encoding(a)) {
5644 float_raise(float_flag_invalid, status);
5645 return 1ULL << 63;
5646 }
5647 aSig = extractFloatx80Frac( a );
5648 aExp = extractFloatx80Exp( a );
5649 aSign = extractFloatx80Sign( a );
5650 shiftCount = aExp - 0x403E;
5651 if ( 0 <= shiftCount ) {
5652 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5653 if ( ( a.high != 0xC03E ) || aSig ) {
5654 float_raise(float_flag_invalid, status);
5655 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5656 return INT64_MAX;
5657 }
5658 }
5659 return INT64_MIN;
5660 }
5661 else if ( aExp < 0x3FFF ) {
5662 if (aExp | aSig) {
5663 float_raise(float_flag_inexact, status);
5664 }
5665 return 0;
5666 }
5667 z = aSig>>( - shiftCount );
5668 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5669 float_raise(float_flag_inexact, status);
5670 }
5671 if ( aSign ) z = - z;
5672 return z;
5673
5674 }
5675
5676 /*----------------------------------------------------------------------------
5677 | Returns the result of converting the extended double-precision floating-
5678 | point value `a' to the single-precision floating-point format. The
5679 | conversion is performed according to the IEC/IEEE Standard for Binary
5680 | Floating-Point Arithmetic.
5681 *----------------------------------------------------------------------------*/
5682
5683 float32 floatx80_to_float32(floatx80 a, float_status *status)
5684 {
5685 bool aSign;
5686 int32_t aExp;
5687 uint64_t aSig;
5688
5689 if (floatx80_invalid_encoding(a)) {
5690 float_raise(float_flag_invalid, status);
5691 return float32_default_nan(status);
5692 }
5693 aSig = extractFloatx80Frac( a );
5694 aExp = extractFloatx80Exp( a );
5695 aSign = extractFloatx80Sign( a );
5696 if ( aExp == 0x7FFF ) {
5697 if ( (uint64_t) ( aSig<<1 ) ) {
5698 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5699 status);
5700 return float32_silence_nan(res, status);
5701 }
5702 return packFloat32( aSign, 0xFF, 0 );
5703 }
5704 shift64RightJamming( aSig, 33, &aSig );
5705 if ( aExp || aSig ) aExp -= 0x3F81;
5706 return roundAndPackFloat32(aSign, aExp, aSig, status);
5707
5708 }
5709
5710 /*----------------------------------------------------------------------------
5711 | Returns the result of converting the extended double-precision floating-
5712 | point value `a' to the double-precision floating-point format. The
5713 | conversion is performed according to the IEC/IEEE Standard for Binary
5714 | Floating-Point Arithmetic.
5715 *----------------------------------------------------------------------------*/
5716
5717 float64 floatx80_to_float64(floatx80 a, float_status *status)
5718 {
5719 bool aSign;
5720 int32_t aExp;
5721 uint64_t aSig, zSig;
5722
5723 if (floatx80_invalid_encoding(a)) {
5724 float_raise(float_flag_invalid, status);
5725 return float64_default_nan(status);
5726 }
5727 aSig = extractFloatx80Frac( a );
5728 aExp = extractFloatx80Exp( a );
5729 aSign = extractFloatx80Sign( a );
5730 if ( aExp == 0x7FFF ) {
5731 if ( (uint64_t) ( aSig<<1 ) ) {
5732 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5733 status);
5734 return float64_silence_nan(res, status);
5735 }
5736 return packFloat64( aSign, 0x7FF, 0 );
5737 }
5738 shift64RightJamming( aSig, 1, &zSig );
5739 if ( aExp || aSig ) aExp -= 0x3C01;
5740 return roundAndPackFloat64(aSign, aExp, zSig, status);
5741
5742 }
5743
5744 /*----------------------------------------------------------------------------
5745 | Returns the result of converting the extended double-precision floating-
5746 | point value `a' to the quadruple-precision floating-point format. The
5747 | conversion is performed according to the IEC/IEEE Standard for Binary
5748 | Floating-Point Arithmetic.
5749 *----------------------------------------------------------------------------*/
5750
5751 float128 floatx80_to_float128(floatx80 a, float_status *status)
5752 {
5753 bool aSign;
5754 int aExp;
5755 uint64_t aSig, zSig0, zSig1;
5756
5757 if (floatx80_invalid_encoding(a)) {
5758 float_raise(float_flag_invalid, status);
5759 return float128_default_nan(status);
5760 }
5761 aSig = extractFloatx80Frac( a );
5762 aExp = extractFloatx80Exp( a );
5763 aSign = extractFloatx80Sign( a );
5764 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5765 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5766 status);
5767 return float128_silence_nan(res, status);
5768 }
5769 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5770 return packFloat128( aSign, aExp, zSig0, zSig1 );
5771
5772 }
5773
5774 /*----------------------------------------------------------------------------
5775 | Rounds the extended double-precision floating-point value `a'
5776 | to the precision provided by floatx80_rounding_precision and returns the
5777 | result as an extended double-precision floating-point value.
5778 | The operation is performed according to the IEC/IEEE Standard for Binary
5779 | Floating-Point Arithmetic.
5780 *----------------------------------------------------------------------------*/
5781
5782 floatx80 floatx80_round(floatx80 a, float_status *status)
5783 {
5784 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5785 extractFloatx80Sign(a),
5786 extractFloatx80Exp(a),
5787 extractFloatx80Frac(a), 0, status);
5788 }
5789
5790 /*----------------------------------------------------------------------------
5791 | Rounds the extended double-precision floating-point value `a' to an integer,
5792 | and returns the result as an extended quadruple-precision floating-point
5793 | value. The operation is performed according to the IEC/IEEE Standard for
5794 | Binary Floating-Point Arithmetic.
5795 *----------------------------------------------------------------------------*/
5796
5797 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5798 {
5799 bool aSign;
5800 int32_t aExp;
5801 uint64_t lastBitMask, roundBitsMask;
5802 floatx80 z;
5803
5804 if (floatx80_invalid_encoding(a)) {
5805 float_raise(float_flag_invalid, status);
5806 return floatx80_default_nan(status);
5807 }
5808 aExp = extractFloatx80Exp( a );
5809 if ( 0x403E <= aExp ) {
5810 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5811 return propagateFloatx80NaN(a, a, status);
5812 }
5813 return a;
5814 }
5815 if ( aExp < 0x3FFF ) {
5816 if ( ( aExp == 0 )
5817 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5818 return a;
5819 }
5820 float_raise(float_flag_inexact, status);
5821 aSign = extractFloatx80Sign( a );
5822 switch (status->float_rounding_mode) {
5823 case float_round_nearest_even:
5824 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5825 ) {
5826 return
5827 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5828 }
5829 break;
5830 case float_round_ties_away:
5831 if (aExp == 0x3FFE) {
5832 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5833 }
5834 break;
5835 case float_round_down:
5836 return
5837 aSign ?
5838 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5839 : packFloatx80( 0, 0, 0 );
5840 case float_round_up:
5841 return
5842 aSign ? packFloatx80( 1, 0, 0 )
5843 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5844
5845 case float_round_to_zero:
5846 break;
5847 default:
5848 g_assert_not_reached();
5849 }
5850 return packFloatx80( aSign, 0, 0 );
5851 }
5852 lastBitMask = 1;
5853 lastBitMask <<= 0x403E - aExp;
5854 roundBitsMask = lastBitMask - 1;
5855 z = a;
5856 switch (status->float_rounding_mode) {
5857 case float_round_nearest_even:
5858 z.low += lastBitMask>>1;
5859 if ((z.low & roundBitsMask) == 0) {
5860 z.low &= ~lastBitMask;
5861 }
5862 break;
5863 case float_round_ties_away:
5864 z.low += lastBitMask >> 1;
5865 break;
5866 case float_round_to_zero:
5867 break;
5868 case float_round_up:
5869 if (!extractFloatx80Sign(z)) {
5870 z.low += roundBitsMask;
5871 }
5872 break;
5873 case float_round_down:
5874 if (extractFloatx80Sign(z)) {
5875 z.low += roundBitsMask;
5876 }
5877 break;
5878 default:
5879 abort();
5880 }
5881 z.low &= ~ roundBitsMask;
5882 if ( z.low == 0 ) {
5883 ++z.high;
5884 z.low = UINT64_C(0x8000000000000000);
5885 }
5886 if (z.low != a.low) {
5887 float_raise(float_flag_inexact, status);
5888 }
5889 return z;
5890
5891 }
5892
5893 /*----------------------------------------------------------------------------
5894 | Returns the remainder of the extended double-precision floating-point value
5895 | `a' with respect to the corresponding value `b'. The operation is performed
5896 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5897 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5898 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5899 | the absolute value of the integer quotient.
5900 *----------------------------------------------------------------------------*/
5901
5902 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5903 float_status *status)
5904 {
5905 bool aSign, zSign;
5906 int32_t aExp, bExp, expDiff, aExpOrig;
5907 uint64_t aSig0, aSig1, bSig;
5908 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5909
5910 *quotient = 0;
5911 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5912 float_raise(float_flag_invalid, status);
5913 return floatx80_default_nan(status);
5914 }
5915 aSig0 = extractFloatx80Frac( a );
5916 aExpOrig = aExp = extractFloatx80Exp( a );
5917 aSign = extractFloatx80Sign( a );
5918 bSig = extractFloatx80Frac( b );
5919 bExp = extractFloatx80Exp( b );
5920 if ( aExp == 0x7FFF ) {
5921 if ( (uint64_t) ( aSig0<<1 )
5922 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5923 return propagateFloatx80NaN(a, b, status);
5924 }
5925 goto invalid;
5926 }
5927 if ( bExp == 0x7FFF ) {
5928 if ((uint64_t)(bSig << 1)) {
5929 return propagateFloatx80NaN(a, b, status);
5930 }
5931 if (aExp == 0 && aSig0 >> 63) {
5932 /*
5933 * Pseudo-denormal argument must be returned in normalized
5934 * form.
5935 */
5936 return packFloatx80(aSign, 1, aSig0);
5937 }
5938 return a;
5939 }
5940 if ( bExp == 0 ) {
5941 if ( bSig == 0 ) {
5942 invalid:
5943 float_raise(float_flag_invalid, status);
5944 return floatx80_default_nan(status);
5945 }
5946 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5947 }
5948 if ( aExp == 0 ) {
5949 if ( aSig0 == 0 ) return a;
5950 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5951 }
5952 zSign = aSign;
5953 expDiff = aExp - bExp;
5954 aSig1 = 0;
5955 if ( expDiff < 0 ) {
5956 if ( mod || expDiff < -1 ) {
5957 if (aExp == 1 && aExpOrig == 0) {
5958 /*
5959 * Pseudo-denormal argument must be returned in
5960 * normalized form.
5961 */
5962 return packFloatx80(aSign, aExp, aSig0);
5963 }
5964 return a;
5965 }
5966 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5967 expDiff = 0;
5968 }
5969 *quotient = q = ( bSig <= aSig0 );
5970 if ( q ) aSig0 -= bSig;
5971 expDiff -= 64;
5972 while ( 0 < expDiff ) {
5973 q = estimateDiv128To64( aSig0, aSig1, bSig );
5974 q = ( 2 < q ) ? q - 2 : 0;
5975 mul64To128( bSig, q, &term0, &term1 );
5976 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5977 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5978 expDiff -= 62;
5979 *quotient <<= 62;
5980 *quotient += q;
5981 }
5982 expDiff += 64;
5983 if ( 0 < expDiff ) {
5984 q = estimateDiv128To64( aSig0, aSig1, bSig );
5985 q = ( 2 < q ) ? q - 2 : 0;
5986 q >>= 64 - expDiff;
5987 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5988 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5989 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5990 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5991 ++q;
5992 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5993 }
5994 if (expDiff < 64) {
5995 *quotient <<= expDiff;
5996 } else {
5997 *quotient = 0;
5998 }
5999 *quotient += q;
6000 }
6001 else {
6002 term1 = 0;
6003 term0 = bSig;
6004 }
6005 if (!mod) {
6006 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6007 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6008 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6009 && ( q & 1 ) )
6010 ) {
6011 aSig0 = alternateASig0;
6012 aSig1 = alternateASig1;
6013 zSign = ! zSign;
6014 ++*quotient;
6015 }
6016 }
6017 return
6018 normalizeRoundAndPackFloatx80(
6019 floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6020
6021 }
6022
6023 /*----------------------------------------------------------------------------
6024 | Returns the remainder of the extended double-precision floating-point value
6025 | `a' with respect to the corresponding value `b'. The operation is performed
6026 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6027 *----------------------------------------------------------------------------*/
6028
6029 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6030 {
6031 uint64_t quotient;
6032 return floatx80_modrem(a, b, false, &quotient, status);
6033 }
6034
6035 /*----------------------------------------------------------------------------
6036 | Returns the remainder of the extended double-precision floating-point value
6037 | `a' with respect to the corresponding value `b', with the quotient truncated
6038 | toward zero.
6039 *----------------------------------------------------------------------------*/
6040
6041 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6042 {
6043 uint64_t quotient;
6044 return floatx80_modrem(a, b, true, &quotient, status);
6045 }
6046
6047 /*----------------------------------------------------------------------------
6048 | Returns the square root of the extended double-precision floating-point
6049 | value `a'. The operation is performed according to the IEC/IEEE Standard
6050 | for Binary Floating-Point Arithmetic.
6051 *----------------------------------------------------------------------------*/
6052
6053 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6054 {
6055 bool aSign;
6056 int32_t aExp, zExp;
6057 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6058 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6059
6060 if (floatx80_invalid_encoding(a)) {
6061 float_raise(float_flag_invalid, status);
6062 return floatx80_default_nan(status);
6063 }
6064 aSig0 = extractFloatx80Frac( a );
6065 aExp = extractFloatx80Exp( a );
6066 aSign = extractFloatx80Sign( a );
6067 if ( aExp == 0x7FFF ) {
6068 if ((uint64_t)(aSig0 << 1)) {
6069 return propagateFloatx80NaN(a, a, status);
6070 }
6071 if ( ! aSign ) return a;
6072 goto invalid;
6073 }
6074 if ( aSign ) {
6075 if ( ( aExp | aSig0 ) == 0 ) return a;
6076 invalid:
6077 float_raise(float_flag_invalid, status);
6078 return floatx80_default_nan(status);
6079 }
6080 if ( aExp == 0 ) {
6081 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6082 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6083 }
6084 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6085 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6086 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6087 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6088 doubleZSig0 = zSig0<<1;
6089 mul64To128( zSig0, zSig0, &term0, &term1 );
6090 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6091 while ( (int64_t) rem0 < 0 ) {
6092 --zSig0;
6093 doubleZSig0 -= 2;
6094 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6095 }
6096 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6097 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6098 if ( zSig1 == 0 ) zSig1 = 1;
6099 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6100 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6101 mul64To128( zSig1, zSig1, &term2, &term3 );
6102 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6103 while ( (int64_t) rem1 < 0 ) {
6104 --zSig1;
6105 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6106 term3 |= 1;
6107 term2 |= doubleZSig0;
6108 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6109 }
6110 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6111 }
6112 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6113 zSig0 |= doubleZSig0;
6114 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6115 0, zExp, zSig0, zSig1, status);
6116 }
6117
6118 /*----------------------------------------------------------------------------
6119 | Returns the result of converting the quadruple-precision floating-point
6120 | value `a' to the extended double-precision floating-point format. The
6121 | conversion is performed according to the IEC/IEEE Standard for Binary
6122 | Floating-Point Arithmetic.
6123 *----------------------------------------------------------------------------*/
6124
6125 floatx80 float128_to_floatx80(float128 a, float_status *status)
6126 {
6127 bool aSign;
6128 int32_t aExp;
6129 uint64_t aSig0, aSig1;
6130
6131 aSig1 = extractFloat128Frac1( a );
6132 aSig0 = extractFloat128Frac0( a );
6133 aExp = extractFloat128Exp( a );
6134 aSign = extractFloat128Sign( a );
6135 if ( aExp == 0x7FFF ) {
6136 if ( aSig0 | aSig1 ) {
6137 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6138 status);
6139 return floatx80_silence_nan(res, status);
6140 }
6141 return packFloatx80(aSign, floatx80_infinity_high,
6142 floatx80_infinity_low);
6143 }
6144 if ( aExp == 0 ) {
6145 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6146 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6147 }
6148 else {
6149 aSig0 |= UINT64_C(0x0001000000000000);
6150 }
6151 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6152 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6153
6154 }
6155
6156 /*----------------------------------------------------------------------------
6157 | Returns the remainder of the quadruple-precision floating-point value `a'
6158 | with respect to the corresponding value `b'. The operation is performed
6159 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6160 *----------------------------------------------------------------------------*/
6161
6162 float128 float128_rem(float128 a, float128 b, float_status *status)
6163 {
6164 bool aSign, zSign;
6165 int32_t aExp, bExp, expDiff;
6166 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6167 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6168 int64_t sigMean0;
6169
6170 aSig1 = extractFloat128Frac1( a );
6171 aSig0 = extractFloat128Frac0( a );
6172 aExp = extractFloat128Exp( a );
6173 aSign = extractFloat128Sign( a );
6174 bSig1 = extractFloat128Frac1( b );
6175 bSig0 = extractFloat128Frac0( b );
6176 bExp = extractFloat128Exp( b );
6177 if ( aExp == 0x7FFF ) {
6178 if ( ( aSig0 | aSig1 )
6179 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6180 return propagateFloat128NaN(a, b, status);
6181 }
6182 goto invalid;
6183 }
6184 if ( bExp == 0x7FFF ) {
6185 if (bSig0 | bSig1) {
6186 return propagateFloat128NaN(a, b, status);
6187 }
6188 return a;
6189 }
6190 if ( bExp == 0 ) {
6191 if ( ( bSig0 | bSig1 ) == 0 ) {
6192 invalid:
6193 float_raise(float_flag_invalid, status);
6194 return float128_default_nan(status);
6195 }
6196 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6197 }
6198 if ( aExp == 0 ) {
6199 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6200 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6201 }
6202 expDiff = aExp - bExp;
6203 if ( expDiff < -1 ) return a;
6204 shortShift128Left(
6205 aSig0 | UINT64_C(0x0001000000000000),
6206 aSig1,
6207 15 - ( expDiff < 0 ),
6208 &aSig0,
6209 &aSig1
6210 );
6211 shortShift128Left(
6212 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6213 q = le128( bSig0, bSig1, aSig0, aSig1 );
6214 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6215 expDiff -= 64;
6216 while ( 0 < expDiff ) {
6217 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6218 q = ( 4 < q ) ? q - 4 : 0;
6219 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6220 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6221 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6222 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6223 expDiff -= 61;
6224 }
6225 if ( -64 < expDiff ) {
6226 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6227 q = ( 4 < q ) ? q - 4 : 0;
6228 q >>= - expDiff;
6229 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6230 expDiff += 52;
6231 if ( expDiff < 0 ) {
6232 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6233 }
6234 else {
6235 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6236 }
6237 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6238 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6239 }
6240 else {
6241 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6242 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6243 }
6244 do {
6245 alternateASig0 = aSig0;
6246 alternateASig1 = aSig1;
6247 ++q;
6248 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6249 } while ( 0 <= (int64_t) aSig0 );
6250 add128(
6251 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6252 if ( ( sigMean0 < 0 )
6253 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6254 aSig0 = alternateASig0;
6255 aSig1 = alternateASig1;
6256 }
6257 zSign = ( (int64_t) aSig0 < 0 );
6258 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6259 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6260 status);
6261 }
6262
6263 static inline FloatRelation
6264 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6265 float_status *status)
6266 {
6267 bool aSign, bSign;
6268
6269 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6270 float_raise(float_flag_invalid, status);
6271 return float_relation_unordered;
6272 }
6273 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6274 ( extractFloatx80Frac( a )<<1 ) ) ||
6275 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6276 ( extractFloatx80Frac( b )<<1 ) )) {
6277 if (!is_quiet ||
6278 floatx80_is_signaling_nan(a, status) ||
6279 floatx80_is_signaling_nan(b, status)) {
6280 float_raise(float_flag_invalid, status);
6281 }
6282 return float_relation_unordered;
6283 }
6284 aSign = extractFloatx80Sign( a );
6285 bSign = extractFloatx80Sign( b );
6286 if ( aSign != bSign ) {
6287
6288 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6289 ( ( a.low | b.low ) == 0 ) ) {
6290 /* zero case */
6291 return float_relation_equal;
6292 } else {
6293 return 1 - (2 * aSign);
6294 }
6295 } else {
6296 /* Normalize pseudo-denormals before comparison. */
6297 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6298 ++a.high;
6299 }
6300 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6301 ++b.high;
6302 }
6303 if (a.low == b.low && a.high == b.high) {
6304 return float_relation_equal;
6305 } else {
6306 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6307 }
6308 }
6309 }
6310
6311 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6312 {
6313 return floatx80_compare_internal(a, b, 0, status);
6314 }
6315
6316 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6317 float_status *status)
6318 {
6319 return floatx80_compare_internal(a, b, 1, status);
6320 }
6321
6322 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6323 {
6324 bool aSign;
6325 int32_t aExp;
6326 uint64_t aSig;
6327
6328 if (floatx80_invalid_encoding(a)) {
6329 float_raise(float_flag_invalid, status);
6330 return floatx80_default_nan(status);
6331 }
6332 aSig = extractFloatx80Frac( a );
6333 aExp = extractFloatx80Exp( a );
6334 aSign = extractFloatx80Sign( a );
6335
6336 if ( aExp == 0x7FFF ) {
6337 if ( aSig<<1 ) {
6338 return propagateFloatx80NaN(a, a, status);
6339 }
6340 return a;
6341 }
6342
6343 if (aExp == 0) {
6344 if (aSig == 0) {
6345 return a;
6346 }
6347 aExp++;
6348 }
6349
6350 if (n > 0x10000) {
6351 n = 0x10000;
6352 } else if (n < -0x10000) {
6353 n = -0x10000;
6354 }
6355
6356 aExp += n;
6357 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6358 aSign, aExp, aSig, 0, status);
6359 }
6360
6361 static void __attribute__((constructor)) softfloat_init(void)
6362 {
6363 union_float64 ua, ub, uc, ur;
6364
6365 if (QEMU_NO_HARDFLOAT) {
6366 return;
6367 }
6368 /*
6369 * Test that the host's FMA is not obviously broken. For example,
6370 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6371 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6372 */
6373 ua.s = 0x0020000000000001ULL;
6374 ub.s = 0x3ca0000000000000ULL;
6375 uc.s = 0x0020000000000000ULL;
6376 ur.h = fma(ua.h, ub.h, uc.h);
6377 if (ur.s != 0x0020000000000001ULL) {
6378 force_soft_fma = true;
6379 }
6380 }