]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Convert floatx80_mul to FloatParts
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485 /* Flags for parts_minmax. */
486 enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493 };
494
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498 return unlikely(c >= float_class_qnan);
499 }
500
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503 return c == float_class_snan;
504 }
505
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508 return c == float_class_qnan;
509 }
510
511 /*
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
514 *
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
518 */
519
520 typedef struct {
521 FloatClass cls;
522 bool sign;
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
535 } FloatParts64;
536
537 typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543 } FloatParts128;
544
545 typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553 } FloatParts256;
554
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
558
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
569 */
570 typedef struct {
571 int exp_size;
572 int exp_bias;
573 int exp_max;
574 int frac_size;
575 int frac_shift;
576 bool arm_althp;
577 uint64_t round_mask;
578 } FloatFmt;
579
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
582 .exp_size = E, \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
585
586 #define FLOAT_PARAMS(E, F) \
587 FLOAT_PARAMS_(E), \
588 .frac_size = F, \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
591
592 static const FloatFmt float16_params = {
593 FLOAT_PARAMS(5, 10)
594 };
595
596 static const FloatFmt float16_params_ahp = {
597 FLOAT_PARAMS(5, 10),
598 .arm_althp = true
599 };
600
601 static const FloatFmt bfloat16_params = {
602 FLOAT_PARAMS(8, 7)
603 };
604
605 static const FloatFmt float32_params = {
606 FLOAT_PARAMS(8, 23)
607 };
608
609 static const FloatFmt float64_params = {
610 FLOAT_PARAMS(11, 52)
611 };
612
613 static const FloatFmt float128_params = {
614 FLOAT_PARAMS(15, 112)
615 };
616
617 #define FLOATX80_PARAMS(R) \
618 FLOAT_PARAMS_(15), \
619 .frac_size = R == 64 ? 63 : R, \
620 .frac_shift = 0, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
622
623 static const FloatFmt floatx80_params[3] = {
624 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
627 };
628
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
631 {
632 const int f_size = fmt->frac_size;
633 const int e_size = fmt->exp_size;
634
635 *r = (FloatParts64) {
636 .cls = float_class_unclassified,
637 .sign = extract64(raw, f_size + e_size, 1),
638 .exp = extract64(raw, f_size, e_size),
639 .frac = extract64(raw, 0, f_size)
640 };
641 }
642
643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
644 {
645 unpack_raw64(p, &float16_params, f);
646 }
647
648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
649 {
650 unpack_raw64(p, &bfloat16_params, f);
651 }
652
653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
654 {
655 unpack_raw64(p, &float32_params, f);
656 }
657
658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
659 {
660 unpack_raw64(p, &float64_params, f);
661 }
662
663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
664 {
665 *p = (FloatParts128) {
666 .cls = float_class_unclassified,
667 .sign = extract32(f.high, 15, 1),
668 .exp = extract32(f.high, 0, 15),
669 .frac_hi = f.low
670 };
671 }
672
673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
674 {
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677
678 *p = (FloatParts128) {
679 .cls = float_class_unclassified,
680 .sign = extract64(f.high, f_size + e_size, 1),
681 .exp = extract64(f.high, f_size, e_size),
682 .frac_hi = extract64(f.high, 0, f_size),
683 .frac_lo = f.low,
684 };
685 }
686
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
689 {
690 const int f_size = fmt->frac_size;
691 const int e_size = fmt->exp_size;
692 uint64_t ret;
693
694 ret = (uint64_t)p->sign << (f_size + e_size);
695 ret = deposit64(ret, f_size, e_size, p->exp);
696 ret = deposit64(ret, 0, f_size, p->frac);
697 return ret;
698 }
699
700 static inline float16 float16_pack_raw(const FloatParts64 *p)
701 {
702 return make_float16(pack_raw64(p, &float16_params));
703 }
704
705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
706 {
707 return pack_raw64(p, &bfloat16_params);
708 }
709
710 static inline float32 float32_pack_raw(const FloatParts64 *p)
711 {
712 return make_float32(pack_raw64(p, &float32_params));
713 }
714
715 static inline float64 float64_pack_raw(const FloatParts64 *p)
716 {
717 return make_float64(pack_raw64(p, &float64_params));
718 }
719
720 static float128 float128_pack_raw(const FloatParts128 *p)
721 {
722 const int f_size = float128_params.frac_size - 64;
723 const int e_size = float128_params.exp_size;
724 uint64_t hi;
725
726 hi = (uint64_t)p->sign << (f_size + e_size);
727 hi = deposit64(hi, f_size, e_size, p->exp);
728 hi = deposit64(hi, 0, f_size, p->frac_hi);
729 return make_float128(hi, p->frac_lo);
730 }
731
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
738 | specific.
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
741
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
744
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
748
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
751
752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
754
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
756
757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
758 float_status *s);
759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
760 float_status *s);
761
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
763
764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
765 FloatParts64 *c, float_status *s,
766 int ab_mask, int abc_mask);
767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
768 FloatParts128 *b,
769 FloatParts128 *c,
770 float_status *s,
771 int ab_mask, int abc_mask);
772
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
775
776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
777 const FloatFmt *fmt);
778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
779 const FloatFmt *fmt);
780
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
783
784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
785 const FloatFmt *fmt);
786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
787 const FloatFmt *fmt);
788
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
791
792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
793 const FloatFmt *fmt);
794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
795 const FloatFmt *fmt);
796
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
799
800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
803
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
806
807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
810
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
813
814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
815 float_status *s, bool subtract);
816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
817 float_status *s, bool subtract);
818
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
821
822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
823 float_status *s);
824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
825 float_status *s);
826
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
829
830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
831 FloatParts64 *c, int flags,
832 float_status *s);
833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
834 FloatParts128 *c, int flags,
835 float_status *s);
836
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
839
840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
841 float_status *s);
842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
843 float_status *s);
844
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
847
848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
850
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
853
854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
855 int scale, int frac_size);
856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
857 int scale, int frac_size);
858
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
861
862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
863 int scale, float_status *s,
864 const FloatFmt *fmt);
865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
866 int scale, float_status *s,
867 const FloatFmt *fmt);
868
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
871
872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
873 int scale, int64_t min, int64_t max,
874 float_status *s);
875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
876 int scale, int64_t min, int64_t max,
877 float_status *s);
878
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
881
882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
883 int scale, uint64_t max,
884 float_status *s);
885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
886 int scale, uint64_t max,
887 float_status *s);
888
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
891
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893 int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895 int scale, float_status *s);
896
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
899
900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
901 int scale, float_status *s);
902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
903 int scale, float_status *s);
904
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
907
908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
909 float_status *s, int flags);
910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
911 float_status *s, int flags);
912
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
915
916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
917 float_status *s, bool q);
918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
919 float_status *s, bool q);
920
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
923
924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
926
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
929
930 /*
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
932 */
933
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
936
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
940
941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
942 {
943 return uadd64_overflow(a->frac, b->frac, &r->frac);
944 }
945
946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
947 {
948 bool c = 0;
949 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
950 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
951 return c;
952 }
953
954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
955 {
956 bool c = 0;
957 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
958 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
959 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
960 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
961 return c;
962 }
963
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
965
966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
967 {
968 return uadd64_overflow(a->frac, c, &r->frac);
969 }
970
971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
972 {
973 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
974 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
975 }
976
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
978
979 static void frac64_allones(FloatParts64 *a)
980 {
981 a->frac = -1;
982 }
983
984 static void frac128_allones(FloatParts128 *a)
985 {
986 a->frac_hi = a->frac_lo = -1;
987 }
988
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
990
991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
992 {
993 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
994 }
995
996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
997 {
998 uint64_t ta = a->frac_hi, tb = b->frac_hi;
999 if (ta == tb) {
1000 ta = a->frac_lo, tb = b->frac_lo;
1001 if (ta == tb) {
1002 return 0;
1003 }
1004 }
1005 return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012 a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017 a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024 uint64_t n1, n0, r, q;
1025 bool ret;
1026
1027 /*
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1034 *
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1037 */
1038 ret = a->frac < b->frac;
1039 if (ret) {
1040 n0 = a->frac;
1041 n1 = 0;
1042 } else {
1043 n0 = a->frac >> 1;
1044 n1 = a->frac << 63;
1045 }
1046 q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a->frac = q | (r != 0);
1050
1051 return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056 uint64_t q0, q1, a0, a1, b0, b1;
1057 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058 bool ret = false;
1059
1060 a0 = a->frac_hi, a1 = a->frac_lo;
1061 b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063 ret = lt128(a0, a1, b0, b1);
1064 if (!ret) {
1065 a1 = shr_double(a0, a1, 1);
1066 a0 = a0 >> 1;
1067 }
1068
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0 = estimateDiv128To64(a0, a1, b0);
1071
1072 /*
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1076 */
1077 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079 while (r0 != 0) {
1080 q0--;
1081 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082 }
1083
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1 = estimateDiv128To64(r1, r2, b0);
1086 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088 while (r1 != 0) {
1089 q1--;
1090 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091 }
1092
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1 |= (r2 | r3) != 0;
1095
1096 a->frac_hi = q0;
1097 a->frac_lo = q1;
1098 return ret;
1099 }
1100
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105 return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110 return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130 a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142 bool c = 0;
1143 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153 if (a->frac) {
1154 int shift = clz64(a->frac);
1155 a->frac <<= shift;
1156 return shift;
1157 }
1158 return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163 if (a->frac_hi) {
1164 int shl = clz64(a->frac_hi);
1165 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166 a->frac_lo <<= shl;
1167 return shl;
1168 } else if (a->frac_lo) {
1169 int shl = clz64(a->frac_lo);
1170 a->frac_hi = a->frac_lo << shl;
1171 a->frac_lo = 0;
1172 return shl + 64;
1173 }
1174 return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181 int ret, shl;
1182
1183 if (likely(a0)) {
1184 shl = clz64(a0);
1185 if (shl == 0) {
1186 return 0;
1187 }
1188 ret = shl;
1189 } else {
1190 if (a1) {
1191 ret = 64;
1192 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193 } else if (a2) {
1194 ret = 128;
1195 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196 } else if (a3) {
1197 ret = 192;
1198 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199 } else {
1200 ret = 256;
1201 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202 goto done;
1203 }
1204 shl = clz64(a0);
1205 if (shl == 0) {
1206 goto done;
1207 }
1208 ret += shl;
1209 }
1210
1211 a0 = shl_double(a0, a1, shl);
1212 a1 = shl_double(a1, a2, shl);
1213 a2 = shl_double(a2, a3, shl);
1214 a3 <<= shl;
1215
1216 done:
1217 a->frac_hi = a0;
1218 a->frac_hm = a1;
1219 a->frac_lm = a2;
1220 a->frac_lo = a3;
1221 return ret;
1222 }
1223
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228 a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235 if (c & 64) {
1236 a0 = a1, a1 = 0;
1237 }
1238
1239 c &= 63;
1240 if (c) {
1241 a0 = shl_double(a0, a1, c);
1242 a1 = a1 << c;
1243 }
1244
1245 a->frac_hi = a0;
1246 a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253 a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260 if (c & 64) {
1261 a1 = a0, a0 = 0;
1262 }
1263
1264 c &= 63;
1265 if (c) {
1266 a1 = shr_double(a0, a1, c);
1267 a0 = a0 >> c;
1268 }
1269
1270 a->frac_hi = a0;
1271 a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278 uint64_t a0 = a->frac;
1279
1280 if (likely(c != 0)) {
1281 if (likely(c < 64)) {
1282 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283 } else {
1284 a0 = a0 != 0;
1285 }
1286 a->frac = a0;
1287 }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293 uint64_t sticky = 0;
1294
1295 if (unlikely(c == 0)) {
1296 return;
1297 } else if (likely(c < 64)) {
1298 /* nothing */
1299 } else if (likely(c < 128)) {
1300 sticky = a1;
1301 a1 = a0;
1302 a0 = 0;
1303 c &= 63;
1304 if (c == 0) {
1305 goto done;
1306 }
1307 } else {
1308 sticky = a0 | a1;
1309 a0 = a1 = 0;
1310 goto done;
1311 }
1312
1313 sticky |= shr_double(a1, 0, c);
1314 a1 = shr_double(a0, a1, c);
1315 a0 = a0 >> c;
1316
1317 done:
1318 a->frac_lo = a1 | (sticky != 0);
1319 a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326 uint64_t sticky = 0;
1327
1328 if (unlikely(c == 0)) {
1329 return;
1330 } else if (likely(c < 64)) {
1331 /* nothing */
1332 } else if (likely(c < 256)) {
1333 if (unlikely(c & 128)) {
1334 sticky |= a2 | a3;
1335 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336 }
1337 if (unlikely(c & 64)) {
1338 sticky |= a3;
1339 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340 }
1341 c &= 63;
1342 if (c == 0) {
1343 goto done;
1344 }
1345 } else {
1346 sticky = a0 | a1 | a2 | a3;
1347 a0 = a1 = a2 = a3 = 0;
1348 goto done;
1349 }
1350
1351 sticky |= shr_double(a3, 0, c);
1352 a3 = shr_double(a2, a3, c);
1353 a2 = shr_double(a1, a2, c);
1354 a1 = shr_double(a0, a1, c);
1355 a0 = a0 >> c;
1356
1357 done:
1358 a->frac_lo = a3 | (sticky != 0);
1359 a->frac_lm = a2;
1360 a->frac_hm = a1;
1361 a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368 return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373 bool c = 0;
1374 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376 return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381 bool c = 0;
1382 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386 return c;
1387 }
1388
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393 r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398 r->frac_hi = a->frac_hi;
1399 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406 r->frac_hi = a->frac;
1407 r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412 r->frac_hi = a->frac_hi;
1413 r->frac_hm = a->frac_lo;
1414 r->frac_lm = 0;
1415 r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1424 */
1425 static const uint16_t rsqrt_tab[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef N
1455 #undef W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef N
1463 #undef W
1464 #define N 256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef N
1469 #undef W
1470 #undef partsN
1471 #undef FloatPartsN
1472 #undef FloatPartsW
1473
1474 /*
1475 * Pack/unpack routines with a specific FloatFmt.
1476 */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479 float_status *s, const FloatFmt *params)
1480 {
1481 float16_unpack_raw(p, f);
1482 parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486 float_status *s)
1487 {
1488 float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492 float_status *s)
1493 {
1494 bfloat16_unpack_raw(p, f);
1495 parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499 float_status *s,
1500 const FloatFmt *params)
1501 {
1502 parts_uncanon(p, s, params);
1503 return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507 float_status *s)
1508 {
1509 return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513 float_status *s)
1514 {
1515 parts_uncanon(p, s, &bfloat16_params);
1516 return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520 float_status *s)
1521 {
1522 float32_unpack_raw(p, f);
1523 parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527 float_status *s)
1528 {
1529 parts_uncanon(p, s, &float32_params);
1530 return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534 float_status *s)
1535 {
1536 float64_unpack_raw(p, f);
1537 parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541 float_status *s)
1542 {
1543 parts_uncanon(p, s, &float64_params);
1544 return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548 float_status *s)
1549 {
1550 float128_unpack_raw(p, f);
1551 parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555 float_status *s)
1556 {
1557 parts_uncanon(p, s, &float128_params);
1558 return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563 float_status *s)
1564 {
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s->floatx80_rounding_precision) {
1567 case floatx80_precision_x:
1568 case floatx80_precision_d:
1569 case floatx80_precision_s:
1570 break;
1571 default:
1572 g_assert_not_reached();
1573 }
1574
1575 if (unlikely(floatx80_invalid_encoding(f))) {
1576 float_raise(float_flag_invalid, s);
1577 return false;
1578 }
1579
1580 floatx80_unpack_raw(p, f);
1581
1582 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584 } else {
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587 p->cls = (p->frac_hi == 0 ? float_class_inf
1588 : parts_is_snan_frac(p->frac_hi, s)
1589 ? float_class_snan : float_class_qnan);
1590 }
1591 return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595 float_status *s)
1596 {
1597 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598 uint64_t frac;
1599 int exp;
1600
1601 switch (p->cls) {
1602 case float_class_normal:
1603 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604 parts_uncanon_normal(p, s, fmt);
1605 frac = p->frac_hi;
1606 exp = p->exp;
1607 } else {
1608 FloatParts64 p64;
1609
1610 p64.sign = p->sign;
1611 p64.exp = p->exp;
1612 frac_truncjam(&p64, p);
1613 parts_uncanon_normal(&p64, s, fmt);
1614 frac = p64.frac;
1615 exp = p64.exp;
1616 }
1617 if (exp != fmt->exp_max) {
1618 break;
1619 }
1620 /* rounded to inf -- fall through to set frac correctly */
1621
1622 case float_class_inf:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac = floatx80_infinity_low;
1625 exp = fmt->exp_max;
1626 break;
1627
1628 case float_class_zero:
1629 frac = 0;
1630 exp = 0;
1631 break;
1632
1633 case float_class_snan:
1634 case float_class_qnan:
1635 /* NaNs have the integer bit set. */
1636 frac = p->frac_hi | (1ull << 63);
1637 exp = fmt->exp_max;
1638 break;
1639
1640 default:
1641 g_assert_not_reached();
1642 }
1643
1644 return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648 * Addition and subtraction
1649 */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654 FloatParts64 pa, pb, *pr;
1655
1656 float16_unpack_canonical(&pa, a, status);
1657 float16_unpack_canonical(&pb, b, status);
1658 pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660 return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665 return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670 return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676 FloatParts64 pa, pb, *pr;
1677
1678 float32_unpack_canonical(&pa, a, status);
1679 float32_unpack_canonical(&pb, b, status);
1680 pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682 return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687 return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692 return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698 FloatParts64 pa, pb, *pr;
1699
1700 float64_unpack_canonical(&pa, a, status);
1701 float64_unpack_canonical(&pb, b, status);
1702 pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704 return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709 return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714 return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719 return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724 return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729 return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734 return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741 }
1742 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749 } else {
1750 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751 }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757 return float32_gen2(a, b, s, hard, soft,
1758 f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764 return float64_gen2(a, b, s, hard, soft,
1765 f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795 FloatParts64 pa, pb, *pr;
1796
1797 bfloat16_unpack_canonical(&pa, a, status);
1798 bfloat16_unpack_canonical(&pb, b, status);
1799 pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801 return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806 return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811 return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817 FloatParts128 pa, pb, *pr;
1818
1819 float128_unpack_canonical(&pa, a, status);
1820 float128_unpack_canonical(&pb, b, status);
1821 pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823 return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828 return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833 return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839 FloatParts128 pa, pb, *pr;
1840
1841 if (!floatx80_unpack_canonical(&pa, a, status) ||
1842 !floatx80_unpack_canonical(&pb, b, status)) {
1843 return floatx80_default_nan(status);
1844 }
1845
1846 pr = parts_addsub(&pa, &pb, status, subtract);
1847 return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852 return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857 return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861 * Multiplication
1862 */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866 FloatParts64 pa, pb, *pr;
1867
1868 float16_unpack_canonical(&pa, a, status);
1869 float16_unpack_canonical(&pb, b, status);
1870 pr = parts_mul(&pa, &pb, status);
1871
1872 return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878 FloatParts64 pa, pb, *pr;
1879
1880 float32_unpack_canonical(&pa, a, status);
1881 float32_unpack_canonical(&pb, b, status);
1882 pr = parts_mul(&pa, &pb, status);
1883
1884 return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890 FloatParts64 pa, pb, *pr;
1891
1892 float64_unpack_canonical(&pa, a, status);
1893 float64_unpack_canonical(&pb, b, status);
1894 pr = parts_mul(&pa, &pb, status);
1895
1896 return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901 return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906 return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913 f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920 f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926 FloatParts64 pa, pb, *pr;
1927
1928 bfloat16_unpack_canonical(&pa, a, status);
1929 bfloat16_unpack_canonical(&pb, b, status);
1930 pr = parts_mul(&pa, &pb, status);
1931
1932 return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938 FloatParts128 pa, pb, *pr;
1939
1940 float128_unpack_canonical(&pa, a, status);
1941 float128_unpack_canonical(&pb, b, status);
1942 pr = parts_mul(&pa, &pb, status);
1943
1944 return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950 FloatParts128 pa, pb, *pr;
1951
1952 if (!floatx80_unpack_canonical(&pa, a, status) ||
1953 !floatx80_unpack_canonical(&pb, b, status)) {
1954 return floatx80_default_nan(status);
1955 }
1956
1957 pr = parts_mul(&pa, &pb, status);
1958 return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962 * Fused multiply-add
1963 */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966 int flags, float_status *status)
1967 {
1968 FloatParts64 pa, pb, pc, *pr;
1969
1970 float16_unpack_canonical(&pa, a, status);
1971 float16_unpack_canonical(&pb, b, status);
1972 float16_unpack_canonical(&pc, c, status);
1973 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975 return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980 float_status *status)
1981 {
1982 FloatParts64 pa, pb, pc, *pr;
1983
1984 float32_unpack_canonical(&pa, a, status);
1985 float32_unpack_canonical(&pb, b, status);
1986 float32_unpack_canonical(&pc, c, status);
1987 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989 return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994 float_status *status)
1995 {
1996 FloatParts64 pa, pb, pc, *pr;
1997
1998 float64_unpack_canonical(&pa, a, status);
1999 float64_unpack_canonical(&pb, b, status);
2000 float64_unpack_canonical(&pc, c, status);
2001 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003 return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011 union_float32 ua, ub, uc, ur;
2012
2013 ua.s = xa;
2014 ub.s = xb;
2015 uc.s = xc;
2016
2017 if (unlikely(!can_use_fpu(s))) {
2018 goto soft;
2019 }
2020 if (unlikely(flags & float_muladd_halve_result)) {
2021 goto soft;
2022 }
2023
2024 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026 goto soft;
2027 }
2028
2029 if (unlikely(force_soft_fma)) {
2030 goto soft;
2031 }
2032
2033 /*
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2036 */
2037 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038 union_float32 up;
2039 bool prod_sign;
2040
2041 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042 prod_sign ^= !!(flags & float_muladd_negate_product);
2043 up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045 if (flags & float_muladd_negate_c) {
2046 uc.h = -uc.h;
2047 }
2048 ur.h = up.h + uc.h;
2049 } else {
2050 union_float32 ua_orig = ua;
2051 union_float32 uc_orig = uc;
2052
2053 if (flags & float_muladd_negate_product) {
2054 ua.h = -ua.h;
2055 }
2056 if (flags & float_muladd_negate_c) {
2057 uc.h = -uc.h;
2058 }
2059
2060 ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062 if (unlikely(f32_is_inf(ur))) {
2063 float_raise(float_flag_overflow, s);
2064 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065 ua = ua_orig;
2066 uc = uc_orig;
2067 goto soft;
2068 }
2069 }
2070 if (flags & float_muladd_negate_result) {
2071 return float32_chs(ur.s);
2072 }
2073 return ur.s;
2074
2075 soft:
2076 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082 union_float64 ua, ub, uc, ur;
2083
2084 ua.s = xa;
2085 ub.s = xb;
2086 uc.s = xc;
2087
2088 if (unlikely(!can_use_fpu(s))) {
2089 goto soft;
2090 }
2091 if (unlikely(flags & float_muladd_halve_result)) {
2092 goto soft;
2093 }
2094
2095 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097 goto soft;
2098 }
2099
2100 if (unlikely(force_soft_fma)) {
2101 goto soft;
2102 }
2103
2104 /*
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2107 */
2108 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109 union_float64 up;
2110 bool prod_sign;
2111
2112 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113 prod_sign ^= !!(flags & float_muladd_negate_product);
2114 up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116 if (flags & float_muladd_negate_c) {
2117 uc.h = -uc.h;
2118 }
2119 ur.h = up.h + uc.h;
2120 } else {
2121 union_float64 ua_orig = ua;
2122 union_float64 uc_orig = uc;
2123
2124 if (flags & float_muladd_negate_product) {
2125 ua.h = -ua.h;
2126 }
2127 if (flags & float_muladd_negate_c) {
2128 uc.h = -uc.h;
2129 }
2130
2131 ur.h = fma(ua.h, ub.h, uc.h);
2132
2133 if (unlikely(f64_is_inf(ur))) {
2134 float_raise(float_flag_overflow, s);
2135 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136 ua = ua_orig;
2137 uc = uc_orig;
2138 goto soft;
2139 }
2140 }
2141 if (flags & float_muladd_negate_result) {
2142 return float64_chs(ur.s);
2143 }
2144 return ur.s;
2145
2146 soft:
2147 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151 int flags, float_status *status)
2152 {
2153 FloatParts64 pa, pb, pc, *pr;
2154
2155 bfloat16_unpack_canonical(&pa, a, status);
2156 bfloat16_unpack_canonical(&pb, b, status);
2157 bfloat16_unpack_canonical(&pc, c, status);
2158 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160 return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164 int flags, float_status *status)
2165 {
2166 FloatParts128 pa, pb, pc, *pr;
2167
2168 float128_unpack_canonical(&pa, a, status);
2169 float128_unpack_canonical(&pb, b, status);
2170 float128_unpack_canonical(&pc, c, status);
2171 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173 return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177 * Division
2178 */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182 FloatParts64 pa, pb, *pr;
2183
2184 float16_unpack_canonical(&pa, a, status);
2185 float16_unpack_canonical(&pb, b, status);
2186 pr = parts_div(&pa, &pb, status);
2187
2188 return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194 FloatParts64 pa, pb, *pr;
2195
2196 float32_unpack_canonical(&pa, a, status);
2197 float32_unpack_canonical(&pb, b, status);
2198 pr = parts_div(&pa, &pb, status);
2199
2200 return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206 FloatParts64 pa, pb, *pr;
2207
2208 float64_unpack_canonical(&pa, a, status);
2209 float64_unpack_canonical(&pb, b, status);
2210 pr = parts_div(&pa, &pb, status);
2211
2212 return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217 return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222 return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229 fpclassify(b.h) == FP_NORMAL;
2230 }
2231 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238 fpclassify(b.h) == FP_NORMAL;
2239 }
2240 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246 return fpclassify(a.h) != FP_ZERO;
2247 }
2248 return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254 return fpclassify(a.h) != FP_ZERO;
2255 }
2256 return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263 f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270 f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276 FloatParts64 pa, pb, *pr;
2277
2278 bfloat16_unpack_canonical(&pa, a, status);
2279 bfloat16_unpack_canonical(&pb, b, status);
2280 pr = parts_div(&pa, &pb, status);
2281
2282 return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288 FloatParts128 pa, pb, *pr;
2289
2290 float128_unpack_canonical(&pa, a, status);
2291 float128_unpack_canonical(&pb, b, status);
2292 pr = parts_div(&pa, &pb, status);
2293
2294 return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 /*
2298 * Float to Float conversions
2299 *
2300 * Returns the result of converting one float format to another. The
2301 * conversion is performed according to the IEC/IEEE Standard for
2302 * Binary Floating-Point Arithmetic.
2303 *
2304 * Usually this only needs to take care of raising invalid exceptions
2305 * and handling the conversion on NaNs.
2306 */
2307
2308 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2309 {
2310 switch (a->cls) {
2311 case float_class_qnan:
2312 case float_class_snan:
2313 /*
2314 * There is no NaN in the destination format. Raise Invalid
2315 * and return a zero with the sign of the input NaN.
2316 */
2317 float_raise(float_flag_invalid, s);
2318 a->cls = float_class_zero;
2319 break;
2320
2321 case float_class_inf:
2322 /*
2323 * There is no Inf in the destination format. Raise Invalid
2324 * and return the maximum normal with the correct sign.
2325 */
2326 float_raise(float_flag_invalid, s);
2327 a->cls = float_class_normal;
2328 a->exp = float16_params_ahp.exp_max;
2329 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2330 float16_params_ahp.frac_size + 1);
2331 break;
2332
2333 case float_class_normal:
2334 case float_class_zero:
2335 break;
2336
2337 default:
2338 g_assert_not_reached();
2339 }
2340 }
2341
2342 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2343 {
2344 if (is_nan(a->cls)) {
2345 parts_return_nan(a, s);
2346 }
2347 }
2348
2349 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2350 {
2351 if (is_nan(a->cls)) {
2352 parts_return_nan(a, s);
2353 }
2354 }
2355
2356 #define parts_float_to_float(P, S) \
2357 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2358
2359 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2360 float_status *s)
2361 {
2362 a->cls = b->cls;
2363 a->sign = b->sign;
2364 a->exp = b->exp;
2365
2366 if (a->cls == float_class_normal) {
2367 frac_truncjam(a, b);
2368 } else if (is_nan(a->cls)) {
2369 /* Discard the low bits of the NaN. */
2370 a->frac = b->frac_hi;
2371 parts_return_nan(a, s);
2372 }
2373 }
2374
2375 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2376 float_status *s)
2377 {
2378 a->cls = b->cls;
2379 a->sign = b->sign;
2380 a->exp = b->exp;
2381 frac_widen(a, b);
2382
2383 if (is_nan(a->cls)) {
2384 parts_return_nan(a, s);
2385 }
2386 }
2387
2388 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2389 {
2390 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2391 FloatParts64 p;
2392
2393 float16a_unpack_canonical(&p, a, s, fmt16);
2394 parts_float_to_float(&p, s);
2395 return float32_round_pack_canonical(&p, s);
2396 }
2397
2398 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2399 {
2400 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2401 FloatParts64 p;
2402
2403 float16a_unpack_canonical(&p, a, s, fmt16);
2404 parts_float_to_float(&p, s);
2405 return float64_round_pack_canonical(&p, s);
2406 }
2407
2408 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2409 {
2410 FloatParts64 p;
2411 const FloatFmt *fmt;
2412
2413 float32_unpack_canonical(&p, a, s);
2414 if (ieee) {
2415 parts_float_to_float(&p, s);
2416 fmt = &float16_params;
2417 } else {
2418 parts_float_to_ahp(&p, s);
2419 fmt = &float16_params_ahp;
2420 }
2421 return float16a_round_pack_canonical(&p, s, fmt);
2422 }
2423
2424 static float64 QEMU_SOFTFLOAT_ATTR
2425 soft_float32_to_float64(float32 a, float_status *s)
2426 {
2427 FloatParts64 p;
2428
2429 float32_unpack_canonical(&p, a, s);
2430 parts_float_to_float(&p, s);
2431 return float64_round_pack_canonical(&p, s);
2432 }
2433
2434 float64 float32_to_float64(float32 a, float_status *s)
2435 {
2436 if (likely(float32_is_normal(a))) {
2437 /* Widening conversion can never produce inexact results. */
2438 union_float32 uf;
2439 union_float64 ud;
2440 uf.s = a;
2441 ud.h = uf.h;
2442 return ud.s;
2443 } else if (float32_is_zero(a)) {
2444 return float64_set_sign(float64_zero, float32_is_neg(a));
2445 } else {
2446 return soft_float32_to_float64(a, s);
2447 }
2448 }
2449
2450 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2451 {
2452 FloatParts64 p;
2453 const FloatFmt *fmt;
2454
2455 float64_unpack_canonical(&p, a, s);
2456 if (ieee) {
2457 parts_float_to_float(&p, s);
2458 fmt = &float16_params;
2459 } else {
2460 parts_float_to_ahp(&p, s);
2461 fmt = &float16_params_ahp;
2462 }
2463 return float16a_round_pack_canonical(&p, s, fmt);
2464 }
2465
2466 float32 float64_to_float32(float64 a, float_status *s)
2467 {
2468 FloatParts64 p;
2469
2470 float64_unpack_canonical(&p, a, s);
2471 parts_float_to_float(&p, s);
2472 return float32_round_pack_canonical(&p, s);
2473 }
2474
2475 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2476 {
2477 FloatParts64 p;
2478
2479 bfloat16_unpack_canonical(&p, a, s);
2480 parts_float_to_float(&p, s);
2481 return float32_round_pack_canonical(&p, s);
2482 }
2483
2484 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2485 {
2486 FloatParts64 p;
2487
2488 bfloat16_unpack_canonical(&p, a, s);
2489 parts_float_to_float(&p, s);
2490 return float64_round_pack_canonical(&p, s);
2491 }
2492
2493 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2494 {
2495 FloatParts64 p;
2496
2497 float32_unpack_canonical(&p, a, s);
2498 parts_float_to_float(&p, s);
2499 return bfloat16_round_pack_canonical(&p, s);
2500 }
2501
2502 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2503 {
2504 FloatParts64 p;
2505
2506 float64_unpack_canonical(&p, a, s);
2507 parts_float_to_float(&p, s);
2508 return bfloat16_round_pack_canonical(&p, s);
2509 }
2510
2511 float32 float128_to_float32(float128 a, float_status *s)
2512 {
2513 FloatParts64 p64;
2514 FloatParts128 p128;
2515
2516 float128_unpack_canonical(&p128, a, s);
2517 parts_float_to_float_narrow(&p64, &p128, s);
2518 return float32_round_pack_canonical(&p64, s);
2519 }
2520
2521 float64 float128_to_float64(float128 a, float_status *s)
2522 {
2523 FloatParts64 p64;
2524 FloatParts128 p128;
2525
2526 float128_unpack_canonical(&p128, a, s);
2527 parts_float_to_float_narrow(&p64, &p128, s);
2528 return float64_round_pack_canonical(&p64, s);
2529 }
2530
2531 float128 float32_to_float128(float32 a, float_status *s)
2532 {
2533 FloatParts64 p64;
2534 FloatParts128 p128;
2535
2536 float32_unpack_canonical(&p64, a, s);
2537 parts_float_to_float_widen(&p128, &p64, s);
2538 return float128_round_pack_canonical(&p128, s);
2539 }
2540
2541 float128 float64_to_float128(float64 a, float_status *s)
2542 {
2543 FloatParts64 p64;
2544 FloatParts128 p128;
2545
2546 float64_unpack_canonical(&p64, a, s);
2547 parts_float_to_float_widen(&p128, &p64, s);
2548 return float128_round_pack_canonical(&p128, s);
2549 }
2550
2551 /*
2552 * Round to integral value
2553 */
2554
2555 float16 float16_round_to_int(float16 a, float_status *s)
2556 {
2557 FloatParts64 p;
2558
2559 float16_unpack_canonical(&p, a, s);
2560 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2561 return float16_round_pack_canonical(&p, s);
2562 }
2563
2564 float32 float32_round_to_int(float32 a, float_status *s)
2565 {
2566 FloatParts64 p;
2567
2568 float32_unpack_canonical(&p, a, s);
2569 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2570 return float32_round_pack_canonical(&p, s);
2571 }
2572
2573 float64 float64_round_to_int(float64 a, float_status *s)
2574 {
2575 FloatParts64 p;
2576
2577 float64_unpack_canonical(&p, a, s);
2578 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2579 return float64_round_pack_canonical(&p, s);
2580 }
2581
2582 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2583 {
2584 FloatParts64 p;
2585
2586 bfloat16_unpack_canonical(&p, a, s);
2587 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2588 return bfloat16_round_pack_canonical(&p, s);
2589 }
2590
2591 float128 float128_round_to_int(float128 a, float_status *s)
2592 {
2593 FloatParts128 p;
2594
2595 float128_unpack_canonical(&p, a, s);
2596 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2597 return float128_round_pack_canonical(&p, s);
2598 }
2599
2600 /*
2601 * Floating-point to signed integer conversions
2602 */
2603
2604 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2605 float_status *s)
2606 {
2607 FloatParts64 p;
2608
2609 float16_unpack_canonical(&p, a, s);
2610 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2611 }
2612
2613 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2614 float_status *s)
2615 {
2616 FloatParts64 p;
2617
2618 float16_unpack_canonical(&p, a, s);
2619 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2620 }
2621
2622 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2623 float_status *s)
2624 {
2625 FloatParts64 p;
2626
2627 float16_unpack_canonical(&p, a, s);
2628 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2629 }
2630
2631 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2632 float_status *s)
2633 {
2634 FloatParts64 p;
2635
2636 float16_unpack_canonical(&p, a, s);
2637 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2638 }
2639
2640 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2641 float_status *s)
2642 {
2643 FloatParts64 p;
2644
2645 float32_unpack_canonical(&p, a, s);
2646 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2647 }
2648
2649 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2650 float_status *s)
2651 {
2652 FloatParts64 p;
2653
2654 float32_unpack_canonical(&p, a, s);
2655 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2656 }
2657
2658 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2659 float_status *s)
2660 {
2661 FloatParts64 p;
2662
2663 float32_unpack_canonical(&p, a, s);
2664 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2665 }
2666
2667 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2668 float_status *s)
2669 {
2670 FloatParts64 p;
2671
2672 float64_unpack_canonical(&p, a, s);
2673 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2674 }
2675
2676 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2677 float_status *s)
2678 {
2679 FloatParts64 p;
2680
2681 float64_unpack_canonical(&p, a, s);
2682 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2683 }
2684
2685 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2686 float_status *s)
2687 {
2688 FloatParts64 p;
2689
2690 float64_unpack_canonical(&p, a, s);
2691 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2692 }
2693
2694 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2695 float_status *s)
2696 {
2697 FloatParts64 p;
2698
2699 bfloat16_unpack_canonical(&p, a, s);
2700 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2701 }
2702
2703 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2704 float_status *s)
2705 {
2706 FloatParts64 p;
2707
2708 bfloat16_unpack_canonical(&p, a, s);
2709 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2710 }
2711
2712 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2713 float_status *s)
2714 {
2715 FloatParts64 p;
2716
2717 bfloat16_unpack_canonical(&p, a, s);
2718 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2719 }
2720
2721 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2722 int scale, float_status *s)
2723 {
2724 FloatParts128 p;
2725
2726 float128_unpack_canonical(&p, a, s);
2727 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2728 }
2729
2730 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2731 int scale, float_status *s)
2732 {
2733 FloatParts128 p;
2734
2735 float128_unpack_canonical(&p, a, s);
2736 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2737 }
2738
2739 int8_t float16_to_int8(float16 a, float_status *s)
2740 {
2741 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2742 }
2743
2744 int16_t float16_to_int16(float16 a, float_status *s)
2745 {
2746 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2747 }
2748
2749 int32_t float16_to_int32(float16 a, float_status *s)
2750 {
2751 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2752 }
2753
2754 int64_t float16_to_int64(float16 a, float_status *s)
2755 {
2756 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2757 }
2758
2759 int16_t float32_to_int16(float32 a, float_status *s)
2760 {
2761 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2762 }
2763
2764 int32_t float32_to_int32(float32 a, float_status *s)
2765 {
2766 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2767 }
2768
2769 int64_t float32_to_int64(float32 a, float_status *s)
2770 {
2771 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2772 }
2773
2774 int16_t float64_to_int16(float64 a, float_status *s)
2775 {
2776 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2777 }
2778
2779 int32_t float64_to_int32(float64 a, float_status *s)
2780 {
2781 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2782 }
2783
2784 int64_t float64_to_int64(float64 a, float_status *s)
2785 {
2786 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2787 }
2788
2789 int32_t float128_to_int32(float128 a, float_status *s)
2790 {
2791 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2792 }
2793
2794 int64_t float128_to_int64(float128 a, float_status *s)
2795 {
2796 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2797 }
2798
2799 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2800 {
2801 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2802 }
2803
2804 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2805 {
2806 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2807 }
2808
2809 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2810 {
2811 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2812 }
2813
2814 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2815 {
2816 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2817 }
2818
2819 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2820 {
2821 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2822 }
2823
2824 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2825 {
2826 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2827 }
2828
2829 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2830 {
2831 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2832 }
2833
2834 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2835 {
2836 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2837 }
2838
2839 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2840 {
2841 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2842 }
2843
2844 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2845 {
2846 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2847 }
2848
2849 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2850 {
2851 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2852 }
2853
2854 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2855 {
2856 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2860 {
2861 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2865 {
2866 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2870 {
2871 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2872 }
2873
2874 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2875 {
2876 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2877 }
2878
2879 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2880 {
2881 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2882 }
2883
2884 /*
2885 * Floating-point to unsigned integer conversions
2886 */
2887
2888 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2889 float_status *s)
2890 {
2891 FloatParts64 p;
2892
2893 float16_unpack_canonical(&p, a, s);
2894 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2895 }
2896
2897 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2898 float_status *s)
2899 {
2900 FloatParts64 p;
2901
2902 float16_unpack_canonical(&p, a, s);
2903 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2904 }
2905
2906 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2907 float_status *s)
2908 {
2909 FloatParts64 p;
2910
2911 float16_unpack_canonical(&p, a, s);
2912 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2913 }
2914
2915 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2916 float_status *s)
2917 {
2918 FloatParts64 p;
2919
2920 float16_unpack_canonical(&p, a, s);
2921 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2922 }
2923
2924 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2925 float_status *s)
2926 {
2927 FloatParts64 p;
2928
2929 float32_unpack_canonical(&p, a, s);
2930 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2931 }
2932
2933 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2934 float_status *s)
2935 {
2936 FloatParts64 p;
2937
2938 float32_unpack_canonical(&p, a, s);
2939 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2940 }
2941
2942 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2943 float_status *s)
2944 {
2945 FloatParts64 p;
2946
2947 float32_unpack_canonical(&p, a, s);
2948 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2949 }
2950
2951 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2952 float_status *s)
2953 {
2954 FloatParts64 p;
2955
2956 float64_unpack_canonical(&p, a, s);
2957 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2958 }
2959
2960 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2961 float_status *s)
2962 {
2963 FloatParts64 p;
2964
2965 float64_unpack_canonical(&p, a, s);
2966 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2967 }
2968
2969 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2970 float_status *s)
2971 {
2972 FloatParts64 p;
2973
2974 float64_unpack_canonical(&p, a, s);
2975 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2976 }
2977
2978 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2979 int scale, float_status *s)
2980 {
2981 FloatParts64 p;
2982
2983 bfloat16_unpack_canonical(&p, a, s);
2984 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2985 }
2986
2987 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2988 int scale, float_status *s)
2989 {
2990 FloatParts64 p;
2991
2992 bfloat16_unpack_canonical(&p, a, s);
2993 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2994 }
2995
2996 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2997 int scale, float_status *s)
2998 {
2999 FloatParts64 p;
3000
3001 bfloat16_unpack_canonical(&p, a, s);
3002 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3003 }
3004
3005 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3006 int scale, float_status *s)
3007 {
3008 FloatParts128 p;
3009
3010 float128_unpack_canonical(&p, a, s);
3011 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3012 }
3013
3014 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3015 int scale, float_status *s)
3016 {
3017 FloatParts128 p;
3018
3019 float128_unpack_canonical(&p, a, s);
3020 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3021 }
3022
3023 uint8_t float16_to_uint8(float16 a, float_status *s)
3024 {
3025 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3026 }
3027
3028 uint16_t float16_to_uint16(float16 a, float_status *s)
3029 {
3030 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3031 }
3032
3033 uint32_t float16_to_uint32(float16 a, float_status *s)
3034 {
3035 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3036 }
3037
3038 uint64_t float16_to_uint64(float16 a, float_status *s)
3039 {
3040 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3041 }
3042
3043 uint16_t float32_to_uint16(float32 a, float_status *s)
3044 {
3045 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3046 }
3047
3048 uint32_t float32_to_uint32(float32 a, float_status *s)
3049 {
3050 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3051 }
3052
3053 uint64_t float32_to_uint64(float32 a, float_status *s)
3054 {
3055 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3056 }
3057
3058 uint16_t float64_to_uint16(float64 a, float_status *s)
3059 {
3060 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3061 }
3062
3063 uint32_t float64_to_uint32(float64 a, float_status *s)
3064 {
3065 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3066 }
3067
3068 uint64_t float64_to_uint64(float64 a, float_status *s)
3069 {
3070 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3071 }
3072
3073 uint32_t float128_to_uint32(float128 a, float_status *s)
3074 {
3075 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3076 }
3077
3078 uint64_t float128_to_uint64(float128 a, float_status *s)
3079 {
3080 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3081 }
3082
3083 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3084 {
3085 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3086 }
3087
3088 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3089 {
3090 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3091 }
3092
3093 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3094 {
3095 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3096 }
3097
3098 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3099 {
3100 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3101 }
3102
3103 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3104 {
3105 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3106 }
3107
3108 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3109 {
3110 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3111 }
3112
3113 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3114 {
3115 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3116 }
3117
3118 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3119 {
3120 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3121 }
3122
3123 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3124 {
3125 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3126 }
3127
3128 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3129 {
3130 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3131 }
3132
3133 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3134 {
3135 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3136 }
3137
3138 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3139 {
3140 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3141 }
3142
3143 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3144 {
3145 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3146 }
3147
3148 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3149 {
3150 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3151 }
3152
3153 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3154 {
3155 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3156 }
3157
3158 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3159 {
3160 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3161 }
3162
3163 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3164 {
3165 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3166 }
3167
3168 /*
3169 * Signed integer to floating-point conversions
3170 */
3171
3172 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3173 {
3174 FloatParts64 p;
3175
3176 parts_sint_to_float(&p, a, scale, status);
3177 return float16_round_pack_canonical(&p, status);
3178 }
3179
3180 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3181 {
3182 return int64_to_float16_scalbn(a, scale, status);
3183 }
3184
3185 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3186 {
3187 return int64_to_float16_scalbn(a, scale, status);
3188 }
3189
3190 float16 int64_to_float16(int64_t a, float_status *status)
3191 {
3192 return int64_to_float16_scalbn(a, 0, status);
3193 }
3194
3195 float16 int32_to_float16(int32_t a, float_status *status)
3196 {
3197 return int64_to_float16_scalbn(a, 0, status);
3198 }
3199
3200 float16 int16_to_float16(int16_t a, float_status *status)
3201 {
3202 return int64_to_float16_scalbn(a, 0, status);
3203 }
3204
3205 float16 int8_to_float16(int8_t a, float_status *status)
3206 {
3207 return int64_to_float16_scalbn(a, 0, status);
3208 }
3209
3210 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3211 {
3212 FloatParts64 p;
3213
3214 parts64_sint_to_float(&p, a, scale, status);
3215 return float32_round_pack_canonical(&p, status);
3216 }
3217
3218 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3219 {
3220 return int64_to_float32_scalbn(a, scale, status);
3221 }
3222
3223 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3224 {
3225 return int64_to_float32_scalbn(a, scale, status);
3226 }
3227
3228 float32 int64_to_float32(int64_t a, float_status *status)
3229 {
3230 return int64_to_float32_scalbn(a, 0, status);
3231 }
3232
3233 float32 int32_to_float32(int32_t a, float_status *status)
3234 {
3235 return int64_to_float32_scalbn(a, 0, status);
3236 }
3237
3238 float32 int16_to_float32(int16_t a, float_status *status)
3239 {
3240 return int64_to_float32_scalbn(a, 0, status);
3241 }
3242
3243 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3244 {
3245 FloatParts64 p;
3246
3247 parts_sint_to_float(&p, a, scale, status);
3248 return float64_round_pack_canonical(&p, status);
3249 }
3250
3251 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3252 {
3253 return int64_to_float64_scalbn(a, scale, status);
3254 }
3255
3256 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3257 {
3258 return int64_to_float64_scalbn(a, scale, status);
3259 }
3260
3261 float64 int64_to_float64(int64_t a, float_status *status)
3262 {
3263 return int64_to_float64_scalbn(a, 0, status);
3264 }
3265
3266 float64 int32_to_float64(int32_t a, float_status *status)
3267 {
3268 return int64_to_float64_scalbn(a, 0, status);
3269 }
3270
3271 float64 int16_to_float64(int16_t a, float_status *status)
3272 {
3273 return int64_to_float64_scalbn(a, 0, status);
3274 }
3275
3276 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3277 {
3278 FloatParts64 p;
3279
3280 parts_sint_to_float(&p, a, scale, status);
3281 return bfloat16_round_pack_canonical(&p, status);
3282 }
3283
3284 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3285 {
3286 return int64_to_bfloat16_scalbn(a, scale, status);
3287 }
3288
3289 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3290 {
3291 return int64_to_bfloat16_scalbn(a, scale, status);
3292 }
3293
3294 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3295 {
3296 return int64_to_bfloat16_scalbn(a, 0, status);
3297 }
3298
3299 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3300 {
3301 return int64_to_bfloat16_scalbn(a, 0, status);
3302 }
3303
3304 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3305 {
3306 return int64_to_bfloat16_scalbn(a, 0, status);
3307 }
3308
3309 float128 int64_to_float128(int64_t a, float_status *status)
3310 {
3311 FloatParts128 p;
3312
3313 parts_sint_to_float(&p, a, 0, status);
3314 return float128_round_pack_canonical(&p, status);
3315 }
3316
3317 float128 int32_to_float128(int32_t a, float_status *status)
3318 {
3319 return int64_to_float128(a, status);
3320 }
3321
3322 /*
3323 * Unsigned Integer to floating-point conversions
3324 */
3325
3326 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3327 {
3328 FloatParts64 p;
3329
3330 parts_uint_to_float(&p, a, scale, status);
3331 return float16_round_pack_canonical(&p, status);
3332 }
3333
3334 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3335 {
3336 return uint64_to_float16_scalbn(a, scale, status);
3337 }
3338
3339 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3340 {
3341 return uint64_to_float16_scalbn(a, scale, status);
3342 }
3343
3344 float16 uint64_to_float16(uint64_t a, float_status *status)
3345 {
3346 return uint64_to_float16_scalbn(a, 0, status);
3347 }
3348
3349 float16 uint32_to_float16(uint32_t a, float_status *status)
3350 {
3351 return uint64_to_float16_scalbn(a, 0, status);
3352 }
3353
3354 float16 uint16_to_float16(uint16_t a, float_status *status)
3355 {
3356 return uint64_to_float16_scalbn(a, 0, status);
3357 }
3358
3359 float16 uint8_to_float16(uint8_t a, float_status *status)
3360 {
3361 return uint64_to_float16_scalbn(a, 0, status);
3362 }
3363
3364 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3365 {
3366 FloatParts64 p;
3367
3368 parts_uint_to_float(&p, a, scale, status);
3369 return float32_round_pack_canonical(&p, status);
3370 }
3371
3372 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3373 {
3374 return uint64_to_float32_scalbn(a, scale, status);
3375 }
3376
3377 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3378 {
3379 return uint64_to_float32_scalbn(a, scale, status);
3380 }
3381
3382 float32 uint64_to_float32(uint64_t a, float_status *status)
3383 {
3384 return uint64_to_float32_scalbn(a, 0, status);
3385 }
3386
3387 float32 uint32_to_float32(uint32_t a, float_status *status)
3388 {
3389 return uint64_to_float32_scalbn(a, 0, status);
3390 }
3391
3392 float32 uint16_to_float32(uint16_t a, float_status *status)
3393 {
3394 return uint64_to_float32_scalbn(a, 0, status);
3395 }
3396
3397 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3398 {
3399 FloatParts64 p;
3400
3401 parts_uint_to_float(&p, a, scale, status);
3402 return float64_round_pack_canonical(&p, status);
3403 }
3404
3405 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3406 {
3407 return uint64_to_float64_scalbn(a, scale, status);
3408 }
3409
3410 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3411 {
3412 return uint64_to_float64_scalbn(a, scale, status);
3413 }
3414
3415 float64 uint64_to_float64(uint64_t a, float_status *status)
3416 {
3417 return uint64_to_float64_scalbn(a, 0, status);
3418 }
3419
3420 float64 uint32_to_float64(uint32_t a, float_status *status)
3421 {
3422 return uint64_to_float64_scalbn(a, 0, status);
3423 }
3424
3425 float64 uint16_to_float64(uint16_t a, float_status *status)
3426 {
3427 return uint64_to_float64_scalbn(a, 0, status);
3428 }
3429
3430 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3431 {
3432 FloatParts64 p;
3433
3434 parts_uint_to_float(&p, a, scale, status);
3435 return bfloat16_round_pack_canonical(&p, status);
3436 }
3437
3438 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3439 {
3440 return uint64_to_bfloat16_scalbn(a, scale, status);
3441 }
3442
3443 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3444 {
3445 return uint64_to_bfloat16_scalbn(a, scale, status);
3446 }
3447
3448 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3449 {
3450 return uint64_to_bfloat16_scalbn(a, 0, status);
3451 }
3452
3453 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3454 {
3455 return uint64_to_bfloat16_scalbn(a, 0, status);
3456 }
3457
3458 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3459 {
3460 return uint64_to_bfloat16_scalbn(a, 0, status);
3461 }
3462
3463 float128 uint64_to_float128(uint64_t a, float_status *status)
3464 {
3465 FloatParts128 p;
3466
3467 parts_uint_to_float(&p, a, 0, status);
3468 return float128_round_pack_canonical(&p, status);
3469 }
3470
3471 /*
3472 * Minimum and maximum
3473 */
3474
3475 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3476 {
3477 FloatParts64 pa, pb, *pr;
3478
3479 float16_unpack_canonical(&pa, a, s);
3480 float16_unpack_canonical(&pb, b, s);
3481 pr = parts_minmax(&pa, &pb, s, flags);
3482
3483 return float16_round_pack_canonical(pr, s);
3484 }
3485
3486 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3487 float_status *s, int flags)
3488 {
3489 FloatParts64 pa, pb, *pr;
3490
3491 bfloat16_unpack_canonical(&pa, a, s);
3492 bfloat16_unpack_canonical(&pb, b, s);
3493 pr = parts_minmax(&pa, &pb, s, flags);
3494
3495 return bfloat16_round_pack_canonical(pr, s);
3496 }
3497
3498 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3499 {
3500 FloatParts64 pa, pb, *pr;
3501
3502 float32_unpack_canonical(&pa, a, s);
3503 float32_unpack_canonical(&pb, b, s);
3504 pr = parts_minmax(&pa, &pb, s, flags);
3505
3506 return float32_round_pack_canonical(pr, s);
3507 }
3508
3509 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3510 {
3511 FloatParts64 pa, pb, *pr;
3512
3513 float64_unpack_canonical(&pa, a, s);
3514 float64_unpack_canonical(&pb, b, s);
3515 pr = parts_minmax(&pa, &pb, s, flags);
3516
3517 return float64_round_pack_canonical(pr, s);
3518 }
3519
3520 static float128 float128_minmax(float128 a, float128 b,
3521 float_status *s, int flags)
3522 {
3523 FloatParts128 pa, pb, *pr;
3524
3525 float128_unpack_canonical(&pa, a, s);
3526 float128_unpack_canonical(&pb, b, s);
3527 pr = parts_minmax(&pa, &pb, s, flags);
3528
3529 return float128_round_pack_canonical(pr, s);
3530 }
3531
3532 #define MINMAX_1(type, name, flags) \
3533 type type##_##name(type a, type b, float_status *s) \
3534 { return type##_minmax(a, b, s, flags); }
3535
3536 #define MINMAX_2(type) \
3537 MINMAX_1(type, max, 0) \
3538 MINMAX_1(type, maxnum, minmax_isnum) \
3539 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3540 MINMAX_1(type, min, minmax_ismin) \
3541 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3542 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3543
3544 MINMAX_2(float16)
3545 MINMAX_2(bfloat16)
3546 MINMAX_2(float32)
3547 MINMAX_2(float64)
3548 MINMAX_2(float128)
3549
3550 #undef MINMAX_1
3551 #undef MINMAX_2
3552
3553 /*
3554 * Floating point compare
3555 */
3556
3557 static FloatRelation QEMU_FLATTEN
3558 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3559 {
3560 FloatParts64 pa, pb;
3561
3562 float16_unpack_canonical(&pa, a, s);
3563 float16_unpack_canonical(&pb, b, s);
3564 return parts_compare(&pa, &pb, s, is_quiet);
3565 }
3566
3567 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3568 {
3569 return float16_do_compare(a, b, s, false);
3570 }
3571
3572 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3573 {
3574 return float16_do_compare(a, b, s, true);
3575 }
3576
3577 static FloatRelation QEMU_SOFTFLOAT_ATTR
3578 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3579 {
3580 FloatParts64 pa, pb;
3581
3582 float32_unpack_canonical(&pa, a, s);
3583 float32_unpack_canonical(&pb, b, s);
3584 return parts_compare(&pa, &pb, s, is_quiet);
3585 }
3586
3587 static FloatRelation QEMU_FLATTEN
3588 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3589 {
3590 union_float32 ua, ub;
3591
3592 ua.s = xa;
3593 ub.s = xb;
3594
3595 if (QEMU_NO_HARDFLOAT) {
3596 goto soft;
3597 }
3598
3599 float32_input_flush2(&ua.s, &ub.s, s);
3600 if (isgreaterequal(ua.h, ub.h)) {
3601 if (isgreater(ua.h, ub.h)) {
3602 return float_relation_greater;
3603 }
3604 return float_relation_equal;
3605 }
3606 if (likely(isless(ua.h, ub.h))) {
3607 return float_relation_less;
3608 }
3609 /*
3610 * The only condition remaining is unordered.
3611 * Fall through to set flags.
3612 */
3613 soft:
3614 return float32_do_compare(ua.s, ub.s, s, is_quiet);
3615 }
3616
3617 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3618 {
3619 return float32_hs_compare(a, b, s, false);
3620 }
3621
3622 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3623 {
3624 return float32_hs_compare(a, b, s, true);
3625 }
3626
3627 static FloatRelation QEMU_SOFTFLOAT_ATTR
3628 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3629 {
3630 FloatParts64 pa, pb;
3631
3632 float64_unpack_canonical(&pa, a, s);
3633 float64_unpack_canonical(&pb, b, s);
3634 return parts_compare(&pa, &pb, s, is_quiet);
3635 }
3636
3637 static FloatRelation QEMU_FLATTEN
3638 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3639 {
3640 union_float64 ua, ub;
3641
3642 ua.s = xa;
3643 ub.s = xb;
3644
3645 if (QEMU_NO_HARDFLOAT) {
3646 goto soft;
3647 }
3648
3649 float64_input_flush2(&ua.s, &ub.s, s);
3650 if (isgreaterequal(ua.h, ub.h)) {
3651 if (isgreater(ua.h, ub.h)) {
3652 return float_relation_greater;
3653 }
3654 return float_relation_equal;
3655 }
3656 if (likely(isless(ua.h, ub.h))) {
3657 return float_relation_less;
3658 }
3659 /*
3660 * The only condition remaining is unordered.
3661 * Fall through to set flags.
3662 */
3663 soft:
3664 return float64_do_compare(ua.s, ub.s, s, is_quiet);
3665 }
3666
3667 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3668 {
3669 return float64_hs_compare(a, b, s, false);
3670 }
3671
3672 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3673 {
3674 return float64_hs_compare(a, b, s, true);
3675 }
3676
3677 static FloatRelation QEMU_FLATTEN
3678 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3679 {
3680 FloatParts64 pa, pb;
3681
3682 bfloat16_unpack_canonical(&pa, a, s);
3683 bfloat16_unpack_canonical(&pb, b, s);
3684 return parts_compare(&pa, &pb, s, is_quiet);
3685 }
3686
3687 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3688 {
3689 return bfloat16_do_compare(a, b, s, false);
3690 }
3691
3692 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3693 {
3694 return bfloat16_do_compare(a, b, s, true);
3695 }
3696
3697 static FloatRelation QEMU_FLATTEN
3698 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3699 {
3700 FloatParts128 pa, pb;
3701
3702 float128_unpack_canonical(&pa, a, s);
3703 float128_unpack_canonical(&pb, b, s);
3704 return parts_compare(&pa, &pb, s, is_quiet);
3705 }
3706
3707 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3708 {
3709 return float128_do_compare(a, b, s, false);
3710 }
3711
3712 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3713 {
3714 return float128_do_compare(a, b, s, true);
3715 }
3716
3717 /*
3718 * Scale by 2**N
3719 */
3720
3721 float16 float16_scalbn(float16 a, int n, float_status *status)
3722 {
3723 FloatParts64 p;
3724
3725 float16_unpack_canonical(&p, a, status);
3726 parts_scalbn(&p, n, status);
3727 return float16_round_pack_canonical(&p, status);
3728 }
3729
3730 float32 float32_scalbn(float32 a, int n, float_status *status)
3731 {
3732 FloatParts64 p;
3733
3734 float32_unpack_canonical(&p, a, status);
3735 parts_scalbn(&p, n, status);
3736 return float32_round_pack_canonical(&p, status);
3737 }
3738
3739 float64 float64_scalbn(float64 a, int n, float_status *status)
3740 {
3741 FloatParts64 p;
3742
3743 float64_unpack_canonical(&p, a, status);
3744 parts_scalbn(&p, n, status);
3745 return float64_round_pack_canonical(&p, status);
3746 }
3747
3748 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3749 {
3750 FloatParts64 p;
3751
3752 bfloat16_unpack_canonical(&p, a, status);
3753 parts_scalbn(&p, n, status);
3754 return bfloat16_round_pack_canonical(&p, status);
3755 }
3756
3757 float128 float128_scalbn(float128 a, int n, float_status *status)
3758 {
3759 FloatParts128 p;
3760
3761 float128_unpack_canonical(&p, a, status);
3762 parts_scalbn(&p, n, status);
3763 return float128_round_pack_canonical(&p, status);
3764 }
3765
3766 /*
3767 * Square Root
3768 */
3769
3770 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3771 {
3772 FloatParts64 p;
3773
3774 float16_unpack_canonical(&p, a, status);
3775 parts_sqrt(&p, status, &float16_params);
3776 return float16_round_pack_canonical(&p, status);
3777 }
3778
3779 static float32 QEMU_SOFTFLOAT_ATTR
3780 soft_f32_sqrt(float32 a, float_status *status)
3781 {
3782 FloatParts64 p;
3783
3784 float32_unpack_canonical(&p, a, status);
3785 parts_sqrt(&p, status, &float32_params);
3786 return float32_round_pack_canonical(&p, status);
3787 }
3788
3789 static float64 QEMU_SOFTFLOAT_ATTR
3790 soft_f64_sqrt(float64 a, float_status *status)
3791 {
3792 FloatParts64 p;
3793
3794 float64_unpack_canonical(&p, a, status);
3795 parts_sqrt(&p, status, &float64_params);
3796 return float64_round_pack_canonical(&p, status);
3797 }
3798
3799 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3800 {
3801 union_float32 ua, ur;
3802
3803 ua.s = xa;
3804 if (unlikely(!can_use_fpu(s))) {
3805 goto soft;
3806 }
3807
3808 float32_input_flush1(&ua.s, s);
3809 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3810 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3811 fpclassify(ua.h) == FP_ZERO) ||
3812 signbit(ua.h))) {
3813 goto soft;
3814 }
3815 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3816 float32_is_neg(ua.s))) {
3817 goto soft;
3818 }
3819 ur.h = sqrtf(ua.h);
3820 return ur.s;
3821
3822 soft:
3823 return soft_f32_sqrt(ua.s, s);
3824 }
3825
3826 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3827 {
3828 union_float64 ua, ur;
3829
3830 ua.s = xa;
3831 if (unlikely(!can_use_fpu(s))) {
3832 goto soft;
3833 }
3834
3835 float64_input_flush1(&ua.s, s);
3836 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3837 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3838 fpclassify(ua.h) == FP_ZERO) ||
3839 signbit(ua.h))) {
3840 goto soft;
3841 }
3842 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3843 float64_is_neg(ua.s))) {
3844 goto soft;
3845 }
3846 ur.h = sqrt(ua.h);
3847 return ur.s;
3848
3849 soft:
3850 return soft_f64_sqrt(ua.s, s);
3851 }
3852
3853 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3854 {
3855 FloatParts64 p;
3856
3857 bfloat16_unpack_canonical(&p, a, status);
3858 parts_sqrt(&p, status, &bfloat16_params);
3859 return bfloat16_round_pack_canonical(&p, status);
3860 }
3861
3862 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3863 {
3864 FloatParts128 p;
3865
3866 float128_unpack_canonical(&p, a, status);
3867 parts_sqrt(&p, status, &float128_params);
3868 return float128_round_pack_canonical(&p, status);
3869 }
3870
3871 /*----------------------------------------------------------------------------
3872 | The pattern for a default generated NaN.
3873 *----------------------------------------------------------------------------*/
3874
3875 float16 float16_default_nan(float_status *status)
3876 {
3877 FloatParts64 p;
3878
3879 parts_default_nan(&p, status);
3880 p.frac >>= float16_params.frac_shift;
3881 return float16_pack_raw(&p);
3882 }
3883
3884 float32 float32_default_nan(float_status *status)
3885 {
3886 FloatParts64 p;
3887
3888 parts_default_nan(&p, status);
3889 p.frac >>= float32_params.frac_shift;
3890 return float32_pack_raw(&p);
3891 }
3892
3893 float64 float64_default_nan(float_status *status)
3894 {
3895 FloatParts64 p;
3896
3897 parts_default_nan(&p, status);
3898 p.frac >>= float64_params.frac_shift;
3899 return float64_pack_raw(&p);
3900 }
3901
3902 float128 float128_default_nan(float_status *status)
3903 {
3904 FloatParts128 p;
3905
3906 parts_default_nan(&p, status);
3907 frac_shr(&p, float128_params.frac_shift);
3908 return float128_pack_raw(&p);
3909 }
3910
3911 bfloat16 bfloat16_default_nan(float_status *status)
3912 {
3913 FloatParts64 p;
3914
3915 parts_default_nan(&p, status);
3916 p.frac >>= bfloat16_params.frac_shift;
3917 return bfloat16_pack_raw(&p);
3918 }
3919
3920 /*----------------------------------------------------------------------------
3921 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3922 *----------------------------------------------------------------------------*/
3923
3924 float16 float16_silence_nan(float16 a, float_status *status)
3925 {
3926 FloatParts64 p;
3927
3928 float16_unpack_raw(&p, a);
3929 p.frac <<= float16_params.frac_shift;
3930 parts_silence_nan(&p, status);
3931 p.frac >>= float16_params.frac_shift;
3932 return float16_pack_raw(&p);
3933 }
3934
3935 float32 float32_silence_nan(float32 a, float_status *status)
3936 {
3937 FloatParts64 p;
3938
3939 float32_unpack_raw(&p, a);
3940 p.frac <<= float32_params.frac_shift;
3941 parts_silence_nan(&p, status);
3942 p.frac >>= float32_params.frac_shift;
3943 return float32_pack_raw(&p);
3944 }
3945
3946 float64 float64_silence_nan(float64 a, float_status *status)
3947 {
3948 FloatParts64 p;
3949
3950 float64_unpack_raw(&p, a);
3951 p.frac <<= float64_params.frac_shift;
3952 parts_silence_nan(&p, status);
3953 p.frac >>= float64_params.frac_shift;
3954 return float64_pack_raw(&p);
3955 }
3956
3957 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3958 {
3959 FloatParts64 p;
3960
3961 bfloat16_unpack_raw(&p, a);
3962 p.frac <<= bfloat16_params.frac_shift;
3963 parts_silence_nan(&p, status);
3964 p.frac >>= bfloat16_params.frac_shift;
3965 return bfloat16_pack_raw(&p);
3966 }
3967
3968 float128 float128_silence_nan(float128 a, float_status *status)
3969 {
3970 FloatParts128 p;
3971
3972 float128_unpack_raw(&p, a);
3973 frac_shl(&p, float128_params.frac_shift);
3974 parts_silence_nan(&p, status);
3975 frac_shr(&p, float128_params.frac_shift);
3976 return float128_pack_raw(&p);
3977 }
3978
3979 /*----------------------------------------------------------------------------
3980 | If `a' is denormal and we are in flush-to-zero mode then set the
3981 | input-denormal exception and return zero. Otherwise just return the value.
3982 *----------------------------------------------------------------------------*/
3983
3984 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3985 {
3986 if (p.exp == 0 && p.frac != 0) {
3987 float_raise(float_flag_input_denormal, status);
3988 return true;
3989 }
3990
3991 return false;
3992 }
3993
3994 float16 float16_squash_input_denormal(float16 a, float_status *status)
3995 {
3996 if (status->flush_inputs_to_zero) {
3997 FloatParts64 p;
3998
3999 float16_unpack_raw(&p, a);
4000 if (parts_squash_denormal(p, status)) {
4001 return float16_set_sign(float16_zero, p.sign);
4002 }
4003 }
4004 return a;
4005 }
4006
4007 float32 float32_squash_input_denormal(float32 a, float_status *status)
4008 {
4009 if (status->flush_inputs_to_zero) {
4010 FloatParts64 p;
4011
4012 float32_unpack_raw(&p, a);
4013 if (parts_squash_denormal(p, status)) {
4014 return float32_set_sign(float32_zero, p.sign);
4015 }
4016 }
4017 return a;
4018 }
4019
4020 float64 float64_squash_input_denormal(float64 a, float_status *status)
4021 {
4022 if (status->flush_inputs_to_zero) {
4023 FloatParts64 p;
4024
4025 float64_unpack_raw(&p, a);
4026 if (parts_squash_denormal(p, status)) {
4027 return float64_set_sign(float64_zero, p.sign);
4028 }
4029 }
4030 return a;
4031 }
4032
4033 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4034 {
4035 if (status->flush_inputs_to_zero) {
4036 FloatParts64 p;
4037
4038 bfloat16_unpack_raw(&p, a);
4039 if (parts_squash_denormal(p, status)) {
4040 return bfloat16_set_sign(bfloat16_zero, p.sign);
4041 }
4042 }
4043 return a;
4044 }
4045
4046 /*----------------------------------------------------------------------------
4047 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4048 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4049 | input. If `zSign' is 1, the input is negated before being converted to an
4050 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4051 | is simply rounded to an integer, with the inexact exception raised if the
4052 | input cannot be represented exactly as an integer. However, if the fixed-
4053 | point input is too large, the invalid exception is raised and the largest
4054 | positive or negative integer is returned.
4055 *----------------------------------------------------------------------------*/
4056
4057 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4058 float_status *status)
4059 {
4060 int8_t roundingMode;
4061 bool roundNearestEven;
4062 int8_t roundIncrement, roundBits;
4063 int32_t z;
4064
4065 roundingMode = status->float_rounding_mode;
4066 roundNearestEven = ( roundingMode == float_round_nearest_even );
4067 switch (roundingMode) {
4068 case float_round_nearest_even:
4069 case float_round_ties_away:
4070 roundIncrement = 0x40;
4071 break;
4072 case float_round_to_zero:
4073 roundIncrement = 0;
4074 break;
4075 case float_round_up:
4076 roundIncrement = zSign ? 0 : 0x7f;
4077 break;
4078 case float_round_down:
4079 roundIncrement = zSign ? 0x7f : 0;
4080 break;
4081 case float_round_to_odd:
4082 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4083 break;
4084 default:
4085 abort();
4086 }
4087 roundBits = absZ & 0x7F;
4088 absZ = ( absZ + roundIncrement )>>7;
4089 if (!(roundBits ^ 0x40) && roundNearestEven) {
4090 absZ &= ~1;
4091 }
4092 z = absZ;
4093 if ( zSign ) z = - z;
4094 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4095 float_raise(float_flag_invalid, status);
4096 return zSign ? INT32_MIN : INT32_MAX;
4097 }
4098 if (roundBits) {
4099 float_raise(float_flag_inexact, status);
4100 }
4101 return z;
4102
4103 }
4104
4105 /*----------------------------------------------------------------------------
4106 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4107 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4108 | and returns the properly rounded 64-bit integer corresponding to the input.
4109 | If `zSign' is 1, the input is negated before being converted to an integer.
4110 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4111 | the inexact exception raised if the input cannot be represented exactly as
4112 | an integer. However, if the fixed-point input is too large, the invalid
4113 | exception is raised and the largest positive or negative integer is
4114 | returned.
4115 *----------------------------------------------------------------------------*/
4116
4117 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4118 float_status *status)
4119 {
4120 int8_t roundingMode;
4121 bool roundNearestEven, increment;
4122 int64_t z;
4123
4124 roundingMode = status->float_rounding_mode;
4125 roundNearestEven = ( roundingMode == float_round_nearest_even );
4126 switch (roundingMode) {
4127 case float_round_nearest_even:
4128 case float_round_ties_away:
4129 increment = ((int64_t) absZ1 < 0);
4130 break;
4131 case float_round_to_zero:
4132 increment = 0;
4133 break;
4134 case float_round_up:
4135 increment = !zSign && absZ1;
4136 break;
4137 case float_round_down:
4138 increment = zSign && absZ1;
4139 break;
4140 case float_round_to_odd:
4141 increment = !(absZ0 & 1) && absZ1;
4142 break;
4143 default:
4144 abort();
4145 }
4146 if ( increment ) {
4147 ++absZ0;
4148 if ( absZ0 == 0 ) goto overflow;
4149 if (!(absZ1 << 1) && roundNearestEven) {
4150 absZ0 &= ~1;
4151 }
4152 }
4153 z = absZ0;
4154 if ( zSign ) z = - z;
4155 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4156 overflow:
4157 float_raise(float_flag_invalid, status);
4158 return zSign ? INT64_MIN : INT64_MAX;
4159 }
4160 if (absZ1) {
4161 float_raise(float_flag_inexact, status);
4162 }
4163 return z;
4164
4165 }
4166
4167 /*----------------------------------------------------------------------------
4168 | Normalizes the subnormal single-precision floating-point value represented
4169 | by the denormalized significand `aSig'. The normalized exponent and
4170 | significand are stored at the locations pointed to by `zExpPtr' and
4171 | `zSigPtr', respectively.
4172 *----------------------------------------------------------------------------*/
4173
4174 static void
4175 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4176 {
4177 int8_t shiftCount;
4178
4179 shiftCount = clz32(aSig) - 8;
4180 *zSigPtr = aSig<<shiftCount;
4181 *zExpPtr = 1 - shiftCount;
4182
4183 }
4184
4185 /*----------------------------------------------------------------------------
4186 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4187 | and significand `zSig', and returns the proper single-precision floating-
4188 | point value corresponding to the abstract input. Ordinarily, the abstract
4189 | value is simply rounded and packed into the single-precision format, with
4190 | the inexact exception raised if the abstract input cannot be represented
4191 | exactly. However, if the abstract value is too large, the overflow and
4192 | inexact exceptions are raised and an infinity or maximal finite value is
4193 | returned. If the abstract value is too small, the input value is rounded to
4194 | a subnormal number, and the underflow and inexact exceptions are raised if
4195 | the abstract input cannot be represented exactly as a subnormal single-
4196 | precision floating-point number.
4197 | The input significand `zSig' has its binary point between bits 30
4198 | and 29, which is 7 bits to the left of the usual location. This shifted
4199 | significand must be normalized or smaller. If `zSig' is not normalized,
4200 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4201 | and it must not require rounding. In the usual case that `zSig' is
4202 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4203 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4204 | Binary Floating-Point Arithmetic.
4205 *----------------------------------------------------------------------------*/
4206
4207 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4208 float_status *status)
4209 {
4210 int8_t roundingMode;
4211 bool roundNearestEven;
4212 int8_t roundIncrement, roundBits;
4213 bool isTiny;
4214
4215 roundingMode = status->float_rounding_mode;
4216 roundNearestEven = ( roundingMode == float_round_nearest_even );
4217 switch (roundingMode) {
4218 case float_round_nearest_even:
4219 case float_round_ties_away:
4220 roundIncrement = 0x40;
4221 break;
4222 case float_round_to_zero:
4223 roundIncrement = 0;
4224 break;
4225 case float_round_up:
4226 roundIncrement = zSign ? 0 : 0x7f;
4227 break;
4228 case float_round_down:
4229 roundIncrement = zSign ? 0x7f : 0;
4230 break;
4231 case float_round_to_odd:
4232 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4233 break;
4234 default:
4235 abort();
4236 break;
4237 }
4238 roundBits = zSig & 0x7F;
4239 if ( 0xFD <= (uint16_t) zExp ) {
4240 if ( ( 0xFD < zExp )
4241 || ( ( zExp == 0xFD )
4242 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4243 ) {
4244 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4245 roundIncrement != 0;
4246 float_raise(float_flag_overflow | float_flag_inexact, status);
4247 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4248 }
4249 if ( zExp < 0 ) {
4250 if (status->flush_to_zero) {
4251 float_raise(float_flag_output_denormal, status);
4252 return packFloat32(zSign, 0, 0);
4253 }
4254 isTiny = status->tininess_before_rounding
4255 || (zExp < -1)
4256 || (zSig + roundIncrement < 0x80000000);
4257 shift32RightJamming( zSig, - zExp, &zSig );
4258 zExp = 0;
4259 roundBits = zSig & 0x7F;
4260 if (isTiny && roundBits) {
4261 float_raise(float_flag_underflow, status);
4262 }
4263 if (roundingMode == float_round_to_odd) {
4264 /*
4265 * For round-to-odd case, the roundIncrement depends on
4266 * zSig which just changed.
4267 */
4268 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4269 }
4270 }
4271 }
4272 if (roundBits) {
4273 float_raise(float_flag_inexact, status);
4274 }
4275 zSig = ( zSig + roundIncrement )>>7;
4276 if (!(roundBits ^ 0x40) && roundNearestEven) {
4277 zSig &= ~1;
4278 }
4279 if ( zSig == 0 ) zExp = 0;
4280 return packFloat32( zSign, zExp, zSig );
4281
4282 }
4283
4284 /*----------------------------------------------------------------------------
4285 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4286 | and significand `zSig', and returns the proper single-precision floating-
4287 | point value corresponding to the abstract input. This routine is just like
4288 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4289 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4290 | floating-point exponent.
4291 *----------------------------------------------------------------------------*/
4292
4293 static float32
4294 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4295 float_status *status)
4296 {
4297 int8_t shiftCount;
4298
4299 shiftCount = clz32(zSig) - 1;
4300 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4301 status);
4302
4303 }
4304
4305 /*----------------------------------------------------------------------------
4306 | Normalizes the subnormal double-precision floating-point value represented
4307 | by the denormalized significand `aSig'. The normalized exponent and
4308 | significand are stored at the locations pointed to by `zExpPtr' and
4309 | `zSigPtr', respectively.
4310 *----------------------------------------------------------------------------*/
4311
4312 static void
4313 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4314 {
4315 int8_t shiftCount;
4316
4317 shiftCount = clz64(aSig) - 11;
4318 *zSigPtr = aSig<<shiftCount;
4319 *zExpPtr = 1 - shiftCount;
4320
4321 }
4322
4323 /*----------------------------------------------------------------------------
4324 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4325 | double-precision floating-point value, returning the result. After being
4326 | shifted into the proper positions, the three fields are simply added
4327 | together to form the result. This means that any integer portion of `zSig'
4328 | will be added into the exponent. Since a properly normalized significand
4329 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4330 | than the desired result exponent whenever `zSig' is a complete, normalized
4331 | significand.
4332 *----------------------------------------------------------------------------*/
4333
4334 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4335 {
4336
4337 return make_float64(
4338 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4339
4340 }
4341
4342 /*----------------------------------------------------------------------------
4343 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4344 | and significand `zSig', and returns the proper double-precision floating-
4345 | point value corresponding to the abstract input. Ordinarily, the abstract
4346 | value is simply rounded and packed into the double-precision format, with
4347 | the inexact exception raised if the abstract input cannot be represented
4348 | exactly. However, if the abstract value is too large, the overflow and
4349 | inexact exceptions are raised and an infinity or maximal finite value is
4350 | returned. If the abstract value is too small, the input value is rounded to
4351 | a subnormal number, and the underflow and inexact exceptions are raised if
4352 | the abstract input cannot be represented exactly as a subnormal double-
4353 | precision floating-point number.
4354 | The input significand `zSig' has its binary point between bits 62
4355 | and 61, which is 10 bits to the left of the usual location. This shifted
4356 | significand must be normalized or smaller. If `zSig' is not normalized,
4357 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4358 | and it must not require rounding. In the usual case that `zSig' is
4359 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4360 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4361 | Binary Floating-Point Arithmetic.
4362 *----------------------------------------------------------------------------*/
4363
4364 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4365 float_status *status)
4366 {
4367 int8_t roundingMode;
4368 bool roundNearestEven;
4369 int roundIncrement, roundBits;
4370 bool isTiny;
4371
4372 roundingMode = status->float_rounding_mode;
4373 roundNearestEven = ( roundingMode == float_round_nearest_even );
4374 switch (roundingMode) {
4375 case float_round_nearest_even:
4376 case float_round_ties_away:
4377 roundIncrement = 0x200;
4378 break;
4379 case float_round_to_zero:
4380 roundIncrement = 0;
4381 break;
4382 case float_round_up:
4383 roundIncrement = zSign ? 0 : 0x3ff;
4384 break;
4385 case float_round_down:
4386 roundIncrement = zSign ? 0x3ff : 0;
4387 break;
4388 case float_round_to_odd:
4389 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4390 break;
4391 default:
4392 abort();
4393 }
4394 roundBits = zSig & 0x3FF;
4395 if ( 0x7FD <= (uint16_t) zExp ) {
4396 if ( ( 0x7FD < zExp )
4397 || ( ( zExp == 0x7FD )
4398 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4399 ) {
4400 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4401 roundIncrement != 0;
4402 float_raise(float_flag_overflow | float_flag_inexact, status);
4403 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4404 }
4405 if ( zExp < 0 ) {
4406 if (status->flush_to_zero) {
4407 float_raise(float_flag_output_denormal, status);
4408 return packFloat64(zSign, 0, 0);
4409 }
4410 isTiny = status->tininess_before_rounding
4411 || (zExp < -1)
4412 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4413 shift64RightJamming( zSig, - zExp, &zSig );
4414 zExp = 0;
4415 roundBits = zSig & 0x3FF;
4416 if (isTiny && roundBits) {
4417 float_raise(float_flag_underflow, status);
4418 }
4419 if (roundingMode == float_round_to_odd) {
4420 /*
4421 * For round-to-odd case, the roundIncrement depends on
4422 * zSig which just changed.
4423 */
4424 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4425 }
4426 }
4427 }
4428 if (roundBits) {
4429 float_raise(float_flag_inexact, status);
4430 }
4431 zSig = ( zSig + roundIncrement )>>10;
4432 if (!(roundBits ^ 0x200) && roundNearestEven) {
4433 zSig &= ~1;
4434 }
4435 if ( zSig == 0 ) zExp = 0;
4436 return packFloat64( zSign, zExp, zSig );
4437
4438 }
4439
4440 /*----------------------------------------------------------------------------
4441 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4442 | and significand `zSig', and returns the proper double-precision floating-
4443 | point value corresponding to the abstract input. This routine is just like
4444 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4445 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4446 | floating-point exponent.
4447 *----------------------------------------------------------------------------*/
4448
4449 static float64
4450 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4451 float_status *status)
4452 {
4453 int8_t shiftCount;
4454
4455 shiftCount = clz64(zSig) - 1;
4456 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4457 status);
4458
4459 }
4460
4461 /*----------------------------------------------------------------------------
4462 | Normalizes the subnormal extended double-precision floating-point value
4463 | represented by the denormalized significand `aSig'. The normalized exponent
4464 | and significand are stored at the locations pointed to by `zExpPtr' and
4465 | `zSigPtr', respectively.
4466 *----------------------------------------------------------------------------*/
4467
4468 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4469 uint64_t *zSigPtr)
4470 {
4471 int8_t shiftCount;
4472
4473 shiftCount = clz64(aSig);
4474 *zSigPtr = aSig<<shiftCount;
4475 *zExpPtr = 1 - shiftCount;
4476 }
4477
4478 /*----------------------------------------------------------------------------
4479 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4480 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4481 | and returns the proper extended double-precision floating-point value
4482 | corresponding to the abstract input. Ordinarily, the abstract value is
4483 | rounded and packed into the extended double-precision format, with the
4484 | inexact exception raised if the abstract input cannot be represented
4485 | exactly. However, if the abstract value is too large, the overflow and
4486 | inexact exceptions are raised and an infinity or maximal finite value is
4487 | returned. If the abstract value is too small, the input value is rounded to
4488 | a subnormal number, and the underflow and inexact exceptions are raised if
4489 | the abstract input cannot be represented exactly as a subnormal extended
4490 | double-precision floating-point number.
4491 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4492 | the result is rounded to the same number of bits as single or double
4493 | precision, respectively. Otherwise, the result is rounded to the full
4494 | precision of the extended double-precision format.
4495 | The input significand must be normalized or smaller. If the input
4496 | significand is not normalized, `zExp' must be 0; in that case, the result
4497 | returned is a subnormal number, and it must not require rounding. The
4498 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4499 | Floating-Point Arithmetic.
4500 *----------------------------------------------------------------------------*/
4501
4502 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4503 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4504 float_status *status)
4505 {
4506 FloatRoundMode roundingMode;
4507 bool roundNearestEven, increment, isTiny;
4508 int64_t roundIncrement, roundMask, roundBits;
4509
4510 roundingMode = status->float_rounding_mode;
4511 roundNearestEven = ( roundingMode == float_round_nearest_even );
4512 switch (roundingPrecision) {
4513 case floatx80_precision_x:
4514 goto precision80;
4515 case floatx80_precision_d:
4516 roundIncrement = UINT64_C(0x0000000000000400);
4517 roundMask = UINT64_C(0x00000000000007FF);
4518 break;
4519 case floatx80_precision_s:
4520 roundIncrement = UINT64_C(0x0000008000000000);
4521 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4522 break;
4523 default:
4524 g_assert_not_reached();
4525 }
4526 zSig0 |= ( zSig1 != 0 );
4527 switch (roundingMode) {
4528 case float_round_nearest_even:
4529 case float_round_ties_away:
4530 break;
4531 case float_round_to_zero:
4532 roundIncrement = 0;
4533 break;
4534 case float_round_up:
4535 roundIncrement = zSign ? 0 : roundMask;
4536 break;
4537 case float_round_down:
4538 roundIncrement = zSign ? roundMask : 0;
4539 break;
4540 default:
4541 abort();
4542 }
4543 roundBits = zSig0 & roundMask;
4544 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4545 if ( ( 0x7FFE < zExp )
4546 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4547 ) {
4548 goto overflow;
4549 }
4550 if ( zExp <= 0 ) {
4551 if (status->flush_to_zero) {
4552 float_raise(float_flag_output_denormal, status);
4553 return packFloatx80(zSign, 0, 0);
4554 }
4555 isTiny = status->tininess_before_rounding
4556 || (zExp < 0 )
4557 || (zSig0 <= zSig0 + roundIncrement);
4558 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4559 zExp = 0;
4560 roundBits = zSig0 & roundMask;
4561 if (isTiny && roundBits) {
4562 float_raise(float_flag_underflow, status);
4563 }
4564 if (roundBits) {
4565 float_raise(float_flag_inexact, status);
4566 }
4567 zSig0 += roundIncrement;
4568 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4569 roundIncrement = roundMask + 1;
4570 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4571 roundMask |= roundIncrement;
4572 }
4573 zSig0 &= ~ roundMask;
4574 return packFloatx80( zSign, zExp, zSig0 );
4575 }
4576 }
4577 if (roundBits) {
4578 float_raise(float_flag_inexact, status);
4579 }
4580 zSig0 += roundIncrement;
4581 if ( zSig0 < roundIncrement ) {
4582 ++zExp;
4583 zSig0 = UINT64_C(0x8000000000000000);
4584 }
4585 roundIncrement = roundMask + 1;
4586 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4587 roundMask |= roundIncrement;
4588 }
4589 zSig0 &= ~ roundMask;
4590 if ( zSig0 == 0 ) zExp = 0;
4591 return packFloatx80( zSign, zExp, zSig0 );
4592 precision80:
4593 switch (roundingMode) {
4594 case float_round_nearest_even:
4595 case float_round_ties_away:
4596 increment = ((int64_t)zSig1 < 0);
4597 break;
4598 case float_round_to_zero:
4599 increment = 0;
4600 break;
4601 case float_round_up:
4602 increment = !zSign && zSig1;
4603 break;
4604 case float_round_down:
4605 increment = zSign && zSig1;
4606 break;
4607 default:
4608 abort();
4609 }
4610 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4611 if ( ( 0x7FFE < zExp )
4612 || ( ( zExp == 0x7FFE )
4613 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4614 && increment
4615 )
4616 ) {
4617 roundMask = 0;
4618 overflow:
4619 float_raise(float_flag_overflow | float_flag_inexact, status);
4620 if ( ( roundingMode == float_round_to_zero )
4621 || ( zSign && ( roundingMode == float_round_up ) )
4622 || ( ! zSign && ( roundingMode == float_round_down ) )
4623 ) {
4624 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4625 }
4626 return packFloatx80(zSign,
4627 floatx80_infinity_high,
4628 floatx80_infinity_low);
4629 }
4630 if ( zExp <= 0 ) {
4631 isTiny = status->tininess_before_rounding
4632 || (zExp < 0)
4633 || !increment
4634 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4635 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4636 zExp = 0;
4637 if (isTiny && zSig1) {
4638 float_raise(float_flag_underflow, status);
4639 }
4640 if (zSig1) {
4641 float_raise(float_flag_inexact, status);
4642 }
4643 switch (roundingMode) {
4644 case float_round_nearest_even:
4645 case float_round_ties_away:
4646 increment = ((int64_t)zSig1 < 0);
4647 break;
4648 case float_round_to_zero:
4649 increment = 0;
4650 break;
4651 case float_round_up:
4652 increment = !zSign && zSig1;
4653 break;
4654 case float_round_down:
4655 increment = zSign && zSig1;
4656 break;
4657 default:
4658 abort();
4659 }
4660 if ( increment ) {
4661 ++zSig0;
4662 if (!(zSig1 << 1) && roundNearestEven) {
4663 zSig0 &= ~1;
4664 }
4665 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4666 }
4667 return packFloatx80( zSign, zExp, zSig0 );
4668 }
4669 }
4670 if (zSig1) {
4671 float_raise(float_flag_inexact, status);
4672 }
4673 if ( increment ) {
4674 ++zSig0;
4675 if ( zSig0 == 0 ) {
4676 ++zExp;
4677 zSig0 = UINT64_C(0x8000000000000000);
4678 }
4679 else {
4680 if (!(zSig1 << 1) && roundNearestEven) {
4681 zSig0 &= ~1;
4682 }
4683 }
4684 }
4685 else {
4686 if ( zSig0 == 0 ) zExp = 0;
4687 }
4688 return packFloatx80( zSign, zExp, zSig0 );
4689
4690 }
4691
4692 /*----------------------------------------------------------------------------
4693 | Takes an abstract floating-point value having sign `zSign', exponent
4694 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4695 | and returns the proper extended double-precision floating-point value
4696 | corresponding to the abstract input. This routine is just like
4697 | `roundAndPackFloatx80' except that the input significand does not have to be
4698 | normalized.
4699 *----------------------------------------------------------------------------*/
4700
4701 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4702 bool zSign, int32_t zExp,
4703 uint64_t zSig0, uint64_t zSig1,
4704 float_status *status)
4705 {
4706 int8_t shiftCount;
4707
4708 if ( zSig0 == 0 ) {
4709 zSig0 = zSig1;
4710 zSig1 = 0;
4711 zExp -= 64;
4712 }
4713 shiftCount = clz64(zSig0);
4714 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4715 zExp -= shiftCount;
4716 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4717 zSig0, zSig1, status);
4718
4719 }
4720
4721 /*----------------------------------------------------------------------------
4722 | Returns the least-significant 64 fraction bits of the quadruple-precision
4723 | floating-point value `a'.
4724 *----------------------------------------------------------------------------*/
4725
4726 static inline uint64_t extractFloat128Frac1( float128 a )
4727 {
4728
4729 return a.low;
4730
4731 }
4732
4733 /*----------------------------------------------------------------------------
4734 | Returns the most-significant 48 fraction bits of the quadruple-precision
4735 | floating-point value `a'.
4736 *----------------------------------------------------------------------------*/
4737
4738 static inline uint64_t extractFloat128Frac0( float128 a )
4739 {
4740
4741 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4742
4743 }
4744
4745 /*----------------------------------------------------------------------------
4746 | Returns the exponent bits of the quadruple-precision floating-point value
4747 | `a'.
4748 *----------------------------------------------------------------------------*/
4749
4750 static inline int32_t extractFloat128Exp( float128 a )
4751 {
4752
4753 return ( a.high>>48 ) & 0x7FFF;
4754
4755 }
4756
4757 /*----------------------------------------------------------------------------
4758 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4759 *----------------------------------------------------------------------------*/
4760
4761 static inline bool extractFloat128Sign(float128 a)
4762 {
4763 return a.high >> 63;
4764 }
4765
4766 /*----------------------------------------------------------------------------
4767 | Normalizes the subnormal quadruple-precision floating-point value
4768 | represented by the denormalized significand formed by the concatenation of
4769 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4770 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4771 | significand are stored at the location pointed to by `zSig0Ptr', and the
4772 | least significant 64 bits of the normalized significand are stored at the
4773 | location pointed to by `zSig1Ptr'.
4774 *----------------------------------------------------------------------------*/
4775
4776 static void
4777 normalizeFloat128Subnormal(
4778 uint64_t aSig0,
4779 uint64_t aSig1,
4780 int32_t *zExpPtr,
4781 uint64_t *zSig0Ptr,
4782 uint64_t *zSig1Ptr
4783 )
4784 {
4785 int8_t shiftCount;
4786
4787 if ( aSig0 == 0 ) {
4788 shiftCount = clz64(aSig1) - 15;
4789 if ( shiftCount < 0 ) {
4790 *zSig0Ptr = aSig1>>( - shiftCount );
4791 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4792 }
4793 else {
4794 *zSig0Ptr = aSig1<<shiftCount;
4795 *zSig1Ptr = 0;
4796 }
4797 *zExpPtr = - shiftCount - 63;
4798 }
4799 else {
4800 shiftCount = clz64(aSig0) - 15;
4801 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4802 *zExpPtr = 1 - shiftCount;
4803 }
4804
4805 }
4806
4807 /*----------------------------------------------------------------------------
4808 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4809 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4810 | floating-point value, returning the result. After being shifted into the
4811 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4812 | added together to form the most significant 32 bits of the result. This
4813 | means that any integer portion of `zSig0' will be added into the exponent.
4814 | Since a properly normalized significand will have an integer portion equal
4815 | to 1, the `zExp' input should be 1 less than the desired result exponent
4816 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4817 | significand.
4818 *----------------------------------------------------------------------------*/
4819
4820 static inline float128
4821 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4822 {
4823 float128 z;
4824
4825 z.low = zSig1;
4826 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4827 return z;
4828 }
4829
4830 /*----------------------------------------------------------------------------
4831 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4832 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4833 | and `zSig2', and returns the proper quadruple-precision floating-point value
4834 | corresponding to the abstract input. Ordinarily, the abstract value is
4835 | simply rounded and packed into the quadruple-precision format, with the
4836 | inexact exception raised if the abstract input cannot be represented
4837 | exactly. However, if the abstract value is too large, the overflow and
4838 | inexact exceptions are raised and an infinity or maximal finite value is
4839 | returned. If the abstract value is too small, the input value is rounded to
4840 | a subnormal number, and the underflow and inexact exceptions are raised if
4841 | the abstract input cannot be represented exactly as a subnormal quadruple-
4842 | precision floating-point number.
4843 | The input significand must be normalized or smaller. If the input
4844 | significand is not normalized, `zExp' must be 0; in that case, the result
4845 | returned is a subnormal number, and it must not require rounding. In the
4846 | usual case that the input significand is normalized, `zExp' must be 1 less
4847 | than the ``true'' floating-point exponent. The handling of underflow and
4848 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4849 *----------------------------------------------------------------------------*/
4850
4851 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4852 uint64_t zSig0, uint64_t zSig1,
4853 uint64_t zSig2, float_status *status)
4854 {
4855 int8_t roundingMode;
4856 bool roundNearestEven, increment, isTiny;
4857
4858 roundingMode = status->float_rounding_mode;
4859 roundNearestEven = ( roundingMode == float_round_nearest_even );
4860 switch (roundingMode) {
4861 case float_round_nearest_even:
4862 case float_round_ties_away:
4863 increment = ((int64_t)zSig2 < 0);
4864 break;
4865 case float_round_to_zero:
4866 increment = 0;
4867 break;
4868 case float_round_up:
4869 increment = !zSign && zSig2;
4870 break;
4871 case float_round_down:
4872 increment = zSign && zSig2;
4873 break;
4874 case float_round_to_odd:
4875 increment = !(zSig1 & 0x1) && zSig2;
4876 break;
4877 default:
4878 abort();
4879 }
4880 if ( 0x7FFD <= (uint32_t) zExp ) {
4881 if ( ( 0x7FFD < zExp )
4882 || ( ( zExp == 0x7FFD )
4883 && eq128(
4884 UINT64_C(0x0001FFFFFFFFFFFF),
4885 UINT64_C(0xFFFFFFFFFFFFFFFF),
4886 zSig0,
4887 zSig1
4888 )
4889 && increment
4890 )
4891 ) {
4892 float_raise(float_flag_overflow | float_flag_inexact, status);
4893 if ( ( roundingMode == float_round_to_zero )
4894 || ( zSign && ( roundingMode == float_round_up ) )
4895 || ( ! zSign && ( roundingMode == float_round_down ) )
4896 || (roundingMode == float_round_to_odd)
4897 ) {
4898 return
4899 packFloat128(
4900 zSign,
4901 0x7FFE,
4902 UINT64_C(0x0000FFFFFFFFFFFF),
4903 UINT64_C(0xFFFFFFFFFFFFFFFF)
4904 );
4905 }
4906 return packFloat128( zSign, 0x7FFF, 0, 0 );
4907 }
4908 if ( zExp < 0 ) {
4909 if (status->flush_to_zero) {
4910 float_raise(float_flag_output_denormal, status);
4911 return packFloat128(zSign, 0, 0, 0);
4912 }
4913 isTiny = status->tininess_before_rounding
4914 || (zExp < -1)
4915 || !increment
4916 || lt128(zSig0, zSig1,
4917 UINT64_C(0x0001FFFFFFFFFFFF),
4918 UINT64_C(0xFFFFFFFFFFFFFFFF));
4919 shift128ExtraRightJamming(
4920 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4921 zExp = 0;
4922 if (isTiny && zSig2) {
4923 float_raise(float_flag_underflow, status);
4924 }
4925 switch (roundingMode) {
4926 case float_round_nearest_even:
4927 case float_round_ties_away:
4928 increment = ((int64_t)zSig2 < 0);
4929 break;
4930 case float_round_to_zero:
4931 increment = 0;
4932 break;
4933 case float_round_up:
4934 increment = !zSign && zSig2;
4935 break;
4936 case float_round_down:
4937 increment = zSign && zSig2;
4938 break;
4939 case float_round_to_odd:
4940 increment = !(zSig1 & 0x1) && zSig2;
4941 break;
4942 default:
4943 abort();
4944 }
4945 }
4946 }
4947 if (zSig2) {
4948 float_raise(float_flag_inexact, status);
4949 }
4950 if ( increment ) {
4951 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4952 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4953 zSig1 &= ~1;
4954 }
4955 }
4956 else {
4957 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4958 }
4959 return packFloat128( zSign, zExp, zSig0, zSig1 );
4960
4961 }
4962
4963 /*----------------------------------------------------------------------------
4964 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4965 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4966 | returns the proper quadruple-precision floating-point value corresponding
4967 | to the abstract input. This routine is just like `roundAndPackFloat128'
4968 | except that the input significand has fewer bits and does not have to be
4969 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4970 | point exponent.
4971 *----------------------------------------------------------------------------*/
4972
4973 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4974 uint64_t zSig0, uint64_t zSig1,
4975 float_status *status)
4976 {
4977 int8_t shiftCount;
4978 uint64_t zSig2;
4979
4980 if ( zSig0 == 0 ) {
4981 zSig0 = zSig1;
4982 zSig1 = 0;
4983 zExp -= 64;
4984 }
4985 shiftCount = clz64(zSig0) - 15;
4986 if ( 0 <= shiftCount ) {
4987 zSig2 = 0;
4988 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4989 }
4990 else {
4991 shift128ExtraRightJamming(
4992 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4993 }
4994 zExp -= shiftCount;
4995 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4996
4997 }
4998
4999
5000 /*----------------------------------------------------------------------------
5001 | Returns the result of converting the 32-bit two's complement integer `a'
5002 | to the extended double-precision floating-point format. The conversion
5003 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5004 | Arithmetic.
5005 *----------------------------------------------------------------------------*/
5006
5007 floatx80 int32_to_floatx80(int32_t a, float_status *status)
5008 {
5009 bool zSign;
5010 uint32_t absA;
5011 int8_t shiftCount;
5012 uint64_t zSig;
5013
5014 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5015 zSign = ( a < 0 );
5016 absA = zSign ? - a : a;
5017 shiftCount = clz32(absA) + 32;
5018 zSig = absA;
5019 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5020
5021 }
5022
5023 /*----------------------------------------------------------------------------
5024 | Returns the result of converting the 64-bit two's complement integer `a'
5025 | to the extended double-precision floating-point format. The conversion
5026 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5027 | Arithmetic.
5028 *----------------------------------------------------------------------------*/
5029
5030 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5031 {
5032 bool zSign;
5033 uint64_t absA;
5034 int8_t shiftCount;
5035
5036 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5037 zSign = ( a < 0 );
5038 absA = zSign ? - a : a;
5039 shiftCount = clz64(absA);
5040 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5041
5042 }
5043
5044 /*----------------------------------------------------------------------------
5045 | Returns the result of converting the single-precision floating-point value
5046 | `a' to the extended double-precision floating-point format. The conversion
5047 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5048 | Arithmetic.
5049 *----------------------------------------------------------------------------*/
5050
5051 floatx80 float32_to_floatx80(float32 a, float_status *status)
5052 {
5053 bool aSign;
5054 int aExp;
5055 uint32_t aSig;
5056
5057 a = float32_squash_input_denormal(a, status);
5058 aSig = extractFloat32Frac( a );
5059 aExp = extractFloat32Exp( a );
5060 aSign = extractFloat32Sign( a );
5061 if ( aExp == 0xFF ) {
5062 if (aSig) {
5063 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5064 status);
5065 return floatx80_silence_nan(res, status);
5066 }
5067 return packFloatx80(aSign,
5068 floatx80_infinity_high,
5069 floatx80_infinity_low);
5070 }
5071 if ( aExp == 0 ) {
5072 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5073 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5074 }
5075 aSig |= 0x00800000;
5076 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5077
5078 }
5079
5080 /*----------------------------------------------------------------------------
5081 | Returns the remainder of the single-precision floating-point value `a'
5082 | with respect to the corresponding value `b'. The operation is performed
5083 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5084 *----------------------------------------------------------------------------*/
5085
5086 float32 float32_rem(float32 a, float32 b, float_status *status)
5087 {
5088 bool aSign, zSign;
5089 int aExp, bExp, expDiff;
5090 uint32_t aSig, bSig;
5091 uint32_t q;
5092 uint64_t aSig64, bSig64, q64;
5093 uint32_t alternateASig;
5094 int32_t sigMean;
5095 a = float32_squash_input_denormal(a, status);
5096 b = float32_squash_input_denormal(b, status);
5097
5098 aSig = extractFloat32Frac( a );
5099 aExp = extractFloat32Exp( a );
5100 aSign = extractFloat32Sign( a );
5101 bSig = extractFloat32Frac( b );
5102 bExp = extractFloat32Exp( b );
5103 if ( aExp == 0xFF ) {
5104 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5105 return propagateFloat32NaN(a, b, status);
5106 }
5107 float_raise(float_flag_invalid, status);
5108 return float32_default_nan(status);
5109 }
5110 if ( bExp == 0xFF ) {
5111 if (bSig) {
5112 return propagateFloat32NaN(a, b, status);
5113 }
5114 return a;
5115 }
5116 if ( bExp == 0 ) {
5117 if ( bSig == 0 ) {
5118 float_raise(float_flag_invalid, status);
5119 return float32_default_nan(status);
5120 }
5121 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5122 }
5123 if ( aExp == 0 ) {
5124 if ( aSig == 0 ) return a;
5125 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5126 }
5127 expDiff = aExp - bExp;
5128 aSig |= 0x00800000;
5129 bSig |= 0x00800000;
5130 if ( expDiff < 32 ) {
5131 aSig <<= 8;
5132 bSig <<= 8;
5133 if ( expDiff < 0 ) {
5134 if ( expDiff < -1 ) return a;
5135 aSig >>= 1;
5136 }
5137 q = ( bSig <= aSig );
5138 if ( q ) aSig -= bSig;
5139 if ( 0 < expDiff ) {
5140 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5141 q >>= 32 - expDiff;
5142 bSig >>= 2;
5143 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5144 }
5145 else {
5146 aSig >>= 2;
5147 bSig >>= 2;
5148 }
5149 }
5150 else {
5151 if ( bSig <= aSig ) aSig -= bSig;
5152 aSig64 = ( (uint64_t) aSig )<<40;
5153 bSig64 = ( (uint64_t) bSig )<<40;
5154 expDiff -= 64;
5155 while ( 0 < expDiff ) {
5156 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5157 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5158 aSig64 = - ( ( bSig * q64 )<<38 );
5159 expDiff -= 62;
5160 }
5161 expDiff += 64;
5162 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5163 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5164 q = q64>>( 64 - expDiff );
5165 bSig <<= 6;
5166 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5167 }
5168 do {
5169 alternateASig = aSig;
5170 ++q;
5171 aSig -= bSig;
5172 } while ( 0 <= (int32_t) aSig );
5173 sigMean = aSig + alternateASig;
5174 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5175 aSig = alternateASig;
5176 }
5177 zSign = ( (int32_t) aSig < 0 );
5178 if ( zSign ) aSig = - aSig;
5179 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5180 }
5181
5182
5183
5184 /*----------------------------------------------------------------------------
5185 | Returns the binary exponential of the single-precision floating-point value
5186 | `a'. The operation is performed according to the IEC/IEEE Standard for
5187 | Binary Floating-Point Arithmetic.
5188 |
5189 | Uses the following identities:
5190 |
5191 | 1. -------------------------------------------------------------------------
5192 | x x*ln(2)
5193 | 2 = e
5194 |
5195 | 2. -------------------------------------------------------------------------
5196 | 2 3 4 5 n
5197 | x x x x x x x
5198 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5199 | 1! 2! 3! 4! 5! n!
5200 *----------------------------------------------------------------------------*/
5201
5202 static const float64 float32_exp2_coefficients[15] =
5203 {
5204 const_float64( 0x3ff0000000000000ll ), /* 1 */
5205 const_float64( 0x3fe0000000000000ll ), /* 2 */
5206 const_float64( 0x3fc5555555555555ll ), /* 3 */
5207 const_float64( 0x3fa5555555555555ll ), /* 4 */
5208 const_float64( 0x3f81111111111111ll ), /* 5 */
5209 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5210 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5211 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5212 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5213 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5214 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5215 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5216 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5217 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5218 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5219 };
5220
5221 float32 float32_exp2(float32 a, float_status *status)
5222 {
5223 bool aSign;
5224 int aExp;
5225 uint32_t aSig;
5226 float64 r, x, xn;
5227 int i;
5228 a = float32_squash_input_denormal(a, status);
5229
5230 aSig = extractFloat32Frac( a );
5231 aExp = extractFloat32Exp( a );
5232 aSign = extractFloat32Sign( a );
5233
5234 if ( aExp == 0xFF) {
5235 if (aSig) {
5236 return propagateFloat32NaN(a, float32_zero, status);
5237 }
5238 return (aSign) ? float32_zero : a;
5239 }
5240 if (aExp == 0) {
5241 if (aSig == 0) return float32_one;
5242 }
5243
5244 float_raise(float_flag_inexact, status);
5245
5246 /* ******************************* */
5247 /* using float64 for approximation */
5248 /* ******************************* */
5249 x = float32_to_float64(a, status);
5250 x = float64_mul(x, float64_ln2, status);
5251
5252 xn = x;
5253 r = float64_one;
5254 for (i = 0 ; i < 15 ; i++) {
5255 float64 f;
5256
5257 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5258 r = float64_add(r, f, status);
5259
5260 xn = float64_mul(xn, x, status);
5261 }
5262
5263 return float64_to_float32(r, status);
5264 }
5265
5266 /*----------------------------------------------------------------------------
5267 | Returns the binary log of the single-precision floating-point value `a'.
5268 | The operation is performed according to the IEC/IEEE Standard for Binary
5269 | Floating-Point Arithmetic.
5270 *----------------------------------------------------------------------------*/
5271 float32 float32_log2(float32 a, float_status *status)
5272 {
5273 bool aSign, zSign;
5274 int aExp;
5275 uint32_t aSig, zSig, i;
5276
5277 a = float32_squash_input_denormal(a, status);
5278 aSig = extractFloat32Frac( a );
5279 aExp = extractFloat32Exp( a );
5280 aSign = extractFloat32Sign( a );
5281
5282 if ( aExp == 0 ) {
5283 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5284 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5285 }
5286 if ( aSign ) {
5287 float_raise(float_flag_invalid, status);
5288 return float32_default_nan(status);
5289 }
5290 if ( aExp == 0xFF ) {
5291 if (aSig) {
5292 return propagateFloat32NaN(a, float32_zero, status);
5293 }
5294 return a;
5295 }
5296
5297 aExp -= 0x7F;
5298 aSig |= 0x00800000;
5299 zSign = aExp < 0;
5300 zSig = aExp << 23;
5301
5302 for (i = 1 << 22; i > 0; i >>= 1) {
5303 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5304 if ( aSig & 0x01000000 ) {
5305 aSig >>= 1;
5306 zSig |= i;
5307 }
5308 }
5309
5310 if ( zSign )
5311 zSig = -zSig;
5312
5313 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5314 }
5315
5316 /*----------------------------------------------------------------------------
5317 | Returns the result of converting the double-precision floating-point value
5318 | `a' to the extended double-precision floating-point format. The conversion
5319 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5320 | Arithmetic.
5321 *----------------------------------------------------------------------------*/
5322
5323 floatx80 float64_to_floatx80(float64 a, float_status *status)
5324 {
5325 bool aSign;
5326 int aExp;
5327 uint64_t aSig;
5328
5329 a = float64_squash_input_denormal(a, status);
5330 aSig = extractFloat64Frac( a );
5331 aExp = extractFloat64Exp( a );
5332 aSign = extractFloat64Sign( a );
5333 if ( aExp == 0x7FF ) {
5334 if (aSig) {
5335 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5336 status);
5337 return floatx80_silence_nan(res, status);
5338 }
5339 return packFloatx80(aSign,
5340 floatx80_infinity_high,
5341 floatx80_infinity_low);
5342 }
5343 if ( aExp == 0 ) {
5344 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5345 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5346 }
5347 return
5348 packFloatx80(
5349 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5350
5351 }
5352
5353 /*----------------------------------------------------------------------------
5354 | Returns the remainder of the double-precision floating-point value `a'
5355 | with respect to the corresponding value `b'. The operation is performed
5356 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5357 *----------------------------------------------------------------------------*/
5358
5359 float64 float64_rem(float64 a, float64 b, float_status *status)
5360 {
5361 bool aSign, zSign;
5362 int aExp, bExp, expDiff;
5363 uint64_t aSig, bSig;
5364 uint64_t q, alternateASig;
5365 int64_t sigMean;
5366
5367 a = float64_squash_input_denormal(a, status);
5368 b = float64_squash_input_denormal(b, status);
5369 aSig = extractFloat64Frac( a );
5370 aExp = extractFloat64Exp( a );
5371 aSign = extractFloat64Sign( a );
5372 bSig = extractFloat64Frac( b );
5373 bExp = extractFloat64Exp( b );
5374 if ( aExp == 0x7FF ) {
5375 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5376 return propagateFloat64NaN(a, b, status);
5377 }
5378 float_raise(float_flag_invalid, status);
5379 return float64_default_nan(status);
5380 }
5381 if ( bExp == 0x7FF ) {
5382 if (bSig) {
5383 return propagateFloat64NaN(a, b, status);
5384 }
5385 return a;
5386 }
5387 if ( bExp == 0 ) {
5388 if ( bSig == 0 ) {
5389 float_raise(float_flag_invalid, status);
5390 return float64_default_nan(status);
5391 }
5392 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5393 }
5394 if ( aExp == 0 ) {
5395 if ( aSig == 0 ) return a;
5396 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5397 }
5398 expDiff = aExp - bExp;
5399 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5400 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5401 if ( expDiff < 0 ) {
5402 if ( expDiff < -1 ) return a;
5403 aSig >>= 1;
5404 }
5405 q = ( bSig <= aSig );
5406 if ( q ) aSig -= bSig;
5407 expDiff -= 64;
5408 while ( 0 < expDiff ) {
5409 q = estimateDiv128To64( aSig, 0, bSig );
5410 q = ( 2 < q ) ? q - 2 : 0;
5411 aSig = - ( ( bSig>>2 ) * q );
5412 expDiff -= 62;
5413 }
5414 expDiff += 64;
5415 if ( 0 < expDiff ) {
5416 q = estimateDiv128To64( aSig, 0, bSig );
5417 q = ( 2 < q ) ? q - 2 : 0;
5418 q >>= 64 - expDiff;
5419 bSig >>= 2;
5420 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5421 }
5422 else {
5423 aSig >>= 2;
5424 bSig >>= 2;
5425 }
5426 do {
5427 alternateASig = aSig;
5428 ++q;
5429 aSig -= bSig;
5430 } while ( 0 <= (int64_t) aSig );
5431 sigMean = aSig + alternateASig;
5432 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5433 aSig = alternateASig;
5434 }
5435 zSign = ( (int64_t) aSig < 0 );
5436 if ( zSign ) aSig = - aSig;
5437 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5438
5439 }
5440
5441 /*----------------------------------------------------------------------------
5442 | Returns the binary log of the double-precision floating-point value `a'.
5443 | The operation is performed according to the IEC/IEEE Standard for Binary
5444 | Floating-Point Arithmetic.
5445 *----------------------------------------------------------------------------*/
5446 float64 float64_log2(float64 a, float_status *status)
5447 {
5448 bool aSign, zSign;
5449 int aExp;
5450 uint64_t aSig, aSig0, aSig1, zSig, i;
5451 a = float64_squash_input_denormal(a, status);
5452
5453 aSig = extractFloat64Frac( a );
5454 aExp = extractFloat64Exp( a );
5455 aSign = extractFloat64Sign( a );
5456
5457 if ( aExp == 0 ) {
5458 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5459 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5460 }
5461 if ( aSign ) {
5462 float_raise(float_flag_invalid, status);
5463 return float64_default_nan(status);
5464 }
5465 if ( aExp == 0x7FF ) {
5466 if (aSig) {
5467 return propagateFloat64NaN(a, float64_zero, status);
5468 }
5469 return a;
5470 }
5471
5472 aExp -= 0x3FF;
5473 aSig |= UINT64_C(0x0010000000000000);
5474 zSign = aExp < 0;
5475 zSig = (uint64_t)aExp << 52;
5476 for (i = 1LL << 51; i > 0; i >>= 1) {
5477 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5478 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5479 if ( aSig & UINT64_C(0x0020000000000000) ) {
5480 aSig >>= 1;
5481 zSig |= i;
5482 }
5483 }
5484
5485 if ( zSign )
5486 zSig = -zSig;
5487 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5488 }
5489
5490 /*----------------------------------------------------------------------------
5491 | Returns the result of converting the extended double-precision floating-
5492 | point value `a' to the 32-bit two's complement integer format. The
5493 | conversion is performed according to the IEC/IEEE Standard for Binary
5494 | Floating-Point Arithmetic---which means in particular that the conversion
5495 | is rounded according to the current rounding mode. If `a' is a NaN, the
5496 | largest positive integer is returned. Otherwise, if the conversion
5497 | overflows, the largest integer with the same sign as `a' is returned.
5498 *----------------------------------------------------------------------------*/
5499
5500 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5501 {
5502 bool aSign;
5503 int32_t aExp, shiftCount;
5504 uint64_t aSig;
5505
5506 if (floatx80_invalid_encoding(a)) {
5507 float_raise(float_flag_invalid, status);
5508 return 1 << 31;
5509 }
5510 aSig = extractFloatx80Frac( a );
5511 aExp = extractFloatx80Exp( a );
5512 aSign = extractFloatx80Sign( a );
5513 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5514 shiftCount = 0x4037 - aExp;
5515 if ( shiftCount <= 0 ) shiftCount = 1;
5516 shift64RightJamming( aSig, shiftCount, &aSig );
5517 return roundAndPackInt32(aSign, aSig, status);
5518
5519 }
5520
5521 /*----------------------------------------------------------------------------
5522 | Returns the result of converting the extended double-precision floating-
5523 | point value `a' to the 32-bit two's complement integer format. The
5524 | conversion is performed according to the IEC/IEEE Standard for Binary
5525 | Floating-Point Arithmetic, except that the conversion is always rounded
5526 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5527 | Otherwise, if the conversion overflows, the largest integer with the same
5528 | sign as `a' is returned.
5529 *----------------------------------------------------------------------------*/
5530
5531 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5532 {
5533 bool aSign;
5534 int32_t aExp, shiftCount;
5535 uint64_t aSig, savedASig;
5536 int32_t z;
5537
5538 if (floatx80_invalid_encoding(a)) {
5539 float_raise(float_flag_invalid, status);
5540 return 1 << 31;
5541 }
5542 aSig = extractFloatx80Frac( a );
5543 aExp = extractFloatx80Exp( a );
5544 aSign = extractFloatx80Sign( a );
5545 if ( 0x401E < aExp ) {
5546 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5547 goto invalid;
5548 }
5549 else if ( aExp < 0x3FFF ) {
5550 if (aExp || aSig) {
5551 float_raise(float_flag_inexact, status);
5552 }
5553 return 0;
5554 }
5555 shiftCount = 0x403E - aExp;
5556 savedASig = aSig;
5557 aSig >>= shiftCount;
5558 z = aSig;
5559 if ( aSign ) z = - z;
5560 if ( ( z < 0 ) ^ aSign ) {
5561 invalid:
5562 float_raise(float_flag_invalid, status);
5563 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5564 }
5565 if ( ( aSig<<shiftCount ) != savedASig ) {
5566 float_raise(float_flag_inexact, status);
5567 }
5568 return z;
5569
5570 }
5571
5572 /*----------------------------------------------------------------------------
5573 | Returns the result of converting the extended double-precision floating-
5574 | point value `a' to the 64-bit two's complement integer format. The
5575 | conversion is performed according to the IEC/IEEE Standard for Binary
5576 | Floating-Point Arithmetic---which means in particular that the conversion
5577 | is rounded according to the current rounding mode. If `a' is a NaN,
5578 | the largest positive integer is returned. Otherwise, if the conversion
5579 | overflows, the largest integer with the same sign as `a' is returned.
5580 *----------------------------------------------------------------------------*/
5581
5582 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5583 {
5584 bool aSign;
5585 int32_t aExp, shiftCount;
5586 uint64_t aSig, aSigExtra;
5587
5588 if (floatx80_invalid_encoding(a)) {
5589 float_raise(float_flag_invalid, status);
5590 return 1ULL << 63;
5591 }
5592 aSig = extractFloatx80Frac( a );
5593 aExp = extractFloatx80Exp( a );
5594 aSign = extractFloatx80Sign( a );
5595 shiftCount = 0x403E - aExp;
5596 if ( shiftCount <= 0 ) {
5597 if ( shiftCount ) {
5598 float_raise(float_flag_invalid, status);
5599 if (!aSign || floatx80_is_any_nan(a)) {
5600 return INT64_MAX;
5601 }
5602 return INT64_MIN;
5603 }
5604 aSigExtra = 0;
5605 }
5606 else {
5607 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5608 }
5609 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5610
5611 }
5612
5613 /*----------------------------------------------------------------------------
5614 | Returns the result of converting the extended double-precision floating-
5615 | point value `a' to the 64-bit two's complement integer format. The
5616 | conversion is performed according to the IEC/IEEE Standard for Binary
5617 | Floating-Point Arithmetic, except that the conversion is always rounded
5618 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5619 | Otherwise, if the conversion overflows, the largest integer with the same
5620 | sign as `a' is returned.
5621 *----------------------------------------------------------------------------*/
5622
5623 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5624 {
5625 bool aSign;
5626 int32_t aExp, shiftCount;
5627 uint64_t aSig;
5628 int64_t z;
5629
5630 if (floatx80_invalid_encoding(a)) {
5631 float_raise(float_flag_invalid, status);
5632 return 1ULL << 63;
5633 }
5634 aSig = extractFloatx80Frac( a );
5635 aExp = extractFloatx80Exp( a );
5636 aSign = extractFloatx80Sign( a );
5637 shiftCount = aExp - 0x403E;
5638 if ( 0 <= shiftCount ) {
5639 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5640 if ( ( a.high != 0xC03E ) || aSig ) {
5641 float_raise(float_flag_invalid, status);
5642 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5643 return INT64_MAX;
5644 }
5645 }
5646 return INT64_MIN;
5647 }
5648 else if ( aExp < 0x3FFF ) {
5649 if (aExp | aSig) {
5650 float_raise(float_flag_inexact, status);
5651 }
5652 return 0;
5653 }
5654 z = aSig>>( - shiftCount );
5655 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5656 float_raise(float_flag_inexact, status);
5657 }
5658 if ( aSign ) z = - z;
5659 return z;
5660
5661 }
5662
5663 /*----------------------------------------------------------------------------
5664 | Returns the result of converting the extended double-precision floating-
5665 | point value `a' to the single-precision floating-point format. The
5666 | conversion is performed according to the IEC/IEEE Standard for Binary
5667 | Floating-Point Arithmetic.
5668 *----------------------------------------------------------------------------*/
5669
5670 float32 floatx80_to_float32(floatx80 a, float_status *status)
5671 {
5672 bool aSign;
5673 int32_t aExp;
5674 uint64_t aSig;
5675
5676 if (floatx80_invalid_encoding(a)) {
5677 float_raise(float_flag_invalid, status);
5678 return float32_default_nan(status);
5679 }
5680 aSig = extractFloatx80Frac( a );
5681 aExp = extractFloatx80Exp( a );
5682 aSign = extractFloatx80Sign( a );
5683 if ( aExp == 0x7FFF ) {
5684 if ( (uint64_t) ( aSig<<1 ) ) {
5685 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5686 status);
5687 return float32_silence_nan(res, status);
5688 }
5689 return packFloat32( aSign, 0xFF, 0 );
5690 }
5691 shift64RightJamming( aSig, 33, &aSig );
5692 if ( aExp || aSig ) aExp -= 0x3F81;
5693 return roundAndPackFloat32(aSign, aExp, aSig, status);
5694
5695 }
5696
5697 /*----------------------------------------------------------------------------
5698 | Returns the result of converting the extended double-precision floating-
5699 | point value `a' to the double-precision floating-point format. The
5700 | conversion is performed according to the IEC/IEEE Standard for Binary
5701 | Floating-Point Arithmetic.
5702 *----------------------------------------------------------------------------*/
5703
5704 float64 floatx80_to_float64(floatx80 a, float_status *status)
5705 {
5706 bool aSign;
5707 int32_t aExp;
5708 uint64_t aSig, zSig;
5709
5710 if (floatx80_invalid_encoding(a)) {
5711 float_raise(float_flag_invalid, status);
5712 return float64_default_nan(status);
5713 }
5714 aSig = extractFloatx80Frac( a );
5715 aExp = extractFloatx80Exp( a );
5716 aSign = extractFloatx80Sign( a );
5717 if ( aExp == 0x7FFF ) {
5718 if ( (uint64_t) ( aSig<<1 ) ) {
5719 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5720 status);
5721 return float64_silence_nan(res, status);
5722 }
5723 return packFloat64( aSign, 0x7FF, 0 );
5724 }
5725 shift64RightJamming( aSig, 1, &zSig );
5726 if ( aExp || aSig ) aExp -= 0x3C01;
5727 return roundAndPackFloat64(aSign, aExp, zSig, status);
5728
5729 }
5730
5731 /*----------------------------------------------------------------------------
5732 | Returns the result of converting the extended double-precision floating-
5733 | point value `a' to the quadruple-precision floating-point format. The
5734 | conversion is performed according to the IEC/IEEE Standard for Binary
5735 | Floating-Point Arithmetic.
5736 *----------------------------------------------------------------------------*/
5737
5738 float128 floatx80_to_float128(floatx80 a, float_status *status)
5739 {
5740 bool aSign;
5741 int aExp;
5742 uint64_t aSig, zSig0, zSig1;
5743
5744 if (floatx80_invalid_encoding(a)) {
5745 float_raise(float_flag_invalid, status);
5746 return float128_default_nan(status);
5747 }
5748 aSig = extractFloatx80Frac( a );
5749 aExp = extractFloatx80Exp( a );
5750 aSign = extractFloatx80Sign( a );
5751 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5752 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5753 status);
5754 return float128_silence_nan(res, status);
5755 }
5756 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5757 return packFloat128( aSign, aExp, zSig0, zSig1 );
5758
5759 }
5760
5761 /*----------------------------------------------------------------------------
5762 | Rounds the extended double-precision floating-point value `a'
5763 | to the precision provided by floatx80_rounding_precision and returns the
5764 | result as an extended double-precision floating-point value.
5765 | The operation is performed according to the IEC/IEEE Standard for Binary
5766 | Floating-Point Arithmetic.
5767 *----------------------------------------------------------------------------*/
5768
5769 floatx80 floatx80_round(floatx80 a, float_status *status)
5770 {
5771 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5772 extractFloatx80Sign(a),
5773 extractFloatx80Exp(a),
5774 extractFloatx80Frac(a), 0, status);
5775 }
5776
5777 /*----------------------------------------------------------------------------
5778 | Rounds the extended double-precision floating-point value `a' to an integer,
5779 | and returns the result as an extended quadruple-precision floating-point
5780 | value. The operation is performed according to the IEC/IEEE Standard for
5781 | Binary Floating-Point Arithmetic.
5782 *----------------------------------------------------------------------------*/
5783
5784 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5785 {
5786 bool aSign;
5787 int32_t aExp;
5788 uint64_t lastBitMask, roundBitsMask;
5789 floatx80 z;
5790
5791 if (floatx80_invalid_encoding(a)) {
5792 float_raise(float_flag_invalid, status);
5793 return floatx80_default_nan(status);
5794 }
5795 aExp = extractFloatx80Exp( a );
5796 if ( 0x403E <= aExp ) {
5797 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5798 return propagateFloatx80NaN(a, a, status);
5799 }
5800 return a;
5801 }
5802 if ( aExp < 0x3FFF ) {
5803 if ( ( aExp == 0 )
5804 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5805 return a;
5806 }
5807 float_raise(float_flag_inexact, status);
5808 aSign = extractFloatx80Sign( a );
5809 switch (status->float_rounding_mode) {
5810 case float_round_nearest_even:
5811 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5812 ) {
5813 return
5814 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5815 }
5816 break;
5817 case float_round_ties_away:
5818 if (aExp == 0x3FFE) {
5819 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5820 }
5821 break;
5822 case float_round_down:
5823 return
5824 aSign ?
5825 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5826 : packFloatx80( 0, 0, 0 );
5827 case float_round_up:
5828 return
5829 aSign ? packFloatx80( 1, 0, 0 )
5830 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5831
5832 case float_round_to_zero:
5833 break;
5834 default:
5835 g_assert_not_reached();
5836 }
5837 return packFloatx80( aSign, 0, 0 );
5838 }
5839 lastBitMask = 1;
5840 lastBitMask <<= 0x403E - aExp;
5841 roundBitsMask = lastBitMask - 1;
5842 z = a;
5843 switch (status->float_rounding_mode) {
5844 case float_round_nearest_even:
5845 z.low += lastBitMask>>1;
5846 if ((z.low & roundBitsMask) == 0) {
5847 z.low &= ~lastBitMask;
5848 }
5849 break;
5850 case float_round_ties_away:
5851 z.low += lastBitMask >> 1;
5852 break;
5853 case float_round_to_zero:
5854 break;
5855 case float_round_up:
5856 if (!extractFloatx80Sign(z)) {
5857 z.low += roundBitsMask;
5858 }
5859 break;
5860 case float_round_down:
5861 if (extractFloatx80Sign(z)) {
5862 z.low += roundBitsMask;
5863 }
5864 break;
5865 default:
5866 abort();
5867 }
5868 z.low &= ~ roundBitsMask;
5869 if ( z.low == 0 ) {
5870 ++z.high;
5871 z.low = UINT64_C(0x8000000000000000);
5872 }
5873 if (z.low != a.low) {
5874 float_raise(float_flag_inexact, status);
5875 }
5876 return z;
5877
5878 }
5879
5880 /*----------------------------------------------------------------------------
5881 | Returns the result of dividing the extended double-precision floating-point
5882 | value `a' by the corresponding value `b'. The operation is performed
5883 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5884 *----------------------------------------------------------------------------*/
5885
5886 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5887 {
5888 bool aSign, bSign, zSign;
5889 int32_t aExp, bExp, zExp;
5890 uint64_t aSig, bSig, zSig0, zSig1;
5891 uint64_t rem0, rem1, rem2, term0, term1, term2;
5892
5893 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5894 float_raise(float_flag_invalid, status);
5895 return floatx80_default_nan(status);
5896 }
5897 aSig = extractFloatx80Frac( a );
5898 aExp = extractFloatx80Exp( a );
5899 aSign = extractFloatx80Sign( a );
5900 bSig = extractFloatx80Frac( b );
5901 bExp = extractFloatx80Exp( b );
5902 bSign = extractFloatx80Sign( b );
5903 zSign = aSign ^ bSign;
5904 if ( aExp == 0x7FFF ) {
5905 if ((uint64_t)(aSig << 1)) {
5906 return propagateFloatx80NaN(a, b, status);
5907 }
5908 if ( bExp == 0x7FFF ) {
5909 if ((uint64_t)(bSig << 1)) {
5910 return propagateFloatx80NaN(a, b, status);
5911 }
5912 goto invalid;
5913 }
5914 return packFloatx80(zSign, floatx80_infinity_high,
5915 floatx80_infinity_low);
5916 }
5917 if ( bExp == 0x7FFF ) {
5918 if ((uint64_t)(bSig << 1)) {
5919 return propagateFloatx80NaN(a, b, status);
5920 }
5921 return packFloatx80( zSign, 0, 0 );
5922 }
5923 if ( bExp == 0 ) {
5924 if ( bSig == 0 ) {
5925 if ( ( aExp | aSig ) == 0 ) {
5926 invalid:
5927 float_raise(float_flag_invalid, status);
5928 return floatx80_default_nan(status);
5929 }
5930 float_raise(float_flag_divbyzero, status);
5931 return packFloatx80(zSign, floatx80_infinity_high,
5932 floatx80_infinity_low);
5933 }
5934 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5935 }
5936 if ( aExp == 0 ) {
5937 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5938 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5939 }
5940 zExp = aExp - bExp + 0x3FFE;
5941 rem1 = 0;
5942 if ( bSig <= aSig ) {
5943 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5944 ++zExp;
5945 }
5946 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5947 mul64To128( bSig, zSig0, &term0, &term1 );
5948 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5949 while ( (int64_t) rem0 < 0 ) {
5950 --zSig0;
5951 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5952 }
5953 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5954 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5955 mul64To128( bSig, zSig1, &term1, &term2 );
5956 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5957 while ( (int64_t) rem1 < 0 ) {
5958 --zSig1;
5959 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5960 }
5961 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5962 }
5963 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5964 zSign, zExp, zSig0, zSig1, status);
5965 }
5966
5967 /*----------------------------------------------------------------------------
5968 | Returns the remainder of the extended double-precision floating-point value
5969 | `a' with respect to the corresponding value `b'. The operation is performed
5970 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5971 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5972 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5973 | the absolute value of the integer quotient.
5974 *----------------------------------------------------------------------------*/
5975
5976 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5977 float_status *status)
5978 {
5979 bool aSign, zSign;
5980 int32_t aExp, bExp, expDiff, aExpOrig;
5981 uint64_t aSig0, aSig1, bSig;
5982 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5983
5984 *quotient = 0;
5985 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5986 float_raise(float_flag_invalid, status);
5987 return floatx80_default_nan(status);
5988 }
5989 aSig0 = extractFloatx80Frac( a );
5990 aExpOrig = aExp = extractFloatx80Exp( a );
5991 aSign = extractFloatx80Sign( a );
5992 bSig = extractFloatx80Frac( b );
5993 bExp = extractFloatx80Exp( b );
5994 if ( aExp == 0x7FFF ) {
5995 if ( (uint64_t) ( aSig0<<1 )
5996 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5997 return propagateFloatx80NaN(a, b, status);
5998 }
5999 goto invalid;
6000 }
6001 if ( bExp == 0x7FFF ) {
6002 if ((uint64_t)(bSig << 1)) {
6003 return propagateFloatx80NaN(a, b, status);
6004 }
6005 if (aExp == 0 && aSig0 >> 63) {
6006 /*
6007 * Pseudo-denormal argument must be returned in normalized
6008 * form.
6009 */
6010 return packFloatx80(aSign, 1, aSig0);
6011 }
6012 return a;
6013 }
6014 if ( bExp == 0 ) {
6015 if ( bSig == 0 ) {
6016 invalid:
6017 float_raise(float_flag_invalid, status);
6018 return floatx80_default_nan(status);
6019 }
6020 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6021 }
6022 if ( aExp == 0 ) {
6023 if ( aSig0 == 0 ) return a;
6024 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6025 }
6026 zSign = aSign;
6027 expDiff = aExp - bExp;
6028 aSig1 = 0;
6029 if ( expDiff < 0 ) {
6030 if ( mod || expDiff < -1 ) {
6031 if (aExp == 1 && aExpOrig == 0) {
6032 /*
6033 * Pseudo-denormal argument must be returned in
6034 * normalized form.
6035 */
6036 return packFloatx80(aSign, aExp, aSig0);
6037 }
6038 return a;
6039 }
6040 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6041 expDiff = 0;
6042 }
6043 *quotient = q = ( bSig <= aSig0 );
6044 if ( q ) aSig0 -= bSig;
6045 expDiff -= 64;
6046 while ( 0 < expDiff ) {
6047 q = estimateDiv128To64( aSig0, aSig1, bSig );
6048 q = ( 2 < q ) ? q - 2 : 0;
6049 mul64To128( bSig, q, &term0, &term1 );
6050 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6051 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6052 expDiff -= 62;
6053 *quotient <<= 62;
6054 *quotient += q;
6055 }
6056 expDiff += 64;
6057 if ( 0 < expDiff ) {
6058 q = estimateDiv128To64( aSig0, aSig1, bSig );
6059 q = ( 2 < q ) ? q - 2 : 0;
6060 q >>= 64 - expDiff;
6061 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6062 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6063 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6064 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6065 ++q;
6066 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6067 }
6068 if (expDiff < 64) {
6069 *quotient <<= expDiff;
6070 } else {
6071 *quotient = 0;
6072 }
6073 *quotient += q;
6074 }
6075 else {
6076 term1 = 0;
6077 term0 = bSig;
6078 }
6079 if (!mod) {
6080 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6081 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6082 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6083 && ( q & 1 ) )
6084 ) {
6085 aSig0 = alternateASig0;
6086 aSig1 = alternateASig1;
6087 zSign = ! zSign;
6088 ++*quotient;
6089 }
6090 }
6091 return
6092 normalizeRoundAndPackFloatx80(
6093 floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6094
6095 }
6096
6097 /*----------------------------------------------------------------------------
6098 | Returns the remainder of the extended double-precision floating-point value
6099 | `a' with respect to the corresponding value `b'. The operation is performed
6100 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6101 *----------------------------------------------------------------------------*/
6102
6103 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6104 {
6105 uint64_t quotient;
6106 return floatx80_modrem(a, b, false, &quotient, status);
6107 }
6108
6109 /*----------------------------------------------------------------------------
6110 | Returns the remainder of the extended double-precision floating-point value
6111 | `a' with respect to the corresponding value `b', with the quotient truncated
6112 | toward zero.
6113 *----------------------------------------------------------------------------*/
6114
6115 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6116 {
6117 uint64_t quotient;
6118 return floatx80_modrem(a, b, true, &quotient, status);
6119 }
6120
6121 /*----------------------------------------------------------------------------
6122 | Returns the square root of the extended double-precision floating-point
6123 | value `a'. The operation is performed according to the IEC/IEEE Standard
6124 | for Binary Floating-Point Arithmetic.
6125 *----------------------------------------------------------------------------*/
6126
6127 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6128 {
6129 bool aSign;
6130 int32_t aExp, zExp;
6131 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6132 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6133
6134 if (floatx80_invalid_encoding(a)) {
6135 float_raise(float_flag_invalid, status);
6136 return floatx80_default_nan(status);
6137 }
6138 aSig0 = extractFloatx80Frac( a );
6139 aExp = extractFloatx80Exp( a );
6140 aSign = extractFloatx80Sign( a );
6141 if ( aExp == 0x7FFF ) {
6142 if ((uint64_t)(aSig0 << 1)) {
6143 return propagateFloatx80NaN(a, a, status);
6144 }
6145 if ( ! aSign ) return a;
6146 goto invalid;
6147 }
6148 if ( aSign ) {
6149 if ( ( aExp | aSig0 ) == 0 ) return a;
6150 invalid:
6151 float_raise(float_flag_invalid, status);
6152 return floatx80_default_nan(status);
6153 }
6154 if ( aExp == 0 ) {
6155 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6156 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6157 }
6158 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6159 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6160 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6161 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6162 doubleZSig0 = zSig0<<1;
6163 mul64To128( zSig0, zSig0, &term0, &term1 );
6164 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6165 while ( (int64_t) rem0 < 0 ) {
6166 --zSig0;
6167 doubleZSig0 -= 2;
6168 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6169 }
6170 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6171 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6172 if ( zSig1 == 0 ) zSig1 = 1;
6173 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6174 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6175 mul64To128( zSig1, zSig1, &term2, &term3 );
6176 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6177 while ( (int64_t) rem1 < 0 ) {
6178 --zSig1;
6179 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6180 term3 |= 1;
6181 term2 |= doubleZSig0;
6182 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6183 }
6184 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6185 }
6186 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6187 zSig0 |= doubleZSig0;
6188 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6189 0, zExp, zSig0, zSig1, status);
6190 }
6191
6192 /*----------------------------------------------------------------------------
6193 | Returns the result of converting the quadruple-precision floating-point
6194 | value `a' to the extended double-precision floating-point format. The
6195 | conversion is performed according to the IEC/IEEE Standard for Binary
6196 | Floating-Point Arithmetic.
6197 *----------------------------------------------------------------------------*/
6198
6199 floatx80 float128_to_floatx80(float128 a, float_status *status)
6200 {
6201 bool aSign;
6202 int32_t aExp;
6203 uint64_t aSig0, aSig1;
6204
6205 aSig1 = extractFloat128Frac1( a );
6206 aSig0 = extractFloat128Frac0( a );
6207 aExp = extractFloat128Exp( a );
6208 aSign = extractFloat128Sign( a );
6209 if ( aExp == 0x7FFF ) {
6210 if ( aSig0 | aSig1 ) {
6211 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6212 status);
6213 return floatx80_silence_nan(res, status);
6214 }
6215 return packFloatx80(aSign, floatx80_infinity_high,
6216 floatx80_infinity_low);
6217 }
6218 if ( aExp == 0 ) {
6219 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6220 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6221 }
6222 else {
6223 aSig0 |= UINT64_C(0x0001000000000000);
6224 }
6225 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6226 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6227
6228 }
6229
6230 /*----------------------------------------------------------------------------
6231 | Returns the remainder of the quadruple-precision floating-point value `a'
6232 | with respect to the corresponding value `b'. The operation is performed
6233 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6234 *----------------------------------------------------------------------------*/
6235
6236 float128 float128_rem(float128 a, float128 b, float_status *status)
6237 {
6238 bool aSign, zSign;
6239 int32_t aExp, bExp, expDiff;
6240 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6241 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6242 int64_t sigMean0;
6243
6244 aSig1 = extractFloat128Frac1( a );
6245 aSig0 = extractFloat128Frac0( a );
6246 aExp = extractFloat128Exp( a );
6247 aSign = extractFloat128Sign( a );
6248 bSig1 = extractFloat128Frac1( b );
6249 bSig0 = extractFloat128Frac0( b );
6250 bExp = extractFloat128Exp( b );
6251 if ( aExp == 0x7FFF ) {
6252 if ( ( aSig0 | aSig1 )
6253 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6254 return propagateFloat128NaN(a, b, status);
6255 }
6256 goto invalid;
6257 }
6258 if ( bExp == 0x7FFF ) {
6259 if (bSig0 | bSig1) {
6260 return propagateFloat128NaN(a, b, status);
6261 }
6262 return a;
6263 }
6264 if ( bExp == 0 ) {
6265 if ( ( bSig0 | bSig1 ) == 0 ) {
6266 invalid:
6267 float_raise(float_flag_invalid, status);
6268 return float128_default_nan(status);
6269 }
6270 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6271 }
6272 if ( aExp == 0 ) {
6273 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6274 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6275 }
6276 expDiff = aExp - bExp;
6277 if ( expDiff < -1 ) return a;
6278 shortShift128Left(
6279 aSig0 | UINT64_C(0x0001000000000000),
6280 aSig1,
6281 15 - ( expDiff < 0 ),
6282 &aSig0,
6283 &aSig1
6284 );
6285 shortShift128Left(
6286 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6287 q = le128( bSig0, bSig1, aSig0, aSig1 );
6288 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6289 expDiff -= 64;
6290 while ( 0 < expDiff ) {
6291 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6292 q = ( 4 < q ) ? q - 4 : 0;
6293 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6294 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6295 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6296 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6297 expDiff -= 61;
6298 }
6299 if ( -64 < expDiff ) {
6300 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6301 q = ( 4 < q ) ? q - 4 : 0;
6302 q >>= - expDiff;
6303 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6304 expDiff += 52;
6305 if ( expDiff < 0 ) {
6306 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6307 }
6308 else {
6309 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6310 }
6311 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6312 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6313 }
6314 else {
6315 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6316 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6317 }
6318 do {
6319 alternateASig0 = aSig0;
6320 alternateASig1 = aSig1;
6321 ++q;
6322 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6323 } while ( 0 <= (int64_t) aSig0 );
6324 add128(
6325 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6326 if ( ( sigMean0 < 0 )
6327 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6328 aSig0 = alternateASig0;
6329 aSig1 = alternateASig1;
6330 }
6331 zSign = ( (int64_t) aSig0 < 0 );
6332 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6333 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6334 status);
6335 }
6336
6337 static inline FloatRelation
6338 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6339 float_status *status)
6340 {
6341 bool aSign, bSign;
6342
6343 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6344 float_raise(float_flag_invalid, status);
6345 return float_relation_unordered;
6346 }
6347 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6348 ( extractFloatx80Frac( a )<<1 ) ) ||
6349 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6350 ( extractFloatx80Frac( b )<<1 ) )) {
6351 if (!is_quiet ||
6352 floatx80_is_signaling_nan(a, status) ||
6353 floatx80_is_signaling_nan(b, status)) {
6354 float_raise(float_flag_invalid, status);
6355 }
6356 return float_relation_unordered;
6357 }
6358 aSign = extractFloatx80Sign( a );
6359 bSign = extractFloatx80Sign( b );
6360 if ( aSign != bSign ) {
6361
6362 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6363 ( ( a.low | b.low ) == 0 ) ) {
6364 /* zero case */
6365 return float_relation_equal;
6366 } else {
6367 return 1 - (2 * aSign);
6368 }
6369 } else {
6370 /* Normalize pseudo-denormals before comparison. */
6371 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6372 ++a.high;
6373 }
6374 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6375 ++b.high;
6376 }
6377 if (a.low == b.low && a.high == b.high) {
6378 return float_relation_equal;
6379 } else {
6380 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6381 }
6382 }
6383 }
6384
6385 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6386 {
6387 return floatx80_compare_internal(a, b, 0, status);
6388 }
6389
6390 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6391 float_status *status)
6392 {
6393 return floatx80_compare_internal(a, b, 1, status);
6394 }
6395
6396 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6397 {
6398 bool aSign;
6399 int32_t aExp;
6400 uint64_t aSig;
6401
6402 if (floatx80_invalid_encoding(a)) {
6403 float_raise(float_flag_invalid, status);
6404 return floatx80_default_nan(status);
6405 }
6406 aSig = extractFloatx80Frac( a );
6407 aExp = extractFloatx80Exp( a );
6408 aSign = extractFloatx80Sign( a );
6409
6410 if ( aExp == 0x7FFF ) {
6411 if ( aSig<<1 ) {
6412 return propagateFloatx80NaN(a, a, status);
6413 }
6414 return a;
6415 }
6416
6417 if (aExp == 0) {
6418 if (aSig == 0) {
6419 return a;
6420 }
6421 aExp++;
6422 }
6423
6424 if (n > 0x10000) {
6425 n = 0x10000;
6426 } else if (n < -0x10000) {
6427 n = -0x10000;
6428 }
6429
6430 aExp += n;
6431 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6432 aSign, aExp, aSig, 0, status);
6433 }
6434
6435 static void __attribute__((constructor)) softfloat_init(void)
6436 {
6437 union_float64 ua, ub, uc, ur;
6438
6439 if (QEMU_NO_HARDFLOAT) {
6440 return;
6441 }
6442 /*
6443 * Test that the host's FMA is not obviously broken. For example,
6444 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6445 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6446 */
6447 ua.s = 0x0020000000000001ULL;
6448 ub.s = 0x3ca0000000000000ULL;
6449 uc.s = 0x0020000000000000ULL;
6450 ur.h = fma(ua.h, ub.h, uc.h);
6451 if (ur.s != 0x0020000000000001ULL) {
6452 force_soft_fma = true;
6453 }
6454 }