]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Convert floatx80_add/sub to FloatParts
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485 /* Flags for parts_minmax. */
486 enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493 };
494
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498 return unlikely(c >= float_class_qnan);
499 }
500
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503 return c == float_class_snan;
504 }
505
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508 return c == float_class_qnan;
509 }
510
511 /*
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
514 *
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
518 */
519
520 typedef struct {
521 FloatClass cls;
522 bool sign;
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
535 } FloatParts64;
536
537 typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543 } FloatParts128;
544
545 typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553 } FloatParts256;
554
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
558
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
569 */
570 typedef struct {
571 int exp_size;
572 int exp_bias;
573 int exp_max;
574 int frac_size;
575 int frac_shift;
576 bool arm_althp;
577 uint64_t round_mask;
578 } FloatFmt;
579
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
582 .exp_size = E, \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
585
586 #define FLOAT_PARAMS(E, F) \
587 FLOAT_PARAMS_(E), \
588 .frac_size = F, \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
591
592 static const FloatFmt float16_params = {
593 FLOAT_PARAMS(5, 10)
594 };
595
596 static const FloatFmt float16_params_ahp = {
597 FLOAT_PARAMS(5, 10),
598 .arm_althp = true
599 };
600
601 static const FloatFmt bfloat16_params = {
602 FLOAT_PARAMS(8, 7)
603 };
604
605 static const FloatFmt float32_params = {
606 FLOAT_PARAMS(8, 23)
607 };
608
609 static const FloatFmt float64_params = {
610 FLOAT_PARAMS(11, 52)
611 };
612
613 static const FloatFmt float128_params = {
614 FLOAT_PARAMS(15, 112)
615 };
616
617 #define FLOATX80_PARAMS(R) \
618 FLOAT_PARAMS_(15), \
619 .frac_size = R == 64 ? 63 : R, \
620 .frac_shift = 0, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
622
623 static const FloatFmt floatx80_params[3] = {
624 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
627 };
628
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
631 {
632 const int f_size = fmt->frac_size;
633 const int e_size = fmt->exp_size;
634
635 *r = (FloatParts64) {
636 .cls = float_class_unclassified,
637 .sign = extract64(raw, f_size + e_size, 1),
638 .exp = extract64(raw, f_size, e_size),
639 .frac = extract64(raw, 0, f_size)
640 };
641 }
642
643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
644 {
645 unpack_raw64(p, &float16_params, f);
646 }
647
648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
649 {
650 unpack_raw64(p, &bfloat16_params, f);
651 }
652
653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
654 {
655 unpack_raw64(p, &float32_params, f);
656 }
657
658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
659 {
660 unpack_raw64(p, &float64_params, f);
661 }
662
663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
664 {
665 *p = (FloatParts128) {
666 .cls = float_class_unclassified,
667 .sign = extract32(f.high, 15, 1),
668 .exp = extract32(f.high, 0, 15),
669 .frac_hi = f.low
670 };
671 }
672
673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
674 {
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677
678 *p = (FloatParts128) {
679 .cls = float_class_unclassified,
680 .sign = extract64(f.high, f_size + e_size, 1),
681 .exp = extract64(f.high, f_size, e_size),
682 .frac_hi = extract64(f.high, 0, f_size),
683 .frac_lo = f.low,
684 };
685 }
686
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
689 {
690 const int f_size = fmt->frac_size;
691 const int e_size = fmt->exp_size;
692 uint64_t ret;
693
694 ret = (uint64_t)p->sign << (f_size + e_size);
695 ret = deposit64(ret, f_size, e_size, p->exp);
696 ret = deposit64(ret, 0, f_size, p->frac);
697 return ret;
698 }
699
700 static inline float16 float16_pack_raw(const FloatParts64 *p)
701 {
702 return make_float16(pack_raw64(p, &float16_params));
703 }
704
705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
706 {
707 return pack_raw64(p, &bfloat16_params);
708 }
709
710 static inline float32 float32_pack_raw(const FloatParts64 *p)
711 {
712 return make_float32(pack_raw64(p, &float32_params));
713 }
714
715 static inline float64 float64_pack_raw(const FloatParts64 *p)
716 {
717 return make_float64(pack_raw64(p, &float64_params));
718 }
719
720 static float128 float128_pack_raw(const FloatParts128 *p)
721 {
722 const int f_size = float128_params.frac_size - 64;
723 const int e_size = float128_params.exp_size;
724 uint64_t hi;
725
726 hi = (uint64_t)p->sign << (f_size + e_size);
727 hi = deposit64(hi, f_size, e_size, p->exp);
728 hi = deposit64(hi, 0, f_size, p->frac_hi);
729 return make_float128(hi, p->frac_lo);
730 }
731
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
738 | specific.
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
741
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
744
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
748
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
751
752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
754
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
756
757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
758 float_status *s);
759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
760 float_status *s);
761
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
763
764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
765 FloatParts64 *c, float_status *s,
766 int ab_mask, int abc_mask);
767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
768 FloatParts128 *b,
769 FloatParts128 *c,
770 float_status *s,
771 int ab_mask, int abc_mask);
772
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
775
776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
777 const FloatFmt *fmt);
778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
779 const FloatFmt *fmt);
780
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
783
784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
785 const FloatFmt *fmt);
786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
787 const FloatFmt *fmt);
788
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
791
792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
793 const FloatFmt *fmt);
794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
795 const FloatFmt *fmt);
796
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
799
800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
803
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
806
807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
810
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
813
814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
815 float_status *s, bool subtract);
816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
817 float_status *s, bool subtract);
818
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
821
822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
823 float_status *s);
824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
825 float_status *s);
826
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
829
830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
831 FloatParts64 *c, int flags,
832 float_status *s);
833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
834 FloatParts128 *c, int flags,
835 float_status *s);
836
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
839
840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
841 float_status *s);
842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
843 float_status *s);
844
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
847
848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
850
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
853
854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
855 int scale, int frac_size);
856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
857 int scale, int frac_size);
858
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
861
862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
863 int scale, float_status *s,
864 const FloatFmt *fmt);
865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
866 int scale, float_status *s,
867 const FloatFmt *fmt);
868
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
871
872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
873 int scale, int64_t min, int64_t max,
874 float_status *s);
875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
876 int scale, int64_t min, int64_t max,
877 float_status *s);
878
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
881
882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
883 int scale, uint64_t max,
884 float_status *s);
885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
886 int scale, uint64_t max,
887 float_status *s);
888
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
891
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893 int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895 int scale, float_status *s);
896
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
899
900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
901 int scale, float_status *s);
902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
903 int scale, float_status *s);
904
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
907
908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
909 float_status *s, int flags);
910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
911 float_status *s, int flags);
912
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
915
916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
917 float_status *s, bool q);
918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
919 float_status *s, bool q);
920
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
923
924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
926
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
929
930 /*
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
932 */
933
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
936
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
940
941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
942 {
943 return uadd64_overflow(a->frac, b->frac, &r->frac);
944 }
945
946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
947 {
948 bool c = 0;
949 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
950 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
951 return c;
952 }
953
954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
955 {
956 bool c = 0;
957 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
958 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
959 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
960 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
961 return c;
962 }
963
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
965
966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
967 {
968 return uadd64_overflow(a->frac, c, &r->frac);
969 }
970
971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
972 {
973 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
974 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
975 }
976
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
978
979 static void frac64_allones(FloatParts64 *a)
980 {
981 a->frac = -1;
982 }
983
984 static void frac128_allones(FloatParts128 *a)
985 {
986 a->frac_hi = a->frac_lo = -1;
987 }
988
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
990
991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
992 {
993 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
994 }
995
996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
997 {
998 uint64_t ta = a->frac_hi, tb = b->frac_hi;
999 if (ta == tb) {
1000 ta = a->frac_lo, tb = b->frac_lo;
1001 if (ta == tb) {
1002 return 0;
1003 }
1004 }
1005 return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012 a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017 a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024 uint64_t n1, n0, r, q;
1025 bool ret;
1026
1027 /*
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1034 *
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1037 */
1038 ret = a->frac < b->frac;
1039 if (ret) {
1040 n0 = a->frac;
1041 n1 = 0;
1042 } else {
1043 n0 = a->frac >> 1;
1044 n1 = a->frac << 63;
1045 }
1046 q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a->frac = q | (r != 0);
1050
1051 return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056 uint64_t q0, q1, a0, a1, b0, b1;
1057 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058 bool ret = false;
1059
1060 a0 = a->frac_hi, a1 = a->frac_lo;
1061 b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063 ret = lt128(a0, a1, b0, b1);
1064 if (!ret) {
1065 a1 = shr_double(a0, a1, 1);
1066 a0 = a0 >> 1;
1067 }
1068
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0 = estimateDiv128To64(a0, a1, b0);
1071
1072 /*
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1076 */
1077 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079 while (r0 != 0) {
1080 q0--;
1081 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082 }
1083
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1 = estimateDiv128To64(r1, r2, b0);
1086 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088 while (r1 != 0) {
1089 q1--;
1090 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091 }
1092
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1 |= (r2 | r3) != 0;
1095
1096 a->frac_hi = q0;
1097 a->frac_lo = q1;
1098 return ret;
1099 }
1100
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105 return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110 return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130 a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142 bool c = 0;
1143 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153 if (a->frac) {
1154 int shift = clz64(a->frac);
1155 a->frac <<= shift;
1156 return shift;
1157 }
1158 return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163 if (a->frac_hi) {
1164 int shl = clz64(a->frac_hi);
1165 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166 a->frac_lo <<= shl;
1167 return shl;
1168 } else if (a->frac_lo) {
1169 int shl = clz64(a->frac_lo);
1170 a->frac_hi = a->frac_lo << shl;
1171 a->frac_lo = 0;
1172 return shl + 64;
1173 }
1174 return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181 int ret, shl;
1182
1183 if (likely(a0)) {
1184 shl = clz64(a0);
1185 if (shl == 0) {
1186 return 0;
1187 }
1188 ret = shl;
1189 } else {
1190 if (a1) {
1191 ret = 64;
1192 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193 } else if (a2) {
1194 ret = 128;
1195 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196 } else if (a3) {
1197 ret = 192;
1198 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199 } else {
1200 ret = 256;
1201 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202 goto done;
1203 }
1204 shl = clz64(a0);
1205 if (shl == 0) {
1206 goto done;
1207 }
1208 ret += shl;
1209 }
1210
1211 a0 = shl_double(a0, a1, shl);
1212 a1 = shl_double(a1, a2, shl);
1213 a2 = shl_double(a2, a3, shl);
1214 a3 <<= shl;
1215
1216 done:
1217 a->frac_hi = a0;
1218 a->frac_hm = a1;
1219 a->frac_lm = a2;
1220 a->frac_lo = a3;
1221 return ret;
1222 }
1223
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228 a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235 if (c & 64) {
1236 a0 = a1, a1 = 0;
1237 }
1238
1239 c &= 63;
1240 if (c) {
1241 a0 = shl_double(a0, a1, c);
1242 a1 = a1 << c;
1243 }
1244
1245 a->frac_hi = a0;
1246 a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253 a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260 if (c & 64) {
1261 a1 = a0, a0 = 0;
1262 }
1263
1264 c &= 63;
1265 if (c) {
1266 a1 = shr_double(a0, a1, c);
1267 a0 = a0 >> c;
1268 }
1269
1270 a->frac_hi = a0;
1271 a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278 uint64_t a0 = a->frac;
1279
1280 if (likely(c != 0)) {
1281 if (likely(c < 64)) {
1282 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283 } else {
1284 a0 = a0 != 0;
1285 }
1286 a->frac = a0;
1287 }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293 uint64_t sticky = 0;
1294
1295 if (unlikely(c == 0)) {
1296 return;
1297 } else if (likely(c < 64)) {
1298 /* nothing */
1299 } else if (likely(c < 128)) {
1300 sticky = a1;
1301 a1 = a0;
1302 a0 = 0;
1303 c &= 63;
1304 if (c == 0) {
1305 goto done;
1306 }
1307 } else {
1308 sticky = a0 | a1;
1309 a0 = a1 = 0;
1310 goto done;
1311 }
1312
1313 sticky |= shr_double(a1, 0, c);
1314 a1 = shr_double(a0, a1, c);
1315 a0 = a0 >> c;
1316
1317 done:
1318 a->frac_lo = a1 | (sticky != 0);
1319 a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326 uint64_t sticky = 0;
1327
1328 if (unlikely(c == 0)) {
1329 return;
1330 } else if (likely(c < 64)) {
1331 /* nothing */
1332 } else if (likely(c < 256)) {
1333 if (unlikely(c & 128)) {
1334 sticky |= a2 | a3;
1335 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336 }
1337 if (unlikely(c & 64)) {
1338 sticky |= a3;
1339 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340 }
1341 c &= 63;
1342 if (c == 0) {
1343 goto done;
1344 }
1345 } else {
1346 sticky = a0 | a1 | a2 | a3;
1347 a0 = a1 = a2 = a3 = 0;
1348 goto done;
1349 }
1350
1351 sticky |= shr_double(a3, 0, c);
1352 a3 = shr_double(a2, a3, c);
1353 a2 = shr_double(a1, a2, c);
1354 a1 = shr_double(a0, a1, c);
1355 a0 = a0 >> c;
1356
1357 done:
1358 a->frac_lo = a3 | (sticky != 0);
1359 a->frac_lm = a2;
1360 a->frac_hm = a1;
1361 a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368 return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373 bool c = 0;
1374 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376 return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381 bool c = 0;
1382 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386 return c;
1387 }
1388
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393 r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398 r->frac_hi = a->frac_hi;
1399 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406 r->frac_hi = a->frac;
1407 r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412 r->frac_hi = a->frac_hi;
1413 r->frac_hm = a->frac_lo;
1414 r->frac_lm = 0;
1415 r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1424 */
1425 static const uint16_t rsqrt_tab[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef N
1455 #undef W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef N
1463 #undef W
1464 #define N 256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef N
1469 #undef W
1470 #undef partsN
1471 #undef FloatPartsN
1472 #undef FloatPartsW
1473
1474 /*
1475 * Pack/unpack routines with a specific FloatFmt.
1476 */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479 float_status *s, const FloatFmt *params)
1480 {
1481 float16_unpack_raw(p, f);
1482 parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486 float_status *s)
1487 {
1488 float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492 float_status *s)
1493 {
1494 bfloat16_unpack_raw(p, f);
1495 parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499 float_status *s,
1500 const FloatFmt *params)
1501 {
1502 parts_uncanon(p, s, params);
1503 return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507 float_status *s)
1508 {
1509 return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513 float_status *s)
1514 {
1515 parts_uncanon(p, s, &bfloat16_params);
1516 return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520 float_status *s)
1521 {
1522 float32_unpack_raw(p, f);
1523 parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527 float_status *s)
1528 {
1529 parts_uncanon(p, s, &float32_params);
1530 return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534 float_status *s)
1535 {
1536 float64_unpack_raw(p, f);
1537 parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541 float_status *s)
1542 {
1543 parts_uncanon(p, s, &float64_params);
1544 return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548 float_status *s)
1549 {
1550 float128_unpack_raw(p, f);
1551 parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555 float_status *s)
1556 {
1557 parts_uncanon(p, s, &float128_params);
1558 return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563 float_status *s)
1564 {
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s->floatx80_rounding_precision) {
1567 case floatx80_precision_x:
1568 case floatx80_precision_d:
1569 case floatx80_precision_s:
1570 break;
1571 default:
1572 g_assert_not_reached();
1573 }
1574
1575 if (unlikely(floatx80_invalid_encoding(f))) {
1576 float_raise(float_flag_invalid, s);
1577 return false;
1578 }
1579
1580 floatx80_unpack_raw(p, f);
1581
1582 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584 } else {
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587 p->cls = (p->frac_hi == 0 ? float_class_inf
1588 : parts_is_snan_frac(p->frac_hi, s)
1589 ? float_class_snan : float_class_qnan);
1590 }
1591 return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595 float_status *s)
1596 {
1597 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598 uint64_t frac;
1599 int exp;
1600
1601 switch (p->cls) {
1602 case float_class_normal:
1603 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604 parts_uncanon_normal(p, s, fmt);
1605 frac = p->frac_hi;
1606 exp = p->exp;
1607 } else {
1608 FloatParts64 p64;
1609
1610 p64.sign = p->sign;
1611 p64.exp = p->exp;
1612 frac_truncjam(&p64, p);
1613 parts_uncanon_normal(&p64, s, fmt);
1614 frac = p64.frac;
1615 exp = p64.exp;
1616 }
1617 if (exp != fmt->exp_max) {
1618 break;
1619 }
1620 /* rounded to inf -- fall through to set frac correctly */
1621
1622 case float_class_inf:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac = floatx80_infinity_low;
1625 exp = fmt->exp_max;
1626 break;
1627
1628 case float_class_zero:
1629 frac = 0;
1630 exp = 0;
1631 break;
1632
1633 case float_class_snan:
1634 case float_class_qnan:
1635 /* NaNs have the integer bit set. */
1636 frac = p->frac_hi | (1ull << 63);
1637 exp = fmt->exp_max;
1638 break;
1639
1640 default:
1641 g_assert_not_reached();
1642 }
1643
1644 return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648 * Addition and subtraction
1649 */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654 FloatParts64 pa, pb, *pr;
1655
1656 float16_unpack_canonical(&pa, a, status);
1657 float16_unpack_canonical(&pb, b, status);
1658 pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660 return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665 return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670 return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676 FloatParts64 pa, pb, *pr;
1677
1678 float32_unpack_canonical(&pa, a, status);
1679 float32_unpack_canonical(&pb, b, status);
1680 pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682 return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687 return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692 return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698 FloatParts64 pa, pb, *pr;
1699
1700 float64_unpack_canonical(&pa, a, status);
1701 float64_unpack_canonical(&pb, b, status);
1702 pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704 return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709 return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714 return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719 return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724 return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729 return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734 return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741 }
1742 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749 } else {
1750 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751 }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757 return float32_gen2(a, b, s, hard, soft,
1758 f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764 return float64_gen2(a, b, s, hard, soft,
1765 f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795 FloatParts64 pa, pb, *pr;
1796
1797 bfloat16_unpack_canonical(&pa, a, status);
1798 bfloat16_unpack_canonical(&pb, b, status);
1799 pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801 return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806 return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811 return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817 FloatParts128 pa, pb, *pr;
1818
1819 float128_unpack_canonical(&pa, a, status);
1820 float128_unpack_canonical(&pb, b, status);
1821 pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823 return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828 return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833 return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839 FloatParts128 pa, pb, *pr;
1840
1841 if (!floatx80_unpack_canonical(&pa, a, status) ||
1842 !floatx80_unpack_canonical(&pb, b, status)) {
1843 return floatx80_default_nan(status);
1844 }
1845
1846 pr = parts_addsub(&pa, &pb, status, subtract);
1847 return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852 return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857 return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861 * Multiplication
1862 */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866 FloatParts64 pa, pb, *pr;
1867
1868 float16_unpack_canonical(&pa, a, status);
1869 float16_unpack_canonical(&pb, b, status);
1870 pr = parts_mul(&pa, &pb, status);
1871
1872 return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878 FloatParts64 pa, pb, *pr;
1879
1880 float32_unpack_canonical(&pa, a, status);
1881 float32_unpack_canonical(&pb, b, status);
1882 pr = parts_mul(&pa, &pb, status);
1883
1884 return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890 FloatParts64 pa, pb, *pr;
1891
1892 float64_unpack_canonical(&pa, a, status);
1893 float64_unpack_canonical(&pb, b, status);
1894 pr = parts_mul(&pa, &pb, status);
1895
1896 return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901 return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906 return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913 f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920 f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926 FloatParts64 pa, pb, *pr;
1927
1928 bfloat16_unpack_canonical(&pa, a, status);
1929 bfloat16_unpack_canonical(&pb, b, status);
1930 pr = parts_mul(&pa, &pb, status);
1931
1932 return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938 FloatParts128 pa, pb, *pr;
1939
1940 float128_unpack_canonical(&pa, a, status);
1941 float128_unpack_canonical(&pb, b, status);
1942 pr = parts_mul(&pa, &pb, status);
1943
1944 return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 /*
1948 * Fused multiply-add
1949 */
1950
1951 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1952 int flags, float_status *status)
1953 {
1954 FloatParts64 pa, pb, pc, *pr;
1955
1956 float16_unpack_canonical(&pa, a, status);
1957 float16_unpack_canonical(&pb, b, status);
1958 float16_unpack_canonical(&pc, c, status);
1959 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1960
1961 return float16_round_pack_canonical(pr, status);
1962 }
1963
1964 static float32 QEMU_SOFTFLOAT_ATTR
1965 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1966 float_status *status)
1967 {
1968 FloatParts64 pa, pb, pc, *pr;
1969
1970 float32_unpack_canonical(&pa, a, status);
1971 float32_unpack_canonical(&pb, b, status);
1972 float32_unpack_canonical(&pc, c, status);
1973 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975 return float32_round_pack_canonical(pr, status);
1976 }
1977
1978 static float64 QEMU_SOFTFLOAT_ATTR
1979 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1980 float_status *status)
1981 {
1982 FloatParts64 pa, pb, pc, *pr;
1983
1984 float64_unpack_canonical(&pa, a, status);
1985 float64_unpack_canonical(&pb, b, status);
1986 float64_unpack_canonical(&pc, c, status);
1987 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989 return float64_round_pack_canonical(pr, status);
1990 }
1991
1992 static bool force_soft_fma;
1993
1994 float32 QEMU_FLATTEN
1995 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1996 {
1997 union_float32 ua, ub, uc, ur;
1998
1999 ua.s = xa;
2000 ub.s = xb;
2001 uc.s = xc;
2002
2003 if (unlikely(!can_use_fpu(s))) {
2004 goto soft;
2005 }
2006 if (unlikely(flags & float_muladd_halve_result)) {
2007 goto soft;
2008 }
2009
2010 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2011 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2012 goto soft;
2013 }
2014
2015 if (unlikely(force_soft_fma)) {
2016 goto soft;
2017 }
2018
2019 /*
2020 * When (a || b) == 0, there's no need to check for under/over flow,
2021 * since we know the addend is (normal || 0) and the product is 0.
2022 */
2023 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2024 union_float32 up;
2025 bool prod_sign;
2026
2027 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2028 prod_sign ^= !!(flags & float_muladd_negate_product);
2029 up.s = float32_set_sign(float32_zero, prod_sign);
2030
2031 if (flags & float_muladd_negate_c) {
2032 uc.h = -uc.h;
2033 }
2034 ur.h = up.h + uc.h;
2035 } else {
2036 union_float32 ua_orig = ua;
2037 union_float32 uc_orig = uc;
2038
2039 if (flags & float_muladd_negate_product) {
2040 ua.h = -ua.h;
2041 }
2042 if (flags & float_muladd_negate_c) {
2043 uc.h = -uc.h;
2044 }
2045
2046 ur.h = fmaf(ua.h, ub.h, uc.h);
2047
2048 if (unlikely(f32_is_inf(ur))) {
2049 float_raise(float_flag_overflow, s);
2050 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2051 ua = ua_orig;
2052 uc = uc_orig;
2053 goto soft;
2054 }
2055 }
2056 if (flags & float_muladd_negate_result) {
2057 return float32_chs(ur.s);
2058 }
2059 return ur.s;
2060
2061 soft:
2062 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2063 }
2064
2065 float64 QEMU_FLATTEN
2066 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2067 {
2068 union_float64 ua, ub, uc, ur;
2069
2070 ua.s = xa;
2071 ub.s = xb;
2072 uc.s = xc;
2073
2074 if (unlikely(!can_use_fpu(s))) {
2075 goto soft;
2076 }
2077 if (unlikely(flags & float_muladd_halve_result)) {
2078 goto soft;
2079 }
2080
2081 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2082 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2083 goto soft;
2084 }
2085
2086 if (unlikely(force_soft_fma)) {
2087 goto soft;
2088 }
2089
2090 /*
2091 * When (a || b) == 0, there's no need to check for under/over flow,
2092 * since we know the addend is (normal || 0) and the product is 0.
2093 */
2094 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2095 union_float64 up;
2096 bool prod_sign;
2097
2098 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2099 prod_sign ^= !!(flags & float_muladd_negate_product);
2100 up.s = float64_set_sign(float64_zero, prod_sign);
2101
2102 if (flags & float_muladd_negate_c) {
2103 uc.h = -uc.h;
2104 }
2105 ur.h = up.h + uc.h;
2106 } else {
2107 union_float64 ua_orig = ua;
2108 union_float64 uc_orig = uc;
2109
2110 if (flags & float_muladd_negate_product) {
2111 ua.h = -ua.h;
2112 }
2113 if (flags & float_muladd_negate_c) {
2114 uc.h = -uc.h;
2115 }
2116
2117 ur.h = fma(ua.h, ub.h, uc.h);
2118
2119 if (unlikely(f64_is_inf(ur))) {
2120 float_raise(float_flag_overflow, s);
2121 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2122 ua = ua_orig;
2123 uc = uc_orig;
2124 goto soft;
2125 }
2126 }
2127 if (flags & float_muladd_negate_result) {
2128 return float64_chs(ur.s);
2129 }
2130 return ur.s;
2131
2132 soft:
2133 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2134 }
2135
2136 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2137 int flags, float_status *status)
2138 {
2139 FloatParts64 pa, pb, pc, *pr;
2140
2141 bfloat16_unpack_canonical(&pa, a, status);
2142 bfloat16_unpack_canonical(&pb, b, status);
2143 bfloat16_unpack_canonical(&pc, c, status);
2144 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2145
2146 return bfloat16_round_pack_canonical(pr, status);
2147 }
2148
2149 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2150 int flags, float_status *status)
2151 {
2152 FloatParts128 pa, pb, pc, *pr;
2153
2154 float128_unpack_canonical(&pa, a, status);
2155 float128_unpack_canonical(&pb, b, status);
2156 float128_unpack_canonical(&pc, c, status);
2157 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2158
2159 return float128_round_pack_canonical(pr, status);
2160 }
2161
2162 /*
2163 * Division
2164 */
2165
2166 float16 float16_div(float16 a, float16 b, float_status *status)
2167 {
2168 FloatParts64 pa, pb, *pr;
2169
2170 float16_unpack_canonical(&pa, a, status);
2171 float16_unpack_canonical(&pb, b, status);
2172 pr = parts_div(&pa, &pb, status);
2173
2174 return float16_round_pack_canonical(pr, status);
2175 }
2176
2177 static float32 QEMU_SOFTFLOAT_ATTR
2178 soft_f32_div(float32 a, float32 b, float_status *status)
2179 {
2180 FloatParts64 pa, pb, *pr;
2181
2182 float32_unpack_canonical(&pa, a, status);
2183 float32_unpack_canonical(&pb, b, status);
2184 pr = parts_div(&pa, &pb, status);
2185
2186 return float32_round_pack_canonical(pr, status);
2187 }
2188
2189 static float64 QEMU_SOFTFLOAT_ATTR
2190 soft_f64_div(float64 a, float64 b, float_status *status)
2191 {
2192 FloatParts64 pa, pb, *pr;
2193
2194 float64_unpack_canonical(&pa, a, status);
2195 float64_unpack_canonical(&pb, b, status);
2196 pr = parts_div(&pa, &pb, status);
2197
2198 return float64_round_pack_canonical(pr, status);
2199 }
2200
2201 static float hard_f32_div(float a, float b)
2202 {
2203 return a / b;
2204 }
2205
2206 static double hard_f64_div(double a, double b)
2207 {
2208 return a / b;
2209 }
2210
2211 static bool f32_div_pre(union_float32 a, union_float32 b)
2212 {
2213 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2214 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2215 fpclassify(b.h) == FP_NORMAL;
2216 }
2217 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2218 }
2219
2220 static bool f64_div_pre(union_float64 a, union_float64 b)
2221 {
2222 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2223 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2224 fpclassify(b.h) == FP_NORMAL;
2225 }
2226 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2227 }
2228
2229 static bool f32_div_post(union_float32 a, union_float32 b)
2230 {
2231 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2232 return fpclassify(a.h) != FP_ZERO;
2233 }
2234 return !float32_is_zero(a.s);
2235 }
2236
2237 static bool f64_div_post(union_float64 a, union_float64 b)
2238 {
2239 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2240 return fpclassify(a.h) != FP_ZERO;
2241 }
2242 return !float64_is_zero(a.s);
2243 }
2244
2245 float32 QEMU_FLATTEN
2246 float32_div(float32 a, float32 b, float_status *s)
2247 {
2248 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2249 f32_div_pre, f32_div_post);
2250 }
2251
2252 float64 QEMU_FLATTEN
2253 float64_div(float64 a, float64 b, float_status *s)
2254 {
2255 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2256 f64_div_pre, f64_div_post);
2257 }
2258
2259 bfloat16 QEMU_FLATTEN
2260 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2261 {
2262 FloatParts64 pa, pb, *pr;
2263
2264 bfloat16_unpack_canonical(&pa, a, status);
2265 bfloat16_unpack_canonical(&pb, b, status);
2266 pr = parts_div(&pa, &pb, status);
2267
2268 return bfloat16_round_pack_canonical(pr, status);
2269 }
2270
2271 float128 QEMU_FLATTEN
2272 float128_div(float128 a, float128 b, float_status *status)
2273 {
2274 FloatParts128 pa, pb, *pr;
2275
2276 float128_unpack_canonical(&pa, a, status);
2277 float128_unpack_canonical(&pb, b, status);
2278 pr = parts_div(&pa, &pb, status);
2279
2280 return float128_round_pack_canonical(pr, status);
2281 }
2282
2283 /*
2284 * Float to Float conversions
2285 *
2286 * Returns the result of converting one float format to another. The
2287 * conversion is performed according to the IEC/IEEE Standard for
2288 * Binary Floating-Point Arithmetic.
2289 *
2290 * Usually this only needs to take care of raising invalid exceptions
2291 * and handling the conversion on NaNs.
2292 */
2293
2294 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2295 {
2296 switch (a->cls) {
2297 case float_class_qnan:
2298 case float_class_snan:
2299 /*
2300 * There is no NaN in the destination format. Raise Invalid
2301 * and return a zero with the sign of the input NaN.
2302 */
2303 float_raise(float_flag_invalid, s);
2304 a->cls = float_class_zero;
2305 break;
2306
2307 case float_class_inf:
2308 /*
2309 * There is no Inf in the destination format. Raise Invalid
2310 * and return the maximum normal with the correct sign.
2311 */
2312 float_raise(float_flag_invalid, s);
2313 a->cls = float_class_normal;
2314 a->exp = float16_params_ahp.exp_max;
2315 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2316 float16_params_ahp.frac_size + 1);
2317 break;
2318
2319 case float_class_normal:
2320 case float_class_zero:
2321 break;
2322
2323 default:
2324 g_assert_not_reached();
2325 }
2326 }
2327
2328 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2329 {
2330 if (is_nan(a->cls)) {
2331 parts_return_nan(a, s);
2332 }
2333 }
2334
2335 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2336 {
2337 if (is_nan(a->cls)) {
2338 parts_return_nan(a, s);
2339 }
2340 }
2341
2342 #define parts_float_to_float(P, S) \
2343 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2344
2345 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2346 float_status *s)
2347 {
2348 a->cls = b->cls;
2349 a->sign = b->sign;
2350 a->exp = b->exp;
2351
2352 if (a->cls == float_class_normal) {
2353 frac_truncjam(a, b);
2354 } else if (is_nan(a->cls)) {
2355 /* Discard the low bits of the NaN. */
2356 a->frac = b->frac_hi;
2357 parts_return_nan(a, s);
2358 }
2359 }
2360
2361 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2362 float_status *s)
2363 {
2364 a->cls = b->cls;
2365 a->sign = b->sign;
2366 a->exp = b->exp;
2367 frac_widen(a, b);
2368
2369 if (is_nan(a->cls)) {
2370 parts_return_nan(a, s);
2371 }
2372 }
2373
2374 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2375 {
2376 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2377 FloatParts64 p;
2378
2379 float16a_unpack_canonical(&p, a, s, fmt16);
2380 parts_float_to_float(&p, s);
2381 return float32_round_pack_canonical(&p, s);
2382 }
2383
2384 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2385 {
2386 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2387 FloatParts64 p;
2388
2389 float16a_unpack_canonical(&p, a, s, fmt16);
2390 parts_float_to_float(&p, s);
2391 return float64_round_pack_canonical(&p, s);
2392 }
2393
2394 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2395 {
2396 FloatParts64 p;
2397 const FloatFmt *fmt;
2398
2399 float32_unpack_canonical(&p, a, s);
2400 if (ieee) {
2401 parts_float_to_float(&p, s);
2402 fmt = &float16_params;
2403 } else {
2404 parts_float_to_ahp(&p, s);
2405 fmt = &float16_params_ahp;
2406 }
2407 return float16a_round_pack_canonical(&p, s, fmt);
2408 }
2409
2410 static float64 QEMU_SOFTFLOAT_ATTR
2411 soft_float32_to_float64(float32 a, float_status *s)
2412 {
2413 FloatParts64 p;
2414
2415 float32_unpack_canonical(&p, a, s);
2416 parts_float_to_float(&p, s);
2417 return float64_round_pack_canonical(&p, s);
2418 }
2419
2420 float64 float32_to_float64(float32 a, float_status *s)
2421 {
2422 if (likely(float32_is_normal(a))) {
2423 /* Widening conversion can never produce inexact results. */
2424 union_float32 uf;
2425 union_float64 ud;
2426 uf.s = a;
2427 ud.h = uf.h;
2428 return ud.s;
2429 } else if (float32_is_zero(a)) {
2430 return float64_set_sign(float64_zero, float32_is_neg(a));
2431 } else {
2432 return soft_float32_to_float64(a, s);
2433 }
2434 }
2435
2436 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2437 {
2438 FloatParts64 p;
2439 const FloatFmt *fmt;
2440
2441 float64_unpack_canonical(&p, a, s);
2442 if (ieee) {
2443 parts_float_to_float(&p, s);
2444 fmt = &float16_params;
2445 } else {
2446 parts_float_to_ahp(&p, s);
2447 fmt = &float16_params_ahp;
2448 }
2449 return float16a_round_pack_canonical(&p, s, fmt);
2450 }
2451
2452 float32 float64_to_float32(float64 a, float_status *s)
2453 {
2454 FloatParts64 p;
2455
2456 float64_unpack_canonical(&p, a, s);
2457 parts_float_to_float(&p, s);
2458 return float32_round_pack_canonical(&p, s);
2459 }
2460
2461 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2462 {
2463 FloatParts64 p;
2464
2465 bfloat16_unpack_canonical(&p, a, s);
2466 parts_float_to_float(&p, s);
2467 return float32_round_pack_canonical(&p, s);
2468 }
2469
2470 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2471 {
2472 FloatParts64 p;
2473
2474 bfloat16_unpack_canonical(&p, a, s);
2475 parts_float_to_float(&p, s);
2476 return float64_round_pack_canonical(&p, s);
2477 }
2478
2479 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2480 {
2481 FloatParts64 p;
2482
2483 float32_unpack_canonical(&p, a, s);
2484 parts_float_to_float(&p, s);
2485 return bfloat16_round_pack_canonical(&p, s);
2486 }
2487
2488 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2489 {
2490 FloatParts64 p;
2491
2492 float64_unpack_canonical(&p, a, s);
2493 parts_float_to_float(&p, s);
2494 return bfloat16_round_pack_canonical(&p, s);
2495 }
2496
2497 float32 float128_to_float32(float128 a, float_status *s)
2498 {
2499 FloatParts64 p64;
2500 FloatParts128 p128;
2501
2502 float128_unpack_canonical(&p128, a, s);
2503 parts_float_to_float_narrow(&p64, &p128, s);
2504 return float32_round_pack_canonical(&p64, s);
2505 }
2506
2507 float64 float128_to_float64(float128 a, float_status *s)
2508 {
2509 FloatParts64 p64;
2510 FloatParts128 p128;
2511
2512 float128_unpack_canonical(&p128, a, s);
2513 parts_float_to_float_narrow(&p64, &p128, s);
2514 return float64_round_pack_canonical(&p64, s);
2515 }
2516
2517 float128 float32_to_float128(float32 a, float_status *s)
2518 {
2519 FloatParts64 p64;
2520 FloatParts128 p128;
2521
2522 float32_unpack_canonical(&p64, a, s);
2523 parts_float_to_float_widen(&p128, &p64, s);
2524 return float128_round_pack_canonical(&p128, s);
2525 }
2526
2527 float128 float64_to_float128(float64 a, float_status *s)
2528 {
2529 FloatParts64 p64;
2530 FloatParts128 p128;
2531
2532 float64_unpack_canonical(&p64, a, s);
2533 parts_float_to_float_widen(&p128, &p64, s);
2534 return float128_round_pack_canonical(&p128, s);
2535 }
2536
2537 /*
2538 * Round to integral value
2539 */
2540
2541 float16 float16_round_to_int(float16 a, float_status *s)
2542 {
2543 FloatParts64 p;
2544
2545 float16_unpack_canonical(&p, a, s);
2546 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2547 return float16_round_pack_canonical(&p, s);
2548 }
2549
2550 float32 float32_round_to_int(float32 a, float_status *s)
2551 {
2552 FloatParts64 p;
2553
2554 float32_unpack_canonical(&p, a, s);
2555 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2556 return float32_round_pack_canonical(&p, s);
2557 }
2558
2559 float64 float64_round_to_int(float64 a, float_status *s)
2560 {
2561 FloatParts64 p;
2562
2563 float64_unpack_canonical(&p, a, s);
2564 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2565 return float64_round_pack_canonical(&p, s);
2566 }
2567
2568 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2569 {
2570 FloatParts64 p;
2571
2572 bfloat16_unpack_canonical(&p, a, s);
2573 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2574 return bfloat16_round_pack_canonical(&p, s);
2575 }
2576
2577 float128 float128_round_to_int(float128 a, float_status *s)
2578 {
2579 FloatParts128 p;
2580
2581 float128_unpack_canonical(&p, a, s);
2582 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2583 return float128_round_pack_canonical(&p, s);
2584 }
2585
2586 /*
2587 * Floating-point to signed integer conversions
2588 */
2589
2590 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2591 float_status *s)
2592 {
2593 FloatParts64 p;
2594
2595 float16_unpack_canonical(&p, a, s);
2596 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2597 }
2598
2599 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2600 float_status *s)
2601 {
2602 FloatParts64 p;
2603
2604 float16_unpack_canonical(&p, a, s);
2605 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2606 }
2607
2608 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2609 float_status *s)
2610 {
2611 FloatParts64 p;
2612
2613 float16_unpack_canonical(&p, a, s);
2614 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2615 }
2616
2617 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2618 float_status *s)
2619 {
2620 FloatParts64 p;
2621
2622 float16_unpack_canonical(&p, a, s);
2623 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2624 }
2625
2626 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2627 float_status *s)
2628 {
2629 FloatParts64 p;
2630
2631 float32_unpack_canonical(&p, a, s);
2632 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2633 }
2634
2635 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2636 float_status *s)
2637 {
2638 FloatParts64 p;
2639
2640 float32_unpack_canonical(&p, a, s);
2641 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2642 }
2643
2644 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2645 float_status *s)
2646 {
2647 FloatParts64 p;
2648
2649 float32_unpack_canonical(&p, a, s);
2650 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2651 }
2652
2653 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2654 float_status *s)
2655 {
2656 FloatParts64 p;
2657
2658 float64_unpack_canonical(&p, a, s);
2659 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2660 }
2661
2662 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2663 float_status *s)
2664 {
2665 FloatParts64 p;
2666
2667 float64_unpack_canonical(&p, a, s);
2668 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2669 }
2670
2671 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2672 float_status *s)
2673 {
2674 FloatParts64 p;
2675
2676 float64_unpack_canonical(&p, a, s);
2677 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2678 }
2679
2680 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2681 float_status *s)
2682 {
2683 FloatParts64 p;
2684
2685 bfloat16_unpack_canonical(&p, a, s);
2686 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2687 }
2688
2689 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2690 float_status *s)
2691 {
2692 FloatParts64 p;
2693
2694 bfloat16_unpack_canonical(&p, a, s);
2695 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2696 }
2697
2698 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2699 float_status *s)
2700 {
2701 FloatParts64 p;
2702
2703 bfloat16_unpack_canonical(&p, a, s);
2704 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2705 }
2706
2707 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2708 int scale, float_status *s)
2709 {
2710 FloatParts128 p;
2711
2712 float128_unpack_canonical(&p, a, s);
2713 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2714 }
2715
2716 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2717 int scale, float_status *s)
2718 {
2719 FloatParts128 p;
2720
2721 float128_unpack_canonical(&p, a, s);
2722 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2723 }
2724
2725 int8_t float16_to_int8(float16 a, float_status *s)
2726 {
2727 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2728 }
2729
2730 int16_t float16_to_int16(float16 a, float_status *s)
2731 {
2732 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2733 }
2734
2735 int32_t float16_to_int32(float16 a, float_status *s)
2736 {
2737 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2738 }
2739
2740 int64_t float16_to_int64(float16 a, float_status *s)
2741 {
2742 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2743 }
2744
2745 int16_t float32_to_int16(float32 a, float_status *s)
2746 {
2747 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2748 }
2749
2750 int32_t float32_to_int32(float32 a, float_status *s)
2751 {
2752 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2753 }
2754
2755 int64_t float32_to_int64(float32 a, float_status *s)
2756 {
2757 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2758 }
2759
2760 int16_t float64_to_int16(float64 a, float_status *s)
2761 {
2762 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2763 }
2764
2765 int32_t float64_to_int32(float64 a, float_status *s)
2766 {
2767 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2768 }
2769
2770 int64_t float64_to_int64(float64 a, float_status *s)
2771 {
2772 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2773 }
2774
2775 int32_t float128_to_int32(float128 a, float_status *s)
2776 {
2777 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2778 }
2779
2780 int64_t float128_to_int64(float128 a, float_status *s)
2781 {
2782 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2783 }
2784
2785 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2786 {
2787 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2788 }
2789
2790 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2791 {
2792 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2793 }
2794
2795 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2796 {
2797 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2798 }
2799
2800 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2801 {
2802 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2803 }
2804
2805 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2806 {
2807 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2808 }
2809
2810 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2811 {
2812 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2813 }
2814
2815 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2816 {
2817 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2818 }
2819
2820 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2821 {
2822 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2823 }
2824
2825 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2826 {
2827 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2828 }
2829
2830 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2831 {
2832 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2833 }
2834
2835 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2836 {
2837 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2838 }
2839
2840 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2841 {
2842 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2843 }
2844
2845 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2846 {
2847 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2848 }
2849
2850 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2851 {
2852 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2853 }
2854
2855 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2856 {
2857 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2858 }
2859
2860 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2861 {
2862 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2863 }
2864
2865 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2866 {
2867 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2868 }
2869
2870 /*
2871 * Floating-point to unsigned integer conversions
2872 */
2873
2874 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2875 float_status *s)
2876 {
2877 FloatParts64 p;
2878
2879 float16_unpack_canonical(&p, a, s);
2880 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2881 }
2882
2883 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2884 float_status *s)
2885 {
2886 FloatParts64 p;
2887
2888 float16_unpack_canonical(&p, a, s);
2889 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2890 }
2891
2892 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2893 float_status *s)
2894 {
2895 FloatParts64 p;
2896
2897 float16_unpack_canonical(&p, a, s);
2898 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2899 }
2900
2901 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2902 float_status *s)
2903 {
2904 FloatParts64 p;
2905
2906 float16_unpack_canonical(&p, a, s);
2907 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2908 }
2909
2910 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2911 float_status *s)
2912 {
2913 FloatParts64 p;
2914
2915 float32_unpack_canonical(&p, a, s);
2916 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2917 }
2918
2919 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2920 float_status *s)
2921 {
2922 FloatParts64 p;
2923
2924 float32_unpack_canonical(&p, a, s);
2925 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2926 }
2927
2928 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2929 float_status *s)
2930 {
2931 FloatParts64 p;
2932
2933 float32_unpack_canonical(&p, a, s);
2934 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2935 }
2936
2937 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2938 float_status *s)
2939 {
2940 FloatParts64 p;
2941
2942 float64_unpack_canonical(&p, a, s);
2943 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2944 }
2945
2946 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2947 float_status *s)
2948 {
2949 FloatParts64 p;
2950
2951 float64_unpack_canonical(&p, a, s);
2952 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2953 }
2954
2955 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2956 float_status *s)
2957 {
2958 FloatParts64 p;
2959
2960 float64_unpack_canonical(&p, a, s);
2961 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2962 }
2963
2964 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2965 int scale, float_status *s)
2966 {
2967 FloatParts64 p;
2968
2969 bfloat16_unpack_canonical(&p, a, s);
2970 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2971 }
2972
2973 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2974 int scale, float_status *s)
2975 {
2976 FloatParts64 p;
2977
2978 bfloat16_unpack_canonical(&p, a, s);
2979 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2980 }
2981
2982 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2983 int scale, float_status *s)
2984 {
2985 FloatParts64 p;
2986
2987 bfloat16_unpack_canonical(&p, a, s);
2988 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2989 }
2990
2991 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2992 int scale, float_status *s)
2993 {
2994 FloatParts128 p;
2995
2996 float128_unpack_canonical(&p, a, s);
2997 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2998 }
2999
3000 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3001 int scale, float_status *s)
3002 {
3003 FloatParts128 p;
3004
3005 float128_unpack_canonical(&p, a, s);
3006 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3007 }
3008
3009 uint8_t float16_to_uint8(float16 a, float_status *s)
3010 {
3011 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3012 }
3013
3014 uint16_t float16_to_uint16(float16 a, float_status *s)
3015 {
3016 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3017 }
3018
3019 uint32_t float16_to_uint32(float16 a, float_status *s)
3020 {
3021 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3022 }
3023
3024 uint64_t float16_to_uint64(float16 a, float_status *s)
3025 {
3026 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3027 }
3028
3029 uint16_t float32_to_uint16(float32 a, float_status *s)
3030 {
3031 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3032 }
3033
3034 uint32_t float32_to_uint32(float32 a, float_status *s)
3035 {
3036 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3037 }
3038
3039 uint64_t float32_to_uint64(float32 a, float_status *s)
3040 {
3041 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3042 }
3043
3044 uint16_t float64_to_uint16(float64 a, float_status *s)
3045 {
3046 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3047 }
3048
3049 uint32_t float64_to_uint32(float64 a, float_status *s)
3050 {
3051 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3052 }
3053
3054 uint64_t float64_to_uint64(float64 a, float_status *s)
3055 {
3056 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3057 }
3058
3059 uint32_t float128_to_uint32(float128 a, float_status *s)
3060 {
3061 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3062 }
3063
3064 uint64_t float128_to_uint64(float128 a, float_status *s)
3065 {
3066 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3067 }
3068
3069 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3070 {
3071 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3072 }
3073
3074 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3075 {
3076 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3077 }
3078
3079 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3080 {
3081 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3082 }
3083
3084 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3085 {
3086 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3087 }
3088
3089 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3090 {
3091 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3092 }
3093
3094 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3095 {
3096 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3097 }
3098
3099 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3100 {
3101 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3102 }
3103
3104 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3105 {
3106 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3107 }
3108
3109 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3110 {
3111 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3112 }
3113
3114 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3115 {
3116 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3117 }
3118
3119 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3120 {
3121 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3122 }
3123
3124 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3125 {
3126 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3127 }
3128
3129 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3130 {
3131 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3132 }
3133
3134 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3135 {
3136 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3137 }
3138
3139 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3140 {
3141 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3142 }
3143
3144 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3145 {
3146 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3147 }
3148
3149 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3150 {
3151 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3152 }
3153
3154 /*
3155 * Signed integer to floating-point conversions
3156 */
3157
3158 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3159 {
3160 FloatParts64 p;
3161
3162 parts_sint_to_float(&p, a, scale, status);
3163 return float16_round_pack_canonical(&p, status);
3164 }
3165
3166 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3167 {
3168 return int64_to_float16_scalbn(a, scale, status);
3169 }
3170
3171 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3172 {
3173 return int64_to_float16_scalbn(a, scale, status);
3174 }
3175
3176 float16 int64_to_float16(int64_t a, float_status *status)
3177 {
3178 return int64_to_float16_scalbn(a, 0, status);
3179 }
3180
3181 float16 int32_to_float16(int32_t a, float_status *status)
3182 {
3183 return int64_to_float16_scalbn(a, 0, status);
3184 }
3185
3186 float16 int16_to_float16(int16_t a, float_status *status)
3187 {
3188 return int64_to_float16_scalbn(a, 0, status);
3189 }
3190
3191 float16 int8_to_float16(int8_t a, float_status *status)
3192 {
3193 return int64_to_float16_scalbn(a, 0, status);
3194 }
3195
3196 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3197 {
3198 FloatParts64 p;
3199
3200 parts64_sint_to_float(&p, a, scale, status);
3201 return float32_round_pack_canonical(&p, status);
3202 }
3203
3204 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3205 {
3206 return int64_to_float32_scalbn(a, scale, status);
3207 }
3208
3209 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3210 {
3211 return int64_to_float32_scalbn(a, scale, status);
3212 }
3213
3214 float32 int64_to_float32(int64_t a, float_status *status)
3215 {
3216 return int64_to_float32_scalbn(a, 0, status);
3217 }
3218
3219 float32 int32_to_float32(int32_t a, float_status *status)
3220 {
3221 return int64_to_float32_scalbn(a, 0, status);
3222 }
3223
3224 float32 int16_to_float32(int16_t a, float_status *status)
3225 {
3226 return int64_to_float32_scalbn(a, 0, status);
3227 }
3228
3229 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3230 {
3231 FloatParts64 p;
3232
3233 parts_sint_to_float(&p, a, scale, status);
3234 return float64_round_pack_canonical(&p, status);
3235 }
3236
3237 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3238 {
3239 return int64_to_float64_scalbn(a, scale, status);
3240 }
3241
3242 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3243 {
3244 return int64_to_float64_scalbn(a, scale, status);
3245 }
3246
3247 float64 int64_to_float64(int64_t a, float_status *status)
3248 {
3249 return int64_to_float64_scalbn(a, 0, status);
3250 }
3251
3252 float64 int32_to_float64(int32_t a, float_status *status)
3253 {
3254 return int64_to_float64_scalbn(a, 0, status);
3255 }
3256
3257 float64 int16_to_float64(int16_t a, float_status *status)
3258 {
3259 return int64_to_float64_scalbn(a, 0, status);
3260 }
3261
3262 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3263 {
3264 FloatParts64 p;
3265
3266 parts_sint_to_float(&p, a, scale, status);
3267 return bfloat16_round_pack_canonical(&p, status);
3268 }
3269
3270 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3271 {
3272 return int64_to_bfloat16_scalbn(a, scale, status);
3273 }
3274
3275 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3276 {
3277 return int64_to_bfloat16_scalbn(a, scale, status);
3278 }
3279
3280 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3281 {
3282 return int64_to_bfloat16_scalbn(a, 0, status);
3283 }
3284
3285 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3286 {
3287 return int64_to_bfloat16_scalbn(a, 0, status);
3288 }
3289
3290 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3291 {
3292 return int64_to_bfloat16_scalbn(a, 0, status);
3293 }
3294
3295 float128 int64_to_float128(int64_t a, float_status *status)
3296 {
3297 FloatParts128 p;
3298
3299 parts_sint_to_float(&p, a, 0, status);
3300 return float128_round_pack_canonical(&p, status);
3301 }
3302
3303 float128 int32_to_float128(int32_t a, float_status *status)
3304 {
3305 return int64_to_float128(a, status);
3306 }
3307
3308 /*
3309 * Unsigned Integer to floating-point conversions
3310 */
3311
3312 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3313 {
3314 FloatParts64 p;
3315
3316 parts_uint_to_float(&p, a, scale, status);
3317 return float16_round_pack_canonical(&p, status);
3318 }
3319
3320 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3321 {
3322 return uint64_to_float16_scalbn(a, scale, status);
3323 }
3324
3325 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3326 {
3327 return uint64_to_float16_scalbn(a, scale, status);
3328 }
3329
3330 float16 uint64_to_float16(uint64_t a, float_status *status)
3331 {
3332 return uint64_to_float16_scalbn(a, 0, status);
3333 }
3334
3335 float16 uint32_to_float16(uint32_t a, float_status *status)
3336 {
3337 return uint64_to_float16_scalbn(a, 0, status);
3338 }
3339
3340 float16 uint16_to_float16(uint16_t a, float_status *status)
3341 {
3342 return uint64_to_float16_scalbn(a, 0, status);
3343 }
3344
3345 float16 uint8_to_float16(uint8_t a, float_status *status)
3346 {
3347 return uint64_to_float16_scalbn(a, 0, status);
3348 }
3349
3350 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3351 {
3352 FloatParts64 p;
3353
3354 parts_uint_to_float(&p, a, scale, status);
3355 return float32_round_pack_canonical(&p, status);
3356 }
3357
3358 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3359 {
3360 return uint64_to_float32_scalbn(a, scale, status);
3361 }
3362
3363 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3364 {
3365 return uint64_to_float32_scalbn(a, scale, status);
3366 }
3367
3368 float32 uint64_to_float32(uint64_t a, float_status *status)
3369 {
3370 return uint64_to_float32_scalbn(a, 0, status);
3371 }
3372
3373 float32 uint32_to_float32(uint32_t a, float_status *status)
3374 {
3375 return uint64_to_float32_scalbn(a, 0, status);
3376 }
3377
3378 float32 uint16_to_float32(uint16_t a, float_status *status)
3379 {
3380 return uint64_to_float32_scalbn(a, 0, status);
3381 }
3382
3383 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3384 {
3385 FloatParts64 p;
3386
3387 parts_uint_to_float(&p, a, scale, status);
3388 return float64_round_pack_canonical(&p, status);
3389 }
3390
3391 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3392 {
3393 return uint64_to_float64_scalbn(a, scale, status);
3394 }
3395
3396 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3397 {
3398 return uint64_to_float64_scalbn(a, scale, status);
3399 }
3400
3401 float64 uint64_to_float64(uint64_t a, float_status *status)
3402 {
3403 return uint64_to_float64_scalbn(a, 0, status);
3404 }
3405
3406 float64 uint32_to_float64(uint32_t a, float_status *status)
3407 {
3408 return uint64_to_float64_scalbn(a, 0, status);
3409 }
3410
3411 float64 uint16_to_float64(uint16_t a, float_status *status)
3412 {
3413 return uint64_to_float64_scalbn(a, 0, status);
3414 }
3415
3416 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3417 {
3418 FloatParts64 p;
3419
3420 parts_uint_to_float(&p, a, scale, status);
3421 return bfloat16_round_pack_canonical(&p, status);
3422 }
3423
3424 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3425 {
3426 return uint64_to_bfloat16_scalbn(a, scale, status);
3427 }
3428
3429 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3430 {
3431 return uint64_to_bfloat16_scalbn(a, scale, status);
3432 }
3433
3434 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3435 {
3436 return uint64_to_bfloat16_scalbn(a, 0, status);
3437 }
3438
3439 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3440 {
3441 return uint64_to_bfloat16_scalbn(a, 0, status);
3442 }
3443
3444 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3445 {
3446 return uint64_to_bfloat16_scalbn(a, 0, status);
3447 }
3448
3449 float128 uint64_to_float128(uint64_t a, float_status *status)
3450 {
3451 FloatParts128 p;
3452
3453 parts_uint_to_float(&p, a, 0, status);
3454 return float128_round_pack_canonical(&p, status);
3455 }
3456
3457 /*
3458 * Minimum and maximum
3459 */
3460
3461 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3462 {
3463 FloatParts64 pa, pb, *pr;
3464
3465 float16_unpack_canonical(&pa, a, s);
3466 float16_unpack_canonical(&pb, b, s);
3467 pr = parts_minmax(&pa, &pb, s, flags);
3468
3469 return float16_round_pack_canonical(pr, s);
3470 }
3471
3472 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3473 float_status *s, int flags)
3474 {
3475 FloatParts64 pa, pb, *pr;
3476
3477 bfloat16_unpack_canonical(&pa, a, s);
3478 bfloat16_unpack_canonical(&pb, b, s);
3479 pr = parts_minmax(&pa, &pb, s, flags);
3480
3481 return bfloat16_round_pack_canonical(pr, s);
3482 }
3483
3484 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3485 {
3486 FloatParts64 pa, pb, *pr;
3487
3488 float32_unpack_canonical(&pa, a, s);
3489 float32_unpack_canonical(&pb, b, s);
3490 pr = parts_minmax(&pa, &pb, s, flags);
3491
3492 return float32_round_pack_canonical(pr, s);
3493 }
3494
3495 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3496 {
3497 FloatParts64 pa, pb, *pr;
3498
3499 float64_unpack_canonical(&pa, a, s);
3500 float64_unpack_canonical(&pb, b, s);
3501 pr = parts_minmax(&pa, &pb, s, flags);
3502
3503 return float64_round_pack_canonical(pr, s);
3504 }
3505
3506 static float128 float128_minmax(float128 a, float128 b,
3507 float_status *s, int flags)
3508 {
3509 FloatParts128 pa, pb, *pr;
3510
3511 float128_unpack_canonical(&pa, a, s);
3512 float128_unpack_canonical(&pb, b, s);
3513 pr = parts_minmax(&pa, &pb, s, flags);
3514
3515 return float128_round_pack_canonical(pr, s);
3516 }
3517
3518 #define MINMAX_1(type, name, flags) \
3519 type type##_##name(type a, type b, float_status *s) \
3520 { return type##_minmax(a, b, s, flags); }
3521
3522 #define MINMAX_2(type) \
3523 MINMAX_1(type, max, 0) \
3524 MINMAX_1(type, maxnum, minmax_isnum) \
3525 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3526 MINMAX_1(type, min, minmax_ismin) \
3527 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3528 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3529
3530 MINMAX_2(float16)
3531 MINMAX_2(bfloat16)
3532 MINMAX_2(float32)
3533 MINMAX_2(float64)
3534 MINMAX_2(float128)
3535
3536 #undef MINMAX_1
3537 #undef MINMAX_2
3538
3539 /*
3540 * Floating point compare
3541 */
3542
3543 static FloatRelation QEMU_FLATTEN
3544 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3545 {
3546 FloatParts64 pa, pb;
3547
3548 float16_unpack_canonical(&pa, a, s);
3549 float16_unpack_canonical(&pb, b, s);
3550 return parts_compare(&pa, &pb, s, is_quiet);
3551 }
3552
3553 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3554 {
3555 return float16_do_compare(a, b, s, false);
3556 }
3557
3558 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3559 {
3560 return float16_do_compare(a, b, s, true);
3561 }
3562
3563 static FloatRelation QEMU_SOFTFLOAT_ATTR
3564 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3565 {
3566 FloatParts64 pa, pb;
3567
3568 float32_unpack_canonical(&pa, a, s);
3569 float32_unpack_canonical(&pb, b, s);
3570 return parts_compare(&pa, &pb, s, is_quiet);
3571 }
3572
3573 static FloatRelation QEMU_FLATTEN
3574 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3575 {
3576 union_float32 ua, ub;
3577
3578 ua.s = xa;
3579 ub.s = xb;
3580
3581 if (QEMU_NO_HARDFLOAT) {
3582 goto soft;
3583 }
3584
3585 float32_input_flush2(&ua.s, &ub.s, s);
3586 if (isgreaterequal(ua.h, ub.h)) {
3587 if (isgreater(ua.h, ub.h)) {
3588 return float_relation_greater;
3589 }
3590 return float_relation_equal;
3591 }
3592 if (likely(isless(ua.h, ub.h))) {
3593 return float_relation_less;
3594 }
3595 /*
3596 * The only condition remaining is unordered.
3597 * Fall through to set flags.
3598 */
3599 soft:
3600 return float32_do_compare(ua.s, ub.s, s, is_quiet);
3601 }
3602
3603 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3604 {
3605 return float32_hs_compare(a, b, s, false);
3606 }
3607
3608 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3609 {
3610 return float32_hs_compare(a, b, s, true);
3611 }
3612
3613 static FloatRelation QEMU_SOFTFLOAT_ATTR
3614 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3615 {
3616 FloatParts64 pa, pb;
3617
3618 float64_unpack_canonical(&pa, a, s);
3619 float64_unpack_canonical(&pb, b, s);
3620 return parts_compare(&pa, &pb, s, is_quiet);
3621 }
3622
3623 static FloatRelation QEMU_FLATTEN
3624 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3625 {
3626 union_float64 ua, ub;
3627
3628 ua.s = xa;
3629 ub.s = xb;
3630
3631 if (QEMU_NO_HARDFLOAT) {
3632 goto soft;
3633 }
3634
3635 float64_input_flush2(&ua.s, &ub.s, s);
3636 if (isgreaterequal(ua.h, ub.h)) {
3637 if (isgreater(ua.h, ub.h)) {
3638 return float_relation_greater;
3639 }
3640 return float_relation_equal;
3641 }
3642 if (likely(isless(ua.h, ub.h))) {
3643 return float_relation_less;
3644 }
3645 /*
3646 * The only condition remaining is unordered.
3647 * Fall through to set flags.
3648 */
3649 soft:
3650 return float64_do_compare(ua.s, ub.s, s, is_quiet);
3651 }
3652
3653 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3654 {
3655 return float64_hs_compare(a, b, s, false);
3656 }
3657
3658 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3659 {
3660 return float64_hs_compare(a, b, s, true);
3661 }
3662
3663 static FloatRelation QEMU_FLATTEN
3664 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3665 {
3666 FloatParts64 pa, pb;
3667
3668 bfloat16_unpack_canonical(&pa, a, s);
3669 bfloat16_unpack_canonical(&pb, b, s);
3670 return parts_compare(&pa, &pb, s, is_quiet);
3671 }
3672
3673 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3674 {
3675 return bfloat16_do_compare(a, b, s, false);
3676 }
3677
3678 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3679 {
3680 return bfloat16_do_compare(a, b, s, true);
3681 }
3682
3683 static FloatRelation QEMU_FLATTEN
3684 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3685 {
3686 FloatParts128 pa, pb;
3687
3688 float128_unpack_canonical(&pa, a, s);
3689 float128_unpack_canonical(&pb, b, s);
3690 return parts_compare(&pa, &pb, s, is_quiet);
3691 }
3692
3693 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3694 {
3695 return float128_do_compare(a, b, s, false);
3696 }
3697
3698 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3699 {
3700 return float128_do_compare(a, b, s, true);
3701 }
3702
3703 /*
3704 * Scale by 2**N
3705 */
3706
3707 float16 float16_scalbn(float16 a, int n, float_status *status)
3708 {
3709 FloatParts64 p;
3710
3711 float16_unpack_canonical(&p, a, status);
3712 parts_scalbn(&p, n, status);
3713 return float16_round_pack_canonical(&p, status);
3714 }
3715
3716 float32 float32_scalbn(float32 a, int n, float_status *status)
3717 {
3718 FloatParts64 p;
3719
3720 float32_unpack_canonical(&p, a, status);
3721 parts_scalbn(&p, n, status);
3722 return float32_round_pack_canonical(&p, status);
3723 }
3724
3725 float64 float64_scalbn(float64 a, int n, float_status *status)
3726 {
3727 FloatParts64 p;
3728
3729 float64_unpack_canonical(&p, a, status);
3730 parts_scalbn(&p, n, status);
3731 return float64_round_pack_canonical(&p, status);
3732 }
3733
3734 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3735 {
3736 FloatParts64 p;
3737
3738 bfloat16_unpack_canonical(&p, a, status);
3739 parts_scalbn(&p, n, status);
3740 return bfloat16_round_pack_canonical(&p, status);
3741 }
3742
3743 float128 float128_scalbn(float128 a, int n, float_status *status)
3744 {
3745 FloatParts128 p;
3746
3747 float128_unpack_canonical(&p, a, status);
3748 parts_scalbn(&p, n, status);
3749 return float128_round_pack_canonical(&p, status);
3750 }
3751
3752 /*
3753 * Square Root
3754 */
3755
3756 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3757 {
3758 FloatParts64 p;
3759
3760 float16_unpack_canonical(&p, a, status);
3761 parts_sqrt(&p, status, &float16_params);
3762 return float16_round_pack_canonical(&p, status);
3763 }
3764
3765 static float32 QEMU_SOFTFLOAT_ATTR
3766 soft_f32_sqrt(float32 a, float_status *status)
3767 {
3768 FloatParts64 p;
3769
3770 float32_unpack_canonical(&p, a, status);
3771 parts_sqrt(&p, status, &float32_params);
3772 return float32_round_pack_canonical(&p, status);
3773 }
3774
3775 static float64 QEMU_SOFTFLOAT_ATTR
3776 soft_f64_sqrt(float64 a, float_status *status)
3777 {
3778 FloatParts64 p;
3779
3780 float64_unpack_canonical(&p, a, status);
3781 parts_sqrt(&p, status, &float64_params);
3782 return float64_round_pack_canonical(&p, status);
3783 }
3784
3785 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3786 {
3787 union_float32 ua, ur;
3788
3789 ua.s = xa;
3790 if (unlikely(!can_use_fpu(s))) {
3791 goto soft;
3792 }
3793
3794 float32_input_flush1(&ua.s, s);
3795 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3796 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3797 fpclassify(ua.h) == FP_ZERO) ||
3798 signbit(ua.h))) {
3799 goto soft;
3800 }
3801 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3802 float32_is_neg(ua.s))) {
3803 goto soft;
3804 }
3805 ur.h = sqrtf(ua.h);
3806 return ur.s;
3807
3808 soft:
3809 return soft_f32_sqrt(ua.s, s);
3810 }
3811
3812 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3813 {
3814 union_float64 ua, ur;
3815
3816 ua.s = xa;
3817 if (unlikely(!can_use_fpu(s))) {
3818 goto soft;
3819 }
3820
3821 float64_input_flush1(&ua.s, s);
3822 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3823 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3824 fpclassify(ua.h) == FP_ZERO) ||
3825 signbit(ua.h))) {
3826 goto soft;
3827 }
3828 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3829 float64_is_neg(ua.s))) {
3830 goto soft;
3831 }
3832 ur.h = sqrt(ua.h);
3833 return ur.s;
3834
3835 soft:
3836 return soft_f64_sqrt(ua.s, s);
3837 }
3838
3839 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3840 {
3841 FloatParts64 p;
3842
3843 bfloat16_unpack_canonical(&p, a, status);
3844 parts_sqrt(&p, status, &bfloat16_params);
3845 return bfloat16_round_pack_canonical(&p, status);
3846 }
3847
3848 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3849 {
3850 FloatParts128 p;
3851
3852 float128_unpack_canonical(&p, a, status);
3853 parts_sqrt(&p, status, &float128_params);
3854 return float128_round_pack_canonical(&p, status);
3855 }
3856
3857 /*----------------------------------------------------------------------------
3858 | The pattern for a default generated NaN.
3859 *----------------------------------------------------------------------------*/
3860
3861 float16 float16_default_nan(float_status *status)
3862 {
3863 FloatParts64 p;
3864
3865 parts_default_nan(&p, status);
3866 p.frac >>= float16_params.frac_shift;
3867 return float16_pack_raw(&p);
3868 }
3869
3870 float32 float32_default_nan(float_status *status)
3871 {
3872 FloatParts64 p;
3873
3874 parts_default_nan(&p, status);
3875 p.frac >>= float32_params.frac_shift;
3876 return float32_pack_raw(&p);
3877 }
3878
3879 float64 float64_default_nan(float_status *status)
3880 {
3881 FloatParts64 p;
3882
3883 parts_default_nan(&p, status);
3884 p.frac >>= float64_params.frac_shift;
3885 return float64_pack_raw(&p);
3886 }
3887
3888 float128 float128_default_nan(float_status *status)
3889 {
3890 FloatParts128 p;
3891
3892 parts_default_nan(&p, status);
3893 frac_shr(&p, float128_params.frac_shift);
3894 return float128_pack_raw(&p);
3895 }
3896
3897 bfloat16 bfloat16_default_nan(float_status *status)
3898 {
3899 FloatParts64 p;
3900
3901 parts_default_nan(&p, status);
3902 p.frac >>= bfloat16_params.frac_shift;
3903 return bfloat16_pack_raw(&p);
3904 }
3905
3906 /*----------------------------------------------------------------------------
3907 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3908 *----------------------------------------------------------------------------*/
3909
3910 float16 float16_silence_nan(float16 a, float_status *status)
3911 {
3912 FloatParts64 p;
3913
3914 float16_unpack_raw(&p, a);
3915 p.frac <<= float16_params.frac_shift;
3916 parts_silence_nan(&p, status);
3917 p.frac >>= float16_params.frac_shift;
3918 return float16_pack_raw(&p);
3919 }
3920
3921 float32 float32_silence_nan(float32 a, float_status *status)
3922 {
3923 FloatParts64 p;
3924
3925 float32_unpack_raw(&p, a);
3926 p.frac <<= float32_params.frac_shift;
3927 parts_silence_nan(&p, status);
3928 p.frac >>= float32_params.frac_shift;
3929 return float32_pack_raw(&p);
3930 }
3931
3932 float64 float64_silence_nan(float64 a, float_status *status)
3933 {
3934 FloatParts64 p;
3935
3936 float64_unpack_raw(&p, a);
3937 p.frac <<= float64_params.frac_shift;
3938 parts_silence_nan(&p, status);
3939 p.frac >>= float64_params.frac_shift;
3940 return float64_pack_raw(&p);
3941 }
3942
3943 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3944 {
3945 FloatParts64 p;
3946
3947 bfloat16_unpack_raw(&p, a);
3948 p.frac <<= bfloat16_params.frac_shift;
3949 parts_silence_nan(&p, status);
3950 p.frac >>= bfloat16_params.frac_shift;
3951 return bfloat16_pack_raw(&p);
3952 }
3953
3954 float128 float128_silence_nan(float128 a, float_status *status)
3955 {
3956 FloatParts128 p;
3957
3958 float128_unpack_raw(&p, a);
3959 frac_shl(&p, float128_params.frac_shift);
3960 parts_silence_nan(&p, status);
3961 frac_shr(&p, float128_params.frac_shift);
3962 return float128_pack_raw(&p);
3963 }
3964
3965 /*----------------------------------------------------------------------------
3966 | If `a' is denormal and we are in flush-to-zero mode then set the
3967 | input-denormal exception and return zero. Otherwise just return the value.
3968 *----------------------------------------------------------------------------*/
3969
3970 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3971 {
3972 if (p.exp == 0 && p.frac != 0) {
3973 float_raise(float_flag_input_denormal, status);
3974 return true;
3975 }
3976
3977 return false;
3978 }
3979
3980 float16 float16_squash_input_denormal(float16 a, float_status *status)
3981 {
3982 if (status->flush_inputs_to_zero) {
3983 FloatParts64 p;
3984
3985 float16_unpack_raw(&p, a);
3986 if (parts_squash_denormal(p, status)) {
3987 return float16_set_sign(float16_zero, p.sign);
3988 }
3989 }
3990 return a;
3991 }
3992
3993 float32 float32_squash_input_denormal(float32 a, float_status *status)
3994 {
3995 if (status->flush_inputs_to_zero) {
3996 FloatParts64 p;
3997
3998 float32_unpack_raw(&p, a);
3999 if (parts_squash_denormal(p, status)) {
4000 return float32_set_sign(float32_zero, p.sign);
4001 }
4002 }
4003 return a;
4004 }
4005
4006 float64 float64_squash_input_denormal(float64 a, float_status *status)
4007 {
4008 if (status->flush_inputs_to_zero) {
4009 FloatParts64 p;
4010
4011 float64_unpack_raw(&p, a);
4012 if (parts_squash_denormal(p, status)) {
4013 return float64_set_sign(float64_zero, p.sign);
4014 }
4015 }
4016 return a;
4017 }
4018
4019 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4020 {
4021 if (status->flush_inputs_to_zero) {
4022 FloatParts64 p;
4023
4024 bfloat16_unpack_raw(&p, a);
4025 if (parts_squash_denormal(p, status)) {
4026 return bfloat16_set_sign(bfloat16_zero, p.sign);
4027 }
4028 }
4029 return a;
4030 }
4031
4032 /*----------------------------------------------------------------------------
4033 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4034 | and 7, and returns the properly rounded 32-bit integer corresponding to the
4035 | input. If `zSign' is 1, the input is negated before being converted to an
4036 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4037 | is simply rounded to an integer, with the inexact exception raised if the
4038 | input cannot be represented exactly as an integer. However, if the fixed-
4039 | point input is too large, the invalid exception is raised and the largest
4040 | positive or negative integer is returned.
4041 *----------------------------------------------------------------------------*/
4042
4043 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4044 float_status *status)
4045 {
4046 int8_t roundingMode;
4047 bool roundNearestEven;
4048 int8_t roundIncrement, roundBits;
4049 int32_t z;
4050
4051 roundingMode = status->float_rounding_mode;
4052 roundNearestEven = ( roundingMode == float_round_nearest_even );
4053 switch (roundingMode) {
4054 case float_round_nearest_even:
4055 case float_round_ties_away:
4056 roundIncrement = 0x40;
4057 break;
4058 case float_round_to_zero:
4059 roundIncrement = 0;
4060 break;
4061 case float_round_up:
4062 roundIncrement = zSign ? 0 : 0x7f;
4063 break;
4064 case float_round_down:
4065 roundIncrement = zSign ? 0x7f : 0;
4066 break;
4067 case float_round_to_odd:
4068 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4069 break;
4070 default:
4071 abort();
4072 }
4073 roundBits = absZ & 0x7F;
4074 absZ = ( absZ + roundIncrement )>>7;
4075 if (!(roundBits ^ 0x40) && roundNearestEven) {
4076 absZ &= ~1;
4077 }
4078 z = absZ;
4079 if ( zSign ) z = - z;
4080 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4081 float_raise(float_flag_invalid, status);
4082 return zSign ? INT32_MIN : INT32_MAX;
4083 }
4084 if (roundBits) {
4085 float_raise(float_flag_inexact, status);
4086 }
4087 return z;
4088
4089 }
4090
4091 /*----------------------------------------------------------------------------
4092 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4093 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4094 | and returns the properly rounded 64-bit integer corresponding to the input.
4095 | If `zSign' is 1, the input is negated before being converted to an integer.
4096 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4097 | the inexact exception raised if the input cannot be represented exactly as
4098 | an integer. However, if the fixed-point input is too large, the invalid
4099 | exception is raised and the largest positive or negative integer is
4100 | returned.
4101 *----------------------------------------------------------------------------*/
4102
4103 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4104 float_status *status)
4105 {
4106 int8_t roundingMode;
4107 bool roundNearestEven, increment;
4108 int64_t z;
4109
4110 roundingMode = status->float_rounding_mode;
4111 roundNearestEven = ( roundingMode == float_round_nearest_even );
4112 switch (roundingMode) {
4113 case float_round_nearest_even:
4114 case float_round_ties_away:
4115 increment = ((int64_t) absZ1 < 0);
4116 break;
4117 case float_round_to_zero:
4118 increment = 0;
4119 break;
4120 case float_round_up:
4121 increment = !zSign && absZ1;
4122 break;
4123 case float_round_down:
4124 increment = zSign && absZ1;
4125 break;
4126 case float_round_to_odd:
4127 increment = !(absZ0 & 1) && absZ1;
4128 break;
4129 default:
4130 abort();
4131 }
4132 if ( increment ) {
4133 ++absZ0;
4134 if ( absZ0 == 0 ) goto overflow;
4135 if (!(absZ1 << 1) && roundNearestEven) {
4136 absZ0 &= ~1;
4137 }
4138 }
4139 z = absZ0;
4140 if ( zSign ) z = - z;
4141 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4142 overflow:
4143 float_raise(float_flag_invalid, status);
4144 return zSign ? INT64_MIN : INT64_MAX;
4145 }
4146 if (absZ1) {
4147 float_raise(float_flag_inexact, status);
4148 }
4149 return z;
4150
4151 }
4152
4153 /*----------------------------------------------------------------------------
4154 | Normalizes the subnormal single-precision floating-point value represented
4155 | by the denormalized significand `aSig'. The normalized exponent and
4156 | significand are stored at the locations pointed to by `zExpPtr' and
4157 | `zSigPtr', respectively.
4158 *----------------------------------------------------------------------------*/
4159
4160 static void
4161 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4162 {
4163 int8_t shiftCount;
4164
4165 shiftCount = clz32(aSig) - 8;
4166 *zSigPtr = aSig<<shiftCount;
4167 *zExpPtr = 1 - shiftCount;
4168
4169 }
4170
4171 /*----------------------------------------------------------------------------
4172 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4173 | and significand `zSig', and returns the proper single-precision floating-
4174 | point value corresponding to the abstract input. Ordinarily, the abstract
4175 | value is simply rounded and packed into the single-precision format, with
4176 | the inexact exception raised if the abstract input cannot be represented
4177 | exactly. However, if the abstract value is too large, the overflow and
4178 | inexact exceptions are raised and an infinity or maximal finite value is
4179 | returned. If the abstract value is too small, the input value is rounded to
4180 | a subnormal number, and the underflow and inexact exceptions are raised if
4181 | the abstract input cannot be represented exactly as a subnormal single-
4182 | precision floating-point number.
4183 | The input significand `zSig' has its binary point between bits 30
4184 | and 29, which is 7 bits to the left of the usual location. This shifted
4185 | significand must be normalized or smaller. If `zSig' is not normalized,
4186 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4187 | and it must not require rounding. In the usual case that `zSig' is
4188 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4189 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4190 | Binary Floating-Point Arithmetic.
4191 *----------------------------------------------------------------------------*/
4192
4193 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4194 float_status *status)
4195 {
4196 int8_t roundingMode;
4197 bool roundNearestEven;
4198 int8_t roundIncrement, roundBits;
4199 bool isTiny;
4200
4201 roundingMode = status->float_rounding_mode;
4202 roundNearestEven = ( roundingMode == float_round_nearest_even );
4203 switch (roundingMode) {
4204 case float_round_nearest_even:
4205 case float_round_ties_away:
4206 roundIncrement = 0x40;
4207 break;
4208 case float_round_to_zero:
4209 roundIncrement = 0;
4210 break;
4211 case float_round_up:
4212 roundIncrement = zSign ? 0 : 0x7f;
4213 break;
4214 case float_round_down:
4215 roundIncrement = zSign ? 0x7f : 0;
4216 break;
4217 case float_round_to_odd:
4218 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4219 break;
4220 default:
4221 abort();
4222 break;
4223 }
4224 roundBits = zSig & 0x7F;
4225 if ( 0xFD <= (uint16_t) zExp ) {
4226 if ( ( 0xFD < zExp )
4227 || ( ( zExp == 0xFD )
4228 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4229 ) {
4230 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4231 roundIncrement != 0;
4232 float_raise(float_flag_overflow | float_flag_inexact, status);
4233 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4234 }
4235 if ( zExp < 0 ) {
4236 if (status->flush_to_zero) {
4237 float_raise(float_flag_output_denormal, status);
4238 return packFloat32(zSign, 0, 0);
4239 }
4240 isTiny = status->tininess_before_rounding
4241 || (zExp < -1)
4242 || (zSig + roundIncrement < 0x80000000);
4243 shift32RightJamming( zSig, - zExp, &zSig );
4244 zExp = 0;
4245 roundBits = zSig & 0x7F;
4246 if (isTiny && roundBits) {
4247 float_raise(float_flag_underflow, status);
4248 }
4249 if (roundingMode == float_round_to_odd) {
4250 /*
4251 * For round-to-odd case, the roundIncrement depends on
4252 * zSig which just changed.
4253 */
4254 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4255 }
4256 }
4257 }
4258 if (roundBits) {
4259 float_raise(float_flag_inexact, status);
4260 }
4261 zSig = ( zSig + roundIncrement )>>7;
4262 if (!(roundBits ^ 0x40) && roundNearestEven) {
4263 zSig &= ~1;
4264 }
4265 if ( zSig == 0 ) zExp = 0;
4266 return packFloat32( zSign, zExp, zSig );
4267
4268 }
4269
4270 /*----------------------------------------------------------------------------
4271 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4272 | and significand `zSig', and returns the proper single-precision floating-
4273 | point value corresponding to the abstract input. This routine is just like
4274 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4275 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4276 | floating-point exponent.
4277 *----------------------------------------------------------------------------*/
4278
4279 static float32
4280 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4281 float_status *status)
4282 {
4283 int8_t shiftCount;
4284
4285 shiftCount = clz32(zSig) - 1;
4286 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4287 status);
4288
4289 }
4290
4291 /*----------------------------------------------------------------------------
4292 | Normalizes the subnormal double-precision floating-point value represented
4293 | by the denormalized significand `aSig'. The normalized exponent and
4294 | significand are stored at the locations pointed to by `zExpPtr' and
4295 | `zSigPtr', respectively.
4296 *----------------------------------------------------------------------------*/
4297
4298 static void
4299 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4300 {
4301 int8_t shiftCount;
4302
4303 shiftCount = clz64(aSig) - 11;
4304 *zSigPtr = aSig<<shiftCount;
4305 *zExpPtr = 1 - shiftCount;
4306
4307 }
4308
4309 /*----------------------------------------------------------------------------
4310 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4311 | double-precision floating-point value, returning the result. After being
4312 | shifted into the proper positions, the three fields are simply added
4313 | together to form the result. This means that any integer portion of `zSig'
4314 | will be added into the exponent. Since a properly normalized significand
4315 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4316 | than the desired result exponent whenever `zSig' is a complete, normalized
4317 | significand.
4318 *----------------------------------------------------------------------------*/
4319
4320 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4321 {
4322
4323 return make_float64(
4324 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4325
4326 }
4327
4328 /*----------------------------------------------------------------------------
4329 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4330 | and significand `zSig', and returns the proper double-precision floating-
4331 | point value corresponding to the abstract input. Ordinarily, the abstract
4332 | value is simply rounded and packed into the double-precision format, with
4333 | the inexact exception raised if the abstract input cannot be represented
4334 | exactly. However, if the abstract value is too large, the overflow and
4335 | inexact exceptions are raised and an infinity or maximal finite value is
4336 | returned. If the abstract value is too small, the input value is rounded to
4337 | a subnormal number, and the underflow and inexact exceptions are raised if
4338 | the abstract input cannot be represented exactly as a subnormal double-
4339 | precision floating-point number.
4340 | The input significand `zSig' has its binary point between bits 62
4341 | and 61, which is 10 bits to the left of the usual location. This shifted
4342 | significand must be normalized or smaller. If `zSig' is not normalized,
4343 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4344 | and it must not require rounding. In the usual case that `zSig' is
4345 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4346 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4347 | Binary Floating-Point Arithmetic.
4348 *----------------------------------------------------------------------------*/
4349
4350 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4351 float_status *status)
4352 {
4353 int8_t roundingMode;
4354 bool roundNearestEven;
4355 int roundIncrement, roundBits;
4356 bool isTiny;
4357
4358 roundingMode = status->float_rounding_mode;
4359 roundNearestEven = ( roundingMode == float_round_nearest_even );
4360 switch (roundingMode) {
4361 case float_round_nearest_even:
4362 case float_round_ties_away:
4363 roundIncrement = 0x200;
4364 break;
4365 case float_round_to_zero:
4366 roundIncrement = 0;
4367 break;
4368 case float_round_up:
4369 roundIncrement = zSign ? 0 : 0x3ff;
4370 break;
4371 case float_round_down:
4372 roundIncrement = zSign ? 0x3ff : 0;
4373 break;
4374 case float_round_to_odd:
4375 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4376 break;
4377 default:
4378 abort();
4379 }
4380 roundBits = zSig & 0x3FF;
4381 if ( 0x7FD <= (uint16_t) zExp ) {
4382 if ( ( 0x7FD < zExp )
4383 || ( ( zExp == 0x7FD )
4384 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4385 ) {
4386 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4387 roundIncrement != 0;
4388 float_raise(float_flag_overflow | float_flag_inexact, status);
4389 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4390 }
4391 if ( zExp < 0 ) {
4392 if (status->flush_to_zero) {
4393 float_raise(float_flag_output_denormal, status);
4394 return packFloat64(zSign, 0, 0);
4395 }
4396 isTiny = status->tininess_before_rounding
4397 || (zExp < -1)
4398 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4399 shift64RightJamming( zSig, - zExp, &zSig );
4400 zExp = 0;
4401 roundBits = zSig & 0x3FF;
4402 if (isTiny && roundBits) {
4403 float_raise(float_flag_underflow, status);
4404 }
4405 if (roundingMode == float_round_to_odd) {
4406 /*
4407 * For round-to-odd case, the roundIncrement depends on
4408 * zSig which just changed.
4409 */
4410 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4411 }
4412 }
4413 }
4414 if (roundBits) {
4415 float_raise(float_flag_inexact, status);
4416 }
4417 zSig = ( zSig + roundIncrement )>>10;
4418 if (!(roundBits ^ 0x200) && roundNearestEven) {
4419 zSig &= ~1;
4420 }
4421 if ( zSig == 0 ) zExp = 0;
4422 return packFloat64( zSign, zExp, zSig );
4423
4424 }
4425
4426 /*----------------------------------------------------------------------------
4427 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4428 | and significand `zSig', and returns the proper double-precision floating-
4429 | point value corresponding to the abstract input. This routine is just like
4430 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4431 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4432 | floating-point exponent.
4433 *----------------------------------------------------------------------------*/
4434
4435 static float64
4436 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4437 float_status *status)
4438 {
4439 int8_t shiftCount;
4440
4441 shiftCount = clz64(zSig) - 1;
4442 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4443 status);
4444
4445 }
4446
4447 /*----------------------------------------------------------------------------
4448 | Normalizes the subnormal extended double-precision floating-point value
4449 | represented by the denormalized significand `aSig'. The normalized exponent
4450 | and significand are stored at the locations pointed to by `zExpPtr' and
4451 | `zSigPtr', respectively.
4452 *----------------------------------------------------------------------------*/
4453
4454 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4455 uint64_t *zSigPtr)
4456 {
4457 int8_t shiftCount;
4458
4459 shiftCount = clz64(aSig);
4460 *zSigPtr = aSig<<shiftCount;
4461 *zExpPtr = 1 - shiftCount;
4462 }
4463
4464 /*----------------------------------------------------------------------------
4465 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4466 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4467 | and returns the proper extended double-precision floating-point value
4468 | corresponding to the abstract input. Ordinarily, the abstract value is
4469 | rounded and packed into the extended double-precision format, with the
4470 | inexact exception raised if the abstract input cannot be represented
4471 | exactly. However, if the abstract value is too large, the overflow and
4472 | inexact exceptions are raised and an infinity or maximal finite value is
4473 | returned. If the abstract value is too small, the input value is rounded to
4474 | a subnormal number, and the underflow and inexact exceptions are raised if
4475 | the abstract input cannot be represented exactly as a subnormal extended
4476 | double-precision floating-point number.
4477 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4478 | the result is rounded to the same number of bits as single or double
4479 | precision, respectively. Otherwise, the result is rounded to the full
4480 | precision of the extended double-precision format.
4481 | The input significand must be normalized or smaller. If the input
4482 | significand is not normalized, `zExp' must be 0; in that case, the result
4483 | returned is a subnormal number, and it must not require rounding. The
4484 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4485 | Floating-Point Arithmetic.
4486 *----------------------------------------------------------------------------*/
4487
4488 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4489 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4490 float_status *status)
4491 {
4492 FloatRoundMode roundingMode;
4493 bool roundNearestEven, increment, isTiny;
4494 int64_t roundIncrement, roundMask, roundBits;
4495
4496 roundingMode = status->float_rounding_mode;
4497 roundNearestEven = ( roundingMode == float_round_nearest_even );
4498 switch (roundingPrecision) {
4499 case floatx80_precision_x:
4500 goto precision80;
4501 case floatx80_precision_d:
4502 roundIncrement = UINT64_C(0x0000000000000400);
4503 roundMask = UINT64_C(0x00000000000007FF);
4504 break;
4505 case floatx80_precision_s:
4506 roundIncrement = UINT64_C(0x0000008000000000);
4507 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4508 break;
4509 default:
4510 g_assert_not_reached();
4511 }
4512 zSig0 |= ( zSig1 != 0 );
4513 switch (roundingMode) {
4514 case float_round_nearest_even:
4515 case float_round_ties_away:
4516 break;
4517 case float_round_to_zero:
4518 roundIncrement = 0;
4519 break;
4520 case float_round_up:
4521 roundIncrement = zSign ? 0 : roundMask;
4522 break;
4523 case float_round_down:
4524 roundIncrement = zSign ? roundMask : 0;
4525 break;
4526 default:
4527 abort();
4528 }
4529 roundBits = zSig0 & roundMask;
4530 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4531 if ( ( 0x7FFE < zExp )
4532 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4533 ) {
4534 goto overflow;
4535 }
4536 if ( zExp <= 0 ) {
4537 if (status->flush_to_zero) {
4538 float_raise(float_flag_output_denormal, status);
4539 return packFloatx80(zSign, 0, 0);
4540 }
4541 isTiny = status->tininess_before_rounding
4542 || (zExp < 0 )
4543 || (zSig0 <= zSig0 + roundIncrement);
4544 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4545 zExp = 0;
4546 roundBits = zSig0 & roundMask;
4547 if (isTiny && roundBits) {
4548 float_raise(float_flag_underflow, status);
4549 }
4550 if (roundBits) {
4551 float_raise(float_flag_inexact, status);
4552 }
4553 zSig0 += roundIncrement;
4554 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4555 roundIncrement = roundMask + 1;
4556 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4557 roundMask |= roundIncrement;
4558 }
4559 zSig0 &= ~ roundMask;
4560 return packFloatx80( zSign, zExp, zSig0 );
4561 }
4562 }
4563 if (roundBits) {
4564 float_raise(float_flag_inexact, status);
4565 }
4566 zSig0 += roundIncrement;
4567 if ( zSig0 < roundIncrement ) {
4568 ++zExp;
4569 zSig0 = UINT64_C(0x8000000000000000);
4570 }
4571 roundIncrement = roundMask + 1;
4572 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4573 roundMask |= roundIncrement;
4574 }
4575 zSig0 &= ~ roundMask;
4576 if ( zSig0 == 0 ) zExp = 0;
4577 return packFloatx80( zSign, zExp, zSig0 );
4578 precision80:
4579 switch (roundingMode) {
4580 case float_round_nearest_even:
4581 case float_round_ties_away:
4582 increment = ((int64_t)zSig1 < 0);
4583 break;
4584 case float_round_to_zero:
4585 increment = 0;
4586 break;
4587 case float_round_up:
4588 increment = !zSign && zSig1;
4589 break;
4590 case float_round_down:
4591 increment = zSign && zSig1;
4592 break;
4593 default:
4594 abort();
4595 }
4596 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4597 if ( ( 0x7FFE < zExp )
4598 || ( ( zExp == 0x7FFE )
4599 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4600 && increment
4601 )
4602 ) {
4603 roundMask = 0;
4604 overflow:
4605 float_raise(float_flag_overflow | float_flag_inexact, status);
4606 if ( ( roundingMode == float_round_to_zero )
4607 || ( zSign && ( roundingMode == float_round_up ) )
4608 || ( ! zSign && ( roundingMode == float_round_down ) )
4609 ) {
4610 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4611 }
4612 return packFloatx80(zSign,
4613 floatx80_infinity_high,
4614 floatx80_infinity_low);
4615 }
4616 if ( zExp <= 0 ) {
4617 isTiny = status->tininess_before_rounding
4618 || (zExp < 0)
4619 || !increment
4620 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4621 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4622 zExp = 0;
4623 if (isTiny && zSig1) {
4624 float_raise(float_flag_underflow, status);
4625 }
4626 if (zSig1) {
4627 float_raise(float_flag_inexact, status);
4628 }
4629 switch (roundingMode) {
4630 case float_round_nearest_even:
4631 case float_round_ties_away:
4632 increment = ((int64_t)zSig1 < 0);
4633 break;
4634 case float_round_to_zero:
4635 increment = 0;
4636 break;
4637 case float_round_up:
4638 increment = !zSign && zSig1;
4639 break;
4640 case float_round_down:
4641 increment = zSign && zSig1;
4642 break;
4643 default:
4644 abort();
4645 }
4646 if ( increment ) {
4647 ++zSig0;
4648 if (!(zSig1 << 1) && roundNearestEven) {
4649 zSig0 &= ~1;
4650 }
4651 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4652 }
4653 return packFloatx80( zSign, zExp, zSig0 );
4654 }
4655 }
4656 if (zSig1) {
4657 float_raise(float_flag_inexact, status);
4658 }
4659 if ( increment ) {
4660 ++zSig0;
4661 if ( zSig0 == 0 ) {
4662 ++zExp;
4663 zSig0 = UINT64_C(0x8000000000000000);
4664 }
4665 else {
4666 if (!(zSig1 << 1) && roundNearestEven) {
4667 zSig0 &= ~1;
4668 }
4669 }
4670 }
4671 else {
4672 if ( zSig0 == 0 ) zExp = 0;
4673 }
4674 return packFloatx80( zSign, zExp, zSig0 );
4675
4676 }
4677
4678 /*----------------------------------------------------------------------------
4679 | Takes an abstract floating-point value having sign `zSign', exponent
4680 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4681 | and returns the proper extended double-precision floating-point value
4682 | corresponding to the abstract input. This routine is just like
4683 | `roundAndPackFloatx80' except that the input significand does not have to be
4684 | normalized.
4685 *----------------------------------------------------------------------------*/
4686
4687 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4688 bool zSign, int32_t zExp,
4689 uint64_t zSig0, uint64_t zSig1,
4690 float_status *status)
4691 {
4692 int8_t shiftCount;
4693
4694 if ( zSig0 == 0 ) {
4695 zSig0 = zSig1;
4696 zSig1 = 0;
4697 zExp -= 64;
4698 }
4699 shiftCount = clz64(zSig0);
4700 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4701 zExp -= shiftCount;
4702 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4703 zSig0, zSig1, status);
4704
4705 }
4706
4707 /*----------------------------------------------------------------------------
4708 | Returns the least-significant 64 fraction bits of the quadruple-precision
4709 | floating-point value `a'.
4710 *----------------------------------------------------------------------------*/
4711
4712 static inline uint64_t extractFloat128Frac1( float128 a )
4713 {
4714
4715 return a.low;
4716
4717 }
4718
4719 /*----------------------------------------------------------------------------
4720 | Returns the most-significant 48 fraction bits of the quadruple-precision
4721 | floating-point value `a'.
4722 *----------------------------------------------------------------------------*/
4723
4724 static inline uint64_t extractFloat128Frac0( float128 a )
4725 {
4726
4727 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4728
4729 }
4730
4731 /*----------------------------------------------------------------------------
4732 | Returns the exponent bits of the quadruple-precision floating-point value
4733 | `a'.
4734 *----------------------------------------------------------------------------*/
4735
4736 static inline int32_t extractFloat128Exp( float128 a )
4737 {
4738
4739 return ( a.high>>48 ) & 0x7FFF;
4740
4741 }
4742
4743 /*----------------------------------------------------------------------------
4744 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4745 *----------------------------------------------------------------------------*/
4746
4747 static inline bool extractFloat128Sign(float128 a)
4748 {
4749 return a.high >> 63;
4750 }
4751
4752 /*----------------------------------------------------------------------------
4753 | Normalizes the subnormal quadruple-precision floating-point value
4754 | represented by the denormalized significand formed by the concatenation of
4755 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4756 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4757 | significand are stored at the location pointed to by `zSig0Ptr', and the
4758 | least significant 64 bits of the normalized significand are stored at the
4759 | location pointed to by `zSig1Ptr'.
4760 *----------------------------------------------------------------------------*/
4761
4762 static void
4763 normalizeFloat128Subnormal(
4764 uint64_t aSig0,
4765 uint64_t aSig1,
4766 int32_t *zExpPtr,
4767 uint64_t *zSig0Ptr,
4768 uint64_t *zSig1Ptr
4769 )
4770 {
4771 int8_t shiftCount;
4772
4773 if ( aSig0 == 0 ) {
4774 shiftCount = clz64(aSig1) - 15;
4775 if ( shiftCount < 0 ) {
4776 *zSig0Ptr = aSig1>>( - shiftCount );
4777 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4778 }
4779 else {
4780 *zSig0Ptr = aSig1<<shiftCount;
4781 *zSig1Ptr = 0;
4782 }
4783 *zExpPtr = - shiftCount - 63;
4784 }
4785 else {
4786 shiftCount = clz64(aSig0) - 15;
4787 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4788 *zExpPtr = 1 - shiftCount;
4789 }
4790
4791 }
4792
4793 /*----------------------------------------------------------------------------
4794 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4795 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4796 | floating-point value, returning the result. After being shifted into the
4797 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4798 | added together to form the most significant 32 bits of the result. This
4799 | means that any integer portion of `zSig0' will be added into the exponent.
4800 | Since a properly normalized significand will have an integer portion equal
4801 | to 1, the `zExp' input should be 1 less than the desired result exponent
4802 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4803 | significand.
4804 *----------------------------------------------------------------------------*/
4805
4806 static inline float128
4807 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4808 {
4809 float128 z;
4810
4811 z.low = zSig1;
4812 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4813 return z;
4814 }
4815
4816 /*----------------------------------------------------------------------------
4817 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4818 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4819 | and `zSig2', and returns the proper quadruple-precision floating-point value
4820 | corresponding to the abstract input. Ordinarily, the abstract value is
4821 | simply rounded and packed into the quadruple-precision format, with the
4822 | inexact exception raised if the abstract input cannot be represented
4823 | exactly. However, if the abstract value is too large, the overflow and
4824 | inexact exceptions are raised and an infinity or maximal finite value is
4825 | returned. If the abstract value is too small, the input value is rounded to
4826 | a subnormal number, and the underflow and inexact exceptions are raised if
4827 | the abstract input cannot be represented exactly as a subnormal quadruple-
4828 | precision floating-point number.
4829 | The input significand must be normalized or smaller. If the input
4830 | significand is not normalized, `zExp' must be 0; in that case, the result
4831 | returned is a subnormal number, and it must not require rounding. In the
4832 | usual case that the input significand is normalized, `zExp' must be 1 less
4833 | than the ``true'' floating-point exponent. The handling of underflow and
4834 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4835 *----------------------------------------------------------------------------*/
4836
4837 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4838 uint64_t zSig0, uint64_t zSig1,
4839 uint64_t zSig2, float_status *status)
4840 {
4841 int8_t roundingMode;
4842 bool roundNearestEven, increment, isTiny;
4843
4844 roundingMode = status->float_rounding_mode;
4845 roundNearestEven = ( roundingMode == float_round_nearest_even );
4846 switch (roundingMode) {
4847 case float_round_nearest_even:
4848 case float_round_ties_away:
4849 increment = ((int64_t)zSig2 < 0);
4850 break;
4851 case float_round_to_zero:
4852 increment = 0;
4853 break;
4854 case float_round_up:
4855 increment = !zSign && zSig2;
4856 break;
4857 case float_round_down:
4858 increment = zSign && zSig2;
4859 break;
4860 case float_round_to_odd:
4861 increment = !(zSig1 & 0x1) && zSig2;
4862 break;
4863 default:
4864 abort();
4865 }
4866 if ( 0x7FFD <= (uint32_t) zExp ) {
4867 if ( ( 0x7FFD < zExp )
4868 || ( ( zExp == 0x7FFD )
4869 && eq128(
4870 UINT64_C(0x0001FFFFFFFFFFFF),
4871 UINT64_C(0xFFFFFFFFFFFFFFFF),
4872 zSig0,
4873 zSig1
4874 )
4875 && increment
4876 )
4877 ) {
4878 float_raise(float_flag_overflow | float_flag_inexact, status);
4879 if ( ( roundingMode == float_round_to_zero )
4880 || ( zSign && ( roundingMode == float_round_up ) )
4881 || ( ! zSign && ( roundingMode == float_round_down ) )
4882 || (roundingMode == float_round_to_odd)
4883 ) {
4884 return
4885 packFloat128(
4886 zSign,
4887 0x7FFE,
4888 UINT64_C(0x0000FFFFFFFFFFFF),
4889 UINT64_C(0xFFFFFFFFFFFFFFFF)
4890 );
4891 }
4892 return packFloat128( zSign, 0x7FFF, 0, 0 );
4893 }
4894 if ( zExp < 0 ) {
4895 if (status->flush_to_zero) {
4896 float_raise(float_flag_output_denormal, status);
4897 return packFloat128(zSign, 0, 0, 0);
4898 }
4899 isTiny = status->tininess_before_rounding
4900 || (zExp < -1)
4901 || !increment
4902 || lt128(zSig0, zSig1,
4903 UINT64_C(0x0001FFFFFFFFFFFF),
4904 UINT64_C(0xFFFFFFFFFFFFFFFF));
4905 shift128ExtraRightJamming(
4906 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4907 zExp = 0;
4908 if (isTiny && zSig2) {
4909 float_raise(float_flag_underflow, status);
4910 }
4911 switch (roundingMode) {
4912 case float_round_nearest_even:
4913 case float_round_ties_away:
4914 increment = ((int64_t)zSig2 < 0);
4915 break;
4916 case float_round_to_zero:
4917 increment = 0;
4918 break;
4919 case float_round_up:
4920 increment = !zSign && zSig2;
4921 break;
4922 case float_round_down:
4923 increment = zSign && zSig2;
4924 break;
4925 case float_round_to_odd:
4926 increment = !(zSig1 & 0x1) && zSig2;
4927 break;
4928 default:
4929 abort();
4930 }
4931 }
4932 }
4933 if (zSig2) {
4934 float_raise(float_flag_inexact, status);
4935 }
4936 if ( increment ) {
4937 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4938 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4939 zSig1 &= ~1;
4940 }
4941 }
4942 else {
4943 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4944 }
4945 return packFloat128( zSign, zExp, zSig0, zSig1 );
4946
4947 }
4948
4949 /*----------------------------------------------------------------------------
4950 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4951 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4952 | returns the proper quadruple-precision floating-point value corresponding
4953 | to the abstract input. This routine is just like `roundAndPackFloat128'
4954 | except that the input significand has fewer bits and does not have to be
4955 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4956 | point exponent.
4957 *----------------------------------------------------------------------------*/
4958
4959 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4960 uint64_t zSig0, uint64_t zSig1,
4961 float_status *status)
4962 {
4963 int8_t shiftCount;
4964 uint64_t zSig2;
4965
4966 if ( zSig0 == 0 ) {
4967 zSig0 = zSig1;
4968 zSig1 = 0;
4969 zExp -= 64;
4970 }
4971 shiftCount = clz64(zSig0) - 15;
4972 if ( 0 <= shiftCount ) {
4973 zSig2 = 0;
4974 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4975 }
4976 else {
4977 shift128ExtraRightJamming(
4978 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4979 }
4980 zExp -= shiftCount;
4981 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4982
4983 }
4984
4985
4986 /*----------------------------------------------------------------------------
4987 | Returns the result of converting the 32-bit two's complement integer `a'
4988 | to the extended double-precision floating-point format. The conversion
4989 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4990 | Arithmetic.
4991 *----------------------------------------------------------------------------*/
4992
4993 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4994 {
4995 bool zSign;
4996 uint32_t absA;
4997 int8_t shiftCount;
4998 uint64_t zSig;
4999
5000 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5001 zSign = ( a < 0 );
5002 absA = zSign ? - a : a;
5003 shiftCount = clz32(absA) + 32;
5004 zSig = absA;
5005 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5006
5007 }
5008
5009 /*----------------------------------------------------------------------------
5010 | Returns the result of converting the 64-bit two's complement integer `a'
5011 | to the extended double-precision floating-point format. The conversion
5012 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5013 | Arithmetic.
5014 *----------------------------------------------------------------------------*/
5015
5016 floatx80 int64_to_floatx80(int64_t a, float_status *status)
5017 {
5018 bool zSign;
5019 uint64_t absA;
5020 int8_t shiftCount;
5021
5022 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5023 zSign = ( a < 0 );
5024 absA = zSign ? - a : a;
5025 shiftCount = clz64(absA);
5026 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5027
5028 }
5029
5030 /*----------------------------------------------------------------------------
5031 | Returns the result of converting the single-precision floating-point value
5032 | `a' to the extended double-precision floating-point format. The conversion
5033 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5034 | Arithmetic.
5035 *----------------------------------------------------------------------------*/
5036
5037 floatx80 float32_to_floatx80(float32 a, float_status *status)
5038 {
5039 bool aSign;
5040 int aExp;
5041 uint32_t aSig;
5042
5043 a = float32_squash_input_denormal(a, status);
5044 aSig = extractFloat32Frac( a );
5045 aExp = extractFloat32Exp( a );
5046 aSign = extractFloat32Sign( a );
5047 if ( aExp == 0xFF ) {
5048 if (aSig) {
5049 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5050 status);
5051 return floatx80_silence_nan(res, status);
5052 }
5053 return packFloatx80(aSign,
5054 floatx80_infinity_high,
5055 floatx80_infinity_low);
5056 }
5057 if ( aExp == 0 ) {
5058 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5059 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5060 }
5061 aSig |= 0x00800000;
5062 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5063
5064 }
5065
5066 /*----------------------------------------------------------------------------
5067 | Returns the remainder of the single-precision floating-point value `a'
5068 | with respect to the corresponding value `b'. The operation is performed
5069 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5070 *----------------------------------------------------------------------------*/
5071
5072 float32 float32_rem(float32 a, float32 b, float_status *status)
5073 {
5074 bool aSign, zSign;
5075 int aExp, bExp, expDiff;
5076 uint32_t aSig, bSig;
5077 uint32_t q;
5078 uint64_t aSig64, bSig64, q64;
5079 uint32_t alternateASig;
5080 int32_t sigMean;
5081 a = float32_squash_input_denormal(a, status);
5082 b = float32_squash_input_denormal(b, status);
5083
5084 aSig = extractFloat32Frac( a );
5085 aExp = extractFloat32Exp( a );
5086 aSign = extractFloat32Sign( a );
5087 bSig = extractFloat32Frac( b );
5088 bExp = extractFloat32Exp( b );
5089 if ( aExp == 0xFF ) {
5090 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5091 return propagateFloat32NaN(a, b, status);
5092 }
5093 float_raise(float_flag_invalid, status);
5094 return float32_default_nan(status);
5095 }
5096 if ( bExp == 0xFF ) {
5097 if (bSig) {
5098 return propagateFloat32NaN(a, b, status);
5099 }
5100 return a;
5101 }
5102 if ( bExp == 0 ) {
5103 if ( bSig == 0 ) {
5104 float_raise(float_flag_invalid, status);
5105 return float32_default_nan(status);
5106 }
5107 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5108 }
5109 if ( aExp == 0 ) {
5110 if ( aSig == 0 ) return a;
5111 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5112 }
5113 expDiff = aExp - bExp;
5114 aSig |= 0x00800000;
5115 bSig |= 0x00800000;
5116 if ( expDiff < 32 ) {
5117 aSig <<= 8;
5118 bSig <<= 8;
5119 if ( expDiff < 0 ) {
5120 if ( expDiff < -1 ) return a;
5121 aSig >>= 1;
5122 }
5123 q = ( bSig <= aSig );
5124 if ( q ) aSig -= bSig;
5125 if ( 0 < expDiff ) {
5126 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5127 q >>= 32 - expDiff;
5128 bSig >>= 2;
5129 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5130 }
5131 else {
5132 aSig >>= 2;
5133 bSig >>= 2;
5134 }
5135 }
5136 else {
5137 if ( bSig <= aSig ) aSig -= bSig;
5138 aSig64 = ( (uint64_t) aSig )<<40;
5139 bSig64 = ( (uint64_t) bSig )<<40;
5140 expDiff -= 64;
5141 while ( 0 < expDiff ) {
5142 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5143 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5144 aSig64 = - ( ( bSig * q64 )<<38 );
5145 expDiff -= 62;
5146 }
5147 expDiff += 64;
5148 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5149 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5150 q = q64>>( 64 - expDiff );
5151 bSig <<= 6;
5152 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5153 }
5154 do {
5155 alternateASig = aSig;
5156 ++q;
5157 aSig -= bSig;
5158 } while ( 0 <= (int32_t) aSig );
5159 sigMean = aSig + alternateASig;
5160 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5161 aSig = alternateASig;
5162 }
5163 zSign = ( (int32_t) aSig < 0 );
5164 if ( zSign ) aSig = - aSig;
5165 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5166 }
5167
5168
5169
5170 /*----------------------------------------------------------------------------
5171 | Returns the binary exponential of the single-precision floating-point value
5172 | `a'. The operation is performed according to the IEC/IEEE Standard for
5173 | Binary Floating-Point Arithmetic.
5174 |
5175 | Uses the following identities:
5176 |
5177 | 1. -------------------------------------------------------------------------
5178 | x x*ln(2)
5179 | 2 = e
5180 |
5181 | 2. -------------------------------------------------------------------------
5182 | 2 3 4 5 n
5183 | x x x x x x x
5184 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5185 | 1! 2! 3! 4! 5! n!
5186 *----------------------------------------------------------------------------*/
5187
5188 static const float64 float32_exp2_coefficients[15] =
5189 {
5190 const_float64( 0x3ff0000000000000ll ), /* 1 */
5191 const_float64( 0x3fe0000000000000ll ), /* 2 */
5192 const_float64( 0x3fc5555555555555ll ), /* 3 */
5193 const_float64( 0x3fa5555555555555ll ), /* 4 */
5194 const_float64( 0x3f81111111111111ll ), /* 5 */
5195 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5196 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5197 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5198 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5199 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5200 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5201 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5202 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5203 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5204 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5205 };
5206
5207 float32 float32_exp2(float32 a, float_status *status)
5208 {
5209 bool aSign;
5210 int aExp;
5211 uint32_t aSig;
5212 float64 r, x, xn;
5213 int i;
5214 a = float32_squash_input_denormal(a, status);
5215
5216 aSig = extractFloat32Frac( a );
5217 aExp = extractFloat32Exp( a );
5218 aSign = extractFloat32Sign( a );
5219
5220 if ( aExp == 0xFF) {
5221 if (aSig) {
5222 return propagateFloat32NaN(a, float32_zero, status);
5223 }
5224 return (aSign) ? float32_zero : a;
5225 }
5226 if (aExp == 0) {
5227 if (aSig == 0) return float32_one;
5228 }
5229
5230 float_raise(float_flag_inexact, status);
5231
5232 /* ******************************* */
5233 /* using float64 for approximation */
5234 /* ******************************* */
5235 x = float32_to_float64(a, status);
5236 x = float64_mul(x, float64_ln2, status);
5237
5238 xn = x;
5239 r = float64_one;
5240 for (i = 0 ; i < 15 ; i++) {
5241 float64 f;
5242
5243 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5244 r = float64_add(r, f, status);
5245
5246 xn = float64_mul(xn, x, status);
5247 }
5248
5249 return float64_to_float32(r, status);
5250 }
5251
5252 /*----------------------------------------------------------------------------
5253 | Returns the binary log of the single-precision floating-point value `a'.
5254 | The operation is performed according to the IEC/IEEE Standard for Binary
5255 | Floating-Point Arithmetic.
5256 *----------------------------------------------------------------------------*/
5257 float32 float32_log2(float32 a, float_status *status)
5258 {
5259 bool aSign, zSign;
5260 int aExp;
5261 uint32_t aSig, zSig, i;
5262
5263 a = float32_squash_input_denormal(a, status);
5264 aSig = extractFloat32Frac( a );
5265 aExp = extractFloat32Exp( a );
5266 aSign = extractFloat32Sign( a );
5267
5268 if ( aExp == 0 ) {
5269 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5270 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5271 }
5272 if ( aSign ) {
5273 float_raise(float_flag_invalid, status);
5274 return float32_default_nan(status);
5275 }
5276 if ( aExp == 0xFF ) {
5277 if (aSig) {
5278 return propagateFloat32NaN(a, float32_zero, status);
5279 }
5280 return a;
5281 }
5282
5283 aExp -= 0x7F;
5284 aSig |= 0x00800000;
5285 zSign = aExp < 0;
5286 zSig = aExp << 23;
5287
5288 for (i = 1 << 22; i > 0; i >>= 1) {
5289 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5290 if ( aSig & 0x01000000 ) {
5291 aSig >>= 1;
5292 zSig |= i;
5293 }
5294 }
5295
5296 if ( zSign )
5297 zSig = -zSig;
5298
5299 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5300 }
5301
5302 /*----------------------------------------------------------------------------
5303 | Returns the result of converting the double-precision floating-point value
5304 | `a' to the extended double-precision floating-point format. The conversion
5305 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5306 | Arithmetic.
5307 *----------------------------------------------------------------------------*/
5308
5309 floatx80 float64_to_floatx80(float64 a, float_status *status)
5310 {
5311 bool aSign;
5312 int aExp;
5313 uint64_t aSig;
5314
5315 a = float64_squash_input_denormal(a, status);
5316 aSig = extractFloat64Frac( a );
5317 aExp = extractFloat64Exp( a );
5318 aSign = extractFloat64Sign( a );
5319 if ( aExp == 0x7FF ) {
5320 if (aSig) {
5321 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5322 status);
5323 return floatx80_silence_nan(res, status);
5324 }
5325 return packFloatx80(aSign,
5326 floatx80_infinity_high,
5327 floatx80_infinity_low);
5328 }
5329 if ( aExp == 0 ) {
5330 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5331 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5332 }
5333 return
5334 packFloatx80(
5335 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5336
5337 }
5338
5339 /*----------------------------------------------------------------------------
5340 | Returns the remainder of the double-precision floating-point value `a'
5341 | with respect to the corresponding value `b'. The operation is performed
5342 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5343 *----------------------------------------------------------------------------*/
5344
5345 float64 float64_rem(float64 a, float64 b, float_status *status)
5346 {
5347 bool aSign, zSign;
5348 int aExp, bExp, expDiff;
5349 uint64_t aSig, bSig;
5350 uint64_t q, alternateASig;
5351 int64_t sigMean;
5352
5353 a = float64_squash_input_denormal(a, status);
5354 b = float64_squash_input_denormal(b, status);
5355 aSig = extractFloat64Frac( a );
5356 aExp = extractFloat64Exp( a );
5357 aSign = extractFloat64Sign( a );
5358 bSig = extractFloat64Frac( b );
5359 bExp = extractFloat64Exp( b );
5360 if ( aExp == 0x7FF ) {
5361 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5362 return propagateFloat64NaN(a, b, status);
5363 }
5364 float_raise(float_flag_invalid, status);
5365 return float64_default_nan(status);
5366 }
5367 if ( bExp == 0x7FF ) {
5368 if (bSig) {
5369 return propagateFloat64NaN(a, b, status);
5370 }
5371 return a;
5372 }
5373 if ( bExp == 0 ) {
5374 if ( bSig == 0 ) {
5375 float_raise(float_flag_invalid, status);
5376 return float64_default_nan(status);
5377 }
5378 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5379 }
5380 if ( aExp == 0 ) {
5381 if ( aSig == 0 ) return a;
5382 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5383 }
5384 expDiff = aExp - bExp;
5385 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5386 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5387 if ( expDiff < 0 ) {
5388 if ( expDiff < -1 ) return a;
5389 aSig >>= 1;
5390 }
5391 q = ( bSig <= aSig );
5392 if ( q ) aSig -= bSig;
5393 expDiff -= 64;
5394 while ( 0 < expDiff ) {
5395 q = estimateDiv128To64( aSig, 0, bSig );
5396 q = ( 2 < q ) ? q - 2 : 0;
5397 aSig = - ( ( bSig>>2 ) * q );
5398 expDiff -= 62;
5399 }
5400 expDiff += 64;
5401 if ( 0 < expDiff ) {
5402 q = estimateDiv128To64( aSig, 0, bSig );
5403 q = ( 2 < q ) ? q - 2 : 0;
5404 q >>= 64 - expDiff;
5405 bSig >>= 2;
5406 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5407 }
5408 else {
5409 aSig >>= 2;
5410 bSig >>= 2;
5411 }
5412 do {
5413 alternateASig = aSig;
5414 ++q;
5415 aSig -= bSig;
5416 } while ( 0 <= (int64_t) aSig );
5417 sigMean = aSig + alternateASig;
5418 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5419 aSig = alternateASig;
5420 }
5421 zSign = ( (int64_t) aSig < 0 );
5422 if ( zSign ) aSig = - aSig;
5423 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5424
5425 }
5426
5427 /*----------------------------------------------------------------------------
5428 | Returns the binary log of the double-precision floating-point value `a'.
5429 | The operation is performed according to the IEC/IEEE Standard for Binary
5430 | Floating-Point Arithmetic.
5431 *----------------------------------------------------------------------------*/
5432 float64 float64_log2(float64 a, float_status *status)
5433 {
5434 bool aSign, zSign;
5435 int aExp;
5436 uint64_t aSig, aSig0, aSig1, zSig, i;
5437 a = float64_squash_input_denormal(a, status);
5438
5439 aSig = extractFloat64Frac( a );
5440 aExp = extractFloat64Exp( a );
5441 aSign = extractFloat64Sign( a );
5442
5443 if ( aExp == 0 ) {
5444 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5445 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5446 }
5447 if ( aSign ) {
5448 float_raise(float_flag_invalid, status);
5449 return float64_default_nan(status);
5450 }
5451 if ( aExp == 0x7FF ) {
5452 if (aSig) {
5453 return propagateFloat64NaN(a, float64_zero, status);
5454 }
5455 return a;
5456 }
5457
5458 aExp -= 0x3FF;
5459 aSig |= UINT64_C(0x0010000000000000);
5460 zSign = aExp < 0;
5461 zSig = (uint64_t)aExp << 52;
5462 for (i = 1LL << 51; i > 0; i >>= 1) {
5463 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5464 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5465 if ( aSig & UINT64_C(0x0020000000000000) ) {
5466 aSig >>= 1;
5467 zSig |= i;
5468 }
5469 }
5470
5471 if ( zSign )
5472 zSig = -zSig;
5473 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5474 }
5475
5476 /*----------------------------------------------------------------------------
5477 | Returns the result of converting the extended double-precision floating-
5478 | point value `a' to the 32-bit two's complement integer format. The
5479 | conversion is performed according to the IEC/IEEE Standard for Binary
5480 | Floating-Point Arithmetic---which means in particular that the conversion
5481 | is rounded according to the current rounding mode. If `a' is a NaN, the
5482 | largest positive integer is returned. Otherwise, if the conversion
5483 | overflows, the largest integer with the same sign as `a' is returned.
5484 *----------------------------------------------------------------------------*/
5485
5486 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5487 {
5488 bool aSign;
5489 int32_t aExp, shiftCount;
5490 uint64_t aSig;
5491
5492 if (floatx80_invalid_encoding(a)) {
5493 float_raise(float_flag_invalid, status);
5494 return 1 << 31;
5495 }
5496 aSig = extractFloatx80Frac( a );
5497 aExp = extractFloatx80Exp( a );
5498 aSign = extractFloatx80Sign( a );
5499 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5500 shiftCount = 0x4037 - aExp;
5501 if ( shiftCount <= 0 ) shiftCount = 1;
5502 shift64RightJamming( aSig, shiftCount, &aSig );
5503 return roundAndPackInt32(aSign, aSig, status);
5504
5505 }
5506
5507 /*----------------------------------------------------------------------------
5508 | Returns the result of converting the extended double-precision floating-
5509 | point value `a' to the 32-bit two's complement integer format. The
5510 | conversion is performed according to the IEC/IEEE Standard for Binary
5511 | Floating-Point Arithmetic, except that the conversion is always rounded
5512 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5513 | Otherwise, if the conversion overflows, the largest integer with the same
5514 | sign as `a' is returned.
5515 *----------------------------------------------------------------------------*/
5516
5517 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5518 {
5519 bool aSign;
5520 int32_t aExp, shiftCount;
5521 uint64_t aSig, savedASig;
5522 int32_t z;
5523
5524 if (floatx80_invalid_encoding(a)) {
5525 float_raise(float_flag_invalid, status);
5526 return 1 << 31;
5527 }
5528 aSig = extractFloatx80Frac( a );
5529 aExp = extractFloatx80Exp( a );
5530 aSign = extractFloatx80Sign( a );
5531 if ( 0x401E < aExp ) {
5532 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5533 goto invalid;
5534 }
5535 else if ( aExp < 0x3FFF ) {
5536 if (aExp || aSig) {
5537 float_raise(float_flag_inexact, status);
5538 }
5539 return 0;
5540 }
5541 shiftCount = 0x403E - aExp;
5542 savedASig = aSig;
5543 aSig >>= shiftCount;
5544 z = aSig;
5545 if ( aSign ) z = - z;
5546 if ( ( z < 0 ) ^ aSign ) {
5547 invalid:
5548 float_raise(float_flag_invalid, status);
5549 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5550 }
5551 if ( ( aSig<<shiftCount ) != savedASig ) {
5552 float_raise(float_flag_inexact, status);
5553 }
5554 return z;
5555
5556 }
5557
5558 /*----------------------------------------------------------------------------
5559 | Returns the result of converting the extended double-precision floating-
5560 | point value `a' to the 64-bit two's complement integer format. The
5561 | conversion is performed according to the IEC/IEEE Standard for Binary
5562 | Floating-Point Arithmetic---which means in particular that the conversion
5563 | is rounded according to the current rounding mode. If `a' is a NaN,
5564 | the largest positive integer is returned. Otherwise, if the conversion
5565 | overflows, the largest integer with the same sign as `a' is returned.
5566 *----------------------------------------------------------------------------*/
5567
5568 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5569 {
5570 bool aSign;
5571 int32_t aExp, shiftCount;
5572 uint64_t aSig, aSigExtra;
5573
5574 if (floatx80_invalid_encoding(a)) {
5575 float_raise(float_flag_invalid, status);
5576 return 1ULL << 63;
5577 }
5578 aSig = extractFloatx80Frac( a );
5579 aExp = extractFloatx80Exp( a );
5580 aSign = extractFloatx80Sign( a );
5581 shiftCount = 0x403E - aExp;
5582 if ( shiftCount <= 0 ) {
5583 if ( shiftCount ) {
5584 float_raise(float_flag_invalid, status);
5585 if (!aSign || floatx80_is_any_nan(a)) {
5586 return INT64_MAX;
5587 }
5588 return INT64_MIN;
5589 }
5590 aSigExtra = 0;
5591 }
5592 else {
5593 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5594 }
5595 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5596
5597 }
5598
5599 /*----------------------------------------------------------------------------
5600 | Returns the result of converting the extended double-precision floating-
5601 | point value `a' to the 64-bit two's complement integer format. The
5602 | conversion is performed according to the IEC/IEEE Standard for Binary
5603 | Floating-Point Arithmetic, except that the conversion is always rounded
5604 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5605 | Otherwise, if the conversion overflows, the largest integer with the same
5606 | sign as `a' is returned.
5607 *----------------------------------------------------------------------------*/
5608
5609 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5610 {
5611 bool aSign;
5612 int32_t aExp, shiftCount;
5613 uint64_t aSig;
5614 int64_t z;
5615
5616 if (floatx80_invalid_encoding(a)) {
5617 float_raise(float_flag_invalid, status);
5618 return 1ULL << 63;
5619 }
5620 aSig = extractFloatx80Frac( a );
5621 aExp = extractFloatx80Exp( a );
5622 aSign = extractFloatx80Sign( a );
5623 shiftCount = aExp - 0x403E;
5624 if ( 0 <= shiftCount ) {
5625 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5626 if ( ( a.high != 0xC03E ) || aSig ) {
5627 float_raise(float_flag_invalid, status);
5628 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5629 return INT64_MAX;
5630 }
5631 }
5632 return INT64_MIN;
5633 }
5634 else if ( aExp < 0x3FFF ) {
5635 if (aExp | aSig) {
5636 float_raise(float_flag_inexact, status);
5637 }
5638 return 0;
5639 }
5640 z = aSig>>( - shiftCount );
5641 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5642 float_raise(float_flag_inexact, status);
5643 }
5644 if ( aSign ) z = - z;
5645 return z;
5646
5647 }
5648
5649 /*----------------------------------------------------------------------------
5650 | Returns the result of converting the extended double-precision floating-
5651 | point value `a' to the single-precision floating-point format. The
5652 | conversion is performed according to the IEC/IEEE Standard for Binary
5653 | Floating-Point Arithmetic.
5654 *----------------------------------------------------------------------------*/
5655
5656 float32 floatx80_to_float32(floatx80 a, float_status *status)
5657 {
5658 bool aSign;
5659 int32_t aExp;
5660 uint64_t aSig;
5661
5662 if (floatx80_invalid_encoding(a)) {
5663 float_raise(float_flag_invalid, status);
5664 return float32_default_nan(status);
5665 }
5666 aSig = extractFloatx80Frac( a );
5667 aExp = extractFloatx80Exp( a );
5668 aSign = extractFloatx80Sign( a );
5669 if ( aExp == 0x7FFF ) {
5670 if ( (uint64_t) ( aSig<<1 ) ) {
5671 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5672 status);
5673 return float32_silence_nan(res, status);
5674 }
5675 return packFloat32( aSign, 0xFF, 0 );
5676 }
5677 shift64RightJamming( aSig, 33, &aSig );
5678 if ( aExp || aSig ) aExp -= 0x3F81;
5679 return roundAndPackFloat32(aSign, aExp, aSig, status);
5680
5681 }
5682
5683 /*----------------------------------------------------------------------------
5684 | Returns the result of converting the extended double-precision floating-
5685 | point value `a' to the double-precision floating-point format. The
5686 | conversion is performed according to the IEC/IEEE Standard for Binary
5687 | Floating-Point Arithmetic.
5688 *----------------------------------------------------------------------------*/
5689
5690 float64 floatx80_to_float64(floatx80 a, float_status *status)
5691 {
5692 bool aSign;
5693 int32_t aExp;
5694 uint64_t aSig, zSig;
5695
5696 if (floatx80_invalid_encoding(a)) {
5697 float_raise(float_flag_invalid, status);
5698 return float64_default_nan(status);
5699 }
5700 aSig = extractFloatx80Frac( a );
5701 aExp = extractFloatx80Exp( a );
5702 aSign = extractFloatx80Sign( a );
5703 if ( aExp == 0x7FFF ) {
5704 if ( (uint64_t) ( aSig<<1 ) ) {
5705 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5706 status);
5707 return float64_silence_nan(res, status);
5708 }
5709 return packFloat64( aSign, 0x7FF, 0 );
5710 }
5711 shift64RightJamming( aSig, 1, &zSig );
5712 if ( aExp || aSig ) aExp -= 0x3C01;
5713 return roundAndPackFloat64(aSign, aExp, zSig, status);
5714
5715 }
5716
5717 /*----------------------------------------------------------------------------
5718 | Returns the result of converting the extended double-precision floating-
5719 | point value `a' to the quadruple-precision floating-point format. The
5720 | conversion is performed according to the IEC/IEEE Standard for Binary
5721 | Floating-Point Arithmetic.
5722 *----------------------------------------------------------------------------*/
5723
5724 float128 floatx80_to_float128(floatx80 a, float_status *status)
5725 {
5726 bool aSign;
5727 int aExp;
5728 uint64_t aSig, zSig0, zSig1;
5729
5730 if (floatx80_invalid_encoding(a)) {
5731 float_raise(float_flag_invalid, status);
5732 return float128_default_nan(status);
5733 }
5734 aSig = extractFloatx80Frac( a );
5735 aExp = extractFloatx80Exp( a );
5736 aSign = extractFloatx80Sign( a );
5737 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5738 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5739 status);
5740 return float128_silence_nan(res, status);
5741 }
5742 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5743 return packFloat128( aSign, aExp, zSig0, zSig1 );
5744
5745 }
5746
5747 /*----------------------------------------------------------------------------
5748 | Rounds the extended double-precision floating-point value `a'
5749 | to the precision provided by floatx80_rounding_precision and returns the
5750 | result as an extended double-precision floating-point value.
5751 | The operation is performed according to the IEC/IEEE Standard for Binary
5752 | Floating-Point Arithmetic.
5753 *----------------------------------------------------------------------------*/
5754
5755 floatx80 floatx80_round(floatx80 a, float_status *status)
5756 {
5757 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5758 extractFloatx80Sign(a),
5759 extractFloatx80Exp(a),
5760 extractFloatx80Frac(a), 0, status);
5761 }
5762
5763 /*----------------------------------------------------------------------------
5764 | Rounds the extended double-precision floating-point value `a' to an integer,
5765 | and returns the result as an extended quadruple-precision floating-point
5766 | value. The operation is performed according to the IEC/IEEE Standard for
5767 | Binary Floating-Point Arithmetic.
5768 *----------------------------------------------------------------------------*/
5769
5770 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5771 {
5772 bool aSign;
5773 int32_t aExp;
5774 uint64_t lastBitMask, roundBitsMask;
5775 floatx80 z;
5776
5777 if (floatx80_invalid_encoding(a)) {
5778 float_raise(float_flag_invalid, status);
5779 return floatx80_default_nan(status);
5780 }
5781 aExp = extractFloatx80Exp( a );
5782 if ( 0x403E <= aExp ) {
5783 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5784 return propagateFloatx80NaN(a, a, status);
5785 }
5786 return a;
5787 }
5788 if ( aExp < 0x3FFF ) {
5789 if ( ( aExp == 0 )
5790 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5791 return a;
5792 }
5793 float_raise(float_flag_inexact, status);
5794 aSign = extractFloatx80Sign( a );
5795 switch (status->float_rounding_mode) {
5796 case float_round_nearest_even:
5797 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5798 ) {
5799 return
5800 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5801 }
5802 break;
5803 case float_round_ties_away:
5804 if (aExp == 0x3FFE) {
5805 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5806 }
5807 break;
5808 case float_round_down:
5809 return
5810 aSign ?
5811 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5812 : packFloatx80( 0, 0, 0 );
5813 case float_round_up:
5814 return
5815 aSign ? packFloatx80( 1, 0, 0 )
5816 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5817
5818 case float_round_to_zero:
5819 break;
5820 default:
5821 g_assert_not_reached();
5822 }
5823 return packFloatx80( aSign, 0, 0 );
5824 }
5825 lastBitMask = 1;
5826 lastBitMask <<= 0x403E - aExp;
5827 roundBitsMask = lastBitMask - 1;
5828 z = a;
5829 switch (status->float_rounding_mode) {
5830 case float_round_nearest_even:
5831 z.low += lastBitMask>>1;
5832 if ((z.low & roundBitsMask) == 0) {
5833 z.low &= ~lastBitMask;
5834 }
5835 break;
5836 case float_round_ties_away:
5837 z.low += lastBitMask >> 1;
5838 break;
5839 case float_round_to_zero:
5840 break;
5841 case float_round_up:
5842 if (!extractFloatx80Sign(z)) {
5843 z.low += roundBitsMask;
5844 }
5845 break;
5846 case float_round_down:
5847 if (extractFloatx80Sign(z)) {
5848 z.low += roundBitsMask;
5849 }
5850 break;
5851 default:
5852 abort();
5853 }
5854 z.low &= ~ roundBitsMask;
5855 if ( z.low == 0 ) {
5856 ++z.high;
5857 z.low = UINT64_C(0x8000000000000000);
5858 }
5859 if (z.low != a.low) {
5860 float_raise(float_flag_inexact, status);
5861 }
5862 return z;
5863
5864 }
5865
5866 /*----------------------------------------------------------------------------
5867 | Returns the result of multiplying the extended double-precision floating-
5868 | point values `a' and `b'. The operation is performed according to the
5869 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5870 *----------------------------------------------------------------------------*/
5871
5872 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5873 {
5874 bool aSign, bSign, zSign;
5875 int32_t aExp, bExp, zExp;
5876 uint64_t aSig, bSig, zSig0, zSig1;
5877
5878 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5879 float_raise(float_flag_invalid, status);
5880 return floatx80_default_nan(status);
5881 }
5882 aSig = extractFloatx80Frac( a );
5883 aExp = extractFloatx80Exp( a );
5884 aSign = extractFloatx80Sign( a );
5885 bSig = extractFloatx80Frac( b );
5886 bExp = extractFloatx80Exp( b );
5887 bSign = extractFloatx80Sign( b );
5888 zSign = aSign ^ bSign;
5889 if ( aExp == 0x7FFF ) {
5890 if ( (uint64_t) ( aSig<<1 )
5891 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5892 return propagateFloatx80NaN(a, b, status);
5893 }
5894 if ( ( bExp | bSig ) == 0 ) goto invalid;
5895 return packFloatx80(zSign, floatx80_infinity_high,
5896 floatx80_infinity_low);
5897 }
5898 if ( bExp == 0x7FFF ) {
5899 if ((uint64_t)(bSig << 1)) {
5900 return propagateFloatx80NaN(a, b, status);
5901 }
5902 if ( ( aExp | aSig ) == 0 ) {
5903 invalid:
5904 float_raise(float_flag_invalid, status);
5905 return floatx80_default_nan(status);
5906 }
5907 return packFloatx80(zSign, floatx80_infinity_high,
5908 floatx80_infinity_low);
5909 }
5910 if ( aExp == 0 ) {
5911 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5912 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5913 }
5914 if ( bExp == 0 ) {
5915 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5916 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5917 }
5918 zExp = aExp + bExp - 0x3FFE;
5919 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5920 if ( 0 < (int64_t) zSig0 ) {
5921 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5922 --zExp;
5923 }
5924 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5925 zSign, zExp, zSig0, zSig1, status);
5926 }
5927
5928 /*----------------------------------------------------------------------------
5929 | Returns the result of dividing the extended double-precision floating-point
5930 | value `a' by the corresponding value `b'. The operation is performed
5931 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932 *----------------------------------------------------------------------------*/
5933
5934 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5935 {
5936 bool aSign, bSign, zSign;
5937 int32_t aExp, bExp, zExp;
5938 uint64_t aSig, bSig, zSig0, zSig1;
5939 uint64_t rem0, rem1, rem2, term0, term1, term2;
5940
5941 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5942 float_raise(float_flag_invalid, status);
5943 return floatx80_default_nan(status);
5944 }
5945 aSig = extractFloatx80Frac( a );
5946 aExp = extractFloatx80Exp( a );
5947 aSign = extractFloatx80Sign( a );
5948 bSig = extractFloatx80Frac( b );
5949 bExp = extractFloatx80Exp( b );
5950 bSign = extractFloatx80Sign( b );
5951 zSign = aSign ^ bSign;
5952 if ( aExp == 0x7FFF ) {
5953 if ((uint64_t)(aSig << 1)) {
5954 return propagateFloatx80NaN(a, b, status);
5955 }
5956 if ( bExp == 0x7FFF ) {
5957 if ((uint64_t)(bSig << 1)) {
5958 return propagateFloatx80NaN(a, b, status);
5959 }
5960 goto invalid;
5961 }
5962 return packFloatx80(zSign, floatx80_infinity_high,
5963 floatx80_infinity_low);
5964 }
5965 if ( bExp == 0x7FFF ) {
5966 if ((uint64_t)(bSig << 1)) {
5967 return propagateFloatx80NaN(a, b, status);
5968 }
5969 return packFloatx80( zSign, 0, 0 );
5970 }
5971 if ( bExp == 0 ) {
5972 if ( bSig == 0 ) {
5973 if ( ( aExp | aSig ) == 0 ) {
5974 invalid:
5975 float_raise(float_flag_invalid, status);
5976 return floatx80_default_nan(status);
5977 }
5978 float_raise(float_flag_divbyzero, status);
5979 return packFloatx80(zSign, floatx80_infinity_high,
5980 floatx80_infinity_low);
5981 }
5982 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5983 }
5984 if ( aExp == 0 ) {
5985 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5986 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5987 }
5988 zExp = aExp - bExp + 0x3FFE;
5989 rem1 = 0;
5990 if ( bSig <= aSig ) {
5991 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5992 ++zExp;
5993 }
5994 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5995 mul64To128( bSig, zSig0, &term0, &term1 );
5996 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5997 while ( (int64_t) rem0 < 0 ) {
5998 --zSig0;
5999 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6000 }
6001 zSig1 = estimateDiv128To64( rem1, 0, bSig );
6002 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6003 mul64To128( bSig, zSig1, &term1, &term2 );
6004 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6005 while ( (int64_t) rem1 < 0 ) {
6006 --zSig1;
6007 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6008 }
6009 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6010 }
6011 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6012 zSign, zExp, zSig0, zSig1, status);
6013 }
6014
6015 /*----------------------------------------------------------------------------
6016 | Returns the remainder of the extended double-precision floating-point value
6017 | `a' with respect to the corresponding value `b'. The operation is performed
6018 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6019 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6020 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6021 | the absolute value of the integer quotient.
6022 *----------------------------------------------------------------------------*/
6023
6024 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6025 float_status *status)
6026 {
6027 bool aSign, zSign;
6028 int32_t aExp, bExp, expDiff, aExpOrig;
6029 uint64_t aSig0, aSig1, bSig;
6030 uint64_t q, term0, term1, alternateASig0, alternateASig1;
6031
6032 *quotient = 0;
6033 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6034 float_raise(float_flag_invalid, status);
6035 return floatx80_default_nan(status);
6036 }
6037 aSig0 = extractFloatx80Frac( a );
6038 aExpOrig = aExp = extractFloatx80Exp( a );
6039 aSign = extractFloatx80Sign( a );
6040 bSig = extractFloatx80Frac( b );
6041 bExp = extractFloatx80Exp( b );
6042 if ( aExp == 0x7FFF ) {
6043 if ( (uint64_t) ( aSig0<<1 )
6044 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6045 return propagateFloatx80NaN(a, b, status);
6046 }
6047 goto invalid;
6048 }
6049 if ( bExp == 0x7FFF ) {
6050 if ((uint64_t)(bSig << 1)) {
6051 return propagateFloatx80NaN(a, b, status);
6052 }
6053 if (aExp == 0 && aSig0 >> 63) {
6054 /*
6055 * Pseudo-denormal argument must be returned in normalized
6056 * form.
6057 */
6058 return packFloatx80(aSign, 1, aSig0);
6059 }
6060 return a;
6061 }
6062 if ( bExp == 0 ) {
6063 if ( bSig == 0 ) {
6064 invalid:
6065 float_raise(float_flag_invalid, status);
6066 return floatx80_default_nan(status);
6067 }
6068 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6069 }
6070 if ( aExp == 0 ) {
6071 if ( aSig0 == 0 ) return a;
6072 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6073 }
6074 zSign = aSign;
6075 expDiff = aExp - bExp;
6076 aSig1 = 0;
6077 if ( expDiff < 0 ) {
6078 if ( mod || expDiff < -1 ) {
6079 if (aExp == 1 && aExpOrig == 0) {
6080 /*
6081 * Pseudo-denormal argument must be returned in
6082 * normalized form.
6083 */
6084 return packFloatx80(aSign, aExp, aSig0);
6085 }
6086 return a;
6087 }
6088 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6089 expDiff = 0;
6090 }
6091 *quotient = q = ( bSig <= aSig0 );
6092 if ( q ) aSig0 -= bSig;
6093 expDiff -= 64;
6094 while ( 0 < expDiff ) {
6095 q = estimateDiv128To64( aSig0, aSig1, bSig );
6096 q = ( 2 < q ) ? q - 2 : 0;
6097 mul64To128( bSig, q, &term0, &term1 );
6098 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6099 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6100 expDiff -= 62;
6101 *quotient <<= 62;
6102 *quotient += q;
6103 }
6104 expDiff += 64;
6105 if ( 0 < expDiff ) {
6106 q = estimateDiv128To64( aSig0, aSig1, bSig );
6107 q = ( 2 < q ) ? q - 2 : 0;
6108 q >>= 64 - expDiff;
6109 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6110 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6111 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6112 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6113 ++q;
6114 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6115 }
6116 if (expDiff < 64) {
6117 *quotient <<= expDiff;
6118 } else {
6119 *quotient = 0;
6120 }
6121 *quotient += q;
6122 }
6123 else {
6124 term1 = 0;
6125 term0 = bSig;
6126 }
6127 if (!mod) {
6128 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6129 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6130 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6131 && ( q & 1 ) )
6132 ) {
6133 aSig0 = alternateASig0;
6134 aSig1 = alternateASig1;
6135 zSign = ! zSign;
6136 ++*quotient;
6137 }
6138 }
6139 return
6140 normalizeRoundAndPackFloatx80(
6141 floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
6142
6143 }
6144
6145 /*----------------------------------------------------------------------------
6146 | Returns the remainder of the extended double-precision floating-point value
6147 | `a' with respect to the corresponding value `b'. The operation is performed
6148 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6149 *----------------------------------------------------------------------------*/
6150
6151 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6152 {
6153 uint64_t quotient;
6154 return floatx80_modrem(a, b, false, &quotient, status);
6155 }
6156
6157 /*----------------------------------------------------------------------------
6158 | Returns the remainder of the extended double-precision floating-point value
6159 | `a' with respect to the corresponding value `b', with the quotient truncated
6160 | toward zero.
6161 *----------------------------------------------------------------------------*/
6162
6163 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6164 {
6165 uint64_t quotient;
6166 return floatx80_modrem(a, b, true, &quotient, status);
6167 }
6168
6169 /*----------------------------------------------------------------------------
6170 | Returns the square root of the extended double-precision floating-point
6171 | value `a'. The operation is performed according to the IEC/IEEE Standard
6172 | for Binary Floating-Point Arithmetic.
6173 *----------------------------------------------------------------------------*/
6174
6175 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6176 {
6177 bool aSign;
6178 int32_t aExp, zExp;
6179 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6180 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6181
6182 if (floatx80_invalid_encoding(a)) {
6183 float_raise(float_flag_invalid, status);
6184 return floatx80_default_nan(status);
6185 }
6186 aSig0 = extractFloatx80Frac( a );
6187 aExp = extractFloatx80Exp( a );
6188 aSign = extractFloatx80Sign( a );
6189 if ( aExp == 0x7FFF ) {
6190 if ((uint64_t)(aSig0 << 1)) {
6191 return propagateFloatx80NaN(a, a, status);
6192 }
6193 if ( ! aSign ) return a;
6194 goto invalid;
6195 }
6196 if ( aSign ) {
6197 if ( ( aExp | aSig0 ) == 0 ) return a;
6198 invalid:
6199 float_raise(float_flag_invalid, status);
6200 return floatx80_default_nan(status);
6201 }
6202 if ( aExp == 0 ) {
6203 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6204 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6205 }
6206 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6207 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6208 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6209 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6210 doubleZSig0 = zSig0<<1;
6211 mul64To128( zSig0, zSig0, &term0, &term1 );
6212 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6213 while ( (int64_t) rem0 < 0 ) {
6214 --zSig0;
6215 doubleZSig0 -= 2;
6216 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6217 }
6218 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6219 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6220 if ( zSig1 == 0 ) zSig1 = 1;
6221 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6222 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6223 mul64To128( zSig1, zSig1, &term2, &term3 );
6224 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6225 while ( (int64_t) rem1 < 0 ) {
6226 --zSig1;
6227 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6228 term3 |= 1;
6229 term2 |= doubleZSig0;
6230 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6231 }
6232 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6233 }
6234 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6235 zSig0 |= doubleZSig0;
6236 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6237 0, zExp, zSig0, zSig1, status);
6238 }
6239
6240 /*----------------------------------------------------------------------------
6241 | Returns the result of converting the quadruple-precision floating-point
6242 | value `a' to the extended double-precision floating-point format. The
6243 | conversion is performed according to the IEC/IEEE Standard for Binary
6244 | Floating-Point Arithmetic.
6245 *----------------------------------------------------------------------------*/
6246
6247 floatx80 float128_to_floatx80(float128 a, float_status *status)
6248 {
6249 bool aSign;
6250 int32_t aExp;
6251 uint64_t aSig0, aSig1;
6252
6253 aSig1 = extractFloat128Frac1( a );
6254 aSig0 = extractFloat128Frac0( a );
6255 aExp = extractFloat128Exp( a );
6256 aSign = extractFloat128Sign( a );
6257 if ( aExp == 0x7FFF ) {
6258 if ( aSig0 | aSig1 ) {
6259 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6260 status);
6261 return floatx80_silence_nan(res, status);
6262 }
6263 return packFloatx80(aSign, floatx80_infinity_high,
6264 floatx80_infinity_low);
6265 }
6266 if ( aExp == 0 ) {
6267 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6268 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6269 }
6270 else {
6271 aSig0 |= UINT64_C(0x0001000000000000);
6272 }
6273 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6274 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6275
6276 }
6277
6278 /*----------------------------------------------------------------------------
6279 | Returns the remainder of the quadruple-precision floating-point value `a'
6280 | with respect to the corresponding value `b'. The operation is performed
6281 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6282 *----------------------------------------------------------------------------*/
6283
6284 float128 float128_rem(float128 a, float128 b, float_status *status)
6285 {
6286 bool aSign, zSign;
6287 int32_t aExp, bExp, expDiff;
6288 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6289 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6290 int64_t sigMean0;
6291
6292 aSig1 = extractFloat128Frac1( a );
6293 aSig0 = extractFloat128Frac0( a );
6294 aExp = extractFloat128Exp( a );
6295 aSign = extractFloat128Sign( a );
6296 bSig1 = extractFloat128Frac1( b );
6297 bSig0 = extractFloat128Frac0( b );
6298 bExp = extractFloat128Exp( b );
6299 if ( aExp == 0x7FFF ) {
6300 if ( ( aSig0 | aSig1 )
6301 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6302 return propagateFloat128NaN(a, b, status);
6303 }
6304 goto invalid;
6305 }
6306 if ( bExp == 0x7FFF ) {
6307 if (bSig0 | bSig1) {
6308 return propagateFloat128NaN(a, b, status);
6309 }
6310 return a;
6311 }
6312 if ( bExp == 0 ) {
6313 if ( ( bSig0 | bSig1 ) == 0 ) {
6314 invalid:
6315 float_raise(float_flag_invalid, status);
6316 return float128_default_nan(status);
6317 }
6318 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6319 }
6320 if ( aExp == 0 ) {
6321 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6322 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6323 }
6324 expDiff = aExp - bExp;
6325 if ( expDiff < -1 ) return a;
6326 shortShift128Left(
6327 aSig0 | UINT64_C(0x0001000000000000),
6328 aSig1,
6329 15 - ( expDiff < 0 ),
6330 &aSig0,
6331 &aSig1
6332 );
6333 shortShift128Left(
6334 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6335 q = le128( bSig0, bSig1, aSig0, aSig1 );
6336 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6337 expDiff -= 64;
6338 while ( 0 < expDiff ) {
6339 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6340 q = ( 4 < q ) ? q - 4 : 0;
6341 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6342 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6343 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6344 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6345 expDiff -= 61;
6346 }
6347 if ( -64 < expDiff ) {
6348 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6349 q = ( 4 < q ) ? q - 4 : 0;
6350 q >>= - expDiff;
6351 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6352 expDiff += 52;
6353 if ( expDiff < 0 ) {
6354 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6355 }
6356 else {
6357 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6358 }
6359 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6360 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6361 }
6362 else {
6363 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6364 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6365 }
6366 do {
6367 alternateASig0 = aSig0;
6368 alternateASig1 = aSig1;
6369 ++q;
6370 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6371 } while ( 0 <= (int64_t) aSig0 );
6372 add128(
6373 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6374 if ( ( sigMean0 < 0 )
6375 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6376 aSig0 = alternateASig0;
6377 aSig1 = alternateASig1;
6378 }
6379 zSign = ( (int64_t) aSig0 < 0 );
6380 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6381 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6382 status);
6383 }
6384
6385 static inline FloatRelation
6386 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6387 float_status *status)
6388 {
6389 bool aSign, bSign;
6390
6391 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6392 float_raise(float_flag_invalid, status);
6393 return float_relation_unordered;
6394 }
6395 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6396 ( extractFloatx80Frac( a )<<1 ) ) ||
6397 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6398 ( extractFloatx80Frac( b )<<1 ) )) {
6399 if (!is_quiet ||
6400 floatx80_is_signaling_nan(a, status) ||
6401 floatx80_is_signaling_nan(b, status)) {
6402 float_raise(float_flag_invalid, status);
6403 }
6404 return float_relation_unordered;
6405 }
6406 aSign = extractFloatx80Sign( a );
6407 bSign = extractFloatx80Sign( b );
6408 if ( aSign != bSign ) {
6409
6410 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6411 ( ( a.low | b.low ) == 0 ) ) {
6412 /* zero case */
6413 return float_relation_equal;
6414 } else {
6415 return 1 - (2 * aSign);
6416 }
6417 } else {
6418 /* Normalize pseudo-denormals before comparison. */
6419 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6420 ++a.high;
6421 }
6422 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6423 ++b.high;
6424 }
6425 if (a.low == b.low && a.high == b.high) {
6426 return float_relation_equal;
6427 } else {
6428 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6429 }
6430 }
6431 }
6432
6433 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6434 {
6435 return floatx80_compare_internal(a, b, 0, status);
6436 }
6437
6438 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6439 float_status *status)
6440 {
6441 return floatx80_compare_internal(a, b, 1, status);
6442 }
6443
6444 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6445 {
6446 bool aSign;
6447 int32_t aExp;
6448 uint64_t aSig;
6449
6450 if (floatx80_invalid_encoding(a)) {
6451 float_raise(float_flag_invalid, status);
6452 return floatx80_default_nan(status);
6453 }
6454 aSig = extractFloatx80Frac( a );
6455 aExp = extractFloatx80Exp( a );
6456 aSign = extractFloatx80Sign( a );
6457
6458 if ( aExp == 0x7FFF ) {
6459 if ( aSig<<1 ) {
6460 return propagateFloatx80NaN(a, a, status);
6461 }
6462 return a;
6463 }
6464
6465 if (aExp == 0) {
6466 if (aSig == 0) {
6467 return a;
6468 }
6469 aExp++;
6470 }
6471
6472 if (n > 0x10000) {
6473 n = 0x10000;
6474 } else if (n < -0x10000) {
6475 n = -0x10000;
6476 }
6477
6478 aExp += n;
6479 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6480 aSign, aExp, aSig, 0, status);
6481 }
6482
6483 static void __attribute__((constructor)) softfloat_init(void)
6484 {
6485 union_float64 ua, ub, uc, ur;
6486
6487 if (QEMU_NO_HARDFLOAT) {
6488 return;
6489 }
6490 /*
6491 * Test that the host's FMA is not obviously broken. For example,
6492 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6493 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6494 */
6495 ua.s = 0x0020000000000001ULL;
6496 ub.s = 0x3ca0000000000000ULL;
6497 uc.s = 0x0020000000000000ULL;
6498 ur.h = fma(ua.h, ub.h, uc.h);
6499 if (ur.s != 0x0020000000000001ULL) {
6500 force_soft_fma = true;
6501 }
6502 }