]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Convert float32_exp2 to FloatParts
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485 /* Flags for parts_minmax. */
486 enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493 };
494
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498 return unlikely(c >= float_class_qnan);
499 }
500
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503 return c == float_class_snan;
504 }
505
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508 return c == float_class_qnan;
509 }
510
511 /*
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
514 *
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
518 */
519
520 typedef struct {
521 FloatClass cls;
522 bool sign;
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
535 } FloatParts64;
536
537 typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543 } FloatParts128;
544
545 typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553 } FloatParts256;
554
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
558
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * round_mask: bits below lsb which must be rounded
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
569 */
570 typedef struct {
571 int exp_size;
572 int exp_bias;
573 int exp_max;
574 int frac_size;
575 int frac_shift;
576 bool arm_althp;
577 uint64_t round_mask;
578 } FloatFmt;
579
580 /* Expand fields based on the size of exponent and fraction */
581 #define FLOAT_PARAMS_(E) \
582 .exp_size = E, \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1
585
586 #define FLOAT_PARAMS(E, F) \
587 FLOAT_PARAMS_(E), \
588 .frac_size = F, \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
591
592 static const FloatFmt float16_params = {
593 FLOAT_PARAMS(5, 10)
594 };
595
596 static const FloatFmt float16_params_ahp = {
597 FLOAT_PARAMS(5, 10),
598 .arm_althp = true
599 };
600
601 static const FloatFmt bfloat16_params = {
602 FLOAT_PARAMS(8, 7)
603 };
604
605 static const FloatFmt float32_params = {
606 FLOAT_PARAMS(8, 23)
607 };
608
609 static const FloatFmt float64_params = {
610 FLOAT_PARAMS(11, 52)
611 };
612
613 static const FloatFmt float128_params = {
614 FLOAT_PARAMS(15, 112)
615 };
616
617 #define FLOATX80_PARAMS(R) \
618 FLOAT_PARAMS_(15), \
619 .frac_size = R == 64 ? 63 : R, \
620 .frac_shift = 0, \
621 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
622
623 static const FloatFmt floatx80_params[3] = {
624 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
625 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
626 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
627 };
628
629 /* Unpack a float to parts, but do not canonicalize. */
630 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
631 {
632 const int f_size = fmt->frac_size;
633 const int e_size = fmt->exp_size;
634
635 *r = (FloatParts64) {
636 .cls = float_class_unclassified,
637 .sign = extract64(raw, f_size + e_size, 1),
638 .exp = extract64(raw, f_size, e_size),
639 .frac = extract64(raw, 0, f_size)
640 };
641 }
642
643 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
644 {
645 unpack_raw64(p, &float16_params, f);
646 }
647
648 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
649 {
650 unpack_raw64(p, &bfloat16_params, f);
651 }
652
653 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
654 {
655 unpack_raw64(p, &float32_params, f);
656 }
657
658 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
659 {
660 unpack_raw64(p, &float64_params, f);
661 }
662
663 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
664 {
665 *p = (FloatParts128) {
666 .cls = float_class_unclassified,
667 .sign = extract32(f.high, 15, 1),
668 .exp = extract32(f.high, 0, 15),
669 .frac_hi = f.low
670 };
671 }
672
673 static void float128_unpack_raw(FloatParts128 *p, float128 f)
674 {
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677
678 *p = (FloatParts128) {
679 .cls = float_class_unclassified,
680 .sign = extract64(f.high, f_size + e_size, 1),
681 .exp = extract64(f.high, f_size, e_size),
682 .frac_hi = extract64(f.high, 0, f_size),
683 .frac_lo = f.low,
684 };
685 }
686
687 /* Pack a float from parts, but do not canonicalize. */
688 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
689 {
690 const int f_size = fmt->frac_size;
691 const int e_size = fmt->exp_size;
692 uint64_t ret;
693
694 ret = (uint64_t)p->sign << (f_size + e_size);
695 ret = deposit64(ret, f_size, e_size, p->exp);
696 ret = deposit64(ret, 0, f_size, p->frac);
697 return ret;
698 }
699
700 static inline float16 float16_pack_raw(const FloatParts64 *p)
701 {
702 return make_float16(pack_raw64(p, &float16_params));
703 }
704
705 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
706 {
707 return pack_raw64(p, &bfloat16_params);
708 }
709
710 static inline float32 float32_pack_raw(const FloatParts64 *p)
711 {
712 return make_float32(pack_raw64(p, &float32_params));
713 }
714
715 static inline float64 float64_pack_raw(const FloatParts64 *p)
716 {
717 return make_float64(pack_raw64(p, &float64_params));
718 }
719
720 static float128 float128_pack_raw(const FloatParts128 *p)
721 {
722 const int f_size = float128_params.frac_size - 64;
723 const int e_size = float128_params.exp_size;
724 uint64_t hi;
725
726 hi = (uint64_t)p->sign << (f_size + e_size);
727 hi = deposit64(hi, f_size, e_size, p->exp);
728 hi = deposit64(hi, 0, f_size, p->frac_hi);
729 return make_float128(hi, p->frac_lo);
730 }
731
732 /*----------------------------------------------------------------------------
733 | Functions and definitions to determine: (1) whether tininess for underflow
734 | is detected before or after rounding by default, (2) what (if anything)
735 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
736 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
737 | are propagated from function inputs to output. These details are target-
738 | specific.
739 *----------------------------------------------------------------------------*/
740 #include "softfloat-specialize.c.inc"
741
742 #define PARTS_GENERIC_64_128(NAME, P) \
743 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
744
745 #define PARTS_GENERIC_64_128_256(NAME, P) \
746 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
747 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
748
749 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
750 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
751
752 static void parts64_return_nan(FloatParts64 *a, float_status *s);
753 static void parts128_return_nan(FloatParts128 *a, float_status *s);
754
755 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
756
757 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
758 float_status *s);
759 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
760 float_status *s);
761
762 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
763
764 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
765 FloatParts64 *c, float_status *s,
766 int ab_mask, int abc_mask);
767 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
768 FloatParts128 *b,
769 FloatParts128 *c,
770 float_status *s,
771 int ab_mask, int abc_mask);
772
773 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
774 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
775
776 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
777 const FloatFmt *fmt);
778 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
779 const FloatFmt *fmt);
780
781 #define parts_canonicalize(A, S, F) \
782 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
783
784 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
785 const FloatFmt *fmt);
786 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
787 const FloatFmt *fmt);
788
789 #define parts_uncanon_normal(A, S, F) \
790 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
791
792 static void parts64_uncanon(FloatParts64 *p, float_status *status,
793 const FloatFmt *fmt);
794 static void parts128_uncanon(FloatParts128 *p, float_status *status,
795 const FloatFmt *fmt);
796
797 #define parts_uncanon(A, S, F) \
798 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
799
800 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
801 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
802 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
803
804 #define parts_add_normal(A, B) \
805 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
806
807 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
808 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
809 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
810
811 #define parts_sub_normal(A, B) \
812 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
813
814 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
815 float_status *s, bool subtract);
816 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
817 float_status *s, bool subtract);
818
819 #define parts_addsub(A, B, S, Z) \
820 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
821
822 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
823 float_status *s);
824 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
825 float_status *s);
826
827 #define parts_mul(A, B, S) \
828 PARTS_GENERIC_64_128(mul, A)(A, B, S)
829
830 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
831 FloatParts64 *c, int flags,
832 float_status *s);
833 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
834 FloatParts128 *c, int flags,
835 float_status *s);
836
837 #define parts_muladd(A, B, C, Z, S) \
838 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
839
840 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
841 float_status *s);
842 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
843 float_status *s);
844
845 #define parts_div(A, B, S) \
846 PARTS_GENERIC_64_128(div, A)(A, B, S)
847
848 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
849 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
850
851 #define parts_sqrt(A, S, F) \
852 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
853
854 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
855 int scale, int frac_size);
856 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
857 int scale, int frac_size);
858
859 #define parts_round_to_int_normal(A, R, C, F) \
860 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
861
862 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
863 int scale, float_status *s,
864 const FloatFmt *fmt);
865 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
866 int scale, float_status *s,
867 const FloatFmt *fmt);
868
869 #define parts_round_to_int(A, R, C, S, F) \
870 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
871
872 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
873 int scale, int64_t min, int64_t max,
874 float_status *s);
875 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
876 int scale, int64_t min, int64_t max,
877 float_status *s);
878
879 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
880 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
881
882 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
883 int scale, uint64_t max,
884 float_status *s);
885 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
886 int scale, uint64_t max,
887 float_status *s);
888
889 #define parts_float_to_uint(P, R, Z, M, S) \
890 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
891
892 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
893 int scale, float_status *s);
894 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
895 int scale, float_status *s);
896
897 #define parts_sint_to_float(P, I, Z, S) \
898 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
899
900 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
901 int scale, float_status *s);
902 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
903 int scale, float_status *s);
904
905 #define parts_uint_to_float(P, I, Z, S) \
906 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
907
908 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
909 float_status *s, int flags);
910 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
911 float_status *s, int flags);
912
913 #define parts_minmax(A, B, S, F) \
914 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
915
916 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
917 float_status *s, bool q);
918 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
919 float_status *s, bool q);
920
921 #define parts_compare(A, B, S, Q) \
922 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
923
924 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
925 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
926
927 #define parts_scalbn(A, N, S) \
928 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
929
930 /*
931 * Helper functions for softfloat-parts.c.inc, per-size operations.
932 */
933
934 #define FRAC_GENERIC_64_128(NAME, P) \
935 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
936
937 #define FRAC_GENERIC_64_128_256(NAME, P) \
938 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
939 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
940
941 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
942 {
943 return uadd64_overflow(a->frac, b->frac, &r->frac);
944 }
945
946 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
947 {
948 bool c = 0;
949 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
950 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
951 return c;
952 }
953
954 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
955 {
956 bool c = 0;
957 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
958 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
959 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
960 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
961 return c;
962 }
963
964 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
965
966 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
967 {
968 return uadd64_overflow(a->frac, c, &r->frac);
969 }
970
971 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
972 {
973 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
974 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
975 }
976
977 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
978
979 static void frac64_allones(FloatParts64 *a)
980 {
981 a->frac = -1;
982 }
983
984 static void frac128_allones(FloatParts128 *a)
985 {
986 a->frac_hi = a->frac_lo = -1;
987 }
988
989 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
990
991 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
992 {
993 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
994 }
995
996 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
997 {
998 uint64_t ta = a->frac_hi, tb = b->frac_hi;
999 if (ta == tb) {
1000 ta = a->frac_lo, tb = b->frac_lo;
1001 if (ta == tb) {
1002 return 0;
1003 }
1004 }
1005 return ta < tb ? -1 : 1;
1006 }
1007
1008 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1009
1010 static void frac64_clear(FloatParts64 *a)
1011 {
1012 a->frac = 0;
1013 }
1014
1015 static void frac128_clear(FloatParts128 *a)
1016 {
1017 a->frac_hi = a->frac_lo = 0;
1018 }
1019
1020 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
1021
1022 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1023 {
1024 uint64_t n1, n0, r, q;
1025 bool ret;
1026
1027 /*
1028 * We want a 2*N / N-bit division to produce exactly an N-bit
1029 * result, so that we do not lose any precision and so that we
1030 * do not have to renormalize afterward. If A.frac < B.frac,
1031 * then division would produce an (N-1)-bit result; shift A left
1032 * by one to produce the an N-bit result, and return true to
1033 * decrement the exponent to match.
1034 *
1035 * The udiv_qrnnd algorithm that we're using requires normalization,
1036 * i.e. the msb of the denominator must be set, which is already true.
1037 */
1038 ret = a->frac < b->frac;
1039 if (ret) {
1040 n0 = a->frac;
1041 n1 = 0;
1042 } else {
1043 n0 = a->frac >> 1;
1044 n1 = a->frac << 63;
1045 }
1046 q = udiv_qrnnd(&r, n0, n1, b->frac);
1047
1048 /* Set lsb if there is a remainder, to set inexact. */
1049 a->frac = q | (r != 0);
1050
1051 return ret;
1052 }
1053
1054 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1055 {
1056 uint64_t q0, q1, a0, a1, b0, b1;
1057 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1058 bool ret = false;
1059
1060 a0 = a->frac_hi, a1 = a->frac_lo;
1061 b0 = b->frac_hi, b1 = b->frac_lo;
1062
1063 ret = lt128(a0, a1, b0, b1);
1064 if (!ret) {
1065 a1 = shr_double(a0, a1, 1);
1066 a0 = a0 >> 1;
1067 }
1068
1069 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1070 q0 = estimateDiv128To64(a0, a1, b0);
1071
1072 /*
1073 * Estimate is high because B1 was not included (unless B1 == 0).
1074 * Reduce quotient and increase remainder until remainder is non-negative.
1075 * This loop will execute 0 to 2 times.
1076 */
1077 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1078 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1079 while (r0 != 0) {
1080 q0--;
1081 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1082 }
1083
1084 /* Repeat using the remainder, producing a second word of quotient. */
1085 q1 = estimateDiv128To64(r1, r2, b0);
1086 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1087 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1088 while (r1 != 0) {
1089 q1--;
1090 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1091 }
1092
1093 /* Any remainder indicates inexact; set sticky bit. */
1094 q1 |= (r2 | r3) != 0;
1095
1096 a->frac_hi = q0;
1097 a->frac_lo = q1;
1098 return ret;
1099 }
1100
1101 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1102
1103 static bool frac64_eqz(FloatParts64 *a)
1104 {
1105 return a->frac == 0;
1106 }
1107
1108 static bool frac128_eqz(FloatParts128 *a)
1109 {
1110 return (a->frac_hi | a->frac_lo) == 0;
1111 }
1112
1113 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1114
1115 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1116 {
1117 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1118 }
1119
1120 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1121 {
1122 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1123 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1124 }
1125
1126 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1127
1128 static void frac64_neg(FloatParts64 *a)
1129 {
1130 a->frac = -a->frac;
1131 }
1132
1133 static void frac128_neg(FloatParts128 *a)
1134 {
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1138 }
1139
1140 static void frac256_neg(FloatParts256 *a)
1141 {
1142 bool c = 0;
1143 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1144 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1145 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1146 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1147 }
1148
1149 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1150
1151 static int frac64_normalize(FloatParts64 *a)
1152 {
1153 if (a->frac) {
1154 int shift = clz64(a->frac);
1155 a->frac <<= shift;
1156 return shift;
1157 }
1158 return 64;
1159 }
1160
1161 static int frac128_normalize(FloatParts128 *a)
1162 {
1163 if (a->frac_hi) {
1164 int shl = clz64(a->frac_hi);
1165 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1166 a->frac_lo <<= shl;
1167 return shl;
1168 } else if (a->frac_lo) {
1169 int shl = clz64(a->frac_lo);
1170 a->frac_hi = a->frac_lo << shl;
1171 a->frac_lo = 0;
1172 return shl + 64;
1173 }
1174 return 128;
1175 }
1176
1177 static int frac256_normalize(FloatParts256 *a)
1178 {
1179 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1180 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1181 int ret, shl;
1182
1183 if (likely(a0)) {
1184 shl = clz64(a0);
1185 if (shl == 0) {
1186 return 0;
1187 }
1188 ret = shl;
1189 } else {
1190 if (a1) {
1191 ret = 64;
1192 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1193 } else if (a2) {
1194 ret = 128;
1195 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1196 } else if (a3) {
1197 ret = 192;
1198 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1199 } else {
1200 ret = 256;
1201 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1202 goto done;
1203 }
1204 shl = clz64(a0);
1205 if (shl == 0) {
1206 goto done;
1207 }
1208 ret += shl;
1209 }
1210
1211 a0 = shl_double(a0, a1, shl);
1212 a1 = shl_double(a1, a2, shl);
1213 a2 = shl_double(a2, a3, shl);
1214 a3 <<= shl;
1215
1216 done:
1217 a->frac_hi = a0;
1218 a->frac_hm = a1;
1219 a->frac_lm = a2;
1220 a->frac_lo = a3;
1221 return ret;
1222 }
1223
1224 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1225
1226 static void frac64_shl(FloatParts64 *a, int c)
1227 {
1228 a->frac <<= c;
1229 }
1230
1231 static void frac128_shl(FloatParts128 *a, int c)
1232 {
1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235 if (c & 64) {
1236 a0 = a1, a1 = 0;
1237 }
1238
1239 c &= 63;
1240 if (c) {
1241 a0 = shl_double(a0, a1, c);
1242 a1 = a1 << c;
1243 }
1244
1245 a->frac_hi = a0;
1246 a->frac_lo = a1;
1247 }
1248
1249 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1250
1251 static void frac64_shr(FloatParts64 *a, int c)
1252 {
1253 a->frac >>= c;
1254 }
1255
1256 static void frac128_shr(FloatParts128 *a, int c)
1257 {
1258 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1259
1260 if (c & 64) {
1261 a1 = a0, a0 = 0;
1262 }
1263
1264 c &= 63;
1265 if (c) {
1266 a1 = shr_double(a0, a1, c);
1267 a0 = a0 >> c;
1268 }
1269
1270 a->frac_hi = a0;
1271 a->frac_lo = a1;
1272 }
1273
1274 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1275
1276 static void frac64_shrjam(FloatParts64 *a, int c)
1277 {
1278 uint64_t a0 = a->frac;
1279
1280 if (likely(c != 0)) {
1281 if (likely(c < 64)) {
1282 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1283 } else {
1284 a0 = a0 != 0;
1285 }
1286 a->frac = a0;
1287 }
1288 }
1289
1290 static void frac128_shrjam(FloatParts128 *a, int c)
1291 {
1292 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1293 uint64_t sticky = 0;
1294
1295 if (unlikely(c == 0)) {
1296 return;
1297 } else if (likely(c < 64)) {
1298 /* nothing */
1299 } else if (likely(c < 128)) {
1300 sticky = a1;
1301 a1 = a0;
1302 a0 = 0;
1303 c &= 63;
1304 if (c == 0) {
1305 goto done;
1306 }
1307 } else {
1308 sticky = a0 | a1;
1309 a0 = a1 = 0;
1310 goto done;
1311 }
1312
1313 sticky |= shr_double(a1, 0, c);
1314 a1 = shr_double(a0, a1, c);
1315 a0 = a0 >> c;
1316
1317 done:
1318 a->frac_lo = a1 | (sticky != 0);
1319 a->frac_hi = a0;
1320 }
1321
1322 static void frac256_shrjam(FloatParts256 *a, int c)
1323 {
1324 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1325 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1326 uint64_t sticky = 0;
1327
1328 if (unlikely(c == 0)) {
1329 return;
1330 } else if (likely(c < 64)) {
1331 /* nothing */
1332 } else if (likely(c < 256)) {
1333 if (unlikely(c & 128)) {
1334 sticky |= a2 | a3;
1335 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1336 }
1337 if (unlikely(c & 64)) {
1338 sticky |= a3;
1339 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1340 }
1341 c &= 63;
1342 if (c == 0) {
1343 goto done;
1344 }
1345 } else {
1346 sticky = a0 | a1 | a2 | a3;
1347 a0 = a1 = a2 = a3 = 0;
1348 goto done;
1349 }
1350
1351 sticky |= shr_double(a3, 0, c);
1352 a3 = shr_double(a2, a3, c);
1353 a2 = shr_double(a1, a2, c);
1354 a1 = shr_double(a0, a1, c);
1355 a0 = a0 >> c;
1356
1357 done:
1358 a->frac_lo = a3 | (sticky != 0);
1359 a->frac_lm = a2;
1360 a->frac_hm = a1;
1361 a->frac_hi = a0;
1362 }
1363
1364 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1365
1366 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1367 {
1368 return usub64_overflow(a->frac, b->frac, &r->frac);
1369 }
1370
1371 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1372 {
1373 bool c = 0;
1374 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1375 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1376 return c;
1377 }
1378
1379 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1380 {
1381 bool c = 0;
1382 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1383 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1384 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1385 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1386 return c;
1387 }
1388
1389 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1390
1391 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1392 {
1393 r->frac = a->frac_hi | (a->frac_lo != 0);
1394 }
1395
1396 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1397 {
1398 r->frac_hi = a->frac_hi;
1399 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1400 }
1401
1402 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1403
1404 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1405 {
1406 r->frac_hi = a->frac;
1407 r->frac_lo = 0;
1408 }
1409
1410 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1411 {
1412 r->frac_hi = a->frac_hi;
1413 r->frac_hm = a->frac_lo;
1414 r->frac_lm = 0;
1415 r->frac_lo = 0;
1416 }
1417
1418 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1419
1420 /*
1421 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1422 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1423 * and thus MIT licenced.
1424 */
1425 static const uint16_t rsqrt_tab[128] = {
1426 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1427 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1428 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1429 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1430 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1431 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1432 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1433 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1434 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1435 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1436 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1437 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1438 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1439 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1440 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1441 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1442 };
1443
1444 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1445 #define FloatPartsN glue(FloatParts,N)
1446 #define FloatPartsW glue(FloatParts,W)
1447
1448 #define N 64
1449 #define W 128
1450
1451 #include "softfloat-parts-addsub.c.inc"
1452 #include "softfloat-parts.c.inc"
1453
1454 #undef N
1455 #undef W
1456 #define N 128
1457 #define W 256
1458
1459 #include "softfloat-parts-addsub.c.inc"
1460 #include "softfloat-parts.c.inc"
1461
1462 #undef N
1463 #undef W
1464 #define N 256
1465
1466 #include "softfloat-parts-addsub.c.inc"
1467
1468 #undef N
1469 #undef W
1470 #undef partsN
1471 #undef FloatPartsN
1472 #undef FloatPartsW
1473
1474 /*
1475 * Pack/unpack routines with a specific FloatFmt.
1476 */
1477
1478 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1479 float_status *s, const FloatFmt *params)
1480 {
1481 float16_unpack_raw(p, f);
1482 parts_canonicalize(p, s, params);
1483 }
1484
1485 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1486 float_status *s)
1487 {
1488 float16a_unpack_canonical(p, f, s, &float16_params);
1489 }
1490
1491 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1492 float_status *s)
1493 {
1494 bfloat16_unpack_raw(p, f);
1495 parts_canonicalize(p, s, &bfloat16_params);
1496 }
1497
1498 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1499 float_status *s,
1500 const FloatFmt *params)
1501 {
1502 parts_uncanon(p, s, params);
1503 return float16_pack_raw(p);
1504 }
1505
1506 static float16 float16_round_pack_canonical(FloatParts64 *p,
1507 float_status *s)
1508 {
1509 return float16a_round_pack_canonical(p, s, &float16_params);
1510 }
1511
1512 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1513 float_status *s)
1514 {
1515 parts_uncanon(p, s, &bfloat16_params);
1516 return bfloat16_pack_raw(p);
1517 }
1518
1519 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1520 float_status *s)
1521 {
1522 float32_unpack_raw(p, f);
1523 parts_canonicalize(p, s, &float32_params);
1524 }
1525
1526 static float32 float32_round_pack_canonical(FloatParts64 *p,
1527 float_status *s)
1528 {
1529 parts_uncanon(p, s, &float32_params);
1530 return float32_pack_raw(p);
1531 }
1532
1533 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1534 float_status *s)
1535 {
1536 float64_unpack_raw(p, f);
1537 parts_canonicalize(p, s, &float64_params);
1538 }
1539
1540 static float64 float64_round_pack_canonical(FloatParts64 *p,
1541 float_status *s)
1542 {
1543 parts_uncanon(p, s, &float64_params);
1544 return float64_pack_raw(p);
1545 }
1546
1547 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1548 float_status *s)
1549 {
1550 float128_unpack_raw(p, f);
1551 parts_canonicalize(p, s, &float128_params);
1552 }
1553
1554 static float128 float128_round_pack_canonical(FloatParts128 *p,
1555 float_status *s)
1556 {
1557 parts_uncanon(p, s, &float128_params);
1558 return float128_pack_raw(p);
1559 }
1560
1561 /* Returns false if the encoding is invalid. */
1562 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1563 float_status *s)
1564 {
1565 /* Ensure rounding precision is set before beginning. */
1566 switch (s->floatx80_rounding_precision) {
1567 case floatx80_precision_x:
1568 case floatx80_precision_d:
1569 case floatx80_precision_s:
1570 break;
1571 default:
1572 g_assert_not_reached();
1573 }
1574
1575 if (unlikely(floatx80_invalid_encoding(f))) {
1576 float_raise(float_flag_invalid, s);
1577 return false;
1578 }
1579
1580 floatx80_unpack_raw(p, f);
1581
1582 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1583 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1584 } else {
1585 /* The explicit integer bit is ignored, after invalid checks. */
1586 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1587 p->cls = (p->frac_hi == 0 ? float_class_inf
1588 : parts_is_snan_frac(p->frac_hi, s)
1589 ? float_class_snan : float_class_qnan);
1590 }
1591 return true;
1592 }
1593
1594 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1595 float_status *s)
1596 {
1597 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1598 uint64_t frac;
1599 int exp;
1600
1601 switch (p->cls) {
1602 case float_class_normal:
1603 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1604 parts_uncanon_normal(p, s, fmt);
1605 frac = p->frac_hi;
1606 exp = p->exp;
1607 } else {
1608 FloatParts64 p64;
1609
1610 p64.sign = p->sign;
1611 p64.exp = p->exp;
1612 frac_truncjam(&p64, p);
1613 parts_uncanon_normal(&p64, s, fmt);
1614 frac = p64.frac;
1615 exp = p64.exp;
1616 }
1617 if (exp != fmt->exp_max) {
1618 break;
1619 }
1620 /* rounded to inf -- fall through to set frac correctly */
1621
1622 case float_class_inf:
1623 /* x86 and m68k differ in the setting of the integer bit. */
1624 frac = floatx80_infinity_low;
1625 exp = fmt->exp_max;
1626 break;
1627
1628 case float_class_zero:
1629 frac = 0;
1630 exp = 0;
1631 break;
1632
1633 case float_class_snan:
1634 case float_class_qnan:
1635 /* NaNs have the integer bit set. */
1636 frac = p->frac_hi | (1ull << 63);
1637 exp = fmt->exp_max;
1638 break;
1639
1640 default:
1641 g_assert_not_reached();
1642 }
1643
1644 return packFloatx80(p->sign, exp, frac);
1645 }
1646
1647 /*
1648 * Addition and subtraction
1649 */
1650
1651 static float16 QEMU_FLATTEN
1652 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1653 {
1654 FloatParts64 pa, pb, *pr;
1655
1656 float16_unpack_canonical(&pa, a, status);
1657 float16_unpack_canonical(&pb, b, status);
1658 pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660 return float16_round_pack_canonical(pr, status);
1661 }
1662
1663 float16 float16_add(float16 a, float16 b, float_status *status)
1664 {
1665 return float16_addsub(a, b, status, false);
1666 }
1667
1668 float16 float16_sub(float16 a, float16 b, float_status *status)
1669 {
1670 return float16_addsub(a, b, status, true);
1671 }
1672
1673 static float32 QEMU_SOFTFLOAT_ATTR
1674 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1675 {
1676 FloatParts64 pa, pb, *pr;
1677
1678 float32_unpack_canonical(&pa, a, status);
1679 float32_unpack_canonical(&pb, b, status);
1680 pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682 return float32_round_pack_canonical(pr, status);
1683 }
1684
1685 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1686 {
1687 return soft_f32_addsub(a, b, status, false);
1688 }
1689
1690 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1691 {
1692 return soft_f32_addsub(a, b, status, true);
1693 }
1694
1695 static float64 QEMU_SOFTFLOAT_ATTR
1696 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1697 {
1698 FloatParts64 pa, pb, *pr;
1699
1700 float64_unpack_canonical(&pa, a, status);
1701 float64_unpack_canonical(&pb, b, status);
1702 pr = parts_addsub(&pa, &pb, status, subtract);
1703
1704 return float64_round_pack_canonical(pr, status);
1705 }
1706
1707 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1708 {
1709 return soft_f64_addsub(a, b, status, false);
1710 }
1711
1712 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1713 {
1714 return soft_f64_addsub(a, b, status, true);
1715 }
1716
1717 static float hard_f32_add(float a, float b)
1718 {
1719 return a + b;
1720 }
1721
1722 static float hard_f32_sub(float a, float b)
1723 {
1724 return a - b;
1725 }
1726
1727 static double hard_f64_add(double a, double b)
1728 {
1729 return a + b;
1730 }
1731
1732 static double hard_f64_sub(double a, double b)
1733 {
1734 return a - b;
1735 }
1736
1737 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1738 {
1739 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1740 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1741 }
1742 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1743 }
1744
1745 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1746 {
1747 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1748 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1749 } else {
1750 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1751 }
1752 }
1753
1754 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1755 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1756 {
1757 return float32_gen2(a, b, s, hard, soft,
1758 f32_is_zon2, f32_addsubmul_post);
1759 }
1760
1761 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1762 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1763 {
1764 return float64_gen2(a, b, s, hard, soft,
1765 f64_is_zon2, f64_addsubmul_post);
1766 }
1767
1768 float32 QEMU_FLATTEN
1769 float32_add(float32 a, float32 b, float_status *s)
1770 {
1771 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1772 }
1773
1774 float32 QEMU_FLATTEN
1775 float32_sub(float32 a, float32 b, float_status *s)
1776 {
1777 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1778 }
1779
1780 float64 QEMU_FLATTEN
1781 float64_add(float64 a, float64 b, float_status *s)
1782 {
1783 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1784 }
1785
1786 float64 QEMU_FLATTEN
1787 float64_sub(float64 a, float64 b, float_status *s)
1788 {
1789 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1790 }
1791
1792 static bfloat16 QEMU_FLATTEN
1793 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1794 {
1795 FloatParts64 pa, pb, *pr;
1796
1797 bfloat16_unpack_canonical(&pa, a, status);
1798 bfloat16_unpack_canonical(&pb, b, status);
1799 pr = parts_addsub(&pa, &pb, status, subtract);
1800
1801 return bfloat16_round_pack_canonical(pr, status);
1802 }
1803
1804 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1805 {
1806 return bfloat16_addsub(a, b, status, false);
1807 }
1808
1809 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1810 {
1811 return bfloat16_addsub(a, b, status, true);
1812 }
1813
1814 static float128 QEMU_FLATTEN
1815 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1816 {
1817 FloatParts128 pa, pb, *pr;
1818
1819 float128_unpack_canonical(&pa, a, status);
1820 float128_unpack_canonical(&pb, b, status);
1821 pr = parts_addsub(&pa, &pb, status, subtract);
1822
1823 return float128_round_pack_canonical(pr, status);
1824 }
1825
1826 float128 float128_add(float128 a, float128 b, float_status *status)
1827 {
1828 return float128_addsub(a, b, status, false);
1829 }
1830
1831 float128 float128_sub(float128 a, float128 b, float_status *status)
1832 {
1833 return float128_addsub(a, b, status, true);
1834 }
1835
1836 static floatx80 QEMU_FLATTEN
1837 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1838 {
1839 FloatParts128 pa, pb, *pr;
1840
1841 if (!floatx80_unpack_canonical(&pa, a, status) ||
1842 !floatx80_unpack_canonical(&pb, b, status)) {
1843 return floatx80_default_nan(status);
1844 }
1845
1846 pr = parts_addsub(&pa, &pb, status, subtract);
1847 return floatx80_round_pack_canonical(pr, status);
1848 }
1849
1850 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1851 {
1852 return floatx80_addsub(a, b, status, false);
1853 }
1854
1855 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1856 {
1857 return floatx80_addsub(a, b, status, true);
1858 }
1859
1860 /*
1861 * Multiplication
1862 */
1863
1864 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1865 {
1866 FloatParts64 pa, pb, *pr;
1867
1868 float16_unpack_canonical(&pa, a, status);
1869 float16_unpack_canonical(&pb, b, status);
1870 pr = parts_mul(&pa, &pb, status);
1871
1872 return float16_round_pack_canonical(pr, status);
1873 }
1874
1875 static float32 QEMU_SOFTFLOAT_ATTR
1876 soft_f32_mul(float32 a, float32 b, float_status *status)
1877 {
1878 FloatParts64 pa, pb, *pr;
1879
1880 float32_unpack_canonical(&pa, a, status);
1881 float32_unpack_canonical(&pb, b, status);
1882 pr = parts_mul(&pa, &pb, status);
1883
1884 return float32_round_pack_canonical(pr, status);
1885 }
1886
1887 static float64 QEMU_SOFTFLOAT_ATTR
1888 soft_f64_mul(float64 a, float64 b, float_status *status)
1889 {
1890 FloatParts64 pa, pb, *pr;
1891
1892 float64_unpack_canonical(&pa, a, status);
1893 float64_unpack_canonical(&pb, b, status);
1894 pr = parts_mul(&pa, &pb, status);
1895
1896 return float64_round_pack_canonical(pr, status);
1897 }
1898
1899 static float hard_f32_mul(float a, float b)
1900 {
1901 return a * b;
1902 }
1903
1904 static double hard_f64_mul(double a, double b)
1905 {
1906 return a * b;
1907 }
1908
1909 float32 QEMU_FLATTEN
1910 float32_mul(float32 a, float32 b, float_status *s)
1911 {
1912 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1913 f32_is_zon2, f32_addsubmul_post);
1914 }
1915
1916 float64 QEMU_FLATTEN
1917 float64_mul(float64 a, float64 b, float_status *s)
1918 {
1919 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1920 f64_is_zon2, f64_addsubmul_post);
1921 }
1922
1923 bfloat16 QEMU_FLATTEN
1924 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1925 {
1926 FloatParts64 pa, pb, *pr;
1927
1928 bfloat16_unpack_canonical(&pa, a, status);
1929 bfloat16_unpack_canonical(&pb, b, status);
1930 pr = parts_mul(&pa, &pb, status);
1931
1932 return bfloat16_round_pack_canonical(pr, status);
1933 }
1934
1935 float128 QEMU_FLATTEN
1936 float128_mul(float128 a, float128 b, float_status *status)
1937 {
1938 FloatParts128 pa, pb, *pr;
1939
1940 float128_unpack_canonical(&pa, a, status);
1941 float128_unpack_canonical(&pb, b, status);
1942 pr = parts_mul(&pa, &pb, status);
1943
1944 return float128_round_pack_canonical(pr, status);
1945 }
1946
1947 floatx80 QEMU_FLATTEN
1948 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
1949 {
1950 FloatParts128 pa, pb, *pr;
1951
1952 if (!floatx80_unpack_canonical(&pa, a, status) ||
1953 !floatx80_unpack_canonical(&pb, b, status)) {
1954 return floatx80_default_nan(status);
1955 }
1956
1957 pr = parts_mul(&pa, &pb, status);
1958 return floatx80_round_pack_canonical(pr, status);
1959 }
1960
1961 /*
1962 * Fused multiply-add
1963 */
1964
1965 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1966 int flags, float_status *status)
1967 {
1968 FloatParts64 pa, pb, pc, *pr;
1969
1970 float16_unpack_canonical(&pa, a, status);
1971 float16_unpack_canonical(&pb, b, status);
1972 float16_unpack_canonical(&pc, c, status);
1973 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1974
1975 return float16_round_pack_canonical(pr, status);
1976 }
1977
1978 static float32 QEMU_SOFTFLOAT_ATTR
1979 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1980 float_status *status)
1981 {
1982 FloatParts64 pa, pb, pc, *pr;
1983
1984 float32_unpack_canonical(&pa, a, status);
1985 float32_unpack_canonical(&pb, b, status);
1986 float32_unpack_canonical(&pc, c, status);
1987 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1988
1989 return float32_round_pack_canonical(pr, status);
1990 }
1991
1992 static float64 QEMU_SOFTFLOAT_ATTR
1993 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1994 float_status *status)
1995 {
1996 FloatParts64 pa, pb, pc, *pr;
1997
1998 float64_unpack_canonical(&pa, a, status);
1999 float64_unpack_canonical(&pb, b, status);
2000 float64_unpack_canonical(&pc, c, status);
2001 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2002
2003 return float64_round_pack_canonical(pr, status);
2004 }
2005
2006 static bool force_soft_fma;
2007
2008 float32 QEMU_FLATTEN
2009 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2010 {
2011 union_float32 ua, ub, uc, ur;
2012
2013 ua.s = xa;
2014 ub.s = xb;
2015 uc.s = xc;
2016
2017 if (unlikely(!can_use_fpu(s))) {
2018 goto soft;
2019 }
2020 if (unlikely(flags & float_muladd_halve_result)) {
2021 goto soft;
2022 }
2023
2024 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2025 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2026 goto soft;
2027 }
2028
2029 if (unlikely(force_soft_fma)) {
2030 goto soft;
2031 }
2032
2033 /*
2034 * When (a || b) == 0, there's no need to check for under/over flow,
2035 * since we know the addend is (normal || 0) and the product is 0.
2036 */
2037 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2038 union_float32 up;
2039 bool prod_sign;
2040
2041 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2042 prod_sign ^= !!(flags & float_muladd_negate_product);
2043 up.s = float32_set_sign(float32_zero, prod_sign);
2044
2045 if (flags & float_muladd_negate_c) {
2046 uc.h = -uc.h;
2047 }
2048 ur.h = up.h + uc.h;
2049 } else {
2050 union_float32 ua_orig = ua;
2051 union_float32 uc_orig = uc;
2052
2053 if (flags & float_muladd_negate_product) {
2054 ua.h = -ua.h;
2055 }
2056 if (flags & float_muladd_negate_c) {
2057 uc.h = -uc.h;
2058 }
2059
2060 ur.h = fmaf(ua.h, ub.h, uc.h);
2061
2062 if (unlikely(f32_is_inf(ur))) {
2063 float_raise(float_flag_overflow, s);
2064 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2065 ua = ua_orig;
2066 uc = uc_orig;
2067 goto soft;
2068 }
2069 }
2070 if (flags & float_muladd_negate_result) {
2071 return float32_chs(ur.s);
2072 }
2073 return ur.s;
2074
2075 soft:
2076 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2077 }
2078
2079 float64 QEMU_FLATTEN
2080 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2081 {
2082 union_float64 ua, ub, uc, ur;
2083
2084 ua.s = xa;
2085 ub.s = xb;
2086 uc.s = xc;
2087
2088 if (unlikely(!can_use_fpu(s))) {
2089 goto soft;
2090 }
2091 if (unlikely(flags & float_muladd_halve_result)) {
2092 goto soft;
2093 }
2094
2095 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2096 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2097 goto soft;
2098 }
2099
2100 if (unlikely(force_soft_fma)) {
2101 goto soft;
2102 }
2103
2104 /*
2105 * When (a || b) == 0, there's no need to check for under/over flow,
2106 * since we know the addend is (normal || 0) and the product is 0.
2107 */
2108 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2109 union_float64 up;
2110 bool prod_sign;
2111
2112 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2113 prod_sign ^= !!(flags & float_muladd_negate_product);
2114 up.s = float64_set_sign(float64_zero, prod_sign);
2115
2116 if (flags & float_muladd_negate_c) {
2117 uc.h = -uc.h;
2118 }
2119 ur.h = up.h + uc.h;
2120 } else {
2121 union_float64 ua_orig = ua;
2122 union_float64 uc_orig = uc;
2123
2124 if (flags & float_muladd_negate_product) {
2125 ua.h = -ua.h;
2126 }
2127 if (flags & float_muladd_negate_c) {
2128 uc.h = -uc.h;
2129 }
2130
2131 ur.h = fma(ua.h, ub.h, uc.h);
2132
2133 if (unlikely(f64_is_inf(ur))) {
2134 float_raise(float_flag_overflow, s);
2135 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2136 ua = ua_orig;
2137 uc = uc_orig;
2138 goto soft;
2139 }
2140 }
2141 if (flags & float_muladd_negate_result) {
2142 return float64_chs(ur.s);
2143 }
2144 return ur.s;
2145
2146 soft:
2147 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2148 }
2149
2150 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2151 int flags, float_status *status)
2152 {
2153 FloatParts64 pa, pb, pc, *pr;
2154
2155 bfloat16_unpack_canonical(&pa, a, status);
2156 bfloat16_unpack_canonical(&pb, b, status);
2157 bfloat16_unpack_canonical(&pc, c, status);
2158 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2159
2160 return bfloat16_round_pack_canonical(pr, status);
2161 }
2162
2163 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2164 int flags, float_status *status)
2165 {
2166 FloatParts128 pa, pb, pc, *pr;
2167
2168 float128_unpack_canonical(&pa, a, status);
2169 float128_unpack_canonical(&pb, b, status);
2170 float128_unpack_canonical(&pc, c, status);
2171 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2172
2173 return float128_round_pack_canonical(pr, status);
2174 }
2175
2176 /*
2177 * Division
2178 */
2179
2180 float16 float16_div(float16 a, float16 b, float_status *status)
2181 {
2182 FloatParts64 pa, pb, *pr;
2183
2184 float16_unpack_canonical(&pa, a, status);
2185 float16_unpack_canonical(&pb, b, status);
2186 pr = parts_div(&pa, &pb, status);
2187
2188 return float16_round_pack_canonical(pr, status);
2189 }
2190
2191 static float32 QEMU_SOFTFLOAT_ATTR
2192 soft_f32_div(float32 a, float32 b, float_status *status)
2193 {
2194 FloatParts64 pa, pb, *pr;
2195
2196 float32_unpack_canonical(&pa, a, status);
2197 float32_unpack_canonical(&pb, b, status);
2198 pr = parts_div(&pa, &pb, status);
2199
2200 return float32_round_pack_canonical(pr, status);
2201 }
2202
2203 static float64 QEMU_SOFTFLOAT_ATTR
2204 soft_f64_div(float64 a, float64 b, float_status *status)
2205 {
2206 FloatParts64 pa, pb, *pr;
2207
2208 float64_unpack_canonical(&pa, a, status);
2209 float64_unpack_canonical(&pb, b, status);
2210 pr = parts_div(&pa, &pb, status);
2211
2212 return float64_round_pack_canonical(pr, status);
2213 }
2214
2215 static float hard_f32_div(float a, float b)
2216 {
2217 return a / b;
2218 }
2219
2220 static double hard_f64_div(double a, double b)
2221 {
2222 return a / b;
2223 }
2224
2225 static bool f32_div_pre(union_float32 a, union_float32 b)
2226 {
2227 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2228 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2229 fpclassify(b.h) == FP_NORMAL;
2230 }
2231 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2232 }
2233
2234 static bool f64_div_pre(union_float64 a, union_float64 b)
2235 {
2236 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2237 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2238 fpclassify(b.h) == FP_NORMAL;
2239 }
2240 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2241 }
2242
2243 static bool f32_div_post(union_float32 a, union_float32 b)
2244 {
2245 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2246 return fpclassify(a.h) != FP_ZERO;
2247 }
2248 return !float32_is_zero(a.s);
2249 }
2250
2251 static bool f64_div_post(union_float64 a, union_float64 b)
2252 {
2253 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2254 return fpclassify(a.h) != FP_ZERO;
2255 }
2256 return !float64_is_zero(a.s);
2257 }
2258
2259 float32 QEMU_FLATTEN
2260 float32_div(float32 a, float32 b, float_status *s)
2261 {
2262 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2263 f32_div_pre, f32_div_post);
2264 }
2265
2266 float64 QEMU_FLATTEN
2267 float64_div(float64 a, float64 b, float_status *s)
2268 {
2269 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2270 f64_div_pre, f64_div_post);
2271 }
2272
2273 bfloat16 QEMU_FLATTEN
2274 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2275 {
2276 FloatParts64 pa, pb, *pr;
2277
2278 bfloat16_unpack_canonical(&pa, a, status);
2279 bfloat16_unpack_canonical(&pb, b, status);
2280 pr = parts_div(&pa, &pb, status);
2281
2282 return bfloat16_round_pack_canonical(pr, status);
2283 }
2284
2285 float128 QEMU_FLATTEN
2286 float128_div(float128 a, float128 b, float_status *status)
2287 {
2288 FloatParts128 pa, pb, *pr;
2289
2290 float128_unpack_canonical(&pa, a, status);
2291 float128_unpack_canonical(&pb, b, status);
2292 pr = parts_div(&pa, &pb, status);
2293
2294 return float128_round_pack_canonical(pr, status);
2295 }
2296
2297 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2298 {
2299 FloatParts128 pa, pb, *pr;
2300
2301 if (!floatx80_unpack_canonical(&pa, a, status) ||
2302 !floatx80_unpack_canonical(&pb, b, status)) {
2303 return floatx80_default_nan(status);
2304 }
2305
2306 pr = parts_div(&pa, &pb, status);
2307 return floatx80_round_pack_canonical(pr, status);
2308 }
2309
2310 /*
2311 * Float to Float conversions
2312 *
2313 * Returns the result of converting one float format to another. The
2314 * conversion is performed according to the IEC/IEEE Standard for
2315 * Binary Floating-Point Arithmetic.
2316 *
2317 * Usually this only needs to take care of raising invalid exceptions
2318 * and handling the conversion on NaNs.
2319 */
2320
2321 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2322 {
2323 switch (a->cls) {
2324 case float_class_qnan:
2325 case float_class_snan:
2326 /*
2327 * There is no NaN in the destination format. Raise Invalid
2328 * and return a zero with the sign of the input NaN.
2329 */
2330 float_raise(float_flag_invalid, s);
2331 a->cls = float_class_zero;
2332 break;
2333
2334 case float_class_inf:
2335 /*
2336 * There is no Inf in the destination format. Raise Invalid
2337 * and return the maximum normal with the correct sign.
2338 */
2339 float_raise(float_flag_invalid, s);
2340 a->cls = float_class_normal;
2341 a->exp = float16_params_ahp.exp_max;
2342 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2343 float16_params_ahp.frac_size + 1);
2344 break;
2345
2346 case float_class_normal:
2347 case float_class_zero:
2348 break;
2349
2350 default:
2351 g_assert_not_reached();
2352 }
2353 }
2354
2355 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2356 {
2357 if (is_nan(a->cls)) {
2358 parts_return_nan(a, s);
2359 }
2360 }
2361
2362 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2363 {
2364 if (is_nan(a->cls)) {
2365 parts_return_nan(a, s);
2366 }
2367 }
2368
2369 #define parts_float_to_float(P, S) \
2370 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2371
2372 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2373 float_status *s)
2374 {
2375 a->cls = b->cls;
2376 a->sign = b->sign;
2377 a->exp = b->exp;
2378
2379 if (a->cls == float_class_normal) {
2380 frac_truncjam(a, b);
2381 } else if (is_nan(a->cls)) {
2382 /* Discard the low bits of the NaN. */
2383 a->frac = b->frac_hi;
2384 parts_return_nan(a, s);
2385 }
2386 }
2387
2388 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2389 float_status *s)
2390 {
2391 a->cls = b->cls;
2392 a->sign = b->sign;
2393 a->exp = b->exp;
2394 frac_widen(a, b);
2395
2396 if (is_nan(a->cls)) {
2397 parts_return_nan(a, s);
2398 }
2399 }
2400
2401 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2402 {
2403 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2404 FloatParts64 p;
2405
2406 float16a_unpack_canonical(&p, a, s, fmt16);
2407 parts_float_to_float(&p, s);
2408 return float32_round_pack_canonical(&p, s);
2409 }
2410
2411 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2412 {
2413 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2414 FloatParts64 p;
2415
2416 float16a_unpack_canonical(&p, a, s, fmt16);
2417 parts_float_to_float(&p, s);
2418 return float64_round_pack_canonical(&p, s);
2419 }
2420
2421 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2422 {
2423 FloatParts64 p;
2424 const FloatFmt *fmt;
2425
2426 float32_unpack_canonical(&p, a, s);
2427 if (ieee) {
2428 parts_float_to_float(&p, s);
2429 fmt = &float16_params;
2430 } else {
2431 parts_float_to_ahp(&p, s);
2432 fmt = &float16_params_ahp;
2433 }
2434 return float16a_round_pack_canonical(&p, s, fmt);
2435 }
2436
2437 static float64 QEMU_SOFTFLOAT_ATTR
2438 soft_float32_to_float64(float32 a, float_status *s)
2439 {
2440 FloatParts64 p;
2441
2442 float32_unpack_canonical(&p, a, s);
2443 parts_float_to_float(&p, s);
2444 return float64_round_pack_canonical(&p, s);
2445 }
2446
2447 float64 float32_to_float64(float32 a, float_status *s)
2448 {
2449 if (likely(float32_is_normal(a))) {
2450 /* Widening conversion can never produce inexact results. */
2451 union_float32 uf;
2452 union_float64 ud;
2453 uf.s = a;
2454 ud.h = uf.h;
2455 return ud.s;
2456 } else if (float32_is_zero(a)) {
2457 return float64_set_sign(float64_zero, float32_is_neg(a));
2458 } else {
2459 return soft_float32_to_float64(a, s);
2460 }
2461 }
2462
2463 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2464 {
2465 FloatParts64 p;
2466 const FloatFmt *fmt;
2467
2468 float64_unpack_canonical(&p, a, s);
2469 if (ieee) {
2470 parts_float_to_float(&p, s);
2471 fmt = &float16_params;
2472 } else {
2473 parts_float_to_ahp(&p, s);
2474 fmt = &float16_params_ahp;
2475 }
2476 return float16a_round_pack_canonical(&p, s, fmt);
2477 }
2478
2479 float32 float64_to_float32(float64 a, float_status *s)
2480 {
2481 FloatParts64 p;
2482
2483 float64_unpack_canonical(&p, a, s);
2484 parts_float_to_float(&p, s);
2485 return float32_round_pack_canonical(&p, s);
2486 }
2487
2488 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2489 {
2490 FloatParts64 p;
2491
2492 bfloat16_unpack_canonical(&p, a, s);
2493 parts_float_to_float(&p, s);
2494 return float32_round_pack_canonical(&p, s);
2495 }
2496
2497 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2498 {
2499 FloatParts64 p;
2500
2501 bfloat16_unpack_canonical(&p, a, s);
2502 parts_float_to_float(&p, s);
2503 return float64_round_pack_canonical(&p, s);
2504 }
2505
2506 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2507 {
2508 FloatParts64 p;
2509
2510 float32_unpack_canonical(&p, a, s);
2511 parts_float_to_float(&p, s);
2512 return bfloat16_round_pack_canonical(&p, s);
2513 }
2514
2515 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2516 {
2517 FloatParts64 p;
2518
2519 float64_unpack_canonical(&p, a, s);
2520 parts_float_to_float(&p, s);
2521 return bfloat16_round_pack_canonical(&p, s);
2522 }
2523
2524 float32 float128_to_float32(float128 a, float_status *s)
2525 {
2526 FloatParts64 p64;
2527 FloatParts128 p128;
2528
2529 float128_unpack_canonical(&p128, a, s);
2530 parts_float_to_float_narrow(&p64, &p128, s);
2531 return float32_round_pack_canonical(&p64, s);
2532 }
2533
2534 float64 float128_to_float64(float128 a, float_status *s)
2535 {
2536 FloatParts64 p64;
2537 FloatParts128 p128;
2538
2539 float128_unpack_canonical(&p128, a, s);
2540 parts_float_to_float_narrow(&p64, &p128, s);
2541 return float64_round_pack_canonical(&p64, s);
2542 }
2543
2544 float128 float32_to_float128(float32 a, float_status *s)
2545 {
2546 FloatParts64 p64;
2547 FloatParts128 p128;
2548
2549 float32_unpack_canonical(&p64, a, s);
2550 parts_float_to_float_widen(&p128, &p64, s);
2551 return float128_round_pack_canonical(&p128, s);
2552 }
2553
2554 float128 float64_to_float128(float64 a, float_status *s)
2555 {
2556 FloatParts64 p64;
2557 FloatParts128 p128;
2558
2559 float64_unpack_canonical(&p64, a, s);
2560 parts_float_to_float_widen(&p128, &p64, s);
2561 return float128_round_pack_canonical(&p128, s);
2562 }
2563
2564 float32 floatx80_to_float32(floatx80 a, float_status *s)
2565 {
2566 FloatParts64 p64;
2567 FloatParts128 p128;
2568
2569 if (floatx80_unpack_canonical(&p128, a, s)) {
2570 parts_float_to_float_narrow(&p64, &p128, s);
2571 } else {
2572 parts_default_nan(&p64, s);
2573 }
2574 return float32_round_pack_canonical(&p64, s);
2575 }
2576
2577 float64 floatx80_to_float64(floatx80 a, float_status *s)
2578 {
2579 FloatParts64 p64;
2580 FloatParts128 p128;
2581
2582 if (floatx80_unpack_canonical(&p128, a, s)) {
2583 parts_float_to_float_narrow(&p64, &p128, s);
2584 } else {
2585 parts_default_nan(&p64, s);
2586 }
2587 return float64_round_pack_canonical(&p64, s);
2588 }
2589
2590 float128 floatx80_to_float128(floatx80 a, float_status *s)
2591 {
2592 FloatParts128 p;
2593
2594 if (floatx80_unpack_canonical(&p, a, s)) {
2595 parts_float_to_float(&p, s);
2596 } else {
2597 parts_default_nan(&p, s);
2598 }
2599 return float128_round_pack_canonical(&p, s);
2600 }
2601
2602 floatx80 float32_to_floatx80(float32 a, float_status *s)
2603 {
2604 FloatParts64 p64;
2605 FloatParts128 p128;
2606
2607 float32_unpack_canonical(&p64, a, s);
2608 parts_float_to_float_widen(&p128, &p64, s);
2609 return floatx80_round_pack_canonical(&p128, s);
2610 }
2611
2612 floatx80 float64_to_floatx80(float64 a, float_status *s)
2613 {
2614 FloatParts64 p64;
2615 FloatParts128 p128;
2616
2617 float64_unpack_canonical(&p64, a, s);
2618 parts_float_to_float_widen(&p128, &p64, s);
2619 return floatx80_round_pack_canonical(&p128, s);
2620 }
2621
2622 floatx80 float128_to_floatx80(float128 a, float_status *s)
2623 {
2624 FloatParts128 p;
2625
2626 float128_unpack_canonical(&p, a, s);
2627 parts_float_to_float(&p, s);
2628 return floatx80_round_pack_canonical(&p, s);
2629 }
2630
2631 /*
2632 * Round to integral value
2633 */
2634
2635 float16 float16_round_to_int(float16 a, float_status *s)
2636 {
2637 FloatParts64 p;
2638
2639 float16_unpack_canonical(&p, a, s);
2640 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2641 return float16_round_pack_canonical(&p, s);
2642 }
2643
2644 float32 float32_round_to_int(float32 a, float_status *s)
2645 {
2646 FloatParts64 p;
2647
2648 float32_unpack_canonical(&p, a, s);
2649 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2650 return float32_round_pack_canonical(&p, s);
2651 }
2652
2653 float64 float64_round_to_int(float64 a, float_status *s)
2654 {
2655 FloatParts64 p;
2656
2657 float64_unpack_canonical(&p, a, s);
2658 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2659 return float64_round_pack_canonical(&p, s);
2660 }
2661
2662 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2663 {
2664 FloatParts64 p;
2665
2666 bfloat16_unpack_canonical(&p, a, s);
2667 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2668 return bfloat16_round_pack_canonical(&p, s);
2669 }
2670
2671 float128 float128_round_to_int(float128 a, float_status *s)
2672 {
2673 FloatParts128 p;
2674
2675 float128_unpack_canonical(&p, a, s);
2676 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2677 return float128_round_pack_canonical(&p, s);
2678 }
2679
2680 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2681 {
2682 FloatParts128 p;
2683
2684 if (!floatx80_unpack_canonical(&p, a, status)) {
2685 return floatx80_default_nan(status);
2686 }
2687
2688 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2689 &floatx80_params[status->floatx80_rounding_precision]);
2690 return floatx80_round_pack_canonical(&p, status);
2691 }
2692
2693 /*
2694 * Floating-point to signed integer conversions
2695 */
2696
2697 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2698 float_status *s)
2699 {
2700 FloatParts64 p;
2701
2702 float16_unpack_canonical(&p, a, s);
2703 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2704 }
2705
2706 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2707 float_status *s)
2708 {
2709 FloatParts64 p;
2710
2711 float16_unpack_canonical(&p, a, s);
2712 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2713 }
2714
2715 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2716 float_status *s)
2717 {
2718 FloatParts64 p;
2719
2720 float16_unpack_canonical(&p, a, s);
2721 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2722 }
2723
2724 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2725 float_status *s)
2726 {
2727 FloatParts64 p;
2728
2729 float16_unpack_canonical(&p, a, s);
2730 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2731 }
2732
2733 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2734 float_status *s)
2735 {
2736 FloatParts64 p;
2737
2738 float32_unpack_canonical(&p, a, s);
2739 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2740 }
2741
2742 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2743 float_status *s)
2744 {
2745 FloatParts64 p;
2746
2747 float32_unpack_canonical(&p, a, s);
2748 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2749 }
2750
2751 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2752 float_status *s)
2753 {
2754 FloatParts64 p;
2755
2756 float32_unpack_canonical(&p, a, s);
2757 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2758 }
2759
2760 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2761 float_status *s)
2762 {
2763 FloatParts64 p;
2764
2765 float64_unpack_canonical(&p, a, s);
2766 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2767 }
2768
2769 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2770 float_status *s)
2771 {
2772 FloatParts64 p;
2773
2774 float64_unpack_canonical(&p, a, s);
2775 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2776 }
2777
2778 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2779 float_status *s)
2780 {
2781 FloatParts64 p;
2782
2783 float64_unpack_canonical(&p, a, s);
2784 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2785 }
2786
2787 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2788 float_status *s)
2789 {
2790 FloatParts64 p;
2791
2792 bfloat16_unpack_canonical(&p, a, s);
2793 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2794 }
2795
2796 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2797 float_status *s)
2798 {
2799 FloatParts64 p;
2800
2801 bfloat16_unpack_canonical(&p, a, s);
2802 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2803 }
2804
2805 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2806 float_status *s)
2807 {
2808 FloatParts64 p;
2809
2810 bfloat16_unpack_canonical(&p, a, s);
2811 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2812 }
2813
2814 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2815 int scale, float_status *s)
2816 {
2817 FloatParts128 p;
2818
2819 float128_unpack_canonical(&p, a, s);
2820 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2821 }
2822
2823 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2824 int scale, float_status *s)
2825 {
2826 FloatParts128 p;
2827
2828 float128_unpack_canonical(&p, a, s);
2829 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2830 }
2831
2832 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
2833 int scale, float_status *s)
2834 {
2835 FloatParts128 p;
2836
2837 if (!floatx80_unpack_canonical(&p, a, s)) {
2838 parts_default_nan(&p, s);
2839 }
2840 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2841 }
2842
2843 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
2844 int scale, float_status *s)
2845 {
2846 FloatParts128 p;
2847
2848 if (!floatx80_unpack_canonical(&p, a, s)) {
2849 parts_default_nan(&p, s);
2850 }
2851 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2852 }
2853
2854 int8_t float16_to_int8(float16 a, float_status *s)
2855 {
2856 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 int16_t float16_to_int16(float16 a, float_status *s)
2860 {
2861 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 int32_t float16_to_int32(float16 a, float_status *s)
2865 {
2866 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 int64_t float16_to_int64(float16 a, float_status *s)
2870 {
2871 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2872 }
2873
2874 int16_t float32_to_int16(float32 a, float_status *s)
2875 {
2876 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2877 }
2878
2879 int32_t float32_to_int32(float32 a, float_status *s)
2880 {
2881 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2882 }
2883
2884 int64_t float32_to_int64(float32 a, float_status *s)
2885 {
2886 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2887 }
2888
2889 int16_t float64_to_int16(float64 a, float_status *s)
2890 {
2891 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2892 }
2893
2894 int32_t float64_to_int32(float64 a, float_status *s)
2895 {
2896 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2897 }
2898
2899 int64_t float64_to_int64(float64 a, float_status *s)
2900 {
2901 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2902 }
2903
2904 int32_t float128_to_int32(float128 a, float_status *s)
2905 {
2906 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2907 }
2908
2909 int64_t float128_to_int64(float128 a, float_status *s)
2910 {
2911 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2912 }
2913
2914 int32_t floatx80_to_int32(floatx80 a, float_status *s)
2915 {
2916 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2917 }
2918
2919 int64_t floatx80_to_int64(floatx80 a, float_status *s)
2920 {
2921 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2922 }
2923
2924 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2925 {
2926 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2927 }
2928
2929 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2930 {
2931 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2932 }
2933
2934 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2935 {
2936 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2937 }
2938
2939 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2940 {
2941 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2942 }
2943
2944 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2945 {
2946 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2947 }
2948
2949 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2950 {
2951 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2952 }
2953
2954 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2955 {
2956 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2957 }
2958
2959 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2960 {
2961 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2962 }
2963
2964 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2965 {
2966 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2967 }
2968
2969 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2970 {
2971 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2972 }
2973
2974 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2975 {
2976 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2977 }
2978
2979 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
2980 {
2981 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
2982 }
2983
2984 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
2985 {
2986 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
2987 }
2988
2989 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2990 {
2991 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2992 }
2993
2994 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2995 {
2996 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2997 }
2998
2999 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3000 {
3001 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3002 }
3003
3004 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3005 {
3006 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3007 }
3008
3009 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3010 {
3011 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3012 }
3013
3014 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3015 {
3016 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3017 }
3018
3019 /*
3020 * Floating-point to unsigned integer conversions
3021 */
3022
3023 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3024 float_status *s)
3025 {
3026 FloatParts64 p;
3027
3028 float16_unpack_canonical(&p, a, s);
3029 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3030 }
3031
3032 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3033 float_status *s)
3034 {
3035 FloatParts64 p;
3036
3037 float16_unpack_canonical(&p, a, s);
3038 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3039 }
3040
3041 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3042 float_status *s)
3043 {
3044 FloatParts64 p;
3045
3046 float16_unpack_canonical(&p, a, s);
3047 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3048 }
3049
3050 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3051 float_status *s)
3052 {
3053 FloatParts64 p;
3054
3055 float16_unpack_canonical(&p, a, s);
3056 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3057 }
3058
3059 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3060 float_status *s)
3061 {
3062 FloatParts64 p;
3063
3064 float32_unpack_canonical(&p, a, s);
3065 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3066 }
3067
3068 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3069 float_status *s)
3070 {
3071 FloatParts64 p;
3072
3073 float32_unpack_canonical(&p, a, s);
3074 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3075 }
3076
3077 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3078 float_status *s)
3079 {
3080 FloatParts64 p;
3081
3082 float32_unpack_canonical(&p, a, s);
3083 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3084 }
3085
3086 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3087 float_status *s)
3088 {
3089 FloatParts64 p;
3090
3091 float64_unpack_canonical(&p, a, s);
3092 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3093 }
3094
3095 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3096 float_status *s)
3097 {
3098 FloatParts64 p;
3099
3100 float64_unpack_canonical(&p, a, s);
3101 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3102 }
3103
3104 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3105 float_status *s)
3106 {
3107 FloatParts64 p;
3108
3109 float64_unpack_canonical(&p, a, s);
3110 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3111 }
3112
3113 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3114 int scale, float_status *s)
3115 {
3116 FloatParts64 p;
3117
3118 bfloat16_unpack_canonical(&p, a, s);
3119 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3120 }
3121
3122 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3123 int scale, float_status *s)
3124 {
3125 FloatParts64 p;
3126
3127 bfloat16_unpack_canonical(&p, a, s);
3128 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3129 }
3130
3131 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3132 int scale, float_status *s)
3133 {
3134 FloatParts64 p;
3135
3136 bfloat16_unpack_canonical(&p, a, s);
3137 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3138 }
3139
3140 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3141 int scale, float_status *s)
3142 {
3143 FloatParts128 p;
3144
3145 float128_unpack_canonical(&p, a, s);
3146 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3147 }
3148
3149 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3150 int scale, float_status *s)
3151 {
3152 FloatParts128 p;
3153
3154 float128_unpack_canonical(&p, a, s);
3155 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3156 }
3157
3158 uint8_t float16_to_uint8(float16 a, float_status *s)
3159 {
3160 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3161 }
3162
3163 uint16_t float16_to_uint16(float16 a, float_status *s)
3164 {
3165 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3166 }
3167
3168 uint32_t float16_to_uint32(float16 a, float_status *s)
3169 {
3170 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3171 }
3172
3173 uint64_t float16_to_uint64(float16 a, float_status *s)
3174 {
3175 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3176 }
3177
3178 uint16_t float32_to_uint16(float32 a, float_status *s)
3179 {
3180 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3181 }
3182
3183 uint32_t float32_to_uint32(float32 a, float_status *s)
3184 {
3185 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3186 }
3187
3188 uint64_t float32_to_uint64(float32 a, float_status *s)
3189 {
3190 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3191 }
3192
3193 uint16_t float64_to_uint16(float64 a, float_status *s)
3194 {
3195 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3196 }
3197
3198 uint32_t float64_to_uint32(float64 a, float_status *s)
3199 {
3200 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3201 }
3202
3203 uint64_t float64_to_uint64(float64 a, float_status *s)
3204 {
3205 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3206 }
3207
3208 uint32_t float128_to_uint32(float128 a, float_status *s)
3209 {
3210 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3211 }
3212
3213 uint64_t float128_to_uint64(float128 a, float_status *s)
3214 {
3215 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3216 }
3217
3218 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3219 {
3220 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3221 }
3222
3223 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3224 {
3225 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3226 }
3227
3228 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3229 {
3230 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3231 }
3232
3233 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3234 {
3235 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3236 }
3237
3238 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3239 {
3240 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3241 }
3242
3243 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3244 {
3245 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3246 }
3247
3248 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3249 {
3250 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3251 }
3252
3253 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3254 {
3255 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3256 }
3257
3258 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3259 {
3260 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3261 }
3262
3263 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3264 {
3265 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3266 }
3267
3268 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3269 {
3270 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3271 }
3272
3273 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3274 {
3275 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3276 }
3277
3278 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3279 {
3280 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3281 }
3282
3283 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3284 {
3285 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3286 }
3287
3288 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3289 {
3290 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3291 }
3292
3293 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3294 {
3295 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3296 }
3297
3298 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3299 {
3300 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3301 }
3302
3303 /*
3304 * Signed integer to floating-point conversions
3305 */
3306
3307 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3308 {
3309 FloatParts64 p;
3310
3311 parts_sint_to_float(&p, a, scale, status);
3312 return float16_round_pack_canonical(&p, status);
3313 }
3314
3315 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3316 {
3317 return int64_to_float16_scalbn(a, scale, status);
3318 }
3319
3320 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3321 {
3322 return int64_to_float16_scalbn(a, scale, status);
3323 }
3324
3325 float16 int64_to_float16(int64_t a, float_status *status)
3326 {
3327 return int64_to_float16_scalbn(a, 0, status);
3328 }
3329
3330 float16 int32_to_float16(int32_t a, float_status *status)
3331 {
3332 return int64_to_float16_scalbn(a, 0, status);
3333 }
3334
3335 float16 int16_to_float16(int16_t a, float_status *status)
3336 {
3337 return int64_to_float16_scalbn(a, 0, status);
3338 }
3339
3340 float16 int8_to_float16(int8_t a, float_status *status)
3341 {
3342 return int64_to_float16_scalbn(a, 0, status);
3343 }
3344
3345 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3346 {
3347 FloatParts64 p;
3348
3349 parts64_sint_to_float(&p, a, scale, status);
3350 return float32_round_pack_canonical(&p, status);
3351 }
3352
3353 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3354 {
3355 return int64_to_float32_scalbn(a, scale, status);
3356 }
3357
3358 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3359 {
3360 return int64_to_float32_scalbn(a, scale, status);
3361 }
3362
3363 float32 int64_to_float32(int64_t a, float_status *status)
3364 {
3365 return int64_to_float32_scalbn(a, 0, status);
3366 }
3367
3368 float32 int32_to_float32(int32_t a, float_status *status)
3369 {
3370 return int64_to_float32_scalbn(a, 0, status);
3371 }
3372
3373 float32 int16_to_float32(int16_t a, float_status *status)
3374 {
3375 return int64_to_float32_scalbn(a, 0, status);
3376 }
3377
3378 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3379 {
3380 FloatParts64 p;
3381
3382 parts_sint_to_float(&p, a, scale, status);
3383 return float64_round_pack_canonical(&p, status);
3384 }
3385
3386 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3387 {
3388 return int64_to_float64_scalbn(a, scale, status);
3389 }
3390
3391 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3392 {
3393 return int64_to_float64_scalbn(a, scale, status);
3394 }
3395
3396 float64 int64_to_float64(int64_t a, float_status *status)
3397 {
3398 return int64_to_float64_scalbn(a, 0, status);
3399 }
3400
3401 float64 int32_to_float64(int32_t a, float_status *status)
3402 {
3403 return int64_to_float64_scalbn(a, 0, status);
3404 }
3405
3406 float64 int16_to_float64(int16_t a, float_status *status)
3407 {
3408 return int64_to_float64_scalbn(a, 0, status);
3409 }
3410
3411 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3412 {
3413 FloatParts64 p;
3414
3415 parts_sint_to_float(&p, a, scale, status);
3416 return bfloat16_round_pack_canonical(&p, status);
3417 }
3418
3419 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3420 {
3421 return int64_to_bfloat16_scalbn(a, scale, status);
3422 }
3423
3424 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3425 {
3426 return int64_to_bfloat16_scalbn(a, scale, status);
3427 }
3428
3429 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3430 {
3431 return int64_to_bfloat16_scalbn(a, 0, status);
3432 }
3433
3434 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3435 {
3436 return int64_to_bfloat16_scalbn(a, 0, status);
3437 }
3438
3439 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3440 {
3441 return int64_to_bfloat16_scalbn(a, 0, status);
3442 }
3443
3444 float128 int64_to_float128(int64_t a, float_status *status)
3445 {
3446 FloatParts128 p;
3447
3448 parts_sint_to_float(&p, a, 0, status);
3449 return float128_round_pack_canonical(&p, status);
3450 }
3451
3452 float128 int32_to_float128(int32_t a, float_status *status)
3453 {
3454 return int64_to_float128(a, status);
3455 }
3456
3457 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3458 {
3459 FloatParts128 p;
3460
3461 parts_sint_to_float(&p, a, 0, status);
3462 return floatx80_round_pack_canonical(&p, status);
3463 }
3464
3465 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3466 {
3467 return int64_to_floatx80(a, status);
3468 }
3469
3470 /*
3471 * Unsigned Integer to floating-point conversions
3472 */
3473
3474 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3475 {
3476 FloatParts64 p;
3477
3478 parts_uint_to_float(&p, a, scale, status);
3479 return float16_round_pack_canonical(&p, status);
3480 }
3481
3482 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3483 {
3484 return uint64_to_float16_scalbn(a, scale, status);
3485 }
3486
3487 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3488 {
3489 return uint64_to_float16_scalbn(a, scale, status);
3490 }
3491
3492 float16 uint64_to_float16(uint64_t a, float_status *status)
3493 {
3494 return uint64_to_float16_scalbn(a, 0, status);
3495 }
3496
3497 float16 uint32_to_float16(uint32_t a, float_status *status)
3498 {
3499 return uint64_to_float16_scalbn(a, 0, status);
3500 }
3501
3502 float16 uint16_to_float16(uint16_t a, float_status *status)
3503 {
3504 return uint64_to_float16_scalbn(a, 0, status);
3505 }
3506
3507 float16 uint8_to_float16(uint8_t a, float_status *status)
3508 {
3509 return uint64_to_float16_scalbn(a, 0, status);
3510 }
3511
3512 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3513 {
3514 FloatParts64 p;
3515
3516 parts_uint_to_float(&p, a, scale, status);
3517 return float32_round_pack_canonical(&p, status);
3518 }
3519
3520 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3521 {
3522 return uint64_to_float32_scalbn(a, scale, status);
3523 }
3524
3525 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3526 {
3527 return uint64_to_float32_scalbn(a, scale, status);
3528 }
3529
3530 float32 uint64_to_float32(uint64_t a, float_status *status)
3531 {
3532 return uint64_to_float32_scalbn(a, 0, status);
3533 }
3534
3535 float32 uint32_to_float32(uint32_t a, float_status *status)
3536 {
3537 return uint64_to_float32_scalbn(a, 0, status);
3538 }
3539
3540 float32 uint16_to_float32(uint16_t a, float_status *status)
3541 {
3542 return uint64_to_float32_scalbn(a, 0, status);
3543 }
3544
3545 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3546 {
3547 FloatParts64 p;
3548
3549 parts_uint_to_float(&p, a, scale, status);
3550 return float64_round_pack_canonical(&p, status);
3551 }
3552
3553 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3554 {
3555 return uint64_to_float64_scalbn(a, scale, status);
3556 }
3557
3558 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3559 {
3560 return uint64_to_float64_scalbn(a, scale, status);
3561 }
3562
3563 float64 uint64_to_float64(uint64_t a, float_status *status)
3564 {
3565 return uint64_to_float64_scalbn(a, 0, status);
3566 }
3567
3568 float64 uint32_to_float64(uint32_t a, float_status *status)
3569 {
3570 return uint64_to_float64_scalbn(a, 0, status);
3571 }
3572
3573 float64 uint16_to_float64(uint16_t a, float_status *status)
3574 {
3575 return uint64_to_float64_scalbn(a, 0, status);
3576 }
3577
3578 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3579 {
3580 FloatParts64 p;
3581
3582 parts_uint_to_float(&p, a, scale, status);
3583 return bfloat16_round_pack_canonical(&p, status);
3584 }
3585
3586 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3587 {
3588 return uint64_to_bfloat16_scalbn(a, scale, status);
3589 }
3590
3591 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3592 {
3593 return uint64_to_bfloat16_scalbn(a, scale, status);
3594 }
3595
3596 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3597 {
3598 return uint64_to_bfloat16_scalbn(a, 0, status);
3599 }
3600
3601 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3602 {
3603 return uint64_to_bfloat16_scalbn(a, 0, status);
3604 }
3605
3606 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3607 {
3608 return uint64_to_bfloat16_scalbn(a, 0, status);
3609 }
3610
3611 float128 uint64_to_float128(uint64_t a, float_status *status)
3612 {
3613 FloatParts128 p;
3614
3615 parts_uint_to_float(&p, a, 0, status);
3616 return float128_round_pack_canonical(&p, status);
3617 }
3618
3619 /*
3620 * Minimum and maximum
3621 */
3622
3623 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3624 {
3625 FloatParts64 pa, pb, *pr;
3626
3627 float16_unpack_canonical(&pa, a, s);
3628 float16_unpack_canonical(&pb, b, s);
3629 pr = parts_minmax(&pa, &pb, s, flags);
3630
3631 return float16_round_pack_canonical(pr, s);
3632 }
3633
3634 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3635 float_status *s, int flags)
3636 {
3637 FloatParts64 pa, pb, *pr;
3638
3639 bfloat16_unpack_canonical(&pa, a, s);
3640 bfloat16_unpack_canonical(&pb, b, s);
3641 pr = parts_minmax(&pa, &pb, s, flags);
3642
3643 return bfloat16_round_pack_canonical(pr, s);
3644 }
3645
3646 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3647 {
3648 FloatParts64 pa, pb, *pr;
3649
3650 float32_unpack_canonical(&pa, a, s);
3651 float32_unpack_canonical(&pb, b, s);
3652 pr = parts_minmax(&pa, &pb, s, flags);
3653
3654 return float32_round_pack_canonical(pr, s);
3655 }
3656
3657 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3658 {
3659 FloatParts64 pa, pb, *pr;
3660
3661 float64_unpack_canonical(&pa, a, s);
3662 float64_unpack_canonical(&pb, b, s);
3663 pr = parts_minmax(&pa, &pb, s, flags);
3664
3665 return float64_round_pack_canonical(pr, s);
3666 }
3667
3668 static float128 float128_minmax(float128 a, float128 b,
3669 float_status *s, int flags)
3670 {
3671 FloatParts128 pa, pb, *pr;
3672
3673 float128_unpack_canonical(&pa, a, s);
3674 float128_unpack_canonical(&pb, b, s);
3675 pr = parts_minmax(&pa, &pb, s, flags);
3676
3677 return float128_round_pack_canonical(pr, s);
3678 }
3679
3680 #define MINMAX_1(type, name, flags) \
3681 type type##_##name(type a, type b, float_status *s) \
3682 { return type##_minmax(a, b, s, flags); }
3683
3684 #define MINMAX_2(type) \
3685 MINMAX_1(type, max, 0) \
3686 MINMAX_1(type, maxnum, minmax_isnum) \
3687 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3688 MINMAX_1(type, min, minmax_ismin) \
3689 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3690 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3691
3692 MINMAX_2(float16)
3693 MINMAX_2(bfloat16)
3694 MINMAX_2(float32)
3695 MINMAX_2(float64)
3696 MINMAX_2(float128)
3697
3698 #undef MINMAX_1
3699 #undef MINMAX_2
3700
3701 /*
3702 * Floating point compare
3703 */
3704
3705 static FloatRelation QEMU_FLATTEN
3706 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3707 {
3708 FloatParts64 pa, pb;
3709
3710 float16_unpack_canonical(&pa, a, s);
3711 float16_unpack_canonical(&pb, b, s);
3712 return parts_compare(&pa, &pb, s, is_quiet);
3713 }
3714
3715 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3716 {
3717 return float16_do_compare(a, b, s, false);
3718 }
3719
3720 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3721 {
3722 return float16_do_compare(a, b, s, true);
3723 }
3724
3725 static FloatRelation QEMU_SOFTFLOAT_ATTR
3726 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3727 {
3728 FloatParts64 pa, pb;
3729
3730 float32_unpack_canonical(&pa, a, s);
3731 float32_unpack_canonical(&pb, b, s);
3732 return parts_compare(&pa, &pb, s, is_quiet);
3733 }
3734
3735 static FloatRelation QEMU_FLATTEN
3736 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3737 {
3738 union_float32 ua, ub;
3739
3740 ua.s = xa;
3741 ub.s = xb;
3742
3743 if (QEMU_NO_HARDFLOAT) {
3744 goto soft;
3745 }
3746
3747 float32_input_flush2(&ua.s, &ub.s, s);
3748 if (isgreaterequal(ua.h, ub.h)) {
3749 if (isgreater(ua.h, ub.h)) {
3750 return float_relation_greater;
3751 }
3752 return float_relation_equal;
3753 }
3754 if (likely(isless(ua.h, ub.h))) {
3755 return float_relation_less;
3756 }
3757 /*
3758 * The only condition remaining is unordered.
3759 * Fall through to set flags.
3760 */
3761 soft:
3762 return float32_do_compare(ua.s, ub.s, s, is_quiet);
3763 }
3764
3765 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3766 {
3767 return float32_hs_compare(a, b, s, false);
3768 }
3769
3770 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3771 {
3772 return float32_hs_compare(a, b, s, true);
3773 }
3774
3775 static FloatRelation QEMU_SOFTFLOAT_ATTR
3776 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3777 {
3778 FloatParts64 pa, pb;
3779
3780 float64_unpack_canonical(&pa, a, s);
3781 float64_unpack_canonical(&pb, b, s);
3782 return parts_compare(&pa, &pb, s, is_quiet);
3783 }
3784
3785 static FloatRelation QEMU_FLATTEN
3786 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3787 {
3788 union_float64 ua, ub;
3789
3790 ua.s = xa;
3791 ub.s = xb;
3792
3793 if (QEMU_NO_HARDFLOAT) {
3794 goto soft;
3795 }
3796
3797 float64_input_flush2(&ua.s, &ub.s, s);
3798 if (isgreaterequal(ua.h, ub.h)) {
3799 if (isgreater(ua.h, ub.h)) {
3800 return float_relation_greater;
3801 }
3802 return float_relation_equal;
3803 }
3804 if (likely(isless(ua.h, ub.h))) {
3805 return float_relation_less;
3806 }
3807 /*
3808 * The only condition remaining is unordered.
3809 * Fall through to set flags.
3810 */
3811 soft:
3812 return float64_do_compare(ua.s, ub.s, s, is_quiet);
3813 }
3814
3815 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3816 {
3817 return float64_hs_compare(a, b, s, false);
3818 }
3819
3820 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3821 {
3822 return float64_hs_compare(a, b, s, true);
3823 }
3824
3825 static FloatRelation QEMU_FLATTEN
3826 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3827 {
3828 FloatParts64 pa, pb;
3829
3830 bfloat16_unpack_canonical(&pa, a, s);
3831 bfloat16_unpack_canonical(&pb, b, s);
3832 return parts_compare(&pa, &pb, s, is_quiet);
3833 }
3834
3835 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3836 {
3837 return bfloat16_do_compare(a, b, s, false);
3838 }
3839
3840 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3841 {
3842 return bfloat16_do_compare(a, b, s, true);
3843 }
3844
3845 static FloatRelation QEMU_FLATTEN
3846 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3847 {
3848 FloatParts128 pa, pb;
3849
3850 float128_unpack_canonical(&pa, a, s);
3851 float128_unpack_canonical(&pb, b, s);
3852 return parts_compare(&pa, &pb, s, is_quiet);
3853 }
3854
3855 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3856 {
3857 return float128_do_compare(a, b, s, false);
3858 }
3859
3860 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3861 {
3862 return float128_do_compare(a, b, s, true);
3863 }
3864
3865 static FloatRelation QEMU_FLATTEN
3866 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
3867 {
3868 FloatParts128 pa, pb;
3869
3870 if (!floatx80_unpack_canonical(&pa, a, s) ||
3871 !floatx80_unpack_canonical(&pb, b, s)) {
3872 return float_relation_unordered;
3873 }
3874 return parts_compare(&pa, &pb, s, is_quiet);
3875 }
3876
3877 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
3878 {
3879 return floatx80_do_compare(a, b, s, false);
3880 }
3881
3882 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
3883 {
3884 return floatx80_do_compare(a, b, s, true);
3885 }
3886
3887 /*
3888 * Scale by 2**N
3889 */
3890
3891 float16 float16_scalbn(float16 a, int n, float_status *status)
3892 {
3893 FloatParts64 p;
3894
3895 float16_unpack_canonical(&p, a, status);
3896 parts_scalbn(&p, n, status);
3897 return float16_round_pack_canonical(&p, status);
3898 }
3899
3900 float32 float32_scalbn(float32 a, int n, float_status *status)
3901 {
3902 FloatParts64 p;
3903
3904 float32_unpack_canonical(&p, a, status);
3905 parts_scalbn(&p, n, status);
3906 return float32_round_pack_canonical(&p, status);
3907 }
3908
3909 float64 float64_scalbn(float64 a, int n, float_status *status)
3910 {
3911 FloatParts64 p;
3912
3913 float64_unpack_canonical(&p, a, status);
3914 parts_scalbn(&p, n, status);
3915 return float64_round_pack_canonical(&p, status);
3916 }
3917
3918 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3919 {
3920 FloatParts64 p;
3921
3922 bfloat16_unpack_canonical(&p, a, status);
3923 parts_scalbn(&p, n, status);
3924 return bfloat16_round_pack_canonical(&p, status);
3925 }
3926
3927 float128 float128_scalbn(float128 a, int n, float_status *status)
3928 {
3929 FloatParts128 p;
3930
3931 float128_unpack_canonical(&p, a, status);
3932 parts_scalbn(&p, n, status);
3933 return float128_round_pack_canonical(&p, status);
3934 }
3935
3936 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
3937 {
3938 FloatParts128 p;
3939
3940 if (!floatx80_unpack_canonical(&p, a, status)) {
3941 return floatx80_default_nan(status);
3942 }
3943 parts_scalbn(&p, n, status);
3944 return floatx80_round_pack_canonical(&p, status);
3945 }
3946
3947 /*
3948 * Square Root
3949 */
3950
3951 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3952 {
3953 FloatParts64 p;
3954
3955 float16_unpack_canonical(&p, a, status);
3956 parts_sqrt(&p, status, &float16_params);
3957 return float16_round_pack_canonical(&p, status);
3958 }
3959
3960 static float32 QEMU_SOFTFLOAT_ATTR
3961 soft_f32_sqrt(float32 a, float_status *status)
3962 {
3963 FloatParts64 p;
3964
3965 float32_unpack_canonical(&p, a, status);
3966 parts_sqrt(&p, status, &float32_params);
3967 return float32_round_pack_canonical(&p, status);
3968 }
3969
3970 static float64 QEMU_SOFTFLOAT_ATTR
3971 soft_f64_sqrt(float64 a, float_status *status)
3972 {
3973 FloatParts64 p;
3974
3975 float64_unpack_canonical(&p, a, status);
3976 parts_sqrt(&p, status, &float64_params);
3977 return float64_round_pack_canonical(&p, status);
3978 }
3979
3980 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3981 {
3982 union_float32 ua, ur;
3983
3984 ua.s = xa;
3985 if (unlikely(!can_use_fpu(s))) {
3986 goto soft;
3987 }
3988
3989 float32_input_flush1(&ua.s, s);
3990 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3991 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3992 fpclassify(ua.h) == FP_ZERO) ||
3993 signbit(ua.h))) {
3994 goto soft;
3995 }
3996 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3997 float32_is_neg(ua.s))) {
3998 goto soft;
3999 }
4000 ur.h = sqrtf(ua.h);
4001 return ur.s;
4002
4003 soft:
4004 return soft_f32_sqrt(ua.s, s);
4005 }
4006
4007 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4008 {
4009 union_float64 ua, ur;
4010
4011 ua.s = xa;
4012 if (unlikely(!can_use_fpu(s))) {
4013 goto soft;
4014 }
4015
4016 float64_input_flush1(&ua.s, s);
4017 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4018 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4019 fpclassify(ua.h) == FP_ZERO) ||
4020 signbit(ua.h))) {
4021 goto soft;
4022 }
4023 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4024 float64_is_neg(ua.s))) {
4025 goto soft;
4026 }
4027 ur.h = sqrt(ua.h);
4028 return ur.s;
4029
4030 soft:
4031 return soft_f64_sqrt(ua.s, s);
4032 }
4033
4034 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4035 {
4036 FloatParts64 p;
4037
4038 bfloat16_unpack_canonical(&p, a, status);
4039 parts_sqrt(&p, status, &bfloat16_params);
4040 return bfloat16_round_pack_canonical(&p, status);
4041 }
4042
4043 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4044 {
4045 FloatParts128 p;
4046
4047 float128_unpack_canonical(&p, a, status);
4048 parts_sqrt(&p, status, &float128_params);
4049 return float128_round_pack_canonical(&p, status);
4050 }
4051
4052 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4053 {
4054 FloatParts128 p;
4055
4056 if (!floatx80_unpack_canonical(&p, a, s)) {
4057 return floatx80_default_nan(s);
4058 }
4059 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4060 return floatx80_round_pack_canonical(&p, s);
4061 }
4062
4063 /*----------------------------------------------------------------------------
4064 | The pattern for a default generated NaN.
4065 *----------------------------------------------------------------------------*/
4066
4067 float16 float16_default_nan(float_status *status)
4068 {
4069 FloatParts64 p;
4070
4071 parts_default_nan(&p, status);
4072 p.frac >>= float16_params.frac_shift;
4073 return float16_pack_raw(&p);
4074 }
4075
4076 float32 float32_default_nan(float_status *status)
4077 {
4078 FloatParts64 p;
4079
4080 parts_default_nan(&p, status);
4081 p.frac >>= float32_params.frac_shift;
4082 return float32_pack_raw(&p);
4083 }
4084
4085 float64 float64_default_nan(float_status *status)
4086 {
4087 FloatParts64 p;
4088
4089 parts_default_nan(&p, status);
4090 p.frac >>= float64_params.frac_shift;
4091 return float64_pack_raw(&p);
4092 }
4093
4094 float128 float128_default_nan(float_status *status)
4095 {
4096 FloatParts128 p;
4097
4098 parts_default_nan(&p, status);
4099 frac_shr(&p, float128_params.frac_shift);
4100 return float128_pack_raw(&p);
4101 }
4102
4103 bfloat16 bfloat16_default_nan(float_status *status)
4104 {
4105 FloatParts64 p;
4106
4107 parts_default_nan(&p, status);
4108 p.frac >>= bfloat16_params.frac_shift;
4109 return bfloat16_pack_raw(&p);
4110 }
4111
4112 /*----------------------------------------------------------------------------
4113 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4114 *----------------------------------------------------------------------------*/
4115
4116 float16 float16_silence_nan(float16 a, float_status *status)
4117 {
4118 FloatParts64 p;
4119
4120 float16_unpack_raw(&p, a);
4121 p.frac <<= float16_params.frac_shift;
4122 parts_silence_nan(&p, status);
4123 p.frac >>= float16_params.frac_shift;
4124 return float16_pack_raw(&p);
4125 }
4126
4127 float32 float32_silence_nan(float32 a, float_status *status)
4128 {
4129 FloatParts64 p;
4130
4131 float32_unpack_raw(&p, a);
4132 p.frac <<= float32_params.frac_shift;
4133 parts_silence_nan(&p, status);
4134 p.frac >>= float32_params.frac_shift;
4135 return float32_pack_raw(&p);
4136 }
4137
4138 float64 float64_silence_nan(float64 a, float_status *status)
4139 {
4140 FloatParts64 p;
4141
4142 float64_unpack_raw(&p, a);
4143 p.frac <<= float64_params.frac_shift;
4144 parts_silence_nan(&p, status);
4145 p.frac >>= float64_params.frac_shift;
4146 return float64_pack_raw(&p);
4147 }
4148
4149 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4150 {
4151 FloatParts64 p;
4152
4153 bfloat16_unpack_raw(&p, a);
4154 p.frac <<= bfloat16_params.frac_shift;
4155 parts_silence_nan(&p, status);
4156 p.frac >>= bfloat16_params.frac_shift;
4157 return bfloat16_pack_raw(&p);
4158 }
4159
4160 float128 float128_silence_nan(float128 a, float_status *status)
4161 {
4162 FloatParts128 p;
4163
4164 float128_unpack_raw(&p, a);
4165 frac_shl(&p, float128_params.frac_shift);
4166 parts_silence_nan(&p, status);
4167 frac_shr(&p, float128_params.frac_shift);
4168 return float128_pack_raw(&p);
4169 }
4170
4171 /*----------------------------------------------------------------------------
4172 | If `a' is denormal and we are in flush-to-zero mode then set the
4173 | input-denormal exception and return zero. Otherwise just return the value.
4174 *----------------------------------------------------------------------------*/
4175
4176 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4177 {
4178 if (p.exp == 0 && p.frac != 0) {
4179 float_raise(float_flag_input_denormal, status);
4180 return true;
4181 }
4182
4183 return false;
4184 }
4185
4186 float16 float16_squash_input_denormal(float16 a, float_status *status)
4187 {
4188 if (status->flush_inputs_to_zero) {
4189 FloatParts64 p;
4190
4191 float16_unpack_raw(&p, a);
4192 if (parts_squash_denormal(p, status)) {
4193 return float16_set_sign(float16_zero, p.sign);
4194 }
4195 }
4196 return a;
4197 }
4198
4199 float32 float32_squash_input_denormal(float32 a, float_status *status)
4200 {
4201 if (status->flush_inputs_to_zero) {
4202 FloatParts64 p;
4203
4204 float32_unpack_raw(&p, a);
4205 if (parts_squash_denormal(p, status)) {
4206 return float32_set_sign(float32_zero, p.sign);
4207 }
4208 }
4209 return a;
4210 }
4211
4212 float64 float64_squash_input_denormal(float64 a, float_status *status)
4213 {
4214 if (status->flush_inputs_to_zero) {
4215 FloatParts64 p;
4216
4217 float64_unpack_raw(&p, a);
4218 if (parts_squash_denormal(p, status)) {
4219 return float64_set_sign(float64_zero, p.sign);
4220 }
4221 }
4222 return a;
4223 }
4224
4225 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4226 {
4227 if (status->flush_inputs_to_zero) {
4228 FloatParts64 p;
4229
4230 bfloat16_unpack_raw(&p, a);
4231 if (parts_squash_denormal(p, status)) {
4232 return bfloat16_set_sign(bfloat16_zero, p.sign);
4233 }
4234 }
4235 return a;
4236 }
4237
4238 /*----------------------------------------------------------------------------
4239 | Normalizes the subnormal single-precision floating-point value represented
4240 | by the denormalized significand `aSig'. The normalized exponent and
4241 | significand are stored at the locations pointed to by `zExpPtr' and
4242 | `zSigPtr', respectively.
4243 *----------------------------------------------------------------------------*/
4244
4245 static void
4246 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4247 {
4248 int8_t shiftCount;
4249
4250 shiftCount = clz32(aSig) - 8;
4251 *zSigPtr = aSig<<shiftCount;
4252 *zExpPtr = 1 - shiftCount;
4253
4254 }
4255
4256 /*----------------------------------------------------------------------------
4257 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4258 | and significand `zSig', and returns the proper single-precision floating-
4259 | point value corresponding to the abstract input. Ordinarily, the abstract
4260 | value is simply rounded and packed into the single-precision format, with
4261 | the inexact exception raised if the abstract input cannot be represented
4262 | exactly. However, if the abstract value is too large, the overflow and
4263 | inexact exceptions are raised and an infinity or maximal finite value is
4264 | returned. If the abstract value is too small, the input value is rounded to
4265 | a subnormal number, and the underflow and inexact exceptions are raised if
4266 | the abstract input cannot be represented exactly as a subnormal single-
4267 | precision floating-point number.
4268 | The input significand `zSig' has its binary point between bits 30
4269 | and 29, which is 7 bits to the left of the usual location. This shifted
4270 | significand must be normalized or smaller. If `zSig' is not normalized,
4271 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4272 | and it must not require rounding. In the usual case that `zSig' is
4273 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4274 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4275 | Binary Floating-Point Arithmetic.
4276 *----------------------------------------------------------------------------*/
4277
4278 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4279 float_status *status)
4280 {
4281 int8_t roundingMode;
4282 bool roundNearestEven;
4283 int8_t roundIncrement, roundBits;
4284 bool isTiny;
4285
4286 roundingMode = status->float_rounding_mode;
4287 roundNearestEven = ( roundingMode == float_round_nearest_even );
4288 switch (roundingMode) {
4289 case float_round_nearest_even:
4290 case float_round_ties_away:
4291 roundIncrement = 0x40;
4292 break;
4293 case float_round_to_zero:
4294 roundIncrement = 0;
4295 break;
4296 case float_round_up:
4297 roundIncrement = zSign ? 0 : 0x7f;
4298 break;
4299 case float_round_down:
4300 roundIncrement = zSign ? 0x7f : 0;
4301 break;
4302 case float_round_to_odd:
4303 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4304 break;
4305 default:
4306 abort();
4307 break;
4308 }
4309 roundBits = zSig & 0x7F;
4310 if ( 0xFD <= (uint16_t) zExp ) {
4311 if ( ( 0xFD < zExp )
4312 || ( ( zExp == 0xFD )
4313 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4314 ) {
4315 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4316 roundIncrement != 0;
4317 float_raise(float_flag_overflow | float_flag_inexact, status);
4318 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4319 }
4320 if ( zExp < 0 ) {
4321 if (status->flush_to_zero) {
4322 float_raise(float_flag_output_denormal, status);
4323 return packFloat32(zSign, 0, 0);
4324 }
4325 isTiny = status->tininess_before_rounding
4326 || (zExp < -1)
4327 || (zSig + roundIncrement < 0x80000000);
4328 shift32RightJamming( zSig, - zExp, &zSig );
4329 zExp = 0;
4330 roundBits = zSig & 0x7F;
4331 if (isTiny && roundBits) {
4332 float_raise(float_flag_underflow, status);
4333 }
4334 if (roundingMode == float_round_to_odd) {
4335 /*
4336 * For round-to-odd case, the roundIncrement depends on
4337 * zSig which just changed.
4338 */
4339 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4340 }
4341 }
4342 }
4343 if (roundBits) {
4344 float_raise(float_flag_inexact, status);
4345 }
4346 zSig = ( zSig + roundIncrement )>>7;
4347 if (!(roundBits ^ 0x40) && roundNearestEven) {
4348 zSig &= ~1;
4349 }
4350 if ( zSig == 0 ) zExp = 0;
4351 return packFloat32( zSign, zExp, zSig );
4352
4353 }
4354
4355 /*----------------------------------------------------------------------------
4356 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4357 | and significand `zSig', and returns the proper single-precision floating-
4358 | point value corresponding to the abstract input. This routine is just like
4359 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4360 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4361 | floating-point exponent.
4362 *----------------------------------------------------------------------------*/
4363
4364 static float32
4365 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4366 float_status *status)
4367 {
4368 int8_t shiftCount;
4369
4370 shiftCount = clz32(zSig) - 1;
4371 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4372 status);
4373
4374 }
4375
4376 /*----------------------------------------------------------------------------
4377 | Normalizes the subnormal double-precision floating-point value represented
4378 | by the denormalized significand `aSig'. The normalized exponent and
4379 | significand are stored at the locations pointed to by `zExpPtr' and
4380 | `zSigPtr', respectively.
4381 *----------------------------------------------------------------------------*/
4382
4383 static void
4384 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4385 {
4386 int8_t shiftCount;
4387
4388 shiftCount = clz64(aSig) - 11;
4389 *zSigPtr = aSig<<shiftCount;
4390 *zExpPtr = 1 - shiftCount;
4391
4392 }
4393
4394 /*----------------------------------------------------------------------------
4395 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4396 | double-precision floating-point value, returning the result. After being
4397 | shifted into the proper positions, the three fields are simply added
4398 | together to form the result. This means that any integer portion of `zSig'
4399 | will be added into the exponent. Since a properly normalized significand
4400 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4401 | than the desired result exponent whenever `zSig' is a complete, normalized
4402 | significand.
4403 *----------------------------------------------------------------------------*/
4404
4405 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4406 {
4407
4408 return make_float64(
4409 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4410
4411 }
4412
4413 /*----------------------------------------------------------------------------
4414 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4415 | and significand `zSig', and returns the proper double-precision floating-
4416 | point value corresponding to the abstract input. Ordinarily, the abstract
4417 | value is simply rounded and packed into the double-precision format, with
4418 | the inexact exception raised if the abstract input cannot be represented
4419 | exactly. However, if the abstract value is too large, the overflow and
4420 | inexact exceptions are raised and an infinity or maximal finite value is
4421 | returned. If the abstract value is too small, the input value is rounded to
4422 | a subnormal number, and the underflow and inexact exceptions are raised if
4423 | the abstract input cannot be represented exactly as a subnormal double-
4424 | precision floating-point number.
4425 | The input significand `zSig' has its binary point between bits 62
4426 | and 61, which is 10 bits to the left of the usual location. This shifted
4427 | significand must be normalized or smaller. If `zSig' is not normalized,
4428 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4429 | and it must not require rounding. In the usual case that `zSig' is
4430 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4431 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4432 | Binary Floating-Point Arithmetic.
4433 *----------------------------------------------------------------------------*/
4434
4435 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4436 float_status *status)
4437 {
4438 int8_t roundingMode;
4439 bool roundNearestEven;
4440 int roundIncrement, roundBits;
4441 bool isTiny;
4442
4443 roundingMode = status->float_rounding_mode;
4444 roundNearestEven = ( roundingMode == float_round_nearest_even );
4445 switch (roundingMode) {
4446 case float_round_nearest_even:
4447 case float_round_ties_away:
4448 roundIncrement = 0x200;
4449 break;
4450 case float_round_to_zero:
4451 roundIncrement = 0;
4452 break;
4453 case float_round_up:
4454 roundIncrement = zSign ? 0 : 0x3ff;
4455 break;
4456 case float_round_down:
4457 roundIncrement = zSign ? 0x3ff : 0;
4458 break;
4459 case float_round_to_odd:
4460 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4461 break;
4462 default:
4463 abort();
4464 }
4465 roundBits = zSig & 0x3FF;
4466 if ( 0x7FD <= (uint16_t) zExp ) {
4467 if ( ( 0x7FD < zExp )
4468 || ( ( zExp == 0x7FD )
4469 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4470 ) {
4471 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4472 roundIncrement != 0;
4473 float_raise(float_flag_overflow | float_flag_inexact, status);
4474 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4475 }
4476 if ( zExp < 0 ) {
4477 if (status->flush_to_zero) {
4478 float_raise(float_flag_output_denormal, status);
4479 return packFloat64(zSign, 0, 0);
4480 }
4481 isTiny = status->tininess_before_rounding
4482 || (zExp < -1)
4483 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4484 shift64RightJamming( zSig, - zExp, &zSig );
4485 zExp = 0;
4486 roundBits = zSig & 0x3FF;
4487 if (isTiny && roundBits) {
4488 float_raise(float_flag_underflow, status);
4489 }
4490 if (roundingMode == float_round_to_odd) {
4491 /*
4492 * For round-to-odd case, the roundIncrement depends on
4493 * zSig which just changed.
4494 */
4495 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4496 }
4497 }
4498 }
4499 if (roundBits) {
4500 float_raise(float_flag_inexact, status);
4501 }
4502 zSig = ( zSig + roundIncrement )>>10;
4503 if (!(roundBits ^ 0x200) && roundNearestEven) {
4504 zSig &= ~1;
4505 }
4506 if ( zSig == 0 ) zExp = 0;
4507 return packFloat64( zSign, zExp, zSig );
4508
4509 }
4510
4511 /*----------------------------------------------------------------------------
4512 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4513 | and significand `zSig', and returns the proper double-precision floating-
4514 | point value corresponding to the abstract input. This routine is just like
4515 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4516 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4517 | floating-point exponent.
4518 *----------------------------------------------------------------------------*/
4519
4520 static float64
4521 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4522 float_status *status)
4523 {
4524 int8_t shiftCount;
4525
4526 shiftCount = clz64(zSig) - 1;
4527 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4528 status);
4529
4530 }
4531
4532 /*----------------------------------------------------------------------------
4533 | Normalizes the subnormal extended double-precision floating-point value
4534 | represented by the denormalized significand `aSig'. The normalized exponent
4535 | and significand are stored at the locations pointed to by `zExpPtr' and
4536 | `zSigPtr', respectively.
4537 *----------------------------------------------------------------------------*/
4538
4539 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4540 uint64_t *zSigPtr)
4541 {
4542 int8_t shiftCount;
4543
4544 shiftCount = clz64(aSig);
4545 *zSigPtr = aSig<<shiftCount;
4546 *zExpPtr = 1 - shiftCount;
4547 }
4548
4549 /*----------------------------------------------------------------------------
4550 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4551 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4552 | and returns the proper extended double-precision floating-point value
4553 | corresponding to the abstract input. Ordinarily, the abstract value is
4554 | rounded and packed into the extended double-precision format, with the
4555 | inexact exception raised if the abstract input cannot be represented
4556 | exactly. However, if the abstract value is too large, the overflow and
4557 | inexact exceptions are raised and an infinity or maximal finite value is
4558 | returned. If the abstract value is too small, the input value is rounded to
4559 | a subnormal number, and the underflow and inexact exceptions are raised if
4560 | the abstract input cannot be represented exactly as a subnormal extended
4561 | double-precision floating-point number.
4562 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4563 | the result is rounded to the same number of bits as single or double
4564 | precision, respectively. Otherwise, the result is rounded to the full
4565 | precision of the extended double-precision format.
4566 | The input significand must be normalized or smaller. If the input
4567 | significand is not normalized, `zExp' must be 0; in that case, the result
4568 | returned is a subnormal number, and it must not require rounding. The
4569 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4570 | Floating-Point Arithmetic.
4571 *----------------------------------------------------------------------------*/
4572
4573 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4574 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4575 float_status *status)
4576 {
4577 FloatRoundMode roundingMode;
4578 bool roundNearestEven, increment, isTiny;
4579 int64_t roundIncrement, roundMask, roundBits;
4580
4581 roundingMode = status->float_rounding_mode;
4582 roundNearestEven = ( roundingMode == float_round_nearest_even );
4583 switch (roundingPrecision) {
4584 case floatx80_precision_x:
4585 goto precision80;
4586 case floatx80_precision_d:
4587 roundIncrement = UINT64_C(0x0000000000000400);
4588 roundMask = UINT64_C(0x00000000000007FF);
4589 break;
4590 case floatx80_precision_s:
4591 roundIncrement = UINT64_C(0x0000008000000000);
4592 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4593 break;
4594 default:
4595 g_assert_not_reached();
4596 }
4597 zSig0 |= ( zSig1 != 0 );
4598 switch (roundingMode) {
4599 case float_round_nearest_even:
4600 case float_round_ties_away:
4601 break;
4602 case float_round_to_zero:
4603 roundIncrement = 0;
4604 break;
4605 case float_round_up:
4606 roundIncrement = zSign ? 0 : roundMask;
4607 break;
4608 case float_round_down:
4609 roundIncrement = zSign ? roundMask : 0;
4610 break;
4611 default:
4612 abort();
4613 }
4614 roundBits = zSig0 & roundMask;
4615 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4616 if ( ( 0x7FFE < zExp )
4617 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4618 ) {
4619 goto overflow;
4620 }
4621 if ( zExp <= 0 ) {
4622 if (status->flush_to_zero) {
4623 float_raise(float_flag_output_denormal, status);
4624 return packFloatx80(zSign, 0, 0);
4625 }
4626 isTiny = status->tininess_before_rounding
4627 || (zExp < 0 )
4628 || (zSig0 <= zSig0 + roundIncrement);
4629 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4630 zExp = 0;
4631 roundBits = zSig0 & roundMask;
4632 if (isTiny && roundBits) {
4633 float_raise(float_flag_underflow, status);
4634 }
4635 if (roundBits) {
4636 float_raise(float_flag_inexact, status);
4637 }
4638 zSig0 += roundIncrement;
4639 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4640 roundIncrement = roundMask + 1;
4641 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4642 roundMask |= roundIncrement;
4643 }
4644 zSig0 &= ~ roundMask;
4645 return packFloatx80( zSign, zExp, zSig0 );
4646 }
4647 }
4648 if (roundBits) {
4649 float_raise(float_flag_inexact, status);
4650 }
4651 zSig0 += roundIncrement;
4652 if ( zSig0 < roundIncrement ) {
4653 ++zExp;
4654 zSig0 = UINT64_C(0x8000000000000000);
4655 }
4656 roundIncrement = roundMask + 1;
4657 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4658 roundMask |= roundIncrement;
4659 }
4660 zSig0 &= ~ roundMask;
4661 if ( zSig0 == 0 ) zExp = 0;
4662 return packFloatx80( zSign, zExp, zSig0 );
4663 precision80:
4664 switch (roundingMode) {
4665 case float_round_nearest_even:
4666 case float_round_ties_away:
4667 increment = ((int64_t)zSig1 < 0);
4668 break;
4669 case float_round_to_zero:
4670 increment = 0;
4671 break;
4672 case float_round_up:
4673 increment = !zSign && zSig1;
4674 break;
4675 case float_round_down:
4676 increment = zSign && zSig1;
4677 break;
4678 default:
4679 abort();
4680 }
4681 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4682 if ( ( 0x7FFE < zExp )
4683 || ( ( zExp == 0x7FFE )
4684 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4685 && increment
4686 )
4687 ) {
4688 roundMask = 0;
4689 overflow:
4690 float_raise(float_flag_overflow | float_flag_inexact, status);
4691 if ( ( roundingMode == float_round_to_zero )
4692 || ( zSign && ( roundingMode == float_round_up ) )
4693 || ( ! zSign && ( roundingMode == float_round_down ) )
4694 ) {
4695 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4696 }
4697 return packFloatx80(zSign,
4698 floatx80_infinity_high,
4699 floatx80_infinity_low);
4700 }
4701 if ( zExp <= 0 ) {
4702 isTiny = status->tininess_before_rounding
4703 || (zExp < 0)
4704 || !increment
4705 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4706 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4707 zExp = 0;
4708 if (isTiny && zSig1) {
4709 float_raise(float_flag_underflow, status);
4710 }
4711 if (zSig1) {
4712 float_raise(float_flag_inexact, status);
4713 }
4714 switch (roundingMode) {
4715 case float_round_nearest_even:
4716 case float_round_ties_away:
4717 increment = ((int64_t)zSig1 < 0);
4718 break;
4719 case float_round_to_zero:
4720 increment = 0;
4721 break;
4722 case float_round_up:
4723 increment = !zSign && zSig1;
4724 break;
4725 case float_round_down:
4726 increment = zSign && zSig1;
4727 break;
4728 default:
4729 abort();
4730 }
4731 if ( increment ) {
4732 ++zSig0;
4733 if (!(zSig1 << 1) && roundNearestEven) {
4734 zSig0 &= ~1;
4735 }
4736 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4737 }
4738 return packFloatx80( zSign, zExp, zSig0 );
4739 }
4740 }
4741 if (zSig1) {
4742 float_raise(float_flag_inexact, status);
4743 }
4744 if ( increment ) {
4745 ++zSig0;
4746 if ( zSig0 == 0 ) {
4747 ++zExp;
4748 zSig0 = UINT64_C(0x8000000000000000);
4749 }
4750 else {
4751 if (!(zSig1 << 1) && roundNearestEven) {
4752 zSig0 &= ~1;
4753 }
4754 }
4755 }
4756 else {
4757 if ( zSig0 == 0 ) zExp = 0;
4758 }
4759 return packFloatx80( zSign, zExp, zSig0 );
4760
4761 }
4762
4763 /*----------------------------------------------------------------------------
4764 | Takes an abstract floating-point value having sign `zSign', exponent
4765 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4766 | and returns the proper extended double-precision floating-point value
4767 | corresponding to the abstract input. This routine is just like
4768 | `roundAndPackFloatx80' except that the input significand does not have to be
4769 | normalized.
4770 *----------------------------------------------------------------------------*/
4771
4772 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4773 bool zSign, int32_t zExp,
4774 uint64_t zSig0, uint64_t zSig1,
4775 float_status *status)
4776 {
4777 int8_t shiftCount;
4778
4779 if ( zSig0 == 0 ) {
4780 zSig0 = zSig1;
4781 zSig1 = 0;
4782 zExp -= 64;
4783 }
4784 shiftCount = clz64(zSig0);
4785 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4786 zExp -= shiftCount;
4787 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4788 zSig0, zSig1, status);
4789
4790 }
4791
4792 /*----------------------------------------------------------------------------
4793 | Returns the least-significant 64 fraction bits of the quadruple-precision
4794 | floating-point value `a'.
4795 *----------------------------------------------------------------------------*/
4796
4797 static inline uint64_t extractFloat128Frac1( float128 a )
4798 {
4799
4800 return a.low;
4801
4802 }
4803
4804 /*----------------------------------------------------------------------------
4805 | Returns the most-significant 48 fraction bits of the quadruple-precision
4806 | floating-point value `a'.
4807 *----------------------------------------------------------------------------*/
4808
4809 static inline uint64_t extractFloat128Frac0( float128 a )
4810 {
4811
4812 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4813
4814 }
4815
4816 /*----------------------------------------------------------------------------
4817 | Returns the exponent bits of the quadruple-precision floating-point value
4818 | `a'.
4819 *----------------------------------------------------------------------------*/
4820
4821 static inline int32_t extractFloat128Exp( float128 a )
4822 {
4823
4824 return ( a.high>>48 ) & 0x7FFF;
4825
4826 }
4827
4828 /*----------------------------------------------------------------------------
4829 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4830 *----------------------------------------------------------------------------*/
4831
4832 static inline bool extractFloat128Sign(float128 a)
4833 {
4834 return a.high >> 63;
4835 }
4836
4837 /*----------------------------------------------------------------------------
4838 | Normalizes the subnormal quadruple-precision floating-point value
4839 | represented by the denormalized significand formed by the concatenation of
4840 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4841 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4842 | significand are stored at the location pointed to by `zSig0Ptr', and the
4843 | least significant 64 bits of the normalized significand are stored at the
4844 | location pointed to by `zSig1Ptr'.
4845 *----------------------------------------------------------------------------*/
4846
4847 static void
4848 normalizeFloat128Subnormal(
4849 uint64_t aSig0,
4850 uint64_t aSig1,
4851 int32_t *zExpPtr,
4852 uint64_t *zSig0Ptr,
4853 uint64_t *zSig1Ptr
4854 )
4855 {
4856 int8_t shiftCount;
4857
4858 if ( aSig0 == 0 ) {
4859 shiftCount = clz64(aSig1) - 15;
4860 if ( shiftCount < 0 ) {
4861 *zSig0Ptr = aSig1>>( - shiftCount );
4862 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4863 }
4864 else {
4865 *zSig0Ptr = aSig1<<shiftCount;
4866 *zSig1Ptr = 0;
4867 }
4868 *zExpPtr = - shiftCount - 63;
4869 }
4870 else {
4871 shiftCount = clz64(aSig0) - 15;
4872 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4873 *zExpPtr = 1 - shiftCount;
4874 }
4875
4876 }
4877
4878 /*----------------------------------------------------------------------------
4879 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4880 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4881 | floating-point value, returning the result. After being shifted into the
4882 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4883 | added together to form the most significant 32 bits of the result. This
4884 | means that any integer portion of `zSig0' will be added into the exponent.
4885 | Since a properly normalized significand will have an integer portion equal
4886 | to 1, the `zExp' input should be 1 less than the desired result exponent
4887 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4888 | significand.
4889 *----------------------------------------------------------------------------*/
4890
4891 static inline float128
4892 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4893 {
4894 float128 z;
4895
4896 z.low = zSig1;
4897 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4898 return z;
4899 }
4900
4901 /*----------------------------------------------------------------------------
4902 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4903 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4904 | and `zSig2', and returns the proper quadruple-precision floating-point value
4905 | corresponding to the abstract input. Ordinarily, the abstract value is
4906 | simply rounded and packed into the quadruple-precision format, with the
4907 | inexact exception raised if the abstract input cannot be represented
4908 | exactly. However, if the abstract value is too large, the overflow and
4909 | inexact exceptions are raised and an infinity or maximal finite value is
4910 | returned. If the abstract value is too small, the input value is rounded to
4911 | a subnormal number, and the underflow and inexact exceptions are raised if
4912 | the abstract input cannot be represented exactly as a subnormal quadruple-
4913 | precision floating-point number.
4914 | The input significand must be normalized or smaller. If the input
4915 | significand is not normalized, `zExp' must be 0; in that case, the result
4916 | returned is a subnormal number, and it must not require rounding. In the
4917 | usual case that the input significand is normalized, `zExp' must be 1 less
4918 | than the ``true'' floating-point exponent. The handling of underflow and
4919 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4920 *----------------------------------------------------------------------------*/
4921
4922 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4923 uint64_t zSig0, uint64_t zSig1,
4924 uint64_t zSig2, float_status *status)
4925 {
4926 int8_t roundingMode;
4927 bool roundNearestEven, increment, isTiny;
4928
4929 roundingMode = status->float_rounding_mode;
4930 roundNearestEven = ( roundingMode == float_round_nearest_even );
4931 switch (roundingMode) {
4932 case float_round_nearest_even:
4933 case float_round_ties_away:
4934 increment = ((int64_t)zSig2 < 0);
4935 break;
4936 case float_round_to_zero:
4937 increment = 0;
4938 break;
4939 case float_round_up:
4940 increment = !zSign && zSig2;
4941 break;
4942 case float_round_down:
4943 increment = zSign && zSig2;
4944 break;
4945 case float_round_to_odd:
4946 increment = !(zSig1 & 0x1) && zSig2;
4947 break;
4948 default:
4949 abort();
4950 }
4951 if ( 0x7FFD <= (uint32_t) zExp ) {
4952 if ( ( 0x7FFD < zExp )
4953 || ( ( zExp == 0x7FFD )
4954 && eq128(
4955 UINT64_C(0x0001FFFFFFFFFFFF),
4956 UINT64_C(0xFFFFFFFFFFFFFFFF),
4957 zSig0,
4958 zSig1
4959 )
4960 && increment
4961 )
4962 ) {
4963 float_raise(float_flag_overflow | float_flag_inexact, status);
4964 if ( ( roundingMode == float_round_to_zero )
4965 || ( zSign && ( roundingMode == float_round_up ) )
4966 || ( ! zSign && ( roundingMode == float_round_down ) )
4967 || (roundingMode == float_round_to_odd)
4968 ) {
4969 return
4970 packFloat128(
4971 zSign,
4972 0x7FFE,
4973 UINT64_C(0x0000FFFFFFFFFFFF),
4974 UINT64_C(0xFFFFFFFFFFFFFFFF)
4975 );
4976 }
4977 return packFloat128( zSign, 0x7FFF, 0, 0 );
4978 }
4979 if ( zExp < 0 ) {
4980 if (status->flush_to_zero) {
4981 float_raise(float_flag_output_denormal, status);
4982 return packFloat128(zSign, 0, 0, 0);
4983 }
4984 isTiny = status->tininess_before_rounding
4985 || (zExp < -1)
4986 || !increment
4987 || lt128(zSig0, zSig1,
4988 UINT64_C(0x0001FFFFFFFFFFFF),
4989 UINT64_C(0xFFFFFFFFFFFFFFFF));
4990 shift128ExtraRightJamming(
4991 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4992 zExp = 0;
4993 if (isTiny && zSig2) {
4994 float_raise(float_flag_underflow, status);
4995 }
4996 switch (roundingMode) {
4997 case float_round_nearest_even:
4998 case float_round_ties_away:
4999 increment = ((int64_t)zSig2 < 0);
5000 break;
5001 case float_round_to_zero:
5002 increment = 0;
5003 break;
5004 case float_round_up:
5005 increment = !zSign && zSig2;
5006 break;
5007 case float_round_down:
5008 increment = zSign && zSig2;
5009 break;
5010 case float_round_to_odd:
5011 increment = !(zSig1 & 0x1) && zSig2;
5012 break;
5013 default:
5014 abort();
5015 }
5016 }
5017 }
5018 if (zSig2) {
5019 float_raise(float_flag_inexact, status);
5020 }
5021 if ( increment ) {
5022 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
5023 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
5024 zSig1 &= ~1;
5025 }
5026 }
5027 else {
5028 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
5029 }
5030 return packFloat128( zSign, zExp, zSig0, zSig1 );
5031
5032 }
5033
5034 /*----------------------------------------------------------------------------
5035 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5036 | and significand formed by the concatenation of `zSig0' and `zSig1', and
5037 | returns the proper quadruple-precision floating-point value corresponding
5038 | to the abstract input. This routine is just like `roundAndPackFloat128'
5039 | except that the input significand has fewer bits and does not have to be
5040 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
5041 | point exponent.
5042 *----------------------------------------------------------------------------*/
5043
5044 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
5045 uint64_t zSig0, uint64_t zSig1,
5046 float_status *status)
5047 {
5048 int8_t shiftCount;
5049 uint64_t zSig2;
5050
5051 if ( zSig0 == 0 ) {
5052 zSig0 = zSig1;
5053 zSig1 = 0;
5054 zExp -= 64;
5055 }
5056 shiftCount = clz64(zSig0) - 15;
5057 if ( 0 <= shiftCount ) {
5058 zSig2 = 0;
5059 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5060 }
5061 else {
5062 shift128ExtraRightJamming(
5063 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5064 }
5065 zExp -= shiftCount;
5066 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
5067
5068 }
5069
5070 /*----------------------------------------------------------------------------
5071 | Returns the remainder of the single-precision floating-point value `a'
5072 | with respect to the corresponding value `b'. The operation is performed
5073 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5074 *----------------------------------------------------------------------------*/
5075
5076 float32 float32_rem(float32 a, float32 b, float_status *status)
5077 {
5078 bool aSign, zSign;
5079 int aExp, bExp, expDiff;
5080 uint32_t aSig, bSig;
5081 uint32_t q;
5082 uint64_t aSig64, bSig64, q64;
5083 uint32_t alternateASig;
5084 int32_t sigMean;
5085 a = float32_squash_input_denormal(a, status);
5086 b = float32_squash_input_denormal(b, status);
5087
5088 aSig = extractFloat32Frac( a );
5089 aExp = extractFloat32Exp( a );
5090 aSign = extractFloat32Sign( a );
5091 bSig = extractFloat32Frac( b );
5092 bExp = extractFloat32Exp( b );
5093 if ( aExp == 0xFF ) {
5094 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5095 return propagateFloat32NaN(a, b, status);
5096 }
5097 float_raise(float_flag_invalid, status);
5098 return float32_default_nan(status);
5099 }
5100 if ( bExp == 0xFF ) {
5101 if (bSig) {
5102 return propagateFloat32NaN(a, b, status);
5103 }
5104 return a;
5105 }
5106 if ( bExp == 0 ) {
5107 if ( bSig == 0 ) {
5108 float_raise(float_flag_invalid, status);
5109 return float32_default_nan(status);
5110 }
5111 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5112 }
5113 if ( aExp == 0 ) {
5114 if ( aSig == 0 ) return a;
5115 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5116 }
5117 expDiff = aExp - bExp;
5118 aSig |= 0x00800000;
5119 bSig |= 0x00800000;
5120 if ( expDiff < 32 ) {
5121 aSig <<= 8;
5122 bSig <<= 8;
5123 if ( expDiff < 0 ) {
5124 if ( expDiff < -1 ) return a;
5125 aSig >>= 1;
5126 }
5127 q = ( bSig <= aSig );
5128 if ( q ) aSig -= bSig;
5129 if ( 0 < expDiff ) {
5130 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5131 q >>= 32 - expDiff;
5132 bSig >>= 2;
5133 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5134 }
5135 else {
5136 aSig >>= 2;
5137 bSig >>= 2;
5138 }
5139 }
5140 else {
5141 if ( bSig <= aSig ) aSig -= bSig;
5142 aSig64 = ( (uint64_t) aSig )<<40;
5143 bSig64 = ( (uint64_t) bSig )<<40;
5144 expDiff -= 64;
5145 while ( 0 < expDiff ) {
5146 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5147 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5148 aSig64 = - ( ( bSig * q64 )<<38 );
5149 expDiff -= 62;
5150 }
5151 expDiff += 64;
5152 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5153 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5154 q = q64>>( 64 - expDiff );
5155 bSig <<= 6;
5156 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5157 }
5158 do {
5159 alternateASig = aSig;
5160 ++q;
5161 aSig -= bSig;
5162 } while ( 0 <= (int32_t) aSig );
5163 sigMean = aSig + alternateASig;
5164 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5165 aSig = alternateASig;
5166 }
5167 zSign = ( (int32_t) aSig < 0 );
5168 if ( zSign ) aSig = - aSig;
5169 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5170 }
5171
5172
5173
5174 /*----------------------------------------------------------------------------
5175 | Returns the binary exponential of the single-precision floating-point value
5176 | `a'. The operation is performed according to the IEC/IEEE Standard for
5177 | Binary Floating-Point Arithmetic.
5178 |
5179 | Uses the following identities:
5180 |
5181 | 1. -------------------------------------------------------------------------
5182 | x x*ln(2)
5183 | 2 = e
5184 |
5185 | 2. -------------------------------------------------------------------------
5186 | 2 3 4 5 n
5187 | x x x x x x x
5188 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5189 | 1! 2! 3! 4! 5! n!
5190 *----------------------------------------------------------------------------*/
5191
5192 static const float64 float32_exp2_coefficients[15] =
5193 {
5194 const_float64( 0x3ff0000000000000ll ), /* 1 */
5195 const_float64( 0x3fe0000000000000ll ), /* 2 */
5196 const_float64( 0x3fc5555555555555ll ), /* 3 */
5197 const_float64( 0x3fa5555555555555ll ), /* 4 */
5198 const_float64( 0x3f81111111111111ll ), /* 5 */
5199 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5200 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5201 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5202 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5203 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5204 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5205 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5206 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5207 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5208 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5209 };
5210
5211 float32 float32_exp2(float32 a, float_status *status)
5212 {
5213 FloatParts64 xp, xnp, tp, rp;
5214 int i;
5215
5216 float32_unpack_canonical(&xp, a, status);
5217 if (unlikely(xp.cls != float_class_normal)) {
5218 switch (xp.cls) {
5219 case float_class_snan:
5220 case float_class_qnan:
5221 parts_return_nan(&xp, status);
5222 return float32_round_pack_canonical(&xp, status);
5223 case float_class_inf:
5224 return xp.sign ? float32_zero : a;
5225 case float_class_zero:
5226 return float32_one;
5227 default:
5228 break;
5229 }
5230 g_assert_not_reached();
5231 }
5232
5233 float_raise(float_flag_inexact, status);
5234
5235 float64_unpack_canonical(&xnp, float64_ln2, status);
5236 xp = *parts_mul(&xp, &tp, status);
5237 xnp = xp;
5238
5239 float64_unpack_canonical(&rp, float64_one, status);
5240 for (i = 0 ; i < 15 ; i++) {
5241 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5242 rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5243 xnp = *parts_mul(&xnp, &xp, status);
5244 }
5245
5246 return float32_round_pack_canonical(&rp, status);
5247 }
5248
5249 /*----------------------------------------------------------------------------
5250 | Returns the binary log of the single-precision floating-point value `a'.
5251 | The operation is performed according to the IEC/IEEE Standard for Binary
5252 | Floating-Point Arithmetic.
5253 *----------------------------------------------------------------------------*/
5254 float32 float32_log2(float32 a, float_status *status)
5255 {
5256 bool aSign, zSign;
5257 int aExp;
5258 uint32_t aSig, zSig, i;
5259
5260 a = float32_squash_input_denormal(a, status);
5261 aSig = extractFloat32Frac( a );
5262 aExp = extractFloat32Exp( a );
5263 aSign = extractFloat32Sign( a );
5264
5265 if ( aExp == 0 ) {
5266 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5267 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5268 }
5269 if ( aSign ) {
5270 float_raise(float_flag_invalid, status);
5271 return float32_default_nan(status);
5272 }
5273 if ( aExp == 0xFF ) {
5274 if (aSig) {
5275 return propagateFloat32NaN(a, float32_zero, status);
5276 }
5277 return a;
5278 }
5279
5280 aExp -= 0x7F;
5281 aSig |= 0x00800000;
5282 zSign = aExp < 0;
5283 zSig = aExp << 23;
5284
5285 for (i = 1 << 22; i > 0; i >>= 1) {
5286 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5287 if ( aSig & 0x01000000 ) {
5288 aSig >>= 1;
5289 zSig |= i;
5290 }
5291 }
5292
5293 if ( zSign )
5294 zSig = -zSig;
5295
5296 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5297 }
5298
5299 /*----------------------------------------------------------------------------
5300 | Returns the remainder of the double-precision floating-point value `a'
5301 | with respect to the corresponding value `b'. The operation is performed
5302 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5303 *----------------------------------------------------------------------------*/
5304
5305 float64 float64_rem(float64 a, float64 b, float_status *status)
5306 {
5307 bool aSign, zSign;
5308 int aExp, bExp, expDiff;
5309 uint64_t aSig, bSig;
5310 uint64_t q, alternateASig;
5311 int64_t sigMean;
5312
5313 a = float64_squash_input_denormal(a, status);
5314 b = float64_squash_input_denormal(b, status);
5315 aSig = extractFloat64Frac( a );
5316 aExp = extractFloat64Exp( a );
5317 aSign = extractFloat64Sign( a );
5318 bSig = extractFloat64Frac( b );
5319 bExp = extractFloat64Exp( b );
5320 if ( aExp == 0x7FF ) {
5321 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5322 return propagateFloat64NaN(a, b, status);
5323 }
5324 float_raise(float_flag_invalid, status);
5325 return float64_default_nan(status);
5326 }
5327 if ( bExp == 0x7FF ) {
5328 if (bSig) {
5329 return propagateFloat64NaN(a, b, status);
5330 }
5331 return a;
5332 }
5333 if ( bExp == 0 ) {
5334 if ( bSig == 0 ) {
5335 float_raise(float_flag_invalid, status);
5336 return float64_default_nan(status);
5337 }
5338 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5339 }
5340 if ( aExp == 0 ) {
5341 if ( aSig == 0 ) return a;
5342 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5343 }
5344 expDiff = aExp - bExp;
5345 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5346 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5347 if ( expDiff < 0 ) {
5348 if ( expDiff < -1 ) return a;
5349 aSig >>= 1;
5350 }
5351 q = ( bSig <= aSig );
5352 if ( q ) aSig -= bSig;
5353 expDiff -= 64;
5354 while ( 0 < expDiff ) {
5355 q = estimateDiv128To64( aSig, 0, bSig );
5356 q = ( 2 < q ) ? q - 2 : 0;
5357 aSig = - ( ( bSig>>2 ) * q );
5358 expDiff -= 62;
5359 }
5360 expDiff += 64;
5361 if ( 0 < expDiff ) {
5362 q = estimateDiv128To64( aSig, 0, bSig );
5363 q = ( 2 < q ) ? q - 2 : 0;
5364 q >>= 64 - expDiff;
5365 bSig >>= 2;
5366 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5367 }
5368 else {
5369 aSig >>= 2;
5370 bSig >>= 2;
5371 }
5372 do {
5373 alternateASig = aSig;
5374 ++q;
5375 aSig -= bSig;
5376 } while ( 0 <= (int64_t) aSig );
5377 sigMean = aSig + alternateASig;
5378 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5379 aSig = alternateASig;
5380 }
5381 zSign = ( (int64_t) aSig < 0 );
5382 if ( zSign ) aSig = - aSig;
5383 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5384
5385 }
5386
5387 /*----------------------------------------------------------------------------
5388 | Returns the binary log of the double-precision floating-point value `a'.
5389 | The operation is performed according to the IEC/IEEE Standard for Binary
5390 | Floating-Point Arithmetic.
5391 *----------------------------------------------------------------------------*/
5392 float64 float64_log2(float64 a, float_status *status)
5393 {
5394 bool aSign, zSign;
5395 int aExp;
5396 uint64_t aSig, aSig0, aSig1, zSig, i;
5397 a = float64_squash_input_denormal(a, status);
5398
5399 aSig = extractFloat64Frac( a );
5400 aExp = extractFloat64Exp( a );
5401 aSign = extractFloat64Sign( a );
5402
5403 if ( aExp == 0 ) {
5404 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5405 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5406 }
5407 if ( aSign ) {
5408 float_raise(float_flag_invalid, status);
5409 return float64_default_nan(status);
5410 }
5411 if ( aExp == 0x7FF ) {
5412 if (aSig) {
5413 return propagateFloat64NaN(a, float64_zero, status);
5414 }
5415 return a;
5416 }
5417
5418 aExp -= 0x3FF;
5419 aSig |= UINT64_C(0x0010000000000000);
5420 zSign = aExp < 0;
5421 zSig = (uint64_t)aExp << 52;
5422 for (i = 1LL << 51; i > 0; i >>= 1) {
5423 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5424 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5425 if ( aSig & UINT64_C(0x0020000000000000) ) {
5426 aSig >>= 1;
5427 zSig |= i;
5428 }
5429 }
5430
5431 if ( zSign )
5432 zSig = -zSig;
5433 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5434 }
5435
5436 /*----------------------------------------------------------------------------
5437 | Rounds the extended double-precision floating-point value `a'
5438 | to the precision provided by floatx80_rounding_precision and returns the
5439 | result as an extended double-precision floating-point value.
5440 | The operation is performed according to the IEC/IEEE Standard for Binary
5441 | Floating-Point Arithmetic.
5442 *----------------------------------------------------------------------------*/
5443
5444 floatx80 floatx80_round(floatx80 a, float_status *status)
5445 {
5446 FloatParts128 p;
5447
5448 if (!floatx80_unpack_canonical(&p, a, status)) {
5449 return floatx80_default_nan(status);
5450 }
5451 return floatx80_round_pack_canonical(&p, status);
5452 }
5453
5454 /*----------------------------------------------------------------------------
5455 | Returns the remainder of the extended double-precision floating-point value
5456 | `a' with respect to the corresponding value `b'. The operation is performed
5457 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
5458 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
5459 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
5460 | the absolute value of the integer quotient.
5461 *----------------------------------------------------------------------------*/
5462
5463 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
5464 float_status *status)
5465 {
5466 bool aSign, zSign;
5467 int32_t aExp, bExp, expDiff, aExpOrig;
5468 uint64_t aSig0, aSig1, bSig;
5469 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5470
5471 *quotient = 0;
5472 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5473 float_raise(float_flag_invalid, status);
5474 return floatx80_default_nan(status);
5475 }
5476 aSig0 = extractFloatx80Frac( a );
5477 aExpOrig = aExp = extractFloatx80Exp( a );
5478 aSign = extractFloatx80Sign( a );
5479 bSig = extractFloatx80Frac( b );
5480 bExp = extractFloatx80Exp( b );
5481 if ( aExp == 0x7FFF ) {
5482 if ( (uint64_t) ( aSig0<<1 )
5483 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5484 return propagateFloatx80NaN(a, b, status);
5485 }
5486 goto invalid;
5487 }
5488 if ( bExp == 0x7FFF ) {
5489 if ((uint64_t)(bSig << 1)) {
5490 return propagateFloatx80NaN(a, b, status);
5491 }
5492 if (aExp == 0 && aSig0 >> 63) {
5493 /*
5494 * Pseudo-denormal argument must be returned in normalized
5495 * form.
5496 */
5497 return packFloatx80(aSign, 1, aSig0);
5498 }
5499 return a;
5500 }
5501 if ( bExp == 0 ) {
5502 if ( bSig == 0 ) {
5503 invalid:
5504 float_raise(float_flag_invalid, status);
5505 return floatx80_default_nan(status);
5506 }
5507 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5508 }
5509 if ( aExp == 0 ) {
5510 if ( aSig0 == 0 ) return a;
5511 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5512 }
5513 zSign = aSign;
5514 expDiff = aExp - bExp;
5515 aSig1 = 0;
5516 if ( expDiff < 0 ) {
5517 if ( mod || expDiff < -1 ) {
5518 if (aExp == 1 && aExpOrig == 0) {
5519 /*
5520 * Pseudo-denormal argument must be returned in
5521 * normalized form.
5522 */
5523 return packFloatx80(aSign, aExp, aSig0);
5524 }
5525 return a;
5526 }
5527 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5528 expDiff = 0;
5529 }
5530 *quotient = q = ( bSig <= aSig0 );
5531 if ( q ) aSig0 -= bSig;
5532 expDiff -= 64;
5533 while ( 0 < expDiff ) {
5534 q = estimateDiv128To64( aSig0, aSig1, bSig );
5535 q = ( 2 < q ) ? q - 2 : 0;
5536 mul64To128( bSig, q, &term0, &term1 );
5537 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5538 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5539 expDiff -= 62;
5540 *quotient <<= 62;
5541 *quotient += q;
5542 }
5543 expDiff += 64;
5544 if ( 0 < expDiff ) {
5545 q = estimateDiv128To64( aSig0, aSig1, bSig );
5546 q = ( 2 < q ) ? q - 2 : 0;
5547 q >>= 64 - expDiff;
5548 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5549 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5550 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5551 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5552 ++q;
5553 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5554 }
5555 if (expDiff < 64) {
5556 *quotient <<= expDiff;
5557 } else {
5558 *quotient = 0;
5559 }
5560 *quotient += q;
5561 }
5562 else {
5563 term1 = 0;
5564 term0 = bSig;
5565 }
5566 if (!mod) {
5567 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5568 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5569 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5570 && ( q & 1 ) )
5571 ) {
5572 aSig0 = alternateASig0;
5573 aSig1 = alternateASig1;
5574 zSign = ! zSign;
5575 ++*quotient;
5576 }
5577 }
5578 return
5579 normalizeRoundAndPackFloatx80(
5580 floatx80_precision_x, zSign, bExp + expDiff, aSig0, aSig1, status);
5581
5582 }
5583
5584 /*----------------------------------------------------------------------------
5585 | Returns the remainder of the extended double-precision floating-point value
5586 | `a' with respect to the corresponding value `b'. The operation is performed
5587 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5588 *----------------------------------------------------------------------------*/
5589
5590 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5591 {
5592 uint64_t quotient;
5593 return floatx80_modrem(a, b, false, &quotient, status);
5594 }
5595
5596 /*----------------------------------------------------------------------------
5597 | Returns the remainder of the extended double-precision floating-point value
5598 | `a' with respect to the corresponding value `b', with the quotient truncated
5599 | toward zero.
5600 *----------------------------------------------------------------------------*/
5601
5602 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
5603 {
5604 uint64_t quotient;
5605 return floatx80_modrem(a, b, true, &quotient, status);
5606 }
5607
5608 /*----------------------------------------------------------------------------
5609 | Returns the remainder of the quadruple-precision floating-point value `a'
5610 | with respect to the corresponding value `b'. The operation is performed
5611 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5612 *----------------------------------------------------------------------------*/
5613
5614 float128 float128_rem(float128 a, float128 b, float_status *status)
5615 {
5616 bool aSign, zSign;
5617 int32_t aExp, bExp, expDiff;
5618 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5619 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5620 int64_t sigMean0;
5621
5622 aSig1 = extractFloat128Frac1( a );
5623 aSig0 = extractFloat128Frac0( a );
5624 aExp = extractFloat128Exp( a );
5625 aSign = extractFloat128Sign( a );
5626 bSig1 = extractFloat128Frac1( b );
5627 bSig0 = extractFloat128Frac0( b );
5628 bExp = extractFloat128Exp( b );
5629 if ( aExp == 0x7FFF ) {
5630 if ( ( aSig0 | aSig1 )
5631 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5632 return propagateFloat128NaN(a, b, status);
5633 }
5634 goto invalid;
5635 }
5636 if ( bExp == 0x7FFF ) {
5637 if (bSig0 | bSig1) {
5638 return propagateFloat128NaN(a, b, status);
5639 }
5640 return a;
5641 }
5642 if ( bExp == 0 ) {
5643 if ( ( bSig0 | bSig1 ) == 0 ) {
5644 invalid:
5645 float_raise(float_flag_invalid, status);
5646 return float128_default_nan(status);
5647 }
5648 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5649 }
5650 if ( aExp == 0 ) {
5651 if ( ( aSig0 | aSig1 ) == 0 ) return a;
5652 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5653 }
5654 expDiff = aExp - bExp;
5655 if ( expDiff < -1 ) return a;
5656 shortShift128Left(
5657 aSig0 | UINT64_C(0x0001000000000000),
5658 aSig1,
5659 15 - ( expDiff < 0 ),
5660 &aSig0,
5661 &aSig1
5662 );
5663 shortShift128Left(
5664 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
5665 q = le128( bSig0, bSig1, aSig0, aSig1 );
5666 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5667 expDiff -= 64;
5668 while ( 0 < expDiff ) {
5669 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5670 q = ( 4 < q ) ? q - 4 : 0;
5671 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5672 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5673 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5674 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5675 expDiff -= 61;
5676 }
5677 if ( -64 < expDiff ) {
5678 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5679 q = ( 4 < q ) ? q - 4 : 0;
5680 q >>= - expDiff;
5681 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5682 expDiff += 52;
5683 if ( expDiff < 0 ) {
5684 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5685 }
5686 else {
5687 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5688 }
5689 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5690 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5691 }
5692 else {
5693 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5694 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5695 }
5696 do {
5697 alternateASig0 = aSig0;
5698 alternateASig1 = aSig1;
5699 ++q;
5700 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5701 } while ( 0 <= (int64_t) aSig0 );
5702 add128(
5703 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
5704 if ( ( sigMean0 < 0 )
5705 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5706 aSig0 = alternateASig0;
5707 aSig1 = alternateASig1;
5708 }
5709 zSign = ( (int64_t) aSig0 < 0 );
5710 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5711 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
5712 status);
5713 }
5714 static void __attribute__((constructor)) softfloat_init(void)
5715 {
5716 union_float64 ua, ub, uc, ur;
5717
5718 if (QEMU_NO_HARDFLOAT) {
5719 return;
5720 }
5721 /*
5722 * Test that the host's FMA is not obviously broken. For example,
5723 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5724 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5725 */
5726 ua.s = 0x0020000000000001ULL;
5727 ub.s = 0x3ca0000000000000ULL;
5728 uc.s = 0x0020000000000000ULL;
5729 ur.h = fma(ua.h, ub.h, uc.h);
5730 if (ur.s != 0x0020000000000001ULL) {
5731 force_soft_fma = true;
5732 }
5733 }