]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Move uint_to_float to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485
486 /* Simple helpers for checking if, or what kind of, NaN we have */
487 static inline __attribute__((unused)) bool is_nan(FloatClass c)
488 {
489 return unlikely(c >= float_class_qnan);
490 }
491
492 static inline __attribute__((unused)) bool is_snan(FloatClass c)
493 {
494 return c == float_class_snan;
495 }
496
497 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498 {
499 return c == float_class_qnan;
500 }
501
502 /*
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
505 *
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
509 */
510
511 typedef struct {
512 FloatClass cls;
513 bool sign;
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
526 } FloatParts64;
527
528 typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 } FloatParts128;
535
536 typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544 } FloatParts256;
545
546 /* These apply to the most significant word of each FloatPartsN. */
547 #define DECOMPOSED_BINARY_POINT 63
548 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
549
550 /* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
558 * frac_lsbm1: the bit below the least significant bit (for rounding)
559 * round_mask/roundeven_mask: masks used for rounding
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
562 */
563 typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
573 bool arm_althp;
574 } FloatFmt;
575
576 /* Expand fields based on the size of exponent and fraction */
577 #define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
587
588 static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590 };
591
592 static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595 };
596
597 static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599 };
600
601 static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603 };
604
605 static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607 };
608
609 static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611 };
612
613 /* Unpack a float to parts, but do not canonicalize. */
614 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
615 {
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
618
619 *r = (FloatParts64) {
620 .cls = float_class_unclassified,
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
624 };
625 }
626
627 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
628 {
629 unpack_raw64(p, &float16_params, f);
630 }
631
632 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
633 {
634 unpack_raw64(p, &bfloat16_params, f);
635 }
636
637 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
638 {
639 unpack_raw64(p, &float32_params, f);
640 }
641
642 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
643 {
644 unpack_raw64(p, &float64_params, f);
645 }
646
647 static void float128_unpack_raw(FloatParts128 *p, float128 f)
648 {
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659 }
660
661 /* Pack a float from parts, but do not canonicalize. */
662 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
663 {
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
672 }
673
674 static inline float16 float16_pack_raw(const FloatParts64 *p)
675 {
676 return make_float16(pack_raw64(p, &float16_params));
677 }
678
679 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
680 {
681 return pack_raw64(p, &bfloat16_params);
682 }
683
684 static inline float32 float32_pack_raw(const FloatParts64 *p)
685 {
686 return make_float32(pack_raw64(p, &float32_params));
687 }
688
689 static inline float64 float64_pack_raw(const FloatParts64 *p)
690 {
691 return make_float64(pack_raw64(p, &float64_params));
692 }
693
694 static float128 float128_pack_raw(const FloatParts128 *p)
695 {
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704 }
705
706 /*----------------------------------------------------------------------------
707 | Functions and definitions to determine: (1) whether tininess for underflow
708 | is detected before or after rounding by default, (2) what (if anything)
709 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
710 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711 | are propagated from function inputs to output. These details are target-
712 | specific.
713 *----------------------------------------------------------------------------*/
714 #include "softfloat-specialize.c.inc"
715
716 #define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
719 #define PARTS_GENERIC_64_128_256(NAME, P) \
720 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
723 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
724 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725
726 static void parts64_return_nan(FloatParts64 *a, float_status *s);
727 static void parts128_return_nan(FloatParts128 *a, float_status *s);
728
729 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
730
731 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732 float_status *s);
733 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734 float_status *s);
735
736 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737
738 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739 FloatParts64 *c, float_status *s,
740 int ab_mask, int abc_mask);
741 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742 FloatParts128 *b,
743 FloatParts128 *c,
744 float_status *s,
745 int ab_mask, int abc_mask);
746
747 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749
750 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751 const FloatFmt *fmt);
752 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753 const FloatFmt *fmt);
754
755 #define parts_canonicalize(A, S, F) \
756 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757
758 static void parts64_uncanon(FloatParts64 *p, float_status *status,
759 const FloatFmt *fmt);
760 static void parts128_uncanon(FloatParts128 *p, float_status *status,
761 const FloatFmt *fmt);
762
763 #define parts_uncanon(A, S, F) \
764 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765
766 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
768 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
769
770 #define parts_add_normal(A, B) \
771 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
772
773 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
775 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
776
777 #define parts_sub_normal(A, B) \
778 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
779
780 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781 float_status *s, bool subtract);
782 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783 float_status *s, bool subtract);
784
785 #define parts_addsub(A, B, S, Z) \
786 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787
788 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789 float_status *s);
790 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791 float_status *s);
792
793 #define parts_mul(A, B, S) \
794 PARTS_GENERIC_64_128(mul, A)(A, B, S)
795
796 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797 FloatParts64 *c, int flags,
798 float_status *s);
799 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800 FloatParts128 *c, int flags,
801 float_status *s);
802
803 #define parts_muladd(A, B, C, Z, S) \
804 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805
806 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807 float_status *s);
808 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809 float_status *s);
810
811 #define parts_div(A, B, S) \
812 PARTS_GENERIC_64_128(div, A)(A, B, S)
813
814 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
815 int scale, int frac_size);
816 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
817 int scale, int frac_size);
818
819 #define parts_round_to_int_normal(A, R, C, F) \
820 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
821
822 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
823 int scale, float_status *s,
824 const FloatFmt *fmt);
825 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
826 int scale, float_status *s,
827 const FloatFmt *fmt);
828
829 #define parts_round_to_int(A, R, C, S, F) \
830 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
831
832 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
833 int scale, int64_t min, int64_t max,
834 float_status *s);
835 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
836 int scale, int64_t min, int64_t max,
837 float_status *s);
838
839 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
840 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
841
842 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
843 int scale, uint64_t max,
844 float_status *s);
845 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
846 int scale, uint64_t max,
847 float_status *s);
848
849 #define parts_float_to_uint(P, R, Z, M, S) \
850 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
851
852 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
853 int scale, float_status *s);
854 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
855 int scale, float_status *s);
856
857 #define parts_sint_to_float(P, I, Z, S) \
858 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
859
860 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
861 int scale, float_status *s);
862 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
863 int scale, float_status *s);
864
865 #define parts_uint_to_float(P, I, Z, S) \
866 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
867
868 /*
869 * Helper functions for softfloat-parts.c.inc, per-size operations.
870 */
871
872 #define FRAC_GENERIC_64_128(NAME, P) \
873 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
874
875 #define FRAC_GENERIC_64_128_256(NAME, P) \
876 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
877 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
878
879 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
880 {
881 return uadd64_overflow(a->frac, b->frac, &r->frac);
882 }
883
884 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
885 {
886 bool c = 0;
887 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
888 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
889 return c;
890 }
891
892 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
893 {
894 bool c = 0;
895 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
896 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
897 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
898 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
899 return c;
900 }
901
902 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
903
904 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
905 {
906 return uadd64_overflow(a->frac, c, &r->frac);
907 }
908
909 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
910 {
911 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
912 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
913 }
914
915 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
916
917 static void frac64_allones(FloatParts64 *a)
918 {
919 a->frac = -1;
920 }
921
922 static void frac128_allones(FloatParts128 *a)
923 {
924 a->frac_hi = a->frac_lo = -1;
925 }
926
927 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
928
929 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
930 {
931 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
932 }
933
934 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
935 {
936 uint64_t ta = a->frac_hi, tb = b->frac_hi;
937 if (ta == tb) {
938 ta = a->frac_lo, tb = b->frac_lo;
939 if (ta == tb) {
940 return 0;
941 }
942 }
943 return ta < tb ? -1 : 1;
944 }
945
946 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
947
948 static void frac64_clear(FloatParts64 *a)
949 {
950 a->frac = 0;
951 }
952
953 static void frac128_clear(FloatParts128 *a)
954 {
955 a->frac_hi = a->frac_lo = 0;
956 }
957
958 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
959
960 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
961 {
962 uint64_t n1, n0, r, q;
963 bool ret;
964
965 /*
966 * We want a 2*N / N-bit division to produce exactly an N-bit
967 * result, so that we do not lose any precision and so that we
968 * do not have to renormalize afterward. If A.frac < B.frac,
969 * then division would produce an (N-1)-bit result; shift A left
970 * by one to produce the an N-bit result, and return true to
971 * decrement the exponent to match.
972 *
973 * The udiv_qrnnd algorithm that we're using requires normalization,
974 * i.e. the msb of the denominator must be set, which is already true.
975 */
976 ret = a->frac < b->frac;
977 if (ret) {
978 n0 = a->frac;
979 n1 = 0;
980 } else {
981 n0 = a->frac >> 1;
982 n1 = a->frac << 63;
983 }
984 q = udiv_qrnnd(&r, n0, n1, b->frac);
985
986 /* Set lsb if there is a remainder, to set inexact. */
987 a->frac = q | (r != 0);
988
989 return ret;
990 }
991
992 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
993 {
994 uint64_t q0, q1, a0, a1, b0, b1;
995 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
996 bool ret = false;
997
998 a0 = a->frac_hi, a1 = a->frac_lo;
999 b0 = b->frac_hi, b1 = b->frac_lo;
1000
1001 ret = lt128(a0, a1, b0, b1);
1002 if (!ret) {
1003 a1 = shr_double(a0, a1, 1);
1004 a0 = a0 >> 1;
1005 }
1006
1007 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1008 q0 = estimateDiv128To64(a0, a1, b0);
1009
1010 /*
1011 * Estimate is high because B1 was not included (unless B1 == 0).
1012 * Reduce quotient and increase remainder until remainder is non-negative.
1013 * This loop will execute 0 to 2 times.
1014 */
1015 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1016 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1017 while (r0 != 0) {
1018 q0--;
1019 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1020 }
1021
1022 /* Repeat using the remainder, producing a second word of quotient. */
1023 q1 = estimateDiv128To64(r1, r2, b0);
1024 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1025 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1026 while (r1 != 0) {
1027 q1--;
1028 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1029 }
1030
1031 /* Any remainder indicates inexact; set sticky bit. */
1032 q1 |= (r2 | r3) != 0;
1033
1034 a->frac_hi = q0;
1035 a->frac_lo = q1;
1036 return ret;
1037 }
1038
1039 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1040
1041 static bool frac64_eqz(FloatParts64 *a)
1042 {
1043 return a->frac == 0;
1044 }
1045
1046 static bool frac128_eqz(FloatParts128 *a)
1047 {
1048 return (a->frac_hi | a->frac_lo) == 0;
1049 }
1050
1051 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1052
1053 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1054 {
1055 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1056 }
1057
1058 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1059 {
1060 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1061 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1062 }
1063
1064 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1065
1066 static void frac64_neg(FloatParts64 *a)
1067 {
1068 a->frac = -a->frac;
1069 }
1070
1071 static void frac128_neg(FloatParts128 *a)
1072 {
1073 bool c = 0;
1074 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1075 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1076 }
1077
1078 static void frac256_neg(FloatParts256 *a)
1079 {
1080 bool c = 0;
1081 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1082 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1083 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1084 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1085 }
1086
1087 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1088
1089 static int frac64_normalize(FloatParts64 *a)
1090 {
1091 if (a->frac) {
1092 int shift = clz64(a->frac);
1093 a->frac <<= shift;
1094 return shift;
1095 }
1096 return 64;
1097 }
1098
1099 static int frac128_normalize(FloatParts128 *a)
1100 {
1101 if (a->frac_hi) {
1102 int shl = clz64(a->frac_hi);
1103 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1104 a->frac_lo <<= shl;
1105 return shl;
1106 } else if (a->frac_lo) {
1107 int shl = clz64(a->frac_lo);
1108 a->frac_hi = a->frac_lo << shl;
1109 a->frac_lo = 0;
1110 return shl + 64;
1111 }
1112 return 128;
1113 }
1114
1115 static int frac256_normalize(FloatParts256 *a)
1116 {
1117 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1118 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1119 int ret, shl;
1120
1121 if (likely(a0)) {
1122 shl = clz64(a0);
1123 if (shl == 0) {
1124 return 0;
1125 }
1126 ret = shl;
1127 } else {
1128 if (a1) {
1129 ret = 64;
1130 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1131 } else if (a2) {
1132 ret = 128;
1133 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1134 } else if (a3) {
1135 ret = 192;
1136 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1137 } else {
1138 ret = 256;
1139 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1140 goto done;
1141 }
1142 shl = clz64(a0);
1143 if (shl == 0) {
1144 goto done;
1145 }
1146 ret += shl;
1147 }
1148
1149 a0 = shl_double(a0, a1, shl);
1150 a1 = shl_double(a1, a2, shl);
1151 a2 = shl_double(a2, a3, shl);
1152 a3 <<= shl;
1153
1154 done:
1155 a->frac_hi = a0;
1156 a->frac_hm = a1;
1157 a->frac_lm = a2;
1158 a->frac_lo = a3;
1159 return ret;
1160 }
1161
1162 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1163
1164 static void frac64_shl(FloatParts64 *a, int c)
1165 {
1166 a->frac <<= c;
1167 }
1168
1169 static void frac128_shl(FloatParts128 *a, int c)
1170 {
1171 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1172
1173 if (c & 64) {
1174 a0 = a1, a1 = 0;
1175 }
1176
1177 c &= 63;
1178 if (c) {
1179 a0 = shl_double(a0, a1, c);
1180 a1 = a1 << c;
1181 }
1182
1183 a->frac_hi = a0;
1184 a->frac_lo = a1;
1185 }
1186
1187 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1188
1189 static void frac64_shr(FloatParts64 *a, int c)
1190 {
1191 a->frac >>= c;
1192 }
1193
1194 static void frac128_shr(FloatParts128 *a, int c)
1195 {
1196 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1197
1198 if (c & 64) {
1199 a1 = a0, a0 = 0;
1200 }
1201
1202 c &= 63;
1203 if (c) {
1204 a1 = shr_double(a0, a1, c);
1205 a0 = a0 >> c;
1206 }
1207
1208 a->frac_hi = a0;
1209 a->frac_lo = a1;
1210 }
1211
1212 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1213
1214 static void frac64_shrjam(FloatParts64 *a, int c)
1215 {
1216 uint64_t a0 = a->frac;
1217
1218 if (likely(c != 0)) {
1219 if (likely(c < 64)) {
1220 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1221 } else {
1222 a0 = a0 != 0;
1223 }
1224 a->frac = a0;
1225 }
1226 }
1227
1228 static void frac128_shrjam(FloatParts128 *a, int c)
1229 {
1230 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1231 uint64_t sticky = 0;
1232
1233 if (unlikely(c == 0)) {
1234 return;
1235 } else if (likely(c < 64)) {
1236 /* nothing */
1237 } else if (likely(c < 128)) {
1238 sticky = a1;
1239 a1 = a0;
1240 a0 = 0;
1241 c &= 63;
1242 if (c == 0) {
1243 goto done;
1244 }
1245 } else {
1246 sticky = a0 | a1;
1247 a0 = a1 = 0;
1248 goto done;
1249 }
1250
1251 sticky |= shr_double(a1, 0, c);
1252 a1 = shr_double(a0, a1, c);
1253 a0 = a0 >> c;
1254
1255 done:
1256 a->frac_lo = a1 | (sticky != 0);
1257 a->frac_hi = a0;
1258 }
1259
1260 static void frac256_shrjam(FloatParts256 *a, int c)
1261 {
1262 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1263 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1264 uint64_t sticky = 0;
1265
1266 if (unlikely(c == 0)) {
1267 return;
1268 } else if (likely(c < 64)) {
1269 /* nothing */
1270 } else if (likely(c < 256)) {
1271 if (unlikely(c & 128)) {
1272 sticky |= a2 | a3;
1273 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1274 }
1275 if (unlikely(c & 64)) {
1276 sticky |= a3;
1277 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1278 }
1279 c &= 63;
1280 if (c == 0) {
1281 goto done;
1282 }
1283 } else {
1284 sticky = a0 | a1 | a2 | a3;
1285 a0 = a1 = a2 = a3 = 0;
1286 goto done;
1287 }
1288
1289 sticky |= shr_double(a3, 0, c);
1290 a3 = shr_double(a2, a3, c);
1291 a2 = shr_double(a1, a2, c);
1292 a1 = shr_double(a0, a1, c);
1293 a0 = a0 >> c;
1294
1295 done:
1296 a->frac_lo = a3 | (sticky != 0);
1297 a->frac_lm = a2;
1298 a->frac_hm = a1;
1299 a->frac_hi = a0;
1300 }
1301
1302 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1303
1304 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1305 {
1306 return usub64_overflow(a->frac, b->frac, &r->frac);
1307 }
1308
1309 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1310 {
1311 bool c = 0;
1312 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1313 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1314 return c;
1315 }
1316
1317 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1318 {
1319 bool c = 0;
1320 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1321 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1322 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1323 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1324 return c;
1325 }
1326
1327 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1328
1329 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1330 {
1331 r->frac = a->frac_hi | (a->frac_lo != 0);
1332 }
1333
1334 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1335 {
1336 r->frac_hi = a->frac_hi;
1337 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1338 }
1339
1340 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1341
1342 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1343 {
1344 r->frac_hi = a->frac;
1345 r->frac_lo = 0;
1346 }
1347
1348 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1349 {
1350 r->frac_hi = a->frac_hi;
1351 r->frac_hm = a->frac_lo;
1352 r->frac_lm = 0;
1353 r->frac_lo = 0;
1354 }
1355
1356 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1357
1358 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1359 #define FloatPartsN glue(FloatParts,N)
1360 #define FloatPartsW glue(FloatParts,W)
1361
1362 #define N 64
1363 #define W 128
1364
1365 #include "softfloat-parts-addsub.c.inc"
1366 #include "softfloat-parts.c.inc"
1367
1368 #undef N
1369 #undef W
1370 #define N 128
1371 #define W 256
1372
1373 #include "softfloat-parts-addsub.c.inc"
1374 #include "softfloat-parts.c.inc"
1375
1376 #undef N
1377 #undef W
1378 #define N 256
1379
1380 #include "softfloat-parts-addsub.c.inc"
1381
1382 #undef N
1383 #undef W
1384 #undef partsN
1385 #undef FloatPartsN
1386 #undef FloatPartsW
1387
1388 /*
1389 * Pack/unpack routines with a specific FloatFmt.
1390 */
1391
1392 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1393 float_status *s, const FloatFmt *params)
1394 {
1395 float16_unpack_raw(p, f);
1396 parts_canonicalize(p, s, params);
1397 }
1398
1399 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1400 float_status *s)
1401 {
1402 float16a_unpack_canonical(p, f, s, &float16_params);
1403 }
1404
1405 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1406 float_status *s)
1407 {
1408 bfloat16_unpack_raw(p, f);
1409 parts_canonicalize(p, s, &bfloat16_params);
1410 }
1411
1412 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1413 float_status *s,
1414 const FloatFmt *params)
1415 {
1416 parts_uncanon(p, s, params);
1417 return float16_pack_raw(p);
1418 }
1419
1420 static float16 float16_round_pack_canonical(FloatParts64 *p,
1421 float_status *s)
1422 {
1423 return float16a_round_pack_canonical(p, s, &float16_params);
1424 }
1425
1426 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1427 float_status *s)
1428 {
1429 parts_uncanon(p, s, &bfloat16_params);
1430 return bfloat16_pack_raw(p);
1431 }
1432
1433 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1434 float_status *s)
1435 {
1436 float32_unpack_raw(p, f);
1437 parts_canonicalize(p, s, &float32_params);
1438 }
1439
1440 static float32 float32_round_pack_canonical(FloatParts64 *p,
1441 float_status *s)
1442 {
1443 parts_uncanon(p, s, &float32_params);
1444 return float32_pack_raw(p);
1445 }
1446
1447 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1448 float_status *s)
1449 {
1450 float64_unpack_raw(p, f);
1451 parts_canonicalize(p, s, &float64_params);
1452 }
1453
1454 static float64 float64_round_pack_canonical(FloatParts64 *p,
1455 float_status *s)
1456 {
1457 parts_uncanon(p, s, &float64_params);
1458 return float64_pack_raw(p);
1459 }
1460
1461 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1462 float_status *s)
1463 {
1464 float128_unpack_raw(p, f);
1465 parts_canonicalize(p, s, &float128_params);
1466 }
1467
1468 static float128 float128_round_pack_canonical(FloatParts128 *p,
1469 float_status *s)
1470 {
1471 parts_uncanon(p, s, &float128_params);
1472 return float128_pack_raw(p);
1473 }
1474
1475 /*
1476 * Addition and subtraction
1477 */
1478
1479 static float16 QEMU_FLATTEN
1480 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1481 {
1482 FloatParts64 pa, pb, *pr;
1483
1484 float16_unpack_canonical(&pa, a, status);
1485 float16_unpack_canonical(&pb, b, status);
1486 pr = parts_addsub(&pa, &pb, status, subtract);
1487
1488 return float16_round_pack_canonical(pr, status);
1489 }
1490
1491 float16 float16_add(float16 a, float16 b, float_status *status)
1492 {
1493 return float16_addsub(a, b, status, false);
1494 }
1495
1496 float16 float16_sub(float16 a, float16 b, float_status *status)
1497 {
1498 return float16_addsub(a, b, status, true);
1499 }
1500
1501 static float32 QEMU_SOFTFLOAT_ATTR
1502 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1503 {
1504 FloatParts64 pa, pb, *pr;
1505
1506 float32_unpack_canonical(&pa, a, status);
1507 float32_unpack_canonical(&pb, b, status);
1508 pr = parts_addsub(&pa, &pb, status, subtract);
1509
1510 return float32_round_pack_canonical(pr, status);
1511 }
1512
1513 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1514 {
1515 return soft_f32_addsub(a, b, status, false);
1516 }
1517
1518 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1519 {
1520 return soft_f32_addsub(a, b, status, true);
1521 }
1522
1523 static float64 QEMU_SOFTFLOAT_ATTR
1524 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1525 {
1526 FloatParts64 pa, pb, *pr;
1527
1528 float64_unpack_canonical(&pa, a, status);
1529 float64_unpack_canonical(&pb, b, status);
1530 pr = parts_addsub(&pa, &pb, status, subtract);
1531
1532 return float64_round_pack_canonical(pr, status);
1533 }
1534
1535 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1536 {
1537 return soft_f64_addsub(a, b, status, false);
1538 }
1539
1540 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1541 {
1542 return soft_f64_addsub(a, b, status, true);
1543 }
1544
1545 static float hard_f32_add(float a, float b)
1546 {
1547 return a + b;
1548 }
1549
1550 static float hard_f32_sub(float a, float b)
1551 {
1552 return a - b;
1553 }
1554
1555 static double hard_f64_add(double a, double b)
1556 {
1557 return a + b;
1558 }
1559
1560 static double hard_f64_sub(double a, double b)
1561 {
1562 return a - b;
1563 }
1564
1565 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1566 {
1567 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1568 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1569 }
1570 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1571 }
1572
1573 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1574 {
1575 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1576 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1577 } else {
1578 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1579 }
1580 }
1581
1582 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1583 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1584 {
1585 return float32_gen2(a, b, s, hard, soft,
1586 f32_is_zon2, f32_addsubmul_post);
1587 }
1588
1589 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1590 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1591 {
1592 return float64_gen2(a, b, s, hard, soft,
1593 f64_is_zon2, f64_addsubmul_post);
1594 }
1595
1596 float32 QEMU_FLATTEN
1597 float32_add(float32 a, float32 b, float_status *s)
1598 {
1599 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1600 }
1601
1602 float32 QEMU_FLATTEN
1603 float32_sub(float32 a, float32 b, float_status *s)
1604 {
1605 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1606 }
1607
1608 float64 QEMU_FLATTEN
1609 float64_add(float64 a, float64 b, float_status *s)
1610 {
1611 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1612 }
1613
1614 float64 QEMU_FLATTEN
1615 float64_sub(float64 a, float64 b, float_status *s)
1616 {
1617 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1618 }
1619
1620 static bfloat16 QEMU_FLATTEN
1621 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1622 {
1623 FloatParts64 pa, pb, *pr;
1624
1625 bfloat16_unpack_canonical(&pa, a, status);
1626 bfloat16_unpack_canonical(&pb, b, status);
1627 pr = parts_addsub(&pa, &pb, status, subtract);
1628
1629 return bfloat16_round_pack_canonical(pr, status);
1630 }
1631
1632 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1633 {
1634 return bfloat16_addsub(a, b, status, false);
1635 }
1636
1637 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1638 {
1639 return bfloat16_addsub(a, b, status, true);
1640 }
1641
1642 static float128 QEMU_FLATTEN
1643 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1644 {
1645 FloatParts128 pa, pb, *pr;
1646
1647 float128_unpack_canonical(&pa, a, status);
1648 float128_unpack_canonical(&pb, b, status);
1649 pr = parts_addsub(&pa, &pb, status, subtract);
1650
1651 return float128_round_pack_canonical(pr, status);
1652 }
1653
1654 float128 float128_add(float128 a, float128 b, float_status *status)
1655 {
1656 return float128_addsub(a, b, status, false);
1657 }
1658
1659 float128 float128_sub(float128 a, float128 b, float_status *status)
1660 {
1661 return float128_addsub(a, b, status, true);
1662 }
1663
1664 /*
1665 * Multiplication
1666 */
1667
1668 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1669 {
1670 FloatParts64 pa, pb, *pr;
1671
1672 float16_unpack_canonical(&pa, a, status);
1673 float16_unpack_canonical(&pb, b, status);
1674 pr = parts_mul(&pa, &pb, status);
1675
1676 return float16_round_pack_canonical(pr, status);
1677 }
1678
1679 static float32 QEMU_SOFTFLOAT_ATTR
1680 soft_f32_mul(float32 a, float32 b, float_status *status)
1681 {
1682 FloatParts64 pa, pb, *pr;
1683
1684 float32_unpack_canonical(&pa, a, status);
1685 float32_unpack_canonical(&pb, b, status);
1686 pr = parts_mul(&pa, &pb, status);
1687
1688 return float32_round_pack_canonical(pr, status);
1689 }
1690
1691 static float64 QEMU_SOFTFLOAT_ATTR
1692 soft_f64_mul(float64 a, float64 b, float_status *status)
1693 {
1694 FloatParts64 pa, pb, *pr;
1695
1696 float64_unpack_canonical(&pa, a, status);
1697 float64_unpack_canonical(&pb, b, status);
1698 pr = parts_mul(&pa, &pb, status);
1699
1700 return float64_round_pack_canonical(pr, status);
1701 }
1702
1703 static float hard_f32_mul(float a, float b)
1704 {
1705 return a * b;
1706 }
1707
1708 static double hard_f64_mul(double a, double b)
1709 {
1710 return a * b;
1711 }
1712
1713 float32 QEMU_FLATTEN
1714 float32_mul(float32 a, float32 b, float_status *s)
1715 {
1716 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1717 f32_is_zon2, f32_addsubmul_post);
1718 }
1719
1720 float64 QEMU_FLATTEN
1721 float64_mul(float64 a, float64 b, float_status *s)
1722 {
1723 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1724 f64_is_zon2, f64_addsubmul_post);
1725 }
1726
1727 bfloat16 QEMU_FLATTEN
1728 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1729 {
1730 FloatParts64 pa, pb, *pr;
1731
1732 bfloat16_unpack_canonical(&pa, a, status);
1733 bfloat16_unpack_canonical(&pb, b, status);
1734 pr = parts_mul(&pa, &pb, status);
1735
1736 return bfloat16_round_pack_canonical(pr, status);
1737 }
1738
1739 float128 QEMU_FLATTEN
1740 float128_mul(float128 a, float128 b, float_status *status)
1741 {
1742 FloatParts128 pa, pb, *pr;
1743
1744 float128_unpack_canonical(&pa, a, status);
1745 float128_unpack_canonical(&pb, b, status);
1746 pr = parts_mul(&pa, &pb, status);
1747
1748 return float128_round_pack_canonical(pr, status);
1749 }
1750
1751 /*
1752 * Fused multiply-add
1753 */
1754
1755 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1756 int flags, float_status *status)
1757 {
1758 FloatParts64 pa, pb, pc, *pr;
1759
1760 float16_unpack_canonical(&pa, a, status);
1761 float16_unpack_canonical(&pb, b, status);
1762 float16_unpack_canonical(&pc, c, status);
1763 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1764
1765 return float16_round_pack_canonical(pr, status);
1766 }
1767
1768 static float32 QEMU_SOFTFLOAT_ATTR
1769 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1770 float_status *status)
1771 {
1772 FloatParts64 pa, pb, pc, *pr;
1773
1774 float32_unpack_canonical(&pa, a, status);
1775 float32_unpack_canonical(&pb, b, status);
1776 float32_unpack_canonical(&pc, c, status);
1777 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1778
1779 return float32_round_pack_canonical(pr, status);
1780 }
1781
1782 static float64 QEMU_SOFTFLOAT_ATTR
1783 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1784 float_status *status)
1785 {
1786 FloatParts64 pa, pb, pc, *pr;
1787
1788 float64_unpack_canonical(&pa, a, status);
1789 float64_unpack_canonical(&pb, b, status);
1790 float64_unpack_canonical(&pc, c, status);
1791 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1792
1793 return float64_round_pack_canonical(pr, status);
1794 }
1795
1796 static bool force_soft_fma;
1797
1798 float32 QEMU_FLATTEN
1799 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1800 {
1801 union_float32 ua, ub, uc, ur;
1802
1803 ua.s = xa;
1804 ub.s = xb;
1805 uc.s = xc;
1806
1807 if (unlikely(!can_use_fpu(s))) {
1808 goto soft;
1809 }
1810 if (unlikely(flags & float_muladd_halve_result)) {
1811 goto soft;
1812 }
1813
1814 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1815 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1816 goto soft;
1817 }
1818
1819 if (unlikely(force_soft_fma)) {
1820 goto soft;
1821 }
1822
1823 /*
1824 * When (a || b) == 0, there's no need to check for under/over flow,
1825 * since we know the addend is (normal || 0) and the product is 0.
1826 */
1827 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1828 union_float32 up;
1829 bool prod_sign;
1830
1831 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1832 prod_sign ^= !!(flags & float_muladd_negate_product);
1833 up.s = float32_set_sign(float32_zero, prod_sign);
1834
1835 if (flags & float_muladd_negate_c) {
1836 uc.h = -uc.h;
1837 }
1838 ur.h = up.h + uc.h;
1839 } else {
1840 union_float32 ua_orig = ua;
1841 union_float32 uc_orig = uc;
1842
1843 if (flags & float_muladd_negate_product) {
1844 ua.h = -ua.h;
1845 }
1846 if (flags & float_muladd_negate_c) {
1847 uc.h = -uc.h;
1848 }
1849
1850 ur.h = fmaf(ua.h, ub.h, uc.h);
1851
1852 if (unlikely(f32_is_inf(ur))) {
1853 float_raise(float_flag_overflow, s);
1854 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1855 ua = ua_orig;
1856 uc = uc_orig;
1857 goto soft;
1858 }
1859 }
1860 if (flags & float_muladd_negate_result) {
1861 return float32_chs(ur.s);
1862 }
1863 return ur.s;
1864
1865 soft:
1866 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1867 }
1868
1869 float64 QEMU_FLATTEN
1870 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1871 {
1872 union_float64 ua, ub, uc, ur;
1873
1874 ua.s = xa;
1875 ub.s = xb;
1876 uc.s = xc;
1877
1878 if (unlikely(!can_use_fpu(s))) {
1879 goto soft;
1880 }
1881 if (unlikely(flags & float_muladd_halve_result)) {
1882 goto soft;
1883 }
1884
1885 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1886 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1887 goto soft;
1888 }
1889
1890 if (unlikely(force_soft_fma)) {
1891 goto soft;
1892 }
1893
1894 /*
1895 * When (a || b) == 0, there's no need to check for under/over flow,
1896 * since we know the addend is (normal || 0) and the product is 0.
1897 */
1898 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1899 union_float64 up;
1900 bool prod_sign;
1901
1902 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1903 prod_sign ^= !!(flags & float_muladd_negate_product);
1904 up.s = float64_set_sign(float64_zero, prod_sign);
1905
1906 if (flags & float_muladd_negate_c) {
1907 uc.h = -uc.h;
1908 }
1909 ur.h = up.h + uc.h;
1910 } else {
1911 union_float64 ua_orig = ua;
1912 union_float64 uc_orig = uc;
1913
1914 if (flags & float_muladd_negate_product) {
1915 ua.h = -ua.h;
1916 }
1917 if (flags & float_muladd_negate_c) {
1918 uc.h = -uc.h;
1919 }
1920
1921 ur.h = fma(ua.h, ub.h, uc.h);
1922
1923 if (unlikely(f64_is_inf(ur))) {
1924 float_raise(float_flag_overflow, s);
1925 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1926 ua = ua_orig;
1927 uc = uc_orig;
1928 goto soft;
1929 }
1930 }
1931 if (flags & float_muladd_negate_result) {
1932 return float64_chs(ur.s);
1933 }
1934 return ur.s;
1935
1936 soft:
1937 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1938 }
1939
1940 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1941 int flags, float_status *status)
1942 {
1943 FloatParts64 pa, pb, pc, *pr;
1944
1945 bfloat16_unpack_canonical(&pa, a, status);
1946 bfloat16_unpack_canonical(&pb, b, status);
1947 bfloat16_unpack_canonical(&pc, c, status);
1948 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1949
1950 return bfloat16_round_pack_canonical(pr, status);
1951 }
1952
1953 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1954 int flags, float_status *status)
1955 {
1956 FloatParts128 pa, pb, pc, *pr;
1957
1958 float128_unpack_canonical(&pa, a, status);
1959 float128_unpack_canonical(&pb, b, status);
1960 float128_unpack_canonical(&pc, c, status);
1961 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1962
1963 return float128_round_pack_canonical(pr, status);
1964 }
1965
1966 /*
1967 * Division
1968 */
1969
1970 float16 float16_div(float16 a, float16 b, float_status *status)
1971 {
1972 FloatParts64 pa, pb, *pr;
1973
1974 float16_unpack_canonical(&pa, a, status);
1975 float16_unpack_canonical(&pb, b, status);
1976 pr = parts_div(&pa, &pb, status);
1977
1978 return float16_round_pack_canonical(pr, status);
1979 }
1980
1981 static float32 QEMU_SOFTFLOAT_ATTR
1982 soft_f32_div(float32 a, float32 b, float_status *status)
1983 {
1984 FloatParts64 pa, pb, *pr;
1985
1986 float32_unpack_canonical(&pa, a, status);
1987 float32_unpack_canonical(&pb, b, status);
1988 pr = parts_div(&pa, &pb, status);
1989
1990 return float32_round_pack_canonical(pr, status);
1991 }
1992
1993 static float64 QEMU_SOFTFLOAT_ATTR
1994 soft_f64_div(float64 a, float64 b, float_status *status)
1995 {
1996 FloatParts64 pa, pb, *pr;
1997
1998 float64_unpack_canonical(&pa, a, status);
1999 float64_unpack_canonical(&pb, b, status);
2000 pr = parts_div(&pa, &pb, status);
2001
2002 return float64_round_pack_canonical(pr, status);
2003 }
2004
2005 static float hard_f32_div(float a, float b)
2006 {
2007 return a / b;
2008 }
2009
2010 static double hard_f64_div(double a, double b)
2011 {
2012 return a / b;
2013 }
2014
2015 static bool f32_div_pre(union_float32 a, union_float32 b)
2016 {
2017 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2018 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2019 fpclassify(b.h) == FP_NORMAL;
2020 }
2021 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2022 }
2023
2024 static bool f64_div_pre(union_float64 a, union_float64 b)
2025 {
2026 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2027 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2028 fpclassify(b.h) == FP_NORMAL;
2029 }
2030 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2031 }
2032
2033 static bool f32_div_post(union_float32 a, union_float32 b)
2034 {
2035 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2036 return fpclassify(a.h) != FP_ZERO;
2037 }
2038 return !float32_is_zero(a.s);
2039 }
2040
2041 static bool f64_div_post(union_float64 a, union_float64 b)
2042 {
2043 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2044 return fpclassify(a.h) != FP_ZERO;
2045 }
2046 return !float64_is_zero(a.s);
2047 }
2048
2049 float32 QEMU_FLATTEN
2050 float32_div(float32 a, float32 b, float_status *s)
2051 {
2052 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2053 f32_div_pre, f32_div_post);
2054 }
2055
2056 float64 QEMU_FLATTEN
2057 float64_div(float64 a, float64 b, float_status *s)
2058 {
2059 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2060 f64_div_pre, f64_div_post);
2061 }
2062
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066 FloatParts64 pa, pb, *pr;
2067
2068 bfloat16_unpack_canonical(&pa, a, status);
2069 bfloat16_unpack_canonical(&pb, b, status);
2070 pr = parts_div(&pa, &pb, status);
2071
2072 return bfloat16_round_pack_canonical(pr, status);
2073 }
2074
2075 float128 QEMU_FLATTEN
2076 float128_div(float128 a, float128 b, float_status *status)
2077 {
2078 FloatParts128 pa, pb, *pr;
2079
2080 float128_unpack_canonical(&pa, a, status);
2081 float128_unpack_canonical(&pb, b, status);
2082 pr = parts_div(&pa, &pb, status);
2083
2084 return float128_round_pack_canonical(pr, status);
2085 }
2086
2087 /*
2088 * Float to Float conversions
2089 *
2090 * Returns the result of converting one float format to another. The
2091 * conversion is performed according to the IEC/IEEE Standard for
2092 * Binary Floating-Point Arithmetic.
2093 *
2094 * Usually this only needs to take care of raising invalid exceptions
2095 * and handling the conversion on NaNs.
2096 */
2097
2098 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2099 {
2100 switch (a->cls) {
2101 case float_class_qnan:
2102 case float_class_snan:
2103 /*
2104 * There is no NaN in the destination format. Raise Invalid
2105 * and return a zero with the sign of the input NaN.
2106 */
2107 float_raise(float_flag_invalid, s);
2108 a->cls = float_class_zero;
2109 break;
2110
2111 case float_class_inf:
2112 /*
2113 * There is no Inf in the destination format. Raise Invalid
2114 * and return the maximum normal with the correct sign.
2115 */
2116 float_raise(float_flag_invalid, s);
2117 a->cls = float_class_normal;
2118 a->exp = float16_params_ahp.exp_max;
2119 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2120 float16_params_ahp.frac_size + 1);
2121 break;
2122
2123 case float_class_normal:
2124 case float_class_zero:
2125 break;
2126
2127 default:
2128 g_assert_not_reached();
2129 }
2130 }
2131
2132 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2133 {
2134 if (is_nan(a->cls)) {
2135 parts_return_nan(a, s);
2136 }
2137 }
2138
2139 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2140 {
2141 if (is_nan(a->cls)) {
2142 parts_return_nan(a, s);
2143 }
2144 }
2145
2146 #define parts_float_to_float(P, S) \
2147 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2148
2149 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2150 float_status *s)
2151 {
2152 a->cls = b->cls;
2153 a->sign = b->sign;
2154 a->exp = b->exp;
2155
2156 if (a->cls == float_class_normal) {
2157 frac_truncjam(a, b);
2158 } else if (is_nan(a->cls)) {
2159 /* Discard the low bits of the NaN. */
2160 a->frac = b->frac_hi;
2161 parts_return_nan(a, s);
2162 }
2163 }
2164
2165 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2166 float_status *s)
2167 {
2168 a->cls = b->cls;
2169 a->sign = b->sign;
2170 a->exp = b->exp;
2171 frac_widen(a, b);
2172
2173 if (is_nan(a->cls)) {
2174 parts_return_nan(a, s);
2175 }
2176 }
2177
2178 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2179 {
2180 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2181 FloatParts64 p;
2182
2183 float16a_unpack_canonical(&p, a, s, fmt16);
2184 parts_float_to_float(&p, s);
2185 return float32_round_pack_canonical(&p, s);
2186 }
2187
2188 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2189 {
2190 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2191 FloatParts64 p;
2192
2193 float16a_unpack_canonical(&p, a, s, fmt16);
2194 parts_float_to_float(&p, s);
2195 return float64_round_pack_canonical(&p, s);
2196 }
2197
2198 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2199 {
2200 FloatParts64 p;
2201 const FloatFmt *fmt;
2202
2203 float32_unpack_canonical(&p, a, s);
2204 if (ieee) {
2205 parts_float_to_float(&p, s);
2206 fmt = &float16_params;
2207 } else {
2208 parts_float_to_ahp(&p, s);
2209 fmt = &float16_params_ahp;
2210 }
2211 return float16a_round_pack_canonical(&p, s, fmt);
2212 }
2213
2214 static float64 QEMU_SOFTFLOAT_ATTR
2215 soft_float32_to_float64(float32 a, float_status *s)
2216 {
2217 FloatParts64 p;
2218
2219 float32_unpack_canonical(&p, a, s);
2220 parts_float_to_float(&p, s);
2221 return float64_round_pack_canonical(&p, s);
2222 }
2223
2224 float64 float32_to_float64(float32 a, float_status *s)
2225 {
2226 if (likely(float32_is_normal(a))) {
2227 /* Widening conversion can never produce inexact results. */
2228 union_float32 uf;
2229 union_float64 ud;
2230 uf.s = a;
2231 ud.h = uf.h;
2232 return ud.s;
2233 } else if (float32_is_zero(a)) {
2234 return float64_set_sign(float64_zero, float32_is_neg(a));
2235 } else {
2236 return soft_float32_to_float64(a, s);
2237 }
2238 }
2239
2240 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2241 {
2242 FloatParts64 p;
2243 const FloatFmt *fmt;
2244
2245 float64_unpack_canonical(&p, a, s);
2246 if (ieee) {
2247 parts_float_to_float(&p, s);
2248 fmt = &float16_params;
2249 } else {
2250 parts_float_to_ahp(&p, s);
2251 fmt = &float16_params_ahp;
2252 }
2253 return float16a_round_pack_canonical(&p, s, fmt);
2254 }
2255
2256 float32 float64_to_float32(float64 a, float_status *s)
2257 {
2258 FloatParts64 p;
2259
2260 float64_unpack_canonical(&p, a, s);
2261 parts_float_to_float(&p, s);
2262 return float32_round_pack_canonical(&p, s);
2263 }
2264
2265 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2266 {
2267 FloatParts64 p;
2268
2269 bfloat16_unpack_canonical(&p, a, s);
2270 parts_float_to_float(&p, s);
2271 return float32_round_pack_canonical(&p, s);
2272 }
2273
2274 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2275 {
2276 FloatParts64 p;
2277
2278 bfloat16_unpack_canonical(&p, a, s);
2279 parts_float_to_float(&p, s);
2280 return float64_round_pack_canonical(&p, s);
2281 }
2282
2283 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2284 {
2285 FloatParts64 p;
2286
2287 float32_unpack_canonical(&p, a, s);
2288 parts_float_to_float(&p, s);
2289 return bfloat16_round_pack_canonical(&p, s);
2290 }
2291
2292 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2293 {
2294 FloatParts64 p;
2295
2296 float64_unpack_canonical(&p, a, s);
2297 parts_float_to_float(&p, s);
2298 return bfloat16_round_pack_canonical(&p, s);
2299 }
2300
2301 float32 float128_to_float32(float128 a, float_status *s)
2302 {
2303 FloatParts64 p64;
2304 FloatParts128 p128;
2305
2306 float128_unpack_canonical(&p128, a, s);
2307 parts_float_to_float_narrow(&p64, &p128, s);
2308 return float32_round_pack_canonical(&p64, s);
2309 }
2310
2311 float64 float128_to_float64(float128 a, float_status *s)
2312 {
2313 FloatParts64 p64;
2314 FloatParts128 p128;
2315
2316 float128_unpack_canonical(&p128, a, s);
2317 parts_float_to_float_narrow(&p64, &p128, s);
2318 return float64_round_pack_canonical(&p64, s);
2319 }
2320
2321 float128 float32_to_float128(float32 a, float_status *s)
2322 {
2323 FloatParts64 p64;
2324 FloatParts128 p128;
2325
2326 float32_unpack_canonical(&p64, a, s);
2327 parts_float_to_float_widen(&p128, &p64, s);
2328 return float128_round_pack_canonical(&p128, s);
2329 }
2330
2331 float128 float64_to_float128(float64 a, float_status *s)
2332 {
2333 FloatParts64 p64;
2334 FloatParts128 p128;
2335
2336 float64_unpack_canonical(&p64, a, s);
2337 parts_float_to_float_widen(&p128, &p64, s);
2338 return float128_round_pack_canonical(&p128, s);
2339 }
2340
2341 /*
2342 * Round to integral value
2343 */
2344
2345 float16 float16_round_to_int(float16 a, float_status *s)
2346 {
2347 FloatParts64 p;
2348
2349 float16_unpack_canonical(&p, a, s);
2350 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2351 return float16_round_pack_canonical(&p, s);
2352 }
2353
2354 float32 float32_round_to_int(float32 a, float_status *s)
2355 {
2356 FloatParts64 p;
2357
2358 float32_unpack_canonical(&p, a, s);
2359 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2360 return float32_round_pack_canonical(&p, s);
2361 }
2362
2363 float64 float64_round_to_int(float64 a, float_status *s)
2364 {
2365 FloatParts64 p;
2366
2367 float64_unpack_canonical(&p, a, s);
2368 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2369 return float64_round_pack_canonical(&p, s);
2370 }
2371
2372 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2373 {
2374 FloatParts64 p;
2375
2376 bfloat16_unpack_canonical(&p, a, s);
2377 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2378 return bfloat16_round_pack_canonical(&p, s);
2379 }
2380
2381 float128 float128_round_to_int(float128 a, float_status *s)
2382 {
2383 FloatParts128 p;
2384
2385 float128_unpack_canonical(&p, a, s);
2386 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2387 return float128_round_pack_canonical(&p, s);
2388 }
2389
2390 /*
2391 * Floating-point to signed integer conversions
2392 */
2393
2394 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2395 float_status *s)
2396 {
2397 FloatParts64 p;
2398
2399 float16_unpack_canonical(&p, a, s);
2400 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2401 }
2402
2403 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2404 float_status *s)
2405 {
2406 FloatParts64 p;
2407
2408 float16_unpack_canonical(&p, a, s);
2409 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2410 }
2411
2412 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2413 float_status *s)
2414 {
2415 FloatParts64 p;
2416
2417 float16_unpack_canonical(&p, a, s);
2418 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2419 }
2420
2421 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2422 float_status *s)
2423 {
2424 FloatParts64 p;
2425
2426 float16_unpack_canonical(&p, a, s);
2427 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2428 }
2429
2430 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2431 float_status *s)
2432 {
2433 FloatParts64 p;
2434
2435 float32_unpack_canonical(&p, a, s);
2436 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2437 }
2438
2439 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2440 float_status *s)
2441 {
2442 FloatParts64 p;
2443
2444 float32_unpack_canonical(&p, a, s);
2445 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2446 }
2447
2448 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2449 float_status *s)
2450 {
2451 FloatParts64 p;
2452
2453 float32_unpack_canonical(&p, a, s);
2454 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2455 }
2456
2457 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2458 float_status *s)
2459 {
2460 FloatParts64 p;
2461
2462 float64_unpack_canonical(&p, a, s);
2463 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2464 }
2465
2466 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2467 float_status *s)
2468 {
2469 FloatParts64 p;
2470
2471 float64_unpack_canonical(&p, a, s);
2472 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2473 }
2474
2475 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2476 float_status *s)
2477 {
2478 FloatParts64 p;
2479
2480 float64_unpack_canonical(&p, a, s);
2481 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2482 }
2483
2484 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2485 float_status *s)
2486 {
2487 FloatParts64 p;
2488
2489 bfloat16_unpack_canonical(&p, a, s);
2490 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2491 }
2492
2493 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2494 float_status *s)
2495 {
2496 FloatParts64 p;
2497
2498 bfloat16_unpack_canonical(&p, a, s);
2499 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2500 }
2501
2502 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2503 float_status *s)
2504 {
2505 FloatParts64 p;
2506
2507 bfloat16_unpack_canonical(&p, a, s);
2508 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2509 }
2510
2511 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2512 int scale, float_status *s)
2513 {
2514 FloatParts128 p;
2515
2516 float128_unpack_canonical(&p, a, s);
2517 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2518 }
2519
2520 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2521 int scale, float_status *s)
2522 {
2523 FloatParts128 p;
2524
2525 float128_unpack_canonical(&p, a, s);
2526 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2527 }
2528
2529 int8_t float16_to_int8(float16 a, float_status *s)
2530 {
2531 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2532 }
2533
2534 int16_t float16_to_int16(float16 a, float_status *s)
2535 {
2536 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2537 }
2538
2539 int32_t float16_to_int32(float16 a, float_status *s)
2540 {
2541 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2542 }
2543
2544 int64_t float16_to_int64(float16 a, float_status *s)
2545 {
2546 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2547 }
2548
2549 int16_t float32_to_int16(float32 a, float_status *s)
2550 {
2551 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2552 }
2553
2554 int32_t float32_to_int32(float32 a, float_status *s)
2555 {
2556 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2557 }
2558
2559 int64_t float32_to_int64(float32 a, float_status *s)
2560 {
2561 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2562 }
2563
2564 int16_t float64_to_int16(float64 a, float_status *s)
2565 {
2566 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2567 }
2568
2569 int32_t float64_to_int32(float64 a, float_status *s)
2570 {
2571 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2572 }
2573
2574 int64_t float64_to_int64(float64 a, float_status *s)
2575 {
2576 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2577 }
2578
2579 int32_t float128_to_int32(float128 a, float_status *s)
2580 {
2581 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2582 }
2583
2584 int64_t float128_to_int64(float128 a, float_status *s)
2585 {
2586 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2587 }
2588
2589 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2590 {
2591 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2592 }
2593
2594 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2595 {
2596 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2597 }
2598
2599 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2600 {
2601 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2602 }
2603
2604 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2605 {
2606 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2607 }
2608
2609 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2610 {
2611 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2612 }
2613
2614 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2615 {
2616 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2617 }
2618
2619 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2620 {
2621 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2622 }
2623
2624 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2625 {
2626 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2627 }
2628
2629 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2630 {
2631 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2632 }
2633
2634 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2635 {
2636 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2637 }
2638
2639 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2640 {
2641 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2642 }
2643
2644 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2645 {
2646 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2647 }
2648
2649 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2650 {
2651 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2652 }
2653
2654 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2655 {
2656 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2657 }
2658
2659 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2660 {
2661 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2662 }
2663
2664 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2665 {
2666 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2667 }
2668
2669 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2670 {
2671 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2672 }
2673
2674 /*
2675 * Floating-point to unsigned integer conversions
2676 */
2677
2678 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2679 float_status *s)
2680 {
2681 FloatParts64 p;
2682
2683 float16_unpack_canonical(&p, a, s);
2684 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2685 }
2686
2687 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2688 float_status *s)
2689 {
2690 FloatParts64 p;
2691
2692 float16_unpack_canonical(&p, a, s);
2693 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2694 }
2695
2696 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2697 float_status *s)
2698 {
2699 FloatParts64 p;
2700
2701 float16_unpack_canonical(&p, a, s);
2702 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2703 }
2704
2705 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2706 float_status *s)
2707 {
2708 FloatParts64 p;
2709
2710 float16_unpack_canonical(&p, a, s);
2711 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2712 }
2713
2714 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2715 float_status *s)
2716 {
2717 FloatParts64 p;
2718
2719 float32_unpack_canonical(&p, a, s);
2720 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2721 }
2722
2723 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2724 float_status *s)
2725 {
2726 FloatParts64 p;
2727
2728 float32_unpack_canonical(&p, a, s);
2729 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2730 }
2731
2732 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2733 float_status *s)
2734 {
2735 FloatParts64 p;
2736
2737 float32_unpack_canonical(&p, a, s);
2738 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2739 }
2740
2741 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2742 float_status *s)
2743 {
2744 FloatParts64 p;
2745
2746 float64_unpack_canonical(&p, a, s);
2747 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2748 }
2749
2750 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2751 float_status *s)
2752 {
2753 FloatParts64 p;
2754
2755 float64_unpack_canonical(&p, a, s);
2756 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2757 }
2758
2759 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2760 float_status *s)
2761 {
2762 FloatParts64 p;
2763
2764 float64_unpack_canonical(&p, a, s);
2765 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2766 }
2767
2768 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2769 int scale, float_status *s)
2770 {
2771 FloatParts64 p;
2772
2773 bfloat16_unpack_canonical(&p, a, s);
2774 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2775 }
2776
2777 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2778 int scale, float_status *s)
2779 {
2780 FloatParts64 p;
2781
2782 bfloat16_unpack_canonical(&p, a, s);
2783 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2784 }
2785
2786 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2787 int scale, float_status *s)
2788 {
2789 FloatParts64 p;
2790
2791 bfloat16_unpack_canonical(&p, a, s);
2792 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2793 }
2794
2795 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2796 int scale, float_status *s)
2797 {
2798 FloatParts128 p;
2799
2800 float128_unpack_canonical(&p, a, s);
2801 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2802 }
2803
2804 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2805 int scale, float_status *s)
2806 {
2807 FloatParts128 p;
2808
2809 float128_unpack_canonical(&p, a, s);
2810 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2811 }
2812
2813 uint8_t float16_to_uint8(float16 a, float_status *s)
2814 {
2815 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2816 }
2817
2818 uint16_t float16_to_uint16(float16 a, float_status *s)
2819 {
2820 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2821 }
2822
2823 uint32_t float16_to_uint32(float16 a, float_status *s)
2824 {
2825 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2826 }
2827
2828 uint64_t float16_to_uint64(float16 a, float_status *s)
2829 {
2830 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2831 }
2832
2833 uint16_t float32_to_uint16(float32 a, float_status *s)
2834 {
2835 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2836 }
2837
2838 uint32_t float32_to_uint32(float32 a, float_status *s)
2839 {
2840 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2841 }
2842
2843 uint64_t float32_to_uint64(float32 a, float_status *s)
2844 {
2845 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2846 }
2847
2848 uint16_t float64_to_uint16(float64 a, float_status *s)
2849 {
2850 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2851 }
2852
2853 uint32_t float64_to_uint32(float64 a, float_status *s)
2854 {
2855 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2856 }
2857
2858 uint64_t float64_to_uint64(float64 a, float_status *s)
2859 {
2860 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2861 }
2862
2863 uint32_t float128_to_uint32(float128 a, float_status *s)
2864 {
2865 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2866 }
2867
2868 uint64_t float128_to_uint64(float128 a, float_status *s)
2869 {
2870 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2871 }
2872
2873 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2874 {
2875 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2876 }
2877
2878 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2879 {
2880 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2881 }
2882
2883 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2884 {
2885 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2886 }
2887
2888 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2889 {
2890 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2891 }
2892
2893 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2894 {
2895 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2896 }
2897
2898 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2899 {
2900 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2901 }
2902
2903 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2904 {
2905 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2906 }
2907
2908 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2909 {
2910 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2911 }
2912
2913 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2914 {
2915 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2916 }
2917
2918 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2919 {
2920 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2921 }
2922
2923 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2924 {
2925 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2926 }
2927
2928 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2929 {
2930 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2931 }
2932
2933 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2934 {
2935 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2936 }
2937
2938 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2939 {
2940 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2941 }
2942
2943 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2944 {
2945 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2946 }
2947
2948 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2949 {
2950 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2951 }
2952
2953 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2954 {
2955 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2956 }
2957
2958 /*
2959 * Signed integer to floating-point conversions
2960 */
2961
2962 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2963 {
2964 FloatParts64 p;
2965
2966 parts_sint_to_float(&p, a, scale, status);
2967 return float16_round_pack_canonical(&p, status);
2968 }
2969
2970 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2971 {
2972 return int64_to_float16_scalbn(a, scale, status);
2973 }
2974
2975 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2976 {
2977 return int64_to_float16_scalbn(a, scale, status);
2978 }
2979
2980 float16 int64_to_float16(int64_t a, float_status *status)
2981 {
2982 return int64_to_float16_scalbn(a, 0, status);
2983 }
2984
2985 float16 int32_to_float16(int32_t a, float_status *status)
2986 {
2987 return int64_to_float16_scalbn(a, 0, status);
2988 }
2989
2990 float16 int16_to_float16(int16_t a, float_status *status)
2991 {
2992 return int64_to_float16_scalbn(a, 0, status);
2993 }
2994
2995 float16 int8_to_float16(int8_t a, float_status *status)
2996 {
2997 return int64_to_float16_scalbn(a, 0, status);
2998 }
2999
3000 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3001 {
3002 FloatParts64 p;
3003
3004 parts64_sint_to_float(&p, a, scale, status);
3005 return float32_round_pack_canonical(&p, status);
3006 }
3007
3008 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3009 {
3010 return int64_to_float32_scalbn(a, scale, status);
3011 }
3012
3013 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3014 {
3015 return int64_to_float32_scalbn(a, scale, status);
3016 }
3017
3018 float32 int64_to_float32(int64_t a, float_status *status)
3019 {
3020 return int64_to_float32_scalbn(a, 0, status);
3021 }
3022
3023 float32 int32_to_float32(int32_t a, float_status *status)
3024 {
3025 return int64_to_float32_scalbn(a, 0, status);
3026 }
3027
3028 float32 int16_to_float32(int16_t a, float_status *status)
3029 {
3030 return int64_to_float32_scalbn(a, 0, status);
3031 }
3032
3033 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3034 {
3035 FloatParts64 p;
3036
3037 parts_sint_to_float(&p, a, scale, status);
3038 return float64_round_pack_canonical(&p, status);
3039 }
3040
3041 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3042 {
3043 return int64_to_float64_scalbn(a, scale, status);
3044 }
3045
3046 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3047 {
3048 return int64_to_float64_scalbn(a, scale, status);
3049 }
3050
3051 float64 int64_to_float64(int64_t a, float_status *status)
3052 {
3053 return int64_to_float64_scalbn(a, 0, status);
3054 }
3055
3056 float64 int32_to_float64(int32_t a, float_status *status)
3057 {
3058 return int64_to_float64_scalbn(a, 0, status);
3059 }
3060
3061 float64 int16_to_float64(int16_t a, float_status *status)
3062 {
3063 return int64_to_float64_scalbn(a, 0, status);
3064 }
3065
3066 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3067 {
3068 FloatParts64 p;
3069
3070 parts_sint_to_float(&p, a, scale, status);
3071 return bfloat16_round_pack_canonical(&p, status);
3072 }
3073
3074 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3075 {
3076 return int64_to_bfloat16_scalbn(a, scale, status);
3077 }
3078
3079 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3080 {
3081 return int64_to_bfloat16_scalbn(a, scale, status);
3082 }
3083
3084 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3085 {
3086 return int64_to_bfloat16_scalbn(a, 0, status);
3087 }
3088
3089 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3090 {
3091 return int64_to_bfloat16_scalbn(a, 0, status);
3092 }
3093
3094 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3095 {
3096 return int64_to_bfloat16_scalbn(a, 0, status);
3097 }
3098
3099 float128 int64_to_float128(int64_t a, float_status *status)
3100 {
3101 FloatParts128 p;
3102
3103 parts_sint_to_float(&p, a, 0, status);
3104 return float128_round_pack_canonical(&p, status);
3105 }
3106
3107 float128 int32_to_float128(int32_t a, float_status *status)
3108 {
3109 return int64_to_float128(a, status);
3110 }
3111
3112 /*
3113 * Unsigned Integer to floating-point conversions
3114 */
3115
3116 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3117 {
3118 FloatParts64 p;
3119
3120 parts_uint_to_float(&p, a, scale, status);
3121 return float16_round_pack_canonical(&p, status);
3122 }
3123
3124 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3125 {
3126 return uint64_to_float16_scalbn(a, scale, status);
3127 }
3128
3129 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3130 {
3131 return uint64_to_float16_scalbn(a, scale, status);
3132 }
3133
3134 float16 uint64_to_float16(uint64_t a, float_status *status)
3135 {
3136 return uint64_to_float16_scalbn(a, 0, status);
3137 }
3138
3139 float16 uint32_to_float16(uint32_t a, float_status *status)
3140 {
3141 return uint64_to_float16_scalbn(a, 0, status);
3142 }
3143
3144 float16 uint16_to_float16(uint16_t a, float_status *status)
3145 {
3146 return uint64_to_float16_scalbn(a, 0, status);
3147 }
3148
3149 float16 uint8_to_float16(uint8_t a, float_status *status)
3150 {
3151 return uint64_to_float16_scalbn(a, 0, status);
3152 }
3153
3154 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3155 {
3156 FloatParts64 p;
3157
3158 parts_uint_to_float(&p, a, scale, status);
3159 return float32_round_pack_canonical(&p, status);
3160 }
3161
3162 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3163 {
3164 return uint64_to_float32_scalbn(a, scale, status);
3165 }
3166
3167 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3168 {
3169 return uint64_to_float32_scalbn(a, scale, status);
3170 }
3171
3172 float32 uint64_to_float32(uint64_t a, float_status *status)
3173 {
3174 return uint64_to_float32_scalbn(a, 0, status);
3175 }
3176
3177 float32 uint32_to_float32(uint32_t a, float_status *status)
3178 {
3179 return uint64_to_float32_scalbn(a, 0, status);
3180 }
3181
3182 float32 uint16_to_float32(uint16_t a, float_status *status)
3183 {
3184 return uint64_to_float32_scalbn(a, 0, status);
3185 }
3186
3187 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3188 {
3189 FloatParts64 p;
3190
3191 parts_uint_to_float(&p, a, scale, status);
3192 return float64_round_pack_canonical(&p, status);
3193 }
3194
3195 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3196 {
3197 return uint64_to_float64_scalbn(a, scale, status);
3198 }
3199
3200 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3201 {
3202 return uint64_to_float64_scalbn(a, scale, status);
3203 }
3204
3205 float64 uint64_to_float64(uint64_t a, float_status *status)
3206 {
3207 return uint64_to_float64_scalbn(a, 0, status);
3208 }
3209
3210 float64 uint32_to_float64(uint32_t a, float_status *status)
3211 {
3212 return uint64_to_float64_scalbn(a, 0, status);
3213 }
3214
3215 float64 uint16_to_float64(uint16_t a, float_status *status)
3216 {
3217 return uint64_to_float64_scalbn(a, 0, status);
3218 }
3219
3220 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3221 {
3222 FloatParts64 p;
3223
3224 parts_uint_to_float(&p, a, scale, status);
3225 return bfloat16_round_pack_canonical(&p, status);
3226 }
3227
3228 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3229 {
3230 return uint64_to_bfloat16_scalbn(a, scale, status);
3231 }
3232
3233 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3234 {
3235 return uint64_to_bfloat16_scalbn(a, scale, status);
3236 }
3237
3238 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3239 {
3240 return uint64_to_bfloat16_scalbn(a, 0, status);
3241 }
3242
3243 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3244 {
3245 return uint64_to_bfloat16_scalbn(a, 0, status);
3246 }
3247
3248 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3249 {
3250 return uint64_to_bfloat16_scalbn(a, 0, status);
3251 }
3252
3253 float128 uint64_to_float128(uint64_t a, float_status *status)
3254 {
3255 FloatParts128 p;
3256
3257 parts_uint_to_float(&p, a, 0, status);
3258 return float128_round_pack_canonical(&p, status);
3259 }
3260
3261 /* Float Min/Max */
3262 /* min() and max() functions. These can't be implemented as
3263 * 'compare and pick one input' because that would mishandle
3264 * NaNs and +0 vs -0.
3265 *
3266 * minnum() and maxnum() functions. These are similar to the min()
3267 * and max() functions but if one of the arguments is a QNaN and
3268 * the other is numerical then the numerical argument is returned.
3269 * SNaNs will get quietened before being returned.
3270 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3271 * and maxNum() operations. min() and max() are the typical min/max
3272 * semantics provided by many CPUs which predate that specification.
3273 *
3274 * minnummag() and maxnummag() functions correspond to minNumMag()
3275 * and minNumMag() from the IEEE-754 2008.
3276 */
3277 static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
3278 bool ieee, bool ismag, float_status *s)
3279 {
3280 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3281 if (ieee) {
3282 /* Takes two floating-point values `a' and `b', one of
3283 * which is a NaN, and returns the appropriate NaN
3284 * result. If either `a' or `b' is a signaling NaN,
3285 * the invalid exception is raised.
3286 */
3287 if (is_snan(a.cls) || is_snan(b.cls)) {
3288 return *parts_pick_nan(&a, &b, s);
3289 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3290 return b;
3291 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3292 return a;
3293 }
3294 }
3295 return *parts_pick_nan(&a, &b, s);
3296 } else {
3297 int a_exp, b_exp;
3298
3299 switch (a.cls) {
3300 case float_class_normal:
3301 a_exp = a.exp;
3302 break;
3303 case float_class_inf:
3304 a_exp = INT_MAX;
3305 break;
3306 case float_class_zero:
3307 a_exp = INT_MIN;
3308 break;
3309 default:
3310 g_assert_not_reached();
3311 break;
3312 }
3313 switch (b.cls) {
3314 case float_class_normal:
3315 b_exp = b.exp;
3316 break;
3317 case float_class_inf:
3318 b_exp = INT_MAX;
3319 break;
3320 case float_class_zero:
3321 b_exp = INT_MIN;
3322 break;
3323 default:
3324 g_assert_not_reached();
3325 break;
3326 }
3327
3328 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3329 bool a_less = a_exp < b_exp;
3330 if (a_exp == b_exp) {
3331 a_less = a.frac < b.frac;
3332 }
3333 return a_less ^ ismin ? b : a;
3334 }
3335
3336 if (a.sign == b.sign) {
3337 bool a_less = a_exp < b_exp;
3338 if (a_exp == b_exp) {
3339 a_less = a.frac < b.frac;
3340 }
3341 return a.sign ^ a_less ^ ismin ? b : a;
3342 } else {
3343 return a.sign ^ ismin ? b : a;
3344 }
3345 }
3346 }
3347
3348 #define MINMAX(sz, name, ismin, isiee, ismag) \
3349 float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3350 float_status *s) \
3351 { \
3352 FloatParts64 pa, pb, pr; \
3353 float ## sz ## _unpack_canonical(&pa, a, s); \
3354 float ## sz ## _unpack_canonical(&pb, b, s); \
3355 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3356 return float ## sz ## _round_pack_canonical(&pr, s); \
3357 }
3358
3359 MINMAX(16, min, true, false, false)
3360 MINMAX(16, minnum, true, true, false)
3361 MINMAX(16, minnummag, true, true, true)
3362 MINMAX(16, max, false, false, false)
3363 MINMAX(16, maxnum, false, true, false)
3364 MINMAX(16, maxnummag, false, true, true)
3365
3366 MINMAX(32, min, true, false, false)
3367 MINMAX(32, minnum, true, true, false)
3368 MINMAX(32, minnummag, true, true, true)
3369 MINMAX(32, max, false, false, false)
3370 MINMAX(32, maxnum, false, true, false)
3371 MINMAX(32, maxnummag, false, true, true)
3372
3373 MINMAX(64, min, true, false, false)
3374 MINMAX(64, minnum, true, true, false)
3375 MINMAX(64, minnummag, true, true, true)
3376 MINMAX(64, max, false, false, false)
3377 MINMAX(64, maxnum, false, true, false)
3378 MINMAX(64, maxnummag, false, true, true)
3379
3380 #undef MINMAX
3381
3382 #define BF16_MINMAX(name, ismin, isiee, ismag) \
3383 bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3384 { \
3385 FloatParts64 pa, pb, pr; \
3386 bfloat16_unpack_canonical(&pa, a, s); \
3387 bfloat16_unpack_canonical(&pb, b, s); \
3388 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3389 return bfloat16_round_pack_canonical(&pr, s); \
3390 }
3391
3392 BF16_MINMAX(min, true, false, false)
3393 BF16_MINMAX(minnum, true, true, false)
3394 BF16_MINMAX(minnummag, true, true, true)
3395 BF16_MINMAX(max, false, false, false)
3396 BF16_MINMAX(maxnum, false, true, false)
3397 BF16_MINMAX(maxnummag, false, true, true)
3398
3399 #undef BF16_MINMAX
3400
3401 /* Floating point compare */
3402 static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
3403 float_status *s)
3404 {
3405 if (is_nan(a.cls) || is_nan(b.cls)) {
3406 if (!is_quiet ||
3407 a.cls == float_class_snan ||
3408 b.cls == float_class_snan) {
3409 float_raise(float_flag_invalid, s);
3410 }
3411 return float_relation_unordered;
3412 }
3413
3414 if (a.cls == float_class_zero) {
3415 if (b.cls == float_class_zero) {
3416 return float_relation_equal;
3417 }
3418 return b.sign ? float_relation_greater : float_relation_less;
3419 } else if (b.cls == float_class_zero) {
3420 return a.sign ? float_relation_less : float_relation_greater;
3421 }
3422
3423 /* The only really important thing about infinity is its sign. If
3424 * both are infinities the sign marks the smallest of the two.
3425 */
3426 if (a.cls == float_class_inf) {
3427 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3428 return float_relation_equal;
3429 }
3430 return a.sign ? float_relation_less : float_relation_greater;
3431 } else if (b.cls == float_class_inf) {
3432 return b.sign ? float_relation_greater : float_relation_less;
3433 }
3434
3435 if (a.sign != b.sign) {
3436 return a.sign ? float_relation_less : float_relation_greater;
3437 }
3438
3439 if (a.exp == b.exp) {
3440 if (a.frac == b.frac) {
3441 return float_relation_equal;
3442 }
3443 if (a.sign) {
3444 return a.frac > b.frac ?
3445 float_relation_less : float_relation_greater;
3446 } else {
3447 return a.frac > b.frac ?
3448 float_relation_greater : float_relation_less;
3449 }
3450 } else {
3451 if (a.sign) {
3452 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3453 } else {
3454 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3455 }
3456 }
3457 }
3458
3459 #define COMPARE(name, attr, sz) \
3460 static int attr \
3461 name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
3462 { \
3463 FloatParts64 pa, pb; \
3464 float ## sz ## _unpack_canonical(&pa, a, s); \
3465 float ## sz ## _unpack_canonical(&pb, b, s); \
3466 return compare_floats(pa, pb, is_quiet, s); \
3467 }
3468
3469 COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3470 COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3471 COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3472
3473 #undef COMPARE
3474
3475 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3476 {
3477 return soft_f16_compare(a, b, false, s);
3478 }
3479
3480 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3481 {
3482 return soft_f16_compare(a, b, true, s);
3483 }
3484
3485 static FloatRelation QEMU_FLATTEN
3486 f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3487 {
3488 union_float32 ua, ub;
3489
3490 ua.s = xa;
3491 ub.s = xb;
3492
3493 if (QEMU_NO_HARDFLOAT) {
3494 goto soft;
3495 }
3496
3497 float32_input_flush2(&ua.s, &ub.s, s);
3498 if (isgreaterequal(ua.h, ub.h)) {
3499 if (isgreater(ua.h, ub.h)) {
3500 return float_relation_greater;
3501 }
3502 return float_relation_equal;
3503 }
3504 if (likely(isless(ua.h, ub.h))) {
3505 return float_relation_less;
3506 }
3507 /* The only condition remaining is unordered.
3508 * Fall through to set flags.
3509 */
3510 soft:
3511 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3512 }
3513
3514 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3515 {
3516 return f32_compare(a, b, false, s);
3517 }
3518
3519 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3520 {
3521 return f32_compare(a, b, true, s);
3522 }
3523
3524 static FloatRelation QEMU_FLATTEN
3525 f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3526 {
3527 union_float64 ua, ub;
3528
3529 ua.s = xa;
3530 ub.s = xb;
3531
3532 if (QEMU_NO_HARDFLOAT) {
3533 goto soft;
3534 }
3535
3536 float64_input_flush2(&ua.s, &ub.s, s);
3537 if (isgreaterequal(ua.h, ub.h)) {
3538 if (isgreater(ua.h, ub.h)) {
3539 return float_relation_greater;
3540 }
3541 return float_relation_equal;
3542 }
3543 if (likely(isless(ua.h, ub.h))) {
3544 return float_relation_less;
3545 }
3546 /* The only condition remaining is unordered.
3547 * Fall through to set flags.
3548 */
3549 soft:
3550 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3551 }
3552
3553 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3554 {
3555 return f64_compare(a, b, false, s);
3556 }
3557
3558 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3559 {
3560 return f64_compare(a, b, true, s);
3561 }
3562
3563 static FloatRelation QEMU_FLATTEN
3564 soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3565 {
3566 FloatParts64 pa, pb;
3567
3568 bfloat16_unpack_canonical(&pa, a, s);
3569 bfloat16_unpack_canonical(&pb, b, s);
3570 return compare_floats(pa, pb, is_quiet, s);
3571 }
3572
3573 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3574 {
3575 return soft_bf16_compare(a, b, false, s);
3576 }
3577
3578 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3579 {
3580 return soft_bf16_compare(a, b, true, s);
3581 }
3582
3583 /* Multiply A by 2 raised to the power N. */
3584 static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
3585 {
3586 if (unlikely(is_nan(a.cls))) {
3587 parts_return_nan(&a, s);
3588 }
3589 if (a.cls == float_class_normal) {
3590 /* The largest float type (even though not supported by FloatParts64)
3591 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3592 * still allows rounding to infinity, without allowing overflow
3593 * within the int32_t that backs FloatParts64.exp.
3594 */
3595 n = MIN(MAX(n, -0x10000), 0x10000);
3596 a.exp += n;
3597 }
3598 return a;
3599 }
3600
3601 float16 float16_scalbn(float16 a, int n, float_status *status)
3602 {
3603 FloatParts64 pa, pr;
3604
3605 float16_unpack_canonical(&pa, a, status);
3606 pr = scalbn_decomposed(pa, n, status);
3607 return float16_round_pack_canonical(&pr, status);
3608 }
3609
3610 float32 float32_scalbn(float32 a, int n, float_status *status)
3611 {
3612 FloatParts64 pa, pr;
3613
3614 float32_unpack_canonical(&pa, a, status);
3615 pr = scalbn_decomposed(pa, n, status);
3616 return float32_round_pack_canonical(&pr, status);
3617 }
3618
3619 float64 float64_scalbn(float64 a, int n, float_status *status)
3620 {
3621 FloatParts64 pa, pr;
3622
3623 float64_unpack_canonical(&pa, a, status);
3624 pr = scalbn_decomposed(pa, n, status);
3625 return float64_round_pack_canonical(&pr, status);
3626 }
3627
3628 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3629 {
3630 FloatParts64 pa, pr;
3631
3632 bfloat16_unpack_canonical(&pa, a, status);
3633 pr = scalbn_decomposed(pa, n, status);
3634 return bfloat16_round_pack_canonical(&pr, status);
3635 }
3636
3637 /*
3638 * Square Root
3639 *
3640 * The old softfloat code did an approximation step before zeroing in
3641 * on the final result. However for simpleness we just compute the
3642 * square root by iterating down from the implicit bit to enough extra
3643 * bits to ensure we get a correctly rounded result.
3644 *
3645 * This does mean however the calculation is slower than before,
3646 * especially for 64 bit floats.
3647 */
3648
3649 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3650 {
3651 uint64_t a_frac, r_frac, s_frac;
3652 int bit, last_bit;
3653
3654 if (is_nan(a.cls)) {
3655 parts_return_nan(&a, s);
3656 return a;
3657 }
3658 if (a.cls == float_class_zero) {
3659 return a; /* sqrt(+-0) = +-0 */
3660 }
3661 if (a.sign) {
3662 float_raise(float_flag_invalid, s);
3663 parts_default_nan(&a, s);
3664 return a;
3665 }
3666 if (a.cls == float_class_inf) {
3667 return a; /* sqrt(+inf) = +inf */
3668 }
3669
3670 assert(a.cls == float_class_normal);
3671
3672 /* We need two overflow bits at the top. Adding room for that is a
3673 * right shift. If the exponent is odd, we can discard the low bit
3674 * by multiplying the fraction by 2; that's a left shift. Combine
3675 * those and we shift right by 1 if the exponent is odd, otherwise 2.
3676 */
3677 a_frac = a.frac >> (2 - (a.exp & 1));
3678 a.exp >>= 1;
3679
3680 /* Bit-by-bit computation of sqrt. */
3681 r_frac = 0;
3682 s_frac = 0;
3683
3684 /* Iterate from implicit bit down to the 3 extra bits to compute a
3685 * properly rounded result. Remember we've inserted two more bits
3686 * at the top, so these positions are two less.
3687 */
3688 bit = DECOMPOSED_BINARY_POINT - 2;
3689 last_bit = MAX(p->frac_shift - 4, 0);
3690 do {
3691 uint64_t q = 1ULL << bit;
3692 uint64_t t_frac = s_frac + q;
3693 if (t_frac <= a_frac) {
3694 s_frac = t_frac + q;
3695 a_frac -= t_frac;
3696 r_frac += q;
3697 }
3698 a_frac <<= 1;
3699 } while (--bit >= last_bit);
3700
3701 /* Undo the right shift done above. If there is any remaining
3702 * fraction, the result is inexact. Set the sticky bit.
3703 */
3704 a.frac = (r_frac << 2) + (a_frac != 0);
3705
3706 return a;
3707 }
3708
3709 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3710 {
3711 FloatParts64 pa, pr;
3712
3713 float16_unpack_canonical(&pa, a, status);
3714 pr = sqrt_float(pa, status, &float16_params);
3715 return float16_round_pack_canonical(&pr, status);
3716 }
3717
3718 static float32 QEMU_SOFTFLOAT_ATTR
3719 soft_f32_sqrt(float32 a, float_status *status)
3720 {
3721 FloatParts64 pa, pr;
3722
3723 float32_unpack_canonical(&pa, a, status);
3724 pr = sqrt_float(pa, status, &float32_params);
3725 return float32_round_pack_canonical(&pr, status);
3726 }
3727
3728 static float64 QEMU_SOFTFLOAT_ATTR
3729 soft_f64_sqrt(float64 a, float_status *status)
3730 {
3731 FloatParts64 pa, pr;
3732
3733 float64_unpack_canonical(&pa, a, status);
3734 pr = sqrt_float(pa, status, &float64_params);
3735 return float64_round_pack_canonical(&pr, status);
3736 }
3737
3738 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3739 {
3740 union_float32 ua, ur;
3741
3742 ua.s = xa;
3743 if (unlikely(!can_use_fpu(s))) {
3744 goto soft;
3745 }
3746
3747 float32_input_flush1(&ua.s, s);
3748 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3749 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3750 fpclassify(ua.h) == FP_ZERO) ||
3751 signbit(ua.h))) {
3752 goto soft;
3753 }
3754 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3755 float32_is_neg(ua.s))) {
3756 goto soft;
3757 }
3758 ur.h = sqrtf(ua.h);
3759 return ur.s;
3760
3761 soft:
3762 return soft_f32_sqrt(ua.s, s);
3763 }
3764
3765 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3766 {
3767 union_float64 ua, ur;
3768
3769 ua.s = xa;
3770 if (unlikely(!can_use_fpu(s))) {
3771 goto soft;
3772 }
3773
3774 float64_input_flush1(&ua.s, s);
3775 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3776 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3777 fpclassify(ua.h) == FP_ZERO) ||
3778 signbit(ua.h))) {
3779 goto soft;
3780 }
3781 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3782 float64_is_neg(ua.s))) {
3783 goto soft;
3784 }
3785 ur.h = sqrt(ua.h);
3786 return ur.s;
3787
3788 soft:
3789 return soft_f64_sqrt(ua.s, s);
3790 }
3791
3792 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3793 {
3794 FloatParts64 pa, pr;
3795
3796 bfloat16_unpack_canonical(&pa, a, status);
3797 pr = sqrt_float(pa, status, &bfloat16_params);
3798 return bfloat16_round_pack_canonical(&pr, status);
3799 }
3800
3801 /*----------------------------------------------------------------------------
3802 | The pattern for a default generated NaN.
3803 *----------------------------------------------------------------------------*/
3804
3805 float16 float16_default_nan(float_status *status)
3806 {
3807 FloatParts64 p;
3808
3809 parts_default_nan(&p, status);
3810 p.frac >>= float16_params.frac_shift;
3811 return float16_pack_raw(&p);
3812 }
3813
3814 float32 float32_default_nan(float_status *status)
3815 {
3816 FloatParts64 p;
3817
3818 parts_default_nan(&p, status);
3819 p.frac >>= float32_params.frac_shift;
3820 return float32_pack_raw(&p);
3821 }
3822
3823 float64 float64_default_nan(float_status *status)
3824 {
3825 FloatParts64 p;
3826
3827 parts_default_nan(&p, status);
3828 p.frac >>= float64_params.frac_shift;
3829 return float64_pack_raw(&p);
3830 }
3831
3832 float128 float128_default_nan(float_status *status)
3833 {
3834 FloatParts128 p;
3835
3836 parts_default_nan(&p, status);
3837 frac_shr(&p, float128_params.frac_shift);
3838 return float128_pack_raw(&p);
3839 }
3840
3841 bfloat16 bfloat16_default_nan(float_status *status)
3842 {
3843 FloatParts64 p;
3844
3845 parts_default_nan(&p, status);
3846 p.frac >>= bfloat16_params.frac_shift;
3847 return bfloat16_pack_raw(&p);
3848 }
3849
3850 /*----------------------------------------------------------------------------
3851 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3852 *----------------------------------------------------------------------------*/
3853
3854 float16 float16_silence_nan(float16 a, float_status *status)
3855 {
3856 FloatParts64 p;
3857
3858 float16_unpack_raw(&p, a);
3859 p.frac <<= float16_params.frac_shift;
3860 parts_silence_nan(&p, status);
3861 p.frac >>= float16_params.frac_shift;
3862 return float16_pack_raw(&p);
3863 }
3864
3865 float32 float32_silence_nan(float32 a, float_status *status)
3866 {
3867 FloatParts64 p;
3868
3869 float32_unpack_raw(&p, a);
3870 p.frac <<= float32_params.frac_shift;
3871 parts_silence_nan(&p, status);
3872 p.frac >>= float32_params.frac_shift;
3873 return float32_pack_raw(&p);
3874 }
3875
3876 float64 float64_silence_nan(float64 a, float_status *status)
3877 {
3878 FloatParts64 p;
3879
3880 float64_unpack_raw(&p, a);
3881 p.frac <<= float64_params.frac_shift;
3882 parts_silence_nan(&p, status);
3883 p.frac >>= float64_params.frac_shift;
3884 return float64_pack_raw(&p);
3885 }
3886
3887 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3888 {
3889 FloatParts64 p;
3890
3891 bfloat16_unpack_raw(&p, a);
3892 p.frac <<= bfloat16_params.frac_shift;
3893 parts_silence_nan(&p, status);
3894 p.frac >>= bfloat16_params.frac_shift;
3895 return bfloat16_pack_raw(&p);
3896 }
3897
3898 float128 float128_silence_nan(float128 a, float_status *status)
3899 {
3900 FloatParts128 p;
3901
3902 float128_unpack_raw(&p, a);
3903 frac_shl(&p, float128_params.frac_shift);
3904 parts_silence_nan(&p, status);
3905 frac_shr(&p, float128_params.frac_shift);
3906 return float128_pack_raw(&p);
3907 }
3908
3909 /*----------------------------------------------------------------------------
3910 | If `a' is denormal and we are in flush-to-zero mode then set the
3911 | input-denormal exception and return zero. Otherwise just return the value.
3912 *----------------------------------------------------------------------------*/
3913
3914 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3915 {
3916 if (p.exp == 0 && p.frac != 0) {
3917 float_raise(float_flag_input_denormal, status);
3918 return true;
3919 }
3920
3921 return false;
3922 }
3923
3924 float16 float16_squash_input_denormal(float16 a, float_status *status)
3925 {
3926 if (status->flush_inputs_to_zero) {
3927 FloatParts64 p;
3928
3929 float16_unpack_raw(&p, a);
3930 if (parts_squash_denormal(p, status)) {
3931 return float16_set_sign(float16_zero, p.sign);
3932 }
3933 }
3934 return a;
3935 }
3936
3937 float32 float32_squash_input_denormal(float32 a, float_status *status)
3938 {
3939 if (status->flush_inputs_to_zero) {
3940 FloatParts64 p;
3941
3942 float32_unpack_raw(&p, a);
3943 if (parts_squash_denormal(p, status)) {
3944 return float32_set_sign(float32_zero, p.sign);
3945 }
3946 }
3947 return a;
3948 }
3949
3950 float64 float64_squash_input_denormal(float64 a, float_status *status)
3951 {
3952 if (status->flush_inputs_to_zero) {
3953 FloatParts64 p;
3954
3955 float64_unpack_raw(&p, a);
3956 if (parts_squash_denormal(p, status)) {
3957 return float64_set_sign(float64_zero, p.sign);
3958 }
3959 }
3960 return a;
3961 }
3962
3963 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3964 {
3965 if (status->flush_inputs_to_zero) {
3966 FloatParts64 p;
3967
3968 bfloat16_unpack_raw(&p, a);
3969 if (parts_squash_denormal(p, status)) {
3970 return bfloat16_set_sign(bfloat16_zero, p.sign);
3971 }
3972 }
3973 return a;
3974 }
3975
3976 /*----------------------------------------------------------------------------
3977 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3978 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3979 | input. If `zSign' is 1, the input is negated before being converted to an
3980 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3981 | is simply rounded to an integer, with the inexact exception raised if the
3982 | input cannot be represented exactly as an integer. However, if the fixed-
3983 | point input is too large, the invalid exception is raised and the largest
3984 | positive or negative integer is returned.
3985 *----------------------------------------------------------------------------*/
3986
3987 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3988 float_status *status)
3989 {
3990 int8_t roundingMode;
3991 bool roundNearestEven;
3992 int8_t roundIncrement, roundBits;
3993 int32_t z;
3994
3995 roundingMode = status->float_rounding_mode;
3996 roundNearestEven = ( roundingMode == float_round_nearest_even );
3997 switch (roundingMode) {
3998 case float_round_nearest_even:
3999 case float_round_ties_away:
4000 roundIncrement = 0x40;
4001 break;
4002 case float_round_to_zero:
4003 roundIncrement = 0;
4004 break;
4005 case float_round_up:
4006 roundIncrement = zSign ? 0 : 0x7f;
4007 break;
4008 case float_round_down:
4009 roundIncrement = zSign ? 0x7f : 0;
4010 break;
4011 case float_round_to_odd:
4012 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4013 break;
4014 default:
4015 abort();
4016 }
4017 roundBits = absZ & 0x7F;
4018 absZ = ( absZ + roundIncrement )>>7;
4019 if (!(roundBits ^ 0x40) && roundNearestEven) {
4020 absZ &= ~1;
4021 }
4022 z = absZ;
4023 if ( zSign ) z = - z;
4024 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
4025 float_raise(float_flag_invalid, status);
4026 return zSign ? INT32_MIN : INT32_MAX;
4027 }
4028 if (roundBits) {
4029 float_raise(float_flag_inexact, status);
4030 }
4031 return z;
4032
4033 }
4034
4035 /*----------------------------------------------------------------------------
4036 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4037 | `absZ1', with binary point between bits 63 and 64 (between the input words),
4038 | and returns the properly rounded 64-bit integer corresponding to the input.
4039 | If `zSign' is 1, the input is negated before being converted to an integer.
4040 | Ordinarily, the fixed-point input is simply rounded to an integer, with
4041 | the inexact exception raised if the input cannot be represented exactly as
4042 | an integer. However, if the fixed-point input is too large, the invalid
4043 | exception is raised and the largest positive or negative integer is
4044 | returned.
4045 *----------------------------------------------------------------------------*/
4046
4047 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
4048 float_status *status)
4049 {
4050 int8_t roundingMode;
4051 bool roundNearestEven, increment;
4052 int64_t z;
4053
4054 roundingMode = status->float_rounding_mode;
4055 roundNearestEven = ( roundingMode == float_round_nearest_even );
4056 switch (roundingMode) {
4057 case float_round_nearest_even:
4058 case float_round_ties_away:
4059 increment = ((int64_t) absZ1 < 0);
4060 break;
4061 case float_round_to_zero:
4062 increment = 0;
4063 break;
4064 case float_round_up:
4065 increment = !zSign && absZ1;
4066 break;
4067 case float_round_down:
4068 increment = zSign && absZ1;
4069 break;
4070 case float_round_to_odd:
4071 increment = !(absZ0 & 1) && absZ1;
4072 break;
4073 default:
4074 abort();
4075 }
4076 if ( increment ) {
4077 ++absZ0;
4078 if ( absZ0 == 0 ) goto overflow;
4079 if (!(absZ1 << 1) && roundNearestEven) {
4080 absZ0 &= ~1;
4081 }
4082 }
4083 z = absZ0;
4084 if ( zSign ) z = - z;
4085 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4086 overflow:
4087 float_raise(float_flag_invalid, status);
4088 return zSign ? INT64_MIN : INT64_MAX;
4089 }
4090 if (absZ1) {
4091 float_raise(float_flag_inexact, status);
4092 }
4093 return z;
4094
4095 }
4096
4097 /*----------------------------------------------------------------------------
4098 | Normalizes the subnormal single-precision floating-point value represented
4099 | by the denormalized significand `aSig'. The normalized exponent and
4100 | significand are stored at the locations pointed to by `zExpPtr' and
4101 | `zSigPtr', respectively.
4102 *----------------------------------------------------------------------------*/
4103
4104 static void
4105 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4106 {
4107 int8_t shiftCount;
4108
4109 shiftCount = clz32(aSig) - 8;
4110 *zSigPtr = aSig<<shiftCount;
4111 *zExpPtr = 1 - shiftCount;
4112
4113 }
4114
4115 /*----------------------------------------------------------------------------
4116 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4117 | and significand `zSig', and returns the proper single-precision floating-
4118 | point value corresponding to the abstract input. Ordinarily, the abstract
4119 | value is simply rounded and packed into the single-precision format, with
4120 | the inexact exception raised if the abstract input cannot be represented
4121 | exactly. However, if the abstract value is too large, the overflow and
4122 | inexact exceptions are raised and an infinity or maximal finite value is
4123 | returned. If the abstract value is too small, the input value is rounded to
4124 | a subnormal number, and the underflow and inexact exceptions are raised if
4125 | the abstract input cannot be represented exactly as a subnormal single-
4126 | precision floating-point number.
4127 | The input significand `zSig' has its binary point between bits 30
4128 | and 29, which is 7 bits to the left of the usual location. This shifted
4129 | significand must be normalized or smaller. If `zSig' is not normalized,
4130 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4131 | and it must not require rounding. In the usual case that `zSig' is
4132 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4133 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4134 | Binary Floating-Point Arithmetic.
4135 *----------------------------------------------------------------------------*/
4136
4137 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4138 float_status *status)
4139 {
4140 int8_t roundingMode;
4141 bool roundNearestEven;
4142 int8_t roundIncrement, roundBits;
4143 bool isTiny;
4144
4145 roundingMode = status->float_rounding_mode;
4146 roundNearestEven = ( roundingMode == float_round_nearest_even );
4147 switch (roundingMode) {
4148 case float_round_nearest_even:
4149 case float_round_ties_away:
4150 roundIncrement = 0x40;
4151 break;
4152 case float_round_to_zero:
4153 roundIncrement = 0;
4154 break;
4155 case float_round_up:
4156 roundIncrement = zSign ? 0 : 0x7f;
4157 break;
4158 case float_round_down:
4159 roundIncrement = zSign ? 0x7f : 0;
4160 break;
4161 case float_round_to_odd:
4162 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4163 break;
4164 default:
4165 abort();
4166 break;
4167 }
4168 roundBits = zSig & 0x7F;
4169 if ( 0xFD <= (uint16_t) zExp ) {
4170 if ( ( 0xFD < zExp )
4171 || ( ( zExp == 0xFD )
4172 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4173 ) {
4174 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4175 roundIncrement != 0;
4176 float_raise(float_flag_overflow | float_flag_inexact, status);
4177 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4178 }
4179 if ( zExp < 0 ) {
4180 if (status->flush_to_zero) {
4181 float_raise(float_flag_output_denormal, status);
4182 return packFloat32(zSign, 0, 0);
4183 }
4184 isTiny = status->tininess_before_rounding
4185 || (zExp < -1)
4186 || (zSig + roundIncrement < 0x80000000);
4187 shift32RightJamming( zSig, - zExp, &zSig );
4188 zExp = 0;
4189 roundBits = zSig & 0x7F;
4190 if (isTiny && roundBits) {
4191 float_raise(float_flag_underflow, status);
4192 }
4193 if (roundingMode == float_round_to_odd) {
4194 /*
4195 * For round-to-odd case, the roundIncrement depends on
4196 * zSig which just changed.
4197 */
4198 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4199 }
4200 }
4201 }
4202 if (roundBits) {
4203 float_raise(float_flag_inexact, status);
4204 }
4205 zSig = ( zSig + roundIncrement )>>7;
4206 if (!(roundBits ^ 0x40) && roundNearestEven) {
4207 zSig &= ~1;
4208 }
4209 if ( zSig == 0 ) zExp = 0;
4210 return packFloat32( zSign, zExp, zSig );
4211
4212 }
4213
4214 /*----------------------------------------------------------------------------
4215 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4216 | and significand `zSig', and returns the proper single-precision floating-
4217 | point value corresponding to the abstract input. This routine is just like
4218 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4219 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4220 | floating-point exponent.
4221 *----------------------------------------------------------------------------*/
4222
4223 static float32
4224 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4225 float_status *status)
4226 {
4227 int8_t shiftCount;
4228
4229 shiftCount = clz32(zSig) - 1;
4230 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4231 status);
4232
4233 }
4234
4235 /*----------------------------------------------------------------------------
4236 | Normalizes the subnormal double-precision floating-point value represented
4237 | by the denormalized significand `aSig'. The normalized exponent and
4238 | significand are stored at the locations pointed to by `zExpPtr' and
4239 | `zSigPtr', respectively.
4240 *----------------------------------------------------------------------------*/
4241
4242 static void
4243 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4244 {
4245 int8_t shiftCount;
4246
4247 shiftCount = clz64(aSig) - 11;
4248 *zSigPtr = aSig<<shiftCount;
4249 *zExpPtr = 1 - shiftCount;
4250
4251 }
4252
4253 /*----------------------------------------------------------------------------
4254 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4255 | double-precision floating-point value, returning the result. After being
4256 | shifted into the proper positions, the three fields are simply added
4257 | together to form the result. This means that any integer portion of `zSig'
4258 | will be added into the exponent. Since a properly normalized significand
4259 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4260 | than the desired result exponent whenever `zSig' is a complete, normalized
4261 | significand.
4262 *----------------------------------------------------------------------------*/
4263
4264 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4265 {
4266
4267 return make_float64(
4268 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4269
4270 }
4271
4272 /*----------------------------------------------------------------------------
4273 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4274 | and significand `zSig', and returns the proper double-precision floating-
4275 | point value corresponding to the abstract input. Ordinarily, the abstract
4276 | value is simply rounded and packed into the double-precision format, with
4277 | the inexact exception raised if the abstract input cannot be represented
4278 | exactly. However, if the abstract value is too large, the overflow and
4279 | inexact exceptions are raised and an infinity or maximal finite value is
4280 | returned. If the abstract value is too small, the input value is rounded to
4281 | a subnormal number, and the underflow and inexact exceptions are raised if
4282 | the abstract input cannot be represented exactly as a subnormal double-
4283 | precision floating-point number.
4284 | The input significand `zSig' has its binary point between bits 62
4285 | and 61, which is 10 bits to the left of the usual location. This shifted
4286 | significand must be normalized or smaller. If `zSig' is not normalized,
4287 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4288 | and it must not require rounding. In the usual case that `zSig' is
4289 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4290 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4291 | Binary Floating-Point Arithmetic.
4292 *----------------------------------------------------------------------------*/
4293
4294 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4295 float_status *status)
4296 {
4297 int8_t roundingMode;
4298 bool roundNearestEven;
4299 int roundIncrement, roundBits;
4300 bool isTiny;
4301
4302 roundingMode = status->float_rounding_mode;
4303 roundNearestEven = ( roundingMode == float_round_nearest_even );
4304 switch (roundingMode) {
4305 case float_round_nearest_even:
4306 case float_round_ties_away:
4307 roundIncrement = 0x200;
4308 break;
4309 case float_round_to_zero:
4310 roundIncrement = 0;
4311 break;
4312 case float_round_up:
4313 roundIncrement = zSign ? 0 : 0x3ff;
4314 break;
4315 case float_round_down:
4316 roundIncrement = zSign ? 0x3ff : 0;
4317 break;
4318 case float_round_to_odd:
4319 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4320 break;
4321 default:
4322 abort();
4323 }
4324 roundBits = zSig & 0x3FF;
4325 if ( 0x7FD <= (uint16_t) zExp ) {
4326 if ( ( 0x7FD < zExp )
4327 || ( ( zExp == 0x7FD )
4328 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4329 ) {
4330 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4331 roundIncrement != 0;
4332 float_raise(float_flag_overflow | float_flag_inexact, status);
4333 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4334 }
4335 if ( zExp < 0 ) {
4336 if (status->flush_to_zero) {
4337 float_raise(float_flag_output_denormal, status);
4338 return packFloat64(zSign, 0, 0);
4339 }
4340 isTiny = status->tininess_before_rounding
4341 || (zExp < -1)
4342 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4343 shift64RightJamming( zSig, - zExp, &zSig );
4344 zExp = 0;
4345 roundBits = zSig & 0x3FF;
4346 if (isTiny && roundBits) {
4347 float_raise(float_flag_underflow, status);
4348 }
4349 if (roundingMode == float_round_to_odd) {
4350 /*
4351 * For round-to-odd case, the roundIncrement depends on
4352 * zSig which just changed.
4353 */
4354 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4355 }
4356 }
4357 }
4358 if (roundBits) {
4359 float_raise(float_flag_inexact, status);
4360 }
4361 zSig = ( zSig + roundIncrement )>>10;
4362 if (!(roundBits ^ 0x200) && roundNearestEven) {
4363 zSig &= ~1;
4364 }
4365 if ( zSig == 0 ) zExp = 0;
4366 return packFloat64( zSign, zExp, zSig );
4367
4368 }
4369
4370 /*----------------------------------------------------------------------------
4371 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4372 | and significand `zSig', and returns the proper double-precision floating-
4373 | point value corresponding to the abstract input. This routine is just like
4374 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4375 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4376 | floating-point exponent.
4377 *----------------------------------------------------------------------------*/
4378
4379 static float64
4380 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4381 float_status *status)
4382 {
4383 int8_t shiftCount;
4384
4385 shiftCount = clz64(zSig) - 1;
4386 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4387 status);
4388
4389 }
4390
4391 /*----------------------------------------------------------------------------
4392 | Normalizes the subnormal extended double-precision floating-point value
4393 | represented by the denormalized significand `aSig'. The normalized exponent
4394 | and significand are stored at the locations pointed to by `zExpPtr' and
4395 | `zSigPtr', respectively.
4396 *----------------------------------------------------------------------------*/
4397
4398 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4399 uint64_t *zSigPtr)
4400 {
4401 int8_t shiftCount;
4402
4403 shiftCount = clz64(aSig);
4404 *zSigPtr = aSig<<shiftCount;
4405 *zExpPtr = 1 - shiftCount;
4406 }
4407
4408 /*----------------------------------------------------------------------------
4409 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4410 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4411 | and returns the proper extended double-precision floating-point value
4412 | corresponding to the abstract input. Ordinarily, the abstract value is
4413 | rounded and packed into the extended double-precision format, with the
4414 | inexact exception raised if the abstract input cannot be represented
4415 | exactly. However, if the abstract value is too large, the overflow and
4416 | inexact exceptions are raised and an infinity or maximal finite value is
4417 | returned. If the abstract value is too small, the input value is rounded to
4418 | a subnormal number, and the underflow and inexact exceptions are raised if
4419 | the abstract input cannot be represented exactly as a subnormal extended
4420 | double-precision floating-point number.
4421 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
4422 | number of bits as single or double precision, respectively. Otherwise, the
4423 | result is rounded to the full precision of the extended double-precision
4424 | format.
4425 | The input significand must be normalized or smaller. If the input
4426 | significand is not normalized, `zExp' must be 0; in that case, the result
4427 | returned is a subnormal number, and it must not require rounding. The
4428 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4429 | Floating-Point Arithmetic.
4430 *----------------------------------------------------------------------------*/
4431
4432 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4433 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4434 float_status *status)
4435 {
4436 int8_t roundingMode;
4437 bool roundNearestEven, increment, isTiny;
4438 int64_t roundIncrement, roundMask, roundBits;
4439
4440 roundingMode = status->float_rounding_mode;
4441 roundNearestEven = ( roundingMode == float_round_nearest_even );
4442 if ( roundingPrecision == 80 ) goto precision80;
4443 if ( roundingPrecision == 64 ) {
4444 roundIncrement = UINT64_C(0x0000000000000400);
4445 roundMask = UINT64_C(0x00000000000007FF);
4446 }
4447 else if ( roundingPrecision == 32 ) {
4448 roundIncrement = UINT64_C(0x0000008000000000);
4449 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4450 }
4451 else {
4452 goto precision80;
4453 }
4454 zSig0 |= ( zSig1 != 0 );
4455 switch (roundingMode) {
4456 case float_round_nearest_even:
4457 case float_round_ties_away:
4458 break;
4459 case float_round_to_zero:
4460 roundIncrement = 0;
4461 break;
4462 case float_round_up:
4463 roundIncrement = zSign ? 0 : roundMask;
4464 break;
4465 case float_round_down:
4466 roundIncrement = zSign ? roundMask : 0;
4467 break;
4468 default:
4469 abort();
4470 }
4471 roundBits = zSig0 & roundMask;
4472 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4473 if ( ( 0x7FFE < zExp )
4474 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4475 ) {
4476 goto overflow;
4477 }
4478 if ( zExp <= 0 ) {
4479 if (status->flush_to_zero) {
4480 float_raise(float_flag_output_denormal, status);
4481 return packFloatx80(zSign, 0, 0);
4482 }
4483 isTiny = status->tininess_before_rounding
4484 || (zExp < 0 )
4485 || (zSig0 <= zSig0 + roundIncrement);
4486 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4487 zExp = 0;
4488 roundBits = zSig0 & roundMask;
4489 if (isTiny && roundBits) {
4490 float_raise(float_flag_underflow, status);
4491 }
4492 if (roundBits) {
4493 float_raise(float_flag_inexact, status);
4494 }
4495 zSig0 += roundIncrement;
4496 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4497 roundIncrement = roundMask + 1;
4498 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4499 roundMask |= roundIncrement;
4500 }
4501 zSig0 &= ~ roundMask;
4502 return packFloatx80( zSign, zExp, zSig0 );
4503 }
4504 }
4505 if (roundBits) {
4506 float_raise(float_flag_inexact, status);
4507 }
4508 zSig0 += roundIncrement;
4509 if ( zSig0 < roundIncrement ) {
4510 ++zExp;
4511 zSig0 = UINT64_C(0x8000000000000000);
4512 }
4513 roundIncrement = roundMask + 1;
4514 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4515 roundMask |= roundIncrement;
4516 }
4517 zSig0 &= ~ roundMask;
4518 if ( zSig0 == 0 ) zExp = 0;
4519 return packFloatx80( zSign, zExp, zSig0 );
4520 precision80:
4521 switch (roundingMode) {
4522 case float_round_nearest_even:
4523 case float_round_ties_away:
4524 increment = ((int64_t)zSig1 < 0);
4525 break;
4526 case float_round_to_zero:
4527 increment = 0;
4528 break;
4529 case float_round_up:
4530 increment = !zSign && zSig1;
4531 break;
4532 case float_round_down:
4533 increment = zSign && zSig1;
4534 break;
4535 default:
4536 abort();
4537 }
4538 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4539 if ( ( 0x7FFE < zExp )
4540 || ( ( zExp == 0x7FFE )
4541 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4542 && increment
4543 )
4544 ) {
4545 roundMask = 0;
4546 overflow:
4547 float_raise(float_flag_overflow | float_flag_inexact, status);
4548 if ( ( roundingMode == float_round_to_zero )
4549 || ( zSign && ( roundingMode == float_round_up ) )
4550 || ( ! zSign && ( roundingMode == float_round_down ) )
4551 ) {
4552 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4553 }
4554 return packFloatx80(zSign,
4555 floatx80_infinity_high,
4556 floatx80_infinity_low);
4557 }
4558 if ( zExp <= 0 ) {
4559 isTiny = status->tininess_before_rounding
4560 || (zExp < 0)
4561 || !increment
4562 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4563 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4564 zExp = 0;
4565 if (isTiny && zSig1) {
4566 float_raise(float_flag_underflow, status);
4567 }
4568 if (zSig1) {
4569 float_raise(float_flag_inexact, status);
4570 }
4571 switch (roundingMode) {
4572 case float_round_nearest_even:
4573 case float_round_ties_away:
4574 increment = ((int64_t)zSig1 < 0);
4575 break;
4576 case float_round_to_zero:
4577 increment = 0;
4578 break;
4579 case float_round_up:
4580 increment = !zSign && zSig1;
4581 break;
4582 case float_round_down:
4583 increment = zSign && zSig1;
4584 break;
4585 default:
4586 abort();
4587 }
4588 if ( increment ) {
4589 ++zSig0;
4590 if (!(zSig1 << 1) && roundNearestEven) {
4591 zSig0 &= ~1;
4592 }
4593 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4594 }
4595 return packFloatx80( zSign, zExp, zSig0 );
4596 }
4597 }
4598 if (zSig1) {
4599 float_raise(float_flag_inexact, status);
4600 }
4601 if ( increment ) {
4602 ++zSig0;
4603 if ( zSig0 == 0 ) {
4604 ++zExp;
4605 zSig0 = UINT64_C(0x8000000000000000);
4606 }
4607 else {
4608 if (!(zSig1 << 1) && roundNearestEven) {
4609 zSig0 &= ~1;
4610 }
4611 }
4612 }
4613 else {
4614 if ( zSig0 == 0 ) zExp = 0;
4615 }
4616 return packFloatx80( zSign, zExp, zSig0 );
4617
4618 }
4619
4620 /*----------------------------------------------------------------------------
4621 | Takes an abstract floating-point value having sign `zSign', exponent
4622 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4623 | and returns the proper extended double-precision floating-point value
4624 | corresponding to the abstract input. This routine is just like
4625 | `roundAndPackFloatx80' except that the input significand does not have to be
4626 | normalized.
4627 *----------------------------------------------------------------------------*/
4628
4629 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4630 bool zSign, int32_t zExp,
4631 uint64_t zSig0, uint64_t zSig1,
4632 float_status *status)
4633 {
4634 int8_t shiftCount;
4635
4636 if ( zSig0 == 0 ) {
4637 zSig0 = zSig1;
4638 zSig1 = 0;
4639 zExp -= 64;
4640 }
4641 shiftCount = clz64(zSig0);
4642 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4643 zExp -= shiftCount;
4644 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4645 zSig0, zSig1, status);
4646
4647 }
4648
4649 /*----------------------------------------------------------------------------
4650 | Returns the least-significant 64 fraction bits of the quadruple-precision
4651 | floating-point value `a'.
4652 *----------------------------------------------------------------------------*/
4653
4654 static inline uint64_t extractFloat128Frac1( float128 a )
4655 {
4656
4657 return a.low;
4658
4659 }
4660
4661 /*----------------------------------------------------------------------------
4662 | Returns the most-significant 48 fraction bits of the quadruple-precision
4663 | floating-point value `a'.
4664 *----------------------------------------------------------------------------*/
4665
4666 static inline uint64_t extractFloat128Frac0( float128 a )
4667 {
4668
4669 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4670
4671 }
4672
4673 /*----------------------------------------------------------------------------
4674 | Returns the exponent bits of the quadruple-precision floating-point value
4675 | `a'.
4676 *----------------------------------------------------------------------------*/
4677
4678 static inline int32_t extractFloat128Exp( float128 a )
4679 {
4680
4681 return ( a.high>>48 ) & 0x7FFF;
4682
4683 }
4684
4685 /*----------------------------------------------------------------------------
4686 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4687 *----------------------------------------------------------------------------*/
4688
4689 static inline bool extractFloat128Sign(float128 a)
4690 {
4691 return a.high >> 63;
4692 }
4693
4694 /*----------------------------------------------------------------------------
4695 | Normalizes the subnormal quadruple-precision floating-point value
4696 | represented by the denormalized significand formed by the concatenation of
4697 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4698 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4699 | significand are stored at the location pointed to by `zSig0Ptr', and the
4700 | least significant 64 bits of the normalized significand are stored at the
4701 | location pointed to by `zSig1Ptr'.
4702 *----------------------------------------------------------------------------*/
4703
4704 static void
4705 normalizeFloat128Subnormal(
4706 uint64_t aSig0,
4707 uint64_t aSig1,
4708 int32_t *zExpPtr,
4709 uint64_t *zSig0Ptr,
4710 uint64_t *zSig1Ptr
4711 )
4712 {
4713 int8_t shiftCount;
4714
4715 if ( aSig0 == 0 ) {
4716 shiftCount = clz64(aSig1) - 15;
4717 if ( shiftCount < 0 ) {
4718 *zSig0Ptr = aSig1>>( - shiftCount );
4719 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4720 }
4721 else {
4722 *zSig0Ptr = aSig1<<shiftCount;
4723 *zSig1Ptr = 0;
4724 }
4725 *zExpPtr = - shiftCount - 63;
4726 }
4727 else {
4728 shiftCount = clz64(aSig0) - 15;
4729 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4730 *zExpPtr = 1 - shiftCount;
4731 }
4732
4733 }
4734
4735 /*----------------------------------------------------------------------------
4736 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4737 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4738 | floating-point value, returning the result. After being shifted into the
4739 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4740 | added together to form the most significant 32 bits of the result. This
4741 | means that any integer portion of `zSig0' will be added into the exponent.
4742 | Since a properly normalized significand will have an integer portion equal
4743 | to 1, the `zExp' input should be 1 less than the desired result exponent
4744 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4745 | significand.
4746 *----------------------------------------------------------------------------*/
4747
4748 static inline float128
4749 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4750 {
4751 float128 z;
4752
4753 z.low = zSig1;
4754 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4755 return z;
4756 }
4757
4758 /*----------------------------------------------------------------------------
4759 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4760 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4761 | and `zSig2', and returns the proper quadruple-precision floating-point value
4762 | corresponding to the abstract input. Ordinarily, the abstract value is
4763 | simply rounded and packed into the quadruple-precision format, with the
4764 | inexact exception raised if the abstract input cannot be represented
4765 | exactly. However, if the abstract value is too large, the overflow and
4766 | inexact exceptions are raised and an infinity or maximal finite value is
4767 | returned. If the abstract value is too small, the input value is rounded to
4768 | a subnormal number, and the underflow and inexact exceptions are raised if
4769 | the abstract input cannot be represented exactly as a subnormal quadruple-
4770 | precision floating-point number.
4771 | The input significand must be normalized or smaller. If the input
4772 | significand is not normalized, `zExp' must be 0; in that case, the result
4773 | returned is a subnormal number, and it must not require rounding. In the
4774 | usual case that the input significand is normalized, `zExp' must be 1 less
4775 | than the ``true'' floating-point exponent. The handling of underflow and
4776 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777 *----------------------------------------------------------------------------*/
4778
4779 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4780 uint64_t zSig0, uint64_t zSig1,
4781 uint64_t zSig2, float_status *status)
4782 {
4783 int8_t roundingMode;
4784 bool roundNearestEven, increment, isTiny;
4785
4786 roundingMode = status->float_rounding_mode;
4787 roundNearestEven = ( roundingMode == float_round_nearest_even );
4788 switch (roundingMode) {
4789 case float_round_nearest_even:
4790 case float_round_ties_away:
4791 increment = ((int64_t)zSig2 < 0);
4792 break;
4793 case float_round_to_zero:
4794 increment = 0;
4795 break;
4796 case float_round_up:
4797 increment = !zSign && zSig2;
4798 break;
4799 case float_round_down:
4800 increment = zSign && zSig2;
4801 break;
4802 case float_round_to_odd:
4803 increment = !(zSig1 & 0x1) && zSig2;
4804 break;
4805 default:
4806 abort();
4807 }
4808 if ( 0x7FFD <= (uint32_t) zExp ) {
4809 if ( ( 0x7FFD < zExp )
4810 || ( ( zExp == 0x7FFD )
4811 && eq128(
4812 UINT64_C(0x0001FFFFFFFFFFFF),
4813 UINT64_C(0xFFFFFFFFFFFFFFFF),
4814 zSig0,
4815 zSig1
4816 )
4817 && increment
4818 )
4819 ) {
4820 float_raise(float_flag_overflow | float_flag_inexact, status);
4821 if ( ( roundingMode == float_round_to_zero )
4822 || ( zSign && ( roundingMode == float_round_up ) )
4823 || ( ! zSign && ( roundingMode == float_round_down ) )
4824 || (roundingMode == float_round_to_odd)
4825 ) {
4826 return
4827 packFloat128(
4828 zSign,
4829 0x7FFE,
4830 UINT64_C(0x0000FFFFFFFFFFFF),
4831 UINT64_C(0xFFFFFFFFFFFFFFFF)
4832 );
4833 }
4834 return packFloat128( zSign, 0x7FFF, 0, 0 );
4835 }
4836 if ( zExp < 0 ) {
4837 if (status->flush_to_zero) {
4838 float_raise(float_flag_output_denormal, status);
4839 return packFloat128(zSign, 0, 0, 0);
4840 }
4841 isTiny = status->tininess_before_rounding
4842 || (zExp < -1)
4843 || !increment
4844 || lt128(zSig0, zSig1,
4845 UINT64_C(0x0001FFFFFFFFFFFF),
4846 UINT64_C(0xFFFFFFFFFFFFFFFF));
4847 shift128ExtraRightJamming(
4848 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4849 zExp = 0;
4850 if (isTiny && zSig2) {
4851 float_raise(float_flag_underflow, status);
4852 }
4853 switch (roundingMode) {
4854 case float_round_nearest_even:
4855 case float_round_ties_away:
4856 increment = ((int64_t)zSig2 < 0);
4857 break;
4858 case float_round_to_zero:
4859 increment = 0;
4860 break;
4861 case float_round_up:
4862 increment = !zSign && zSig2;
4863 break;
4864 case float_round_down:
4865 increment = zSign && zSig2;
4866 break;
4867 case float_round_to_odd:
4868 increment = !(zSig1 & 0x1) && zSig2;
4869 break;
4870 default:
4871 abort();
4872 }
4873 }
4874 }
4875 if (zSig2) {
4876 float_raise(float_flag_inexact, status);
4877 }
4878 if ( increment ) {
4879 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4880 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4881 zSig1 &= ~1;
4882 }
4883 }
4884 else {
4885 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4886 }
4887 return packFloat128( zSign, zExp, zSig0, zSig1 );
4888
4889 }
4890
4891 /*----------------------------------------------------------------------------
4892 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4893 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4894 | returns the proper quadruple-precision floating-point value corresponding
4895 | to the abstract input. This routine is just like `roundAndPackFloat128'
4896 | except that the input significand has fewer bits and does not have to be
4897 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4898 | point exponent.
4899 *----------------------------------------------------------------------------*/
4900
4901 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4902 uint64_t zSig0, uint64_t zSig1,
4903 float_status *status)
4904 {
4905 int8_t shiftCount;
4906 uint64_t zSig2;
4907
4908 if ( zSig0 == 0 ) {
4909 zSig0 = zSig1;
4910 zSig1 = 0;
4911 zExp -= 64;
4912 }
4913 shiftCount = clz64(zSig0) - 15;
4914 if ( 0 <= shiftCount ) {
4915 zSig2 = 0;
4916 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4917 }
4918 else {
4919 shift128ExtraRightJamming(
4920 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4921 }
4922 zExp -= shiftCount;
4923 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4924
4925 }
4926
4927
4928 /*----------------------------------------------------------------------------
4929 | Returns the result of converting the 32-bit two's complement integer `a'
4930 | to the extended double-precision floating-point format. The conversion
4931 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4932 | Arithmetic.
4933 *----------------------------------------------------------------------------*/
4934
4935 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4936 {
4937 bool zSign;
4938 uint32_t absA;
4939 int8_t shiftCount;
4940 uint64_t zSig;
4941
4942 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4943 zSign = ( a < 0 );
4944 absA = zSign ? - a : a;
4945 shiftCount = clz32(absA) + 32;
4946 zSig = absA;
4947 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4948
4949 }
4950
4951 /*----------------------------------------------------------------------------
4952 | Returns the result of converting the 64-bit two's complement integer `a'
4953 | to the extended double-precision floating-point format. The conversion
4954 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4955 | Arithmetic.
4956 *----------------------------------------------------------------------------*/
4957
4958 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4959 {
4960 bool zSign;
4961 uint64_t absA;
4962 int8_t shiftCount;
4963
4964 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4965 zSign = ( a < 0 );
4966 absA = zSign ? - a : a;
4967 shiftCount = clz64(absA);
4968 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4969
4970 }
4971
4972 /*----------------------------------------------------------------------------
4973 | Returns the result of converting the single-precision floating-point value
4974 | `a' to the extended double-precision floating-point format. The conversion
4975 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4976 | Arithmetic.
4977 *----------------------------------------------------------------------------*/
4978
4979 floatx80 float32_to_floatx80(float32 a, float_status *status)
4980 {
4981 bool aSign;
4982 int aExp;
4983 uint32_t aSig;
4984
4985 a = float32_squash_input_denormal(a, status);
4986 aSig = extractFloat32Frac( a );
4987 aExp = extractFloat32Exp( a );
4988 aSign = extractFloat32Sign( a );
4989 if ( aExp == 0xFF ) {
4990 if (aSig) {
4991 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4992 status);
4993 return floatx80_silence_nan(res, status);
4994 }
4995 return packFloatx80(aSign,
4996 floatx80_infinity_high,
4997 floatx80_infinity_low);
4998 }
4999 if ( aExp == 0 ) {
5000 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5001 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5002 }
5003 aSig |= 0x00800000;
5004 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
5005
5006 }
5007
5008 /*----------------------------------------------------------------------------
5009 | Returns the remainder of the single-precision floating-point value `a'
5010 | with respect to the corresponding value `b'. The operation is performed
5011 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5012 *----------------------------------------------------------------------------*/
5013
5014 float32 float32_rem(float32 a, float32 b, float_status *status)
5015 {
5016 bool aSign, zSign;
5017 int aExp, bExp, expDiff;
5018 uint32_t aSig, bSig;
5019 uint32_t q;
5020 uint64_t aSig64, bSig64, q64;
5021 uint32_t alternateASig;
5022 int32_t sigMean;
5023 a = float32_squash_input_denormal(a, status);
5024 b = float32_squash_input_denormal(b, status);
5025
5026 aSig = extractFloat32Frac( a );
5027 aExp = extractFloat32Exp( a );
5028 aSign = extractFloat32Sign( a );
5029 bSig = extractFloat32Frac( b );
5030 bExp = extractFloat32Exp( b );
5031 if ( aExp == 0xFF ) {
5032 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
5033 return propagateFloat32NaN(a, b, status);
5034 }
5035 float_raise(float_flag_invalid, status);
5036 return float32_default_nan(status);
5037 }
5038 if ( bExp == 0xFF ) {
5039 if (bSig) {
5040 return propagateFloat32NaN(a, b, status);
5041 }
5042 return a;
5043 }
5044 if ( bExp == 0 ) {
5045 if ( bSig == 0 ) {
5046 float_raise(float_flag_invalid, status);
5047 return float32_default_nan(status);
5048 }
5049 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5050 }
5051 if ( aExp == 0 ) {
5052 if ( aSig == 0 ) return a;
5053 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5054 }
5055 expDiff = aExp - bExp;
5056 aSig |= 0x00800000;
5057 bSig |= 0x00800000;
5058 if ( expDiff < 32 ) {
5059 aSig <<= 8;
5060 bSig <<= 8;
5061 if ( expDiff < 0 ) {
5062 if ( expDiff < -1 ) return a;
5063 aSig >>= 1;
5064 }
5065 q = ( bSig <= aSig );
5066 if ( q ) aSig -= bSig;
5067 if ( 0 < expDiff ) {
5068 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5069 q >>= 32 - expDiff;
5070 bSig >>= 2;
5071 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5072 }
5073 else {
5074 aSig >>= 2;
5075 bSig >>= 2;
5076 }
5077 }
5078 else {
5079 if ( bSig <= aSig ) aSig -= bSig;
5080 aSig64 = ( (uint64_t) aSig )<<40;
5081 bSig64 = ( (uint64_t) bSig )<<40;
5082 expDiff -= 64;
5083 while ( 0 < expDiff ) {
5084 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5085 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5086 aSig64 = - ( ( bSig * q64 )<<38 );
5087 expDiff -= 62;
5088 }
5089 expDiff += 64;
5090 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5091 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5092 q = q64>>( 64 - expDiff );
5093 bSig <<= 6;
5094 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5095 }
5096 do {
5097 alternateASig = aSig;
5098 ++q;
5099 aSig -= bSig;
5100 } while ( 0 <= (int32_t) aSig );
5101 sigMean = aSig + alternateASig;
5102 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5103 aSig = alternateASig;
5104 }
5105 zSign = ( (int32_t) aSig < 0 );
5106 if ( zSign ) aSig = - aSig;
5107 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5108 }
5109
5110
5111
5112 /*----------------------------------------------------------------------------
5113 | Returns the binary exponential of the single-precision floating-point value
5114 | `a'. The operation is performed according to the IEC/IEEE Standard for
5115 | Binary Floating-Point Arithmetic.
5116 |
5117 | Uses the following identities:
5118 |
5119 | 1. -------------------------------------------------------------------------
5120 | x x*ln(2)
5121 | 2 = e
5122 |
5123 | 2. -------------------------------------------------------------------------
5124 | 2 3 4 5 n
5125 | x x x x x x x
5126 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5127 | 1! 2! 3! 4! 5! n!
5128 *----------------------------------------------------------------------------*/
5129
5130 static const float64 float32_exp2_coefficients[15] =
5131 {
5132 const_float64( 0x3ff0000000000000ll ), /* 1 */
5133 const_float64( 0x3fe0000000000000ll ), /* 2 */
5134 const_float64( 0x3fc5555555555555ll ), /* 3 */
5135 const_float64( 0x3fa5555555555555ll ), /* 4 */
5136 const_float64( 0x3f81111111111111ll ), /* 5 */
5137 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5138 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5139 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5140 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5141 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5142 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5143 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5144 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5145 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5146 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5147 };
5148
5149 float32 float32_exp2(float32 a, float_status *status)
5150 {
5151 bool aSign;
5152 int aExp;
5153 uint32_t aSig;
5154 float64 r, x, xn;
5155 int i;
5156 a = float32_squash_input_denormal(a, status);
5157
5158 aSig = extractFloat32Frac( a );
5159 aExp = extractFloat32Exp( a );
5160 aSign = extractFloat32Sign( a );
5161
5162 if ( aExp == 0xFF) {
5163 if (aSig) {
5164 return propagateFloat32NaN(a, float32_zero, status);
5165 }
5166 return (aSign) ? float32_zero : a;
5167 }
5168 if (aExp == 0) {
5169 if (aSig == 0) return float32_one;
5170 }
5171
5172 float_raise(float_flag_inexact, status);
5173
5174 /* ******************************* */
5175 /* using float64 for approximation */
5176 /* ******************************* */
5177 x = float32_to_float64(a, status);
5178 x = float64_mul(x, float64_ln2, status);
5179
5180 xn = x;
5181 r = float64_one;
5182 for (i = 0 ; i < 15 ; i++) {
5183 float64 f;
5184
5185 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5186 r = float64_add(r, f, status);
5187
5188 xn = float64_mul(xn, x, status);
5189 }
5190
5191 return float64_to_float32(r, status);
5192 }
5193
5194 /*----------------------------------------------------------------------------
5195 | Returns the binary log of the single-precision floating-point value `a'.
5196 | The operation is performed according to the IEC/IEEE Standard for Binary
5197 | Floating-Point Arithmetic.
5198 *----------------------------------------------------------------------------*/
5199 float32 float32_log2(float32 a, float_status *status)
5200 {
5201 bool aSign, zSign;
5202 int aExp;
5203 uint32_t aSig, zSig, i;
5204
5205 a = float32_squash_input_denormal(a, status);
5206 aSig = extractFloat32Frac( a );
5207 aExp = extractFloat32Exp( a );
5208 aSign = extractFloat32Sign( a );
5209
5210 if ( aExp == 0 ) {
5211 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5212 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5213 }
5214 if ( aSign ) {
5215 float_raise(float_flag_invalid, status);
5216 return float32_default_nan(status);
5217 }
5218 if ( aExp == 0xFF ) {
5219 if (aSig) {
5220 return propagateFloat32NaN(a, float32_zero, status);
5221 }
5222 return a;
5223 }
5224
5225 aExp -= 0x7F;
5226 aSig |= 0x00800000;
5227 zSign = aExp < 0;
5228 zSig = aExp << 23;
5229
5230 for (i = 1 << 22; i > 0; i >>= 1) {
5231 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5232 if ( aSig & 0x01000000 ) {
5233 aSig >>= 1;
5234 zSig |= i;
5235 }
5236 }
5237
5238 if ( zSign )
5239 zSig = -zSig;
5240
5241 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5242 }
5243
5244 /*----------------------------------------------------------------------------
5245 | Returns the result of converting the double-precision floating-point value
5246 | `a' to the extended double-precision floating-point format. The conversion
5247 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5248 | Arithmetic.
5249 *----------------------------------------------------------------------------*/
5250
5251 floatx80 float64_to_floatx80(float64 a, float_status *status)
5252 {
5253 bool aSign;
5254 int aExp;
5255 uint64_t aSig;
5256
5257 a = float64_squash_input_denormal(a, status);
5258 aSig = extractFloat64Frac( a );
5259 aExp = extractFloat64Exp( a );
5260 aSign = extractFloat64Sign( a );
5261 if ( aExp == 0x7FF ) {
5262 if (aSig) {
5263 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5264 status);
5265 return floatx80_silence_nan(res, status);
5266 }
5267 return packFloatx80(aSign,
5268 floatx80_infinity_high,
5269 floatx80_infinity_low);
5270 }
5271 if ( aExp == 0 ) {
5272 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5273 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5274 }
5275 return
5276 packFloatx80(
5277 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5278
5279 }
5280
5281 /*----------------------------------------------------------------------------
5282 | Returns the remainder of the double-precision floating-point value `a'
5283 | with respect to the corresponding value `b'. The operation is performed
5284 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5285 *----------------------------------------------------------------------------*/
5286
5287 float64 float64_rem(float64 a, float64 b, float_status *status)
5288 {
5289 bool aSign, zSign;
5290 int aExp, bExp, expDiff;
5291 uint64_t aSig, bSig;
5292 uint64_t q, alternateASig;
5293 int64_t sigMean;
5294
5295 a = float64_squash_input_denormal(a, status);
5296 b = float64_squash_input_denormal(b, status);
5297 aSig = extractFloat64Frac( a );
5298 aExp = extractFloat64Exp( a );
5299 aSign = extractFloat64Sign( a );
5300 bSig = extractFloat64Frac( b );
5301 bExp = extractFloat64Exp( b );
5302 if ( aExp == 0x7FF ) {
5303 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5304 return propagateFloat64NaN(a, b, status);
5305 }
5306 float_raise(float_flag_invalid, status);
5307 return float64_default_nan(status);
5308 }
5309 if ( bExp == 0x7FF ) {
5310 if (bSig) {
5311 return propagateFloat64NaN(a, b, status);
5312 }
5313 return a;
5314 }
5315 if ( bExp == 0 ) {
5316 if ( bSig == 0 ) {
5317 float_raise(float_flag_invalid, status);
5318 return float64_default_nan(status);
5319 }
5320 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5321 }
5322 if ( aExp == 0 ) {
5323 if ( aSig == 0 ) return a;
5324 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5325 }
5326 expDiff = aExp - bExp;
5327 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5328 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5329 if ( expDiff < 0 ) {
5330 if ( expDiff < -1 ) return a;
5331 aSig >>= 1;
5332 }
5333 q = ( bSig <= aSig );
5334 if ( q ) aSig -= bSig;
5335 expDiff -= 64;
5336 while ( 0 < expDiff ) {
5337 q = estimateDiv128To64( aSig, 0, bSig );
5338 q = ( 2 < q ) ? q - 2 : 0;
5339 aSig = - ( ( bSig>>2 ) * q );
5340 expDiff -= 62;
5341 }
5342 expDiff += 64;
5343 if ( 0 < expDiff ) {
5344 q = estimateDiv128To64( aSig, 0, bSig );
5345 q = ( 2 < q ) ? q - 2 : 0;
5346 q >>= 64 - expDiff;
5347 bSig >>= 2;
5348 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5349 }
5350 else {
5351 aSig >>= 2;
5352 bSig >>= 2;
5353 }
5354 do {
5355 alternateASig = aSig;
5356 ++q;
5357 aSig -= bSig;
5358 } while ( 0 <= (int64_t) aSig );
5359 sigMean = aSig + alternateASig;
5360 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5361 aSig = alternateASig;
5362 }
5363 zSign = ( (int64_t) aSig < 0 );
5364 if ( zSign ) aSig = - aSig;
5365 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5366
5367 }
5368
5369 /*----------------------------------------------------------------------------
5370 | Returns the binary log of the double-precision floating-point value `a'.
5371 | The operation is performed according to the IEC/IEEE Standard for Binary
5372 | Floating-Point Arithmetic.
5373 *----------------------------------------------------------------------------*/
5374 float64 float64_log2(float64 a, float_status *status)
5375 {
5376 bool aSign, zSign;
5377 int aExp;
5378 uint64_t aSig, aSig0, aSig1, zSig, i;
5379 a = float64_squash_input_denormal(a, status);
5380
5381 aSig = extractFloat64Frac( a );
5382 aExp = extractFloat64Exp( a );
5383 aSign = extractFloat64Sign( a );
5384
5385 if ( aExp == 0 ) {
5386 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5387 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5388 }
5389 if ( aSign ) {
5390 float_raise(float_flag_invalid, status);
5391 return float64_default_nan(status);
5392 }
5393 if ( aExp == 0x7FF ) {
5394 if (aSig) {
5395 return propagateFloat64NaN(a, float64_zero, status);
5396 }
5397 return a;
5398 }
5399
5400 aExp -= 0x3FF;
5401 aSig |= UINT64_C(0x0010000000000000);
5402 zSign = aExp < 0;
5403 zSig = (uint64_t)aExp << 52;
5404 for (i = 1LL << 51; i > 0; i >>= 1) {
5405 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5406 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5407 if ( aSig & UINT64_C(0x0020000000000000) ) {
5408 aSig >>= 1;
5409 zSig |= i;
5410 }
5411 }
5412
5413 if ( zSign )
5414 zSig = -zSig;
5415 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5416 }
5417
5418 /*----------------------------------------------------------------------------
5419 | Returns the result of converting the extended double-precision floating-
5420 | point value `a' to the 32-bit two's complement integer format. The
5421 | conversion is performed according to the IEC/IEEE Standard for Binary
5422 | Floating-Point Arithmetic---which means in particular that the conversion
5423 | is rounded according to the current rounding mode. If `a' is a NaN, the
5424 | largest positive integer is returned. Otherwise, if the conversion
5425 | overflows, the largest integer with the same sign as `a' is returned.
5426 *----------------------------------------------------------------------------*/
5427
5428 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5429 {
5430 bool aSign;
5431 int32_t aExp, shiftCount;
5432 uint64_t aSig;
5433
5434 if (floatx80_invalid_encoding(a)) {
5435 float_raise(float_flag_invalid, status);
5436 return 1 << 31;
5437 }
5438 aSig = extractFloatx80Frac( a );
5439 aExp = extractFloatx80Exp( a );
5440 aSign = extractFloatx80Sign( a );
5441 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5442 shiftCount = 0x4037 - aExp;
5443 if ( shiftCount <= 0 ) shiftCount = 1;
5444 shift64RightJamming( aSig, shiftCount, &aSig );
5445 return roundAndPackInt32(aSign, aSig, status);
5446
5447 }
5448
5449 /*----------------------------------------------------------------------------
5450 | Returns the result of converting the extended double-precision floating-
5451 | point value `a' to the 32-bit two's complement integer format. The
5452 | conversion is performed according to the IEC/IEEE Standard for Binary
5453 | Floating-Point Arithmetic, except that the conversion is always rounded
5454 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5455 | Otherwise, if the conversion overflows, the largest integer with the same
5456 | sign as `a' is returned.
5457 *----------------------------------------------------------------------------*/
5458
5459 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5460 {
5461 bool aSign;
5462 int32_t aExp, shiftCount;
5463 uint64_t aSig, savedASig;
5464 int32_t z;
5465
5466 if (floatx80_invalid_encoding(a)) {
5467 float_raise(float_flag_invalid, status);
5468 return 1 << 31;
5469 }
5470 aSig = extractFloatx80Frac( a );
5471 aExp = extractFloatx80Exp( a );
5472 aSign = extractFloatx80Sign( a );
5473 if ( 0x401E < aExp ) {
5474 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5475 goto invalid;
5476 }
5477 else if ( aExp < 0x3FFF ) {
5478 if (aExp || aSig) {
5479 float_raise(float_flag_inexact, status);
5480 }
5481 return 0;
5482 }
5483 shiftCount = 0x403E - aExp;
5484 savedASig = aSig;
5485 aSig >>= shiftCount;
5486 z = aSig;
5487 if ( aSign ) z = - z;
5488 if ( ( z < 0 ) ^ aSign ) {
5489 invalid:
5490 float_raise(float_flag_invalid, status);
5491 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5492 }
5493 if ( ( aSig<<shiftCount ) != savedASig ) {
5494 float_raise(float_flag_inexact, status);
5495 }
5496 return z;
5497
5498 }
5499
5500 /*----------------------------------------------------------------------------
5501 | Returns the result of converting the extended double-precision floating-
5502 | point value `a' to the 64-bit two's complement integer format. The
5503 | conversion is performed according to the IEC/IEEE Standard for Binary
5504 | Floating-Point Arithmetic---which means in particular that the conversion
5505 | is rounded according to the current rounding mode. If `a' is a NaN,
5506 | the largest positive integer is returned. Otherwise, if the conversion
5507 | overflows, the largest integer with the same sign as `a' is returned.
5508 *----------------------------------------------------------------------------*/
5509
5510 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5511 {
5512 bool aSign;
5513 int32_t aExp, shiftCount;
5514 uint64_t aSig, aSigExtra;
5515
5516 if (floatx80_invalid_encoding(a)) {
5517 float_raise(float_flag_invalid, status);
5518 return 1ULL << 63;
5519 }
5520 aSig = extractFloatx80Frac( a );
5521 aExp = extractFloatx80Exp( a );
5522 aSign = extractFloatx80Sign( a );
5523 shiftCount = 0x403E - aExp;
5524 if ( shiftCount <= 0 ) {
5525 if ( shiftCount ) {
5526 float_raise(float_flag_invalid, status);
5527 if (!aSign || floatx80_is_any_nan(a)) {
5528 return INT64_MAX;
5529 }
5530 return INT64_MIN;
5531 }
5532 aSigExtra = 0;
5533 }
5534 else {
5535 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5536 }
5537 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5538
5539 }
5540
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the 64-bit two's complement integer format. The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic, except that the conversion is always rounded
5546 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5547 | Otherwise, if the conversion overflows, the largest integer with the same
5548 | sign as `a' is returned.
5549 *----------------------------------------------------------------------------*/
5550
5551 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5552 {
5553 bool aSign;
5554 int32_t aExp, shiftCount;
5555 uint64_t aSig;
5556 int64_t z;
5557
5558 if (floatx80_invalid_encoding(a)) {
5559 float_raise(float_flag_invalid, status);
5560 return 1ULL << 63;
5561 }
5562 aSig = extractFloatx80Frac( a );
5563 aExp = extractFloatx80Exp( a );
5564 aSign = extractFloatx80Sign( a );
5565 shiftCount = aExp - 0x403E;
5566 if ( 0 <= shiftCount ) {
5567 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5568 if ( ( a.high != 0xC03E ) || aSig ) {
5569 float_raise(float_flag_invalid, status);
5570 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5571 return INT64_MAX;
5572 }
5573 }
5574 return INT64_MIN;
5575 }
5576 else if ( aExp < 0x3FFF ) {
5577 if (aExp | aSig) {
5578 float_raise(float_flag_inexact, status);
5579 }
5580 return 0;
5581 }
5582 z = aSig>>( - shiftCount );
5583 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5584 float_raise(float_flag_inexact, status);
5585 }
5586 if ( aSign ) z = - z;
5587 return z;
5588
5589 }
5590
5591 /*----------------------------------------------------------------------------
5592 | Returns the result of converting the extended double-precision floating-
5593 | point value `a' to the single-precision floating-point format. The
5594 | conversion is performed according to the IEC/IEEE Standard for Binary
5595 | Floating-Point Arithmetic.
5596 *----------------------------------------------------------------------------*/
5597
5598 float32 floatx80_to_float32(floatx80 a, float_status *status)
5599 {
5600 bool aSign;
5601 int32_t aExp;
5602 uint64_t aSig;
5603
5604 if (floatx80_invalid_encoding(a)) {
5605 float_raise(float_flag_invalid, status);
5606 return float32_default_nan(status);
5607 }
5608 aSig = extractFloatx80Frac( a );
5609 aExp = extractFloatx80Exp( a );
5610 aSign = extractFloatx80Sign( a );
5611 if ( aExp == 0x7FFF ) {
5612 if ( (uint64_t) ( aSig<<1 ) ) {
5613 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5614 status);
5615 return float32_silence_nan(res, status);
5616 }
5617 return packFloat32( aSign, 0xFF, 0 );
5618 }
5619 shift64RightJamming( aSig, 33, &aSig );
5620 if ( aExp || aSig ) aExp -= 0x3F81;
5621 return roundAndPackFloat32(aSign, aExp, aSig, status);
5622
5623 }
5624
5625 /*----------------------------------------------------------------------------
5626 | Returns the result of converting the extended double-precision floating-
5627 | point value `a' to the double-precision floating-point format. The
5628 | conversion is performed according to the IEC/IEEE Standard for Binary
5629 | Floating-Point Arithmetic.
5630 *----------------------------------------------------------------------------*/
5631
5632 float64 floatx80_to_float64(floatx80 a, float_status *status)
5633 {
5634 bool aSign;
5635 int32_t aExp;
5636 uint64_t aSig, zSig;
5637
5638 if (floatx80_invalid_encoding(a)) {
5639 float_raise(float_flag_invalid, status);
5640 return float64_default_nan(status);
5641 }
5642 aSig = extractFloatx80Frac( a );
5643 aExp = extractFloatx80Exp( a );
5644 aSign = extractFloatx80Sign( a );
5645 if ( aExp == 0x7FFF ) {
5646 if ( (uint64_t) ( aSig<<1 ) ) {
5647 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5648 status);
5649 return float64_silence_nan(res, status);
5650 }
5651 return packFloat64( aSign, 0x7FF, 0 );
5652 }
5653 shift64RightJamming( aSig, 1, &zSig );
5654 if ( aExp || aSig ) aExp -= 0x3C01;
5655 return roundAndPackFloat64(aSign, aExp, zSig, status);
5656
5657 }
5658
5659 /*----------------------------------------------------------------------------
5660 | Returns the result of converting the extended double-precision floating-
5661 | point value `a' to the quadruple-precision floating-point format. The
5662 | conversion is performed according to the IEC/IEEE Standard for Binary
5663 | Floating-Point Arithmetic.
5664 *----------------------------------------------------------------------------*/
5665
5666 float128 floatx80_to_float128(floatx80 a, float_status *status)
5667 {
5668 bool aSign;
5669 int aExp;
5670 uint64_t aSig, zSig0, zSig1;
5671
5672 if (floatx80_invalid_encoding(a)) {
5673 float_raise(float_flag_invalid, status);
5674 return float128_default_nan(status);
5675 }
5676 aSig = extractFloatx80Frac( a );
5677 aExp = extractFloatx80Exp( a );
5678 aSign = extractFloatx80Sign( a );
5679 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5680 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5681 status);
5682 return float128_silence_nan(res, status);
5683 }
5684 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5685 return packFloat128( aSign, aExp, zSig0, zSig1 );
5686
5687 }
5688
5689 /*----------------------------------------------------------------------------
5690 | Rounds the extended double-precision floating-point value `a'
5691 | to the precision provided by floatx80_rounding_precision and returns the
5692 | result as an extended double-precision floating-point value.
5693 | The operation is performed according to the IEC/IEEE Standard for Binary
5694 | Floating-Point Arithmetic.
5695 *----------------------------------------------------------------------------*/
5696
5697 floatx80 floatx80_round(floatx80 a, float_status *status)
5698 {
5699 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5700 extractFloatx80Sign(a),
5701 extractFloatx80Exp(a),
5702 extractFloatx80Frac(a), 0, status);
5703 }
5704
5705 /*----------------------------------------------------------------------------
5706 | Rounds the extended double-precision floating-point value `a' to an integer,
5707 | and returns the result as an extended quadruple-precision floating-point
5708 | value. The operation is performed according to the IEC/IEEE Standard for
5709 | Binary Floating-Point Arithmetic.
5710 *----------------------------------------------------------------------------*/
5711
5712 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5713 {
5714 bool aSign;
5715 int32_t aExp;
5716 uint64_t lastBitMask, roundBitsMask;
5717 floatx80 z;
5718
5719 if (floatx80_invalid_encoding(a)) {
5720 float_raise(float_flag_invalid, status);
5721 return floatx80_default_nan(status);
5722 }
5723 aExp = extractFloatx80Exp( a );
5724 if ( 0x403E <= aExp ) {
5725 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5726 return propagateFloatx80NaN(a, a, status);
5727 }
5728 return a;
5729 }
5730 if ( aExp < 0x3FFF ) {
5731 if ( ( aExp == 0 )
5732 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5733 return a;
5734 }
5735 float_raise(float_flag_inexact, status);
5736 aSign = extractFloatx80Sign( a );
5737 switch (status->float_rounding_mode) {
5738 case float_round_nearest_even:
5739 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5740 ) {
5741 return
5742 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5743 }
5744 break;
5745 case float_round_ties_away:
5746 if (aExp == 0x3FFE) {
5747 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5748 }
5749 break;
5750 case float_round_down:
5751 return
5752 aSign ?
5753 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5754 : packFloatx80( 0, 0, 0 );
5755 case float_round_up:
5756 return
5757 aSign ? packFloatx80( 1, 0, 0 )
5758 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5759
5760 case float_round_to_zero:
5761 break;
5762 default:
5763 g_assert_not_reached();
5764 }
5765 return packFloatx80( aSign, 0, 0 );
5766 }
5767 lastBitMask = 1;
5768 lastBitMask <<= 0x403E - aExp;
5769 roundBitsMask = lastBitMask - 1;
5770 z = a;
5771 switch (status->float_rounding_mode) {
5772 case float_round_nearest_even:
5773 z.low += lastBitMask>>1;
5774 if ((z.low & roundBitsMask) == 0) {
5775 z.low &= ~lastBitMask;
5776 }
5777 break;
5778 case float_round_ties_away:
5779 z.low += lastBitMask >> 1;
5780 break;
5781 case float_round_to_zero:
5782 break;
5783 case float_round_up:
5784 if (!extractFloatx80Sign(z)) {
5785 z.low += roundBitsMask;
5786 }
5787 break;
5788 case float_round_down:
5789 if (extractFloatx80Sign(z)) {
5790 z.low += roundBitsMask;
5791 }
5792 break;
5793 default:
5794 abort();
5795 }
5796 z.low &= ~ roundBitsMask;
5797 if ( z.low == 0 ) {
5798 ++z.high;
5799 z.low = UINT64_C(0x8000000000000000);
5800 }
5801 if (z.low != a.low) {
5802 float_raise(float_flag_inexact, status);
5803 }
5804 return z;
5805
5806 }
5807
5808 /*----------------------------------------------------------------------------
5809 | Returns the result of adding the absolute values of the extended double-
5810 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5811 | negated before being returned. `zSign' is ignored if the result is a NaN.
5812 | The addition is performed according to the IEC/IEEE Standard for Binary
5813 | Floating-Point Arithmetic.
5814 *----------------------------------------------------------------------------*/
5815
5816 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5817 float_status *status)
5818 {
5819 int32_t aExp, bExp, zExp;
5820 uint64_t aSig, bSig, zSig0, zSig1;
5821 int32_t expDiff;
5822
5823 aSig = extractFloatx80Frac( a );
5824 aExp = extractFloatx80Exp( a );
5825 bSig = extractFloatx80Frac( b );
5826 bExp = extractFloatx80Exp( b );
5827 expDiff = aExp - bExp;
5828 if ( 0 < expDiff ) {
5829 if ( aExp == 0x7FFF ) {
5830 if ((uint64_t)(aSig << 1)) {
5831 return propagateFloatx80NaN(a, b, status);
5832 }
5833 return a;
5834 }
5835 if ( bExp == 0 ) --expDiff;
5836 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5837 zExp = aExp;
5838 }
5839 else if ( expDiff < 0 ) {
5840 if ( bExp == 0x7FFF ) {
5841 if ((uint64_t)(bSig << 1)) {
5842 return propagateFloatx80NaN(a, b, status);
5843 }
5844 return packFloatx80(zSign,
5845 floatx80_infinity_high,
5846 floatx80_infinity_low);
5847 }
5848 if ( aExp == 0 ) ++expDiff;
5849 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5850 zExp = bExp;
5851 }
5852 else {
5853 if ( aExp == 0x7FFF ) {
5854 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5855 return propagateFloatx80NaN(a, b, status);
5856 }
5857 return a;
5858 }
5859 zSig1 = 0;
5860 zSig0 = aSig + bSig;
5861 if ( aExp == 0 ) {
5862 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5863 /* At least one of the values is a pseudo-denormal,
5864 * and there is a carry out of the result. */
5865 zExp = 1;
5866 goto shiftRight1;
5867 }
5868 if (zSig0 == 0) {
5869 return packFloatx80(zSign, 0, 0);
5870 }
5871 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5872 goto roundAndPack;
5873 }
5874 zExp = aExp;
5875 goto shiftRight1;
5876 }
5877 zSig0 = aSig + bSig;
5878 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5879 shiftRight1:
5880 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5881 zSig0 |= UINT64_C(0x8000000000000000);
5882 ++zExp;
5883 roundAndPack:
5884 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5885 zSign, zExp, zSig0, zSig1, status);
5886 }
5887
5888 /*----------------------------------------------------------------------------
5889 | Returns the result of subtracting the absolute values of the extended
5890 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5891 | difference is negated before being returned. `zSign' is ignored if the
5892 | result is a NaN. The subtraction is performed according to the IEC/IEEE
5893 | Standard for Binary Floating-Point Arithmetic.
5894 *----------------------------------------------------------------------------*/
5895
5896 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5897 float_status *status)
5898 {
5899 int32_t aExp, bExp, zExp;
5900 uint64_t aSig, bSig, zSig0, zSig1;
5901 int32_t expDiff;
5902
5903 aSig = extractFloatx80Frac( a );
5904 aExp = extractFloatx80Exp( a );
5905 bSig = extractFloatx80Frac( b );
5906 bExp = extractFloatx80Exp( b );
5907 expDiff = aExp - bExp;
5908 if ( 0 < expDiff ) goto aExpBigger;
5909 if ( expDiff < 0 ) goto bExpBigger;
5910 if ( aExp == 0x7FFF ) {
5911 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5912 return propagateFloatx80NaN(a, b, status);
5913 }
5914 float_raise(float_flag_invalid, status);
5915 return floatx80_default_nan(status);
5916 }
5917 if ( aExp == 0 ) {
5918 aExp = 1;
5919 bExp = 1;
5920 }
5921 zSig1 = 0;
5922 if ( bSig < aSig ) goto aBigger;
5923 if ( aSig < bSig ) goto bBigger;
5924 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5925 bExpBigger:
5926 if ( bExp == 0x7FFF ) {
5927 if ((uint64_t)(bSig << 1)) {
5928 return propagateFloatx80NaN(a, b, status);
5929 }
5930 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5931 floatx80_infinity_low);
5932 }
5933 if ( aExp == 0 ) ++expDiff;
5934 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5935 bBigger:
5936 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5937 zExp = bExp;
5938 zSign ^= 1;
5939 goto normalizeRoundAndPack;
5940 aExpBigger:
5941 if ( aExp == 0x7FFF ) {
5942 if ((uint64_t)(aSig << 1)) {
5943 return propagateFloatx80NaN(a, b, status);
5944 }
5945 return a;
5946 }
5947 if ( bExp == 0 ) --expDiff;
5948 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5949 aBigger:
5950 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5951 zExp = aExp;
5952 normalizeRoundAndPack:
5953 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5954 zSign, zExp, zSig0, zSig1, status);
5955 }
5956
5957 /*----------------------------------------------------------------------------
5958 | Returns the result of adding the extended double-precision floating-point
5959 | values `a' and `b'. The operation is performed according to the IEC/IEEE
5960 | Standard for Binary Floating-Point Arithmetic.
5961 *----------------------------------------------------------------------------*/
5962
5963 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5964 {
5965 bool aSign, bSign;
5966
5967 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5968 float_raise(float_flag_invalid, status);
5969 return floatx80_default_nan(status);
5970 }
5971 aSign = extractFloatx80Sign( a );
5972 bSign = extractFloatx80Sign( b );
5973 if ( aSign == bSign ) {
5974 return addFloatx80Sigs(a, b, aSign, status);
5975 }
5976 else {
5977 return subFloatx80Sigs(a, b, aSign, status);
5978 }
5979
5980 }
5981
5982 /*----------------------------------------------------------------------------
5983 | Returns the result of subtracting the extended double-precision floating-
5984 | point values `a' and `b'. The operation is performed according to the
5985 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5986 *----------------------------------------------------------------------------*/
5987
5988 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5989 {
5990 bool aSign, bSign;
5991
5992 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5993 float_raise(float_flag_invalid, status);
5994 return floatx80_default_nan(status);
5995 }
5996 aSign = extractFloatx80Sign( a );
5997 bSign = extractFloatx80Sign( b );
5998 if ( aSign == bSign ) {
5999 return subFloatx80Sigs(a, b, aSign, status);
6000 }
6001 else {
6002 return addFloatx80Sigs(a, b, aSign, status);
6003 }
6004
6005 }
6006
6007 /*----------------------------------------------------------------------------
6008 | Returns the result of multiplying the extended double-precision floating-
6009 | point values `a' and `b'. The operation is performed according to the
6010 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6011 *----------------------------------------------------------------------------*/
6012
6013 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
6014 {
6015 bool aSign, bSign, zSign;
6016 int32_t aExp, bExp, zExp;
6017 uint64_t aSig, bSig, zSig0, zSig1;
6018
6019 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6020 float_raise(float_flag_invalid, status);
6021 return floatx80_default_nan(status);
6022 }
6023 aSig = extractFloatx80Frac( a );
6024 aExp = extractFloatx80Exp( a );
6025 aSign = extractFloatx80Sign( a );
6026 bSig = extractFloatx80Frac( b );
6027 bExp = extractFloatx80Exp( b );
6028 bSign = extractFloatx80Sign( b );
6029 zSign = aSign ^ bSign;
6030 if ( aExp == 0x7FFF ) {
6031 if ( (uint64_t) ( aSig<<1 )
6032 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6033 return propagateFloatx80NaN(a, b, status);
6034 }
6035 if ( ( bExp | bSig ) == 0 ) goto invalid;
6036 return packFloatx80(zSign, floatx80_infinity_high,
6037 floatx80_infinity_low);
6038 }
6039 if ( bExp == 0x7FFF ) {
6040 if ((uint64_t)(bSig << 1)) {
6041 return propagateFloatx80NaN(a, b, status);
6042 }
6043 if ( ( aExp | aSig ) == 0 ) {
6044 invalid:
6045 float_raise(float_flag_invalid, status);
6046 return floatx80_default_nan(status);
6047 }
6048 return packFloatx80(zSign, floatx80_infinity_high,
6049 floatx80_infinity_low);
6050 }
6051 if ( aExp == 0 ) {
6052 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6053 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6054 }
6055 if ( bExp == 0 ) {
6056 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6057 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6058 }
6059 zExp = aExp + bExp - 0x3FFE;
6060 mul64To128( aSig, bSig, &zSig0, &zSig1 );
6061 if ( 0 < (int64_t) zSig0 ) {
6062 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6063 --zExp;
6064 }
6065 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6066 zSign, zExp, zSig0, zSig1, status);
6067 }
6068
6069 /*----------------------------------------------------------------------------
6070 | Returns the result of dividing the extended double-precision floating-point
6071 | value `a' by the corresponding value `b'. The operation is performed
6072 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6073 *----------------------------------------------------------------------------*/
6074
6075 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6076 {
6077 bool aSign, bSign, zSign;
6078 int32_t aExp, bExp, zExp;
6079 uint64_t aSig, bSig, zSig0, zSig1;
6080 uint64_t rem0, rem1, rem2, term0, term1, term2;
6081
6082 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6083 float_raise(float_flag_invalid, status);
6084 return floatx80_default_nan(status);
6085 }
6086 aSig = extractFloatx80Frac( a );
6087 aExp = extractFloatx80Exp( a );
6088 aSign = extractFloatx80Sign( a );
6089 bSig = extractFloatx80Frac( b );
6090 bExp = extractFloatx80Exp( b );
6091 bSign = extractFloatx80Sign( b );
6092 zSign = aSign ^ bSign;
6093 if ( aExp == 0x7FFF ) {
6094 if ((uint64_t)(aSig << 1)) {
6095 return propagateFloatx80NaN(a, b, status);
6096 }
6097 if ( bExp == 0x7FFF ) {
6098 if ((uint64_t)(bSig << 1)) {
6099 return propagateFloatx80NaN(a, b, status);
6100 }
6101 goto invalid;
6102 }
6103 return packFloatx80(zSign, floatx80_infinity_high,
6104 floatx80_infinity_low);
6105 }
6106 if ( bExp == 0x7FFF ) {
6107 if ((uint64_t)(bSig << 1)) {
6108 return propagateFloatx80NaN(a, b, status);
6109 }
6110 return packFloatx80( zSign, 0, 0 );
6111 }
6112 if ( bExp == 0 ) {
6113 if ( bSig == 0 ) {
6114 if ( ( aExp | aSig ) == 0 ) {
6115 invalid:
6116 float_raise(float_flag_invalid, status);
6117 return floatx80_default_nan(status);
6118 }
6119 float_raise(float_flag_divbyzero, status);
6120 return packFloatx80(zSign, floatx80_infinity_high,
6121 floatx80_infinity_low);
6122 }
6123 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6124 }
6125 if ( aExp == 0 ) {
6126 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6127 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6128 }
6129 zExp = aExp - bExp + 0x3FFE;
6130 rem1 = 0;
6131 if ( bSig <= aSig ) {
6132 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6133 ++zExp;
6134 }
6135 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6136 mul64To128( bSig, zSig0, &term0, &term1 );
6137 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6138 while ( (int64_t) rem0 < 0 ) {
6139 --zSig0;
6140 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6141 }
6142 zSig1 = estimateDiv128To64( rem1, 0, bSig );
6143 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6144 mul64To128( bSig, zSig1, &term1, &term2 );
6145 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6146 while ( (int64_t) rem1 < 0 ) {
6147 --zSig1;
6148 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6149 }
6150 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6151 }
6152 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6153 zSign, zExp, zSig0, zSig1, status);
6154 }
6155
6156 /*----------------------------------------------------------------------------
6157 | Returns the remainder of the extended double-precision floating-point value
6158 | `a' with respect to the corresponding value `b'. The operation is performed
6159 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6160 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6161 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6162 | the absolute value of the integer quotient.
6163 *----------------------------------------------------------------------------*/
6164
6165 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6166 float_status *status)
6167 {
6168 bool aSign, zSign;
6169 int32_t aExp, bExp, expDiff, aExpOrig;
6170 uint64_t aSig0, aSig1, bSig;
6171 uint64_t q, term0, term1, alternateASig0, alternateASig1;
6172
6173 *quotient = 0;
6174 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6175 float_raise(float_flag_invalid, status);
6176 return floatx80_default_nan(status);
6177 }
6178 aSig0 = extractFloatx80Frac( a );
6179 aExpOrig = aExp = extractFloatx80Exp( a );
6180 aSign = extractFloatx80Sign( a );
6181 bSig = extractFloatx80Frac( b );
6182 bExp = extractFloatx80Exp( b );
6183 if ( aExp == 0x7FFF ) {
6184 if ( (uint64_t) ( aSig0<<1 )
6185 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6186 return propagateFloatx80NaN(a, b, status);
6187 }
6188 goto invalid;
6189 }
6190 if ( bExp == 0x7FFF ) {
6191 if ((uint64_t)(bSig << 1)) {
6192 return propagateFloatx80NaN(a, b, status);
6193 }
6194 if (aExp == 0 && aSig0 >> 63) {
6195 /*
6196 * Pseudo-denormal argument must be returned in normalized
6197 * form.
6198 */
6199 return packFloatx80(aSign, 1, aSig0);
6200 }
6201 return a;
6202 }
6203 if ( bExp == 0 ) {
6204 if ( bSig == 0 ) {
6205 invalid:
6206 float_raise(float_flag_invalid, status);
6207 return floatx80_default_nan(status);
6208 }
6209 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6210 }
6211 if ( aExp == 0 ) {
6212 if ( aSig0 == 0 ) return a;
6213 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6214 }
6215 zSign = aSign;
6216 expDiff = aExp - bExp;
6217 aSig1 = 0;
6218 if ( expDiff < 0 ) {
6219 if ( mod || expDiff < -1 ) {
6220 if (aExp == 1 && aExpOrig == 0) {
6221 /*
6222 * Pseudo-denormal argument must be returned in
6223 * normalized form.
6224 */
6225 return packFloatx80(aSign, aExp, aSig0);
6226 }
6227 return a;
6228 }
6229 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6230 expDiff = 0;
6231 }
6232 *quotient = q = ( bSig <= aSig0 );
6233 if ( q ) aSig0 -= bSig;
6234 expDiff -= 64;
6235 while ( 0 < expDiff ) {
6236 q = estimateDiv128To64( aSig0, aSig1, bSig );
6237 q = ( 2 < q ) ? q - 2 : 0;
6238 mul64To128( bSig, q, &term0, &term1 );
6239 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6240 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6241 expDiff -= 62;
6242 *quotient <<= 62;
6243 *quotient += q;
6244 }
6245 expDiff += 64;
6246 if ( 0 < expDiff ) {
6247 q = estimateDiv128To64( aSig0, aSig1, bSig );
6248 q = ( 2 < q ) ? q - 2 : 0;
6249 q >>= 64 - expDiff;
6250 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6251 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6252 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6253 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6254 ++q;
6255 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6256 }
6257 if (expDiff < 64) {
6258 *quotient <<= expDiff;
6259 } else {
6260 *quotient = 0;
6261 }
6262 *quotient += q;
6263 }
6264 else {
6265 term1 = 0;
6266 term0 = bSig;
6267 }
6268 if (!mod) {
6269 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6270 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6271 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6272 && ( q & 1 ) )
6273 ) {
6274 aSig0 = alternateASig0;
6275 aSig1 = alternateASig1;
6276 zSign = ! zSign;
6277 ++*quotient;
6278 }
6279 }
6280 return
6281 normalizeRoundAndPackFloatx80(
6282 80, zSign, bExp + expDiff, aSig0, aSig1, status);
6283
6284 }
6285
6286 /*----------------------------------------------------------------------------
6287 | Returns the remainder of the extended double-precision floating-point value
6288 | `a' with respect to the corresponding value `b'. The operation is performed
6289 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6290 *----------------------------------------------------------------------------*/
6291
6292 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6293 {
6294 uint64_t quotient;
6295 return floatx80_modrem(a, b, false, &quotient, status);
6296 }
6297
6298 /*----------------------------------------------------------------------------
6299 | Returns the remainder of the extended double-precision floating-point value
6300 | `a' with respect to the corresponding value `b', with the quotient truncated
6301 | toward zero.
6302 *----------------------------------------------------------------------------*/
6303
6304 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6305 {
6306 uint64_t quotient;
6307 return floatx80_modrem(a, b, true, &quotient, status);
6308 }
6309
6310 /*----------------------------------------------------------------------------
6311 | Returns the square root of the extended double-precision floating-point
6312 | value `a'. The operation is performed according to the IEC/IEEE Standard
6313 | for Binary Floating-Point Arithmetic.
6314 *----------------------------------------------------------------------------*/
6315
6316 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6317 {
6318 bool aSign;
6319 int32_t aExp, zExp;
6320 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6321 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6322
6323 if (floatx80_invalid_encoding(a)) {
6324 float_raise(float_flag_invalid, status);
6325 return floatx80_default_nan(status);
6326 }
6327 aSig0 = extractFloatx80Frac( a );
6328 aExp = extractFloatx80Exp( a );
6329 aSign = extractFloatx80Sign( a );
6330 if ( aExp == 0x7FFF ) {
6331 if ((uint64_t)(aSig0 << 1)) {
6332 return propagateFloatx80NaN(a, a, status);
6333 }
6334 if ( ! aSign ) return a;
6335 goto invalid;
6336 }
6337 if ( aSign ) {
6338 if ( ( aExp | aSig0 ) == 0 ) return a;
6339 invalid:
6340 float_raise(float_flag_invalid, status);
6341 return floatx80_default_nan(status);
6342 }
6343 if ( aExp == 0 ) {
6344 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6345 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6346 }
6347 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6348 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6349 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6350 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6351 doubleZSig0 = zSig0<<1;
6352 mul64To128( zSig0, zSig0, &term0, &term1 );
6353 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6354 while ( (int64_t) rem0 < 0 ) {
6355 --zSig0;
6356 doubleZSig0 -= 2;
6357 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6358 }
6359 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6360 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6361 if ( zSig1 == 0 ) zSig1 = 1;
6362 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6363 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6364 mul64To128( zSig1, zSig1, &term2, &term3 );
6365 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6366 while ( (int64_t) rem1 < 0 ) {
6367 --zSig1;
6368 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6369 term3 |= 1;
6370 term2 |= doubleZSig0;
6371 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6372 }
6373 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6374 }
6375 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6376 zSig0 |= doubleZSig0;
6377 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6378 0, zExp, zSig0, zSig1, status);
6379 }
6380
6381 /*----------------------------------------------------------------------------
6382 | Returns the result of converting the quadruple-precision floating-point
6383 | value `a' to the extended double-precision floating-point format. The
6384 | conversion is performed according to the IEC/IEEE Standard for Binary
6385 | Floating-Point Arithmetic.
6386 *----------------------------------------------------------------------------*/
6387
6388 floatx80 float128_to_floatx80(float128 a, float_status *status)
6389 {
6390 bool aSign;
6391 int32_t aExp;
6392 uint64_t aSig0, aSig1;
6393
6394 aSig1 = extractFloat128Frac1( a );
6395 aSig0 = extractFloat128Frac0( a );
6396 aExp = extractFloat128Exp( a );
6397 aSign = extractFloat128Sign( a );
6398 if ( aExp == 0x7FFF ) {
6399 if ( aSig0 | aSig1 ) {
6400 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6401 status);
6402 return floatx80_silence_nan(res, status);
6403 }
6404 return packFloatx80(aSign, floatx80_infinity_high,
6405 floatx80_infinity_low);
6406 }
6407 if ( aExp == 0 ) {
6408 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6409 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6410 }
6411 else {
6412 aSig0 |= UINT64_C(0x0001000000000000);
6413 }
6414 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6415 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6416
6417 }
6418
6419 /*----------------------------------------------------------------------------
6420 | Returns the remainder of the quadruple-precision floating-point value `a'
6421 | with respect to the corresponding value `b'. The operation is performed
6422 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6423 *----------------------------------------------------------------------------*/
6424
6425 float128 float128_rem(float128 a, float128 b, float_status *status)
6426 {
6427 bool aSign, zSign;
6428 int32_t aExp, bExp, expDiff;
6429 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6430 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6431 int64_t sigMean0;
6432
6433 aSig1 = extractFloat128Frac1( a );
6434 aSig0 = extractFloat128Frac0( a );
6435 aExp = extractFloat128Exp( a );
6436 aSign = extractFloat128Sign( a );
6437 bSig1 = extractFloat128Frac1( b );
6438 bSig0 = extractFloat128Frac0( b );
6439 bExp = extractFloat128Exp( b );
6440 if ( aExp == 0x7FFF ) {
6441 if ( ( aSig0 | aSig1 )
6442 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6443 return propagateFloat128NaN(a, b, status);
6444 }
6445 goto invalid;
6446 }
6447 if ( bExp == 0x7FFF ) {
6448 if (bSig0 | bSig1) {
6449 return propagateFloat128NaN(a, b, status);
6450 }
6451 return a;
6452 }
6453 if ( bExp == 0 ) {
6454 if ( ( bSig0 | bSig1 ) == 0 ) {
6455 invalid:
6456 float_raise(float_flag_invalid, status);
6457 return float128_default_nan(status);
6458 }
6459 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6460 }
6461 if ( aExp == 0 ) {
6462 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6463 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6464 }
6465 expDiff = aExp - bExp;
6466 if ( expDiff < -1 ) return a;
6467 shortShift128Left(
6468 aSig0 | UINT64_C(0x0001000000000000),
6469 aSig1,
6470 15 - ( expDiff < 0 ),
6471 &aSig0,
6472 &aSig1
6473 );
6474 shortShift128Left(
6475 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6476 q = le128( bSig0, bSig1, aSig0, aSig1 );
6477 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6478 expDiff -= 64;
6479 while ( 0 < expDiff ) {
6480 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6481 q = ( 4 < q ) ? q - 4 : 0;
6482 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6483 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6484 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6485 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6486 expDiff -= 61;
6487 }
6488 if ( -64 < expDiff ) {
6489 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6490 q = ( 4 < q ) ? q - 4 : 0;
6491 q >>= - expDiff;
6492 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6493 expDiff += 52;
6494 if ( expDiff < 0 ) {
6495 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6496 }
6497 else {
6498 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6499 }
6500 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6501 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6502 }
6503 else {
6504 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6505 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6506 }
6507 do {
6508 alternateASig0 = aSig0;
6509 alternateASig1 = aSig1;
6510 ++q;
6511 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6512 } while ( 0 <= (int64_t) aSig0 );
6513 add128(
6514 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6515 if ( ( sigMean0 < 0 )
6516 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6517 aSig0 = alternateASig0;
6518 aSig1 = alternateASig1;
6519 }
6520 zSign = ( (int64_t) aSig0 < 0 );
6521 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6522 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6523 status);
6524 }
6525
6526 /*----------------------------------------------------------------------------
6527 | Returns the square root of the quadruple-precision floating-point value `a'.
6528 | The operation is performed according to the IEC/IEEE Standard for Binary
6529 | Floating-Point Arithmetic.
6530 *----------------------------------------------------------------------------*/
6531
6532 float128 float128_sqrt(float128 a, float_status *status)
6533 {
6534 bool aSign;
6535 int32_t aExp, zExp;
6536 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6537 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6538
6539 aSig1 = extractFloat128Frac1( a );
6540 aSig0 = extractFloat128Frac0( a );
6541 aExp = extractFloat128Exp( a );
6542 aSign = extractFloat128Sign( a );
6543 if ( aExp == 0x7FFF ) {
6544 if (aSig0 | aSig1) {
6545 return propagateFloat128NaN(a, a, status);
6546 }
6547 if ( ! aSign ) return a;
6548 goto invalid;
6549 }
6550 if ( aSign ) {
6551 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6552 invalid:
6553 float_raise(float_flag_invalid, status);
6554 return float128_default_nan(status);
6555 }
6556 if ( aExp == 0 ) {
6557 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6558 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6559 }
6560 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6561 aSig0 |= UINT64_C(0x0001000000000000);
6562 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6563 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6564 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6565 doubleZSig0 = zSig0<<1;
6566 mul64To128( zSig0, zSig0, &term0, &term1 );
6567 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6568 while ( (int64_t) rem0 < 0 ) {
6569 --zSig0;
6570 doubleZSig0 -= 2;
6571 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6572 }
6573 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6574 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6575 if ( zSig1 == 0 ) zSig1 = 1;
6576 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6577 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6578 mul64To128( zSig1, zSig1, &term2, &term3 );
6579 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6580 while ( (int64_t) rem1 < 0 ) {
6581 --zSig1;
6582 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6583 term3 |= 1;
6584 term2 |= doubleZSig0;
6585 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6586 }
6587 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6588 }
6589 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6590 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6591
6592 }
6593
6594 static inline FloatRelation
6595 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6596 float_status *status)
6597 {
6598 bool aSign, bSign;
6599
6600 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6601 float_raise(float_flag_invalid, status);
6602 return float_relation_unordered;
6603 }
6604 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6605 ( extractFloatx80Frac( a )<<1 ) ) ||
6606 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6607 ( extractFloatx80Frac( b )<<1 ) )) {
6608 if (!is_quiet ||
6609 floatx80_is_signaling_nan(a, status) ||
6610 floatx80_is_signaling_nan(b, status)) {
6611 float_raise(float_flag_invalid, status);
6612 }
6613 return float_relation_unordered;
6614 }
6615 aSign = extractFloatx80Sign( a );
6616 bSign = extractFloatx80Sign( b );
6617 if ( aSign != bSign ) {
6618
6619 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6620 ( ( a.low | b.low ) == 0 ) ) {
6621 /* zero case */
6622 return float_relation_equal;
6623 } else {
6624 return 1 - (2 * aSign);
6625 }
6626 } else {
6627 /* Normalize pseudo-denormals before comparison. */
6628 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6629 ++a.high;
6630 }
6631 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6632 ++b.high;
6633 }
6634 if (a.low == b.low && a.high == b.high) {
6635 return float_relation_equal;
6636 } else {
6637 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6638 }
6639 }
6640 }
6641
6642 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6643 {
6644 return floatx80_compare_internal(a, b, 0, status);
6645 }
6646
6647 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6648 float_status *status)
6649 {
6650 return floatx80_compare_internal(a, b, 1, status);
6651 }
6652
6653 static inline FloatRelation
6654 float128_compare_internal(float128 a, float128 b, bool is_quiet,
6655 float_status *status)
6656 {
6657 bool aSign, bSign;
6658
6659 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6660 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6661 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6662 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6663 if (!is_quiet ||
6664 float128_is_signaling_nan(a, status) ||
6665 float128_is_signaling_nan(b, status)) {
6666 float_raise(float_flag_invalid, status);
6667 }
6668 return float_relation_unordered;
6669 }
6670 aSign = extractFloat128Sign( a );
6671 bSign = extractFloat128Sign( b );
6672 if ( aSign != bSign ) {
6673 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6674 /* zero case */
6675 return float_relation_equal;
6676 } else {
6677 return 1 - (2 * aSign);
6678 }
6679 } else {
6680 if (a.low == b.low && a.high == b.high) {
6681 return float_relation_equal;
6682 } else {
6683 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6684 }
6685 }
6686 }
6687
6688 FloatRelation float128_compare(float128 a, float128 b, float_status *status)
6689 {
6690 return float128_compare_internal(a, b, 0, status);
6691 }
6692
6693 FloatRelation float128_compare_quiet(float128 a, float128 b,
6694 float_status *status)
6695 {
6696 return float128_compare_internal(a, b, 1, status);
6697 }
6698
6699 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6700 {
6701 bool aSign;
6702 int32_t aExp;
6703 uint64_t aSig;
6704
6705 if (floatx80_invalid_encoding(a)) {
6706 float_raise(float_flag_invalid, status);
6707 return floatx80_default_nan(status);
6708 }
6709 aSig = extractFloatx80Frac( a );
6710 aExp = extractFloatx80Exp( a );
6711 aSign = extractFloatx80Sign( a );
6712
6713 if ( aExp == 0x7FFF ) {
6714 if ( aSig<<1 ) {
6715 return propagateFloatx80NaN(a, a, status);
6716 }
6717 return a;
6718 }
6719
6720 if (aExp == 0) {
6721 if (aSig == 0) {
6722 return a;
6723 }
6724 aExp++;
6725 }
6726
6727 if (n > 0x10000) {
6728 n = 0x10000;
6729 } else if (n < -0x10000) {
6730 n = -0x10000;
6731 }
6732
6733 aExp += n;
6734 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6735 aSign, aExp, aSig, 0, status);
6736 }
6737
6738 float128 float128_scalbn(float128 a, int n, float_status *status)
6739 {
6740 bool aSign;
6741 int32_t aExp;
6742 uint64_t aSig0, aSig1;
6743
6744 aSig1 = extractFloat128Frac1( a );
6745 aSig0 = extractFloat128Frac0( a );
6746 aExp = extractFloat128Exp( a );
6747 aSign = extractFloat128Sign( a );
6748 if ( aExp == 0x7FFF ) {
6749 if ( aSig0 | aSig1 ) {
6750 return propagateFloat128NaN(a, a, status);
6751 }
6752 return a;
6753 }
6754 if (aExp != 0) {
6755 aSig0 |= UINT64_C(0x0001000000000000);
6756 } else if (aSig0 == 0 && aSig1 == 0) {
6757 return a;
6758 } else {
6759 aExp++;
6760 }
6761
6762 if (n > 0x10000) {
6763 n = 0x10000;
6764 } else if (n < -0x10000) {
6765 n = -0x10000;
6766 }
6767
6768 aExp += n - 1;
6769 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6770 , status);
6771
6772 }
6773
6774 static void __attribute__((constructor)) softfloat_init(void)
6775 {
6776 union_float64 ua, ub, uc, ur;
6777
6778 if (QEMU_NO_HARDFLOAT) {
6779 return;
6780 }
6781 /*
6782 * Test that the host's FMA is not obviously broken. For example,
6783 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6784 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6785 */
6786 ua.s = 0x0020000000000001ULL;
6787 ub.s = 0x3ca0000000000000ULL;
6788 uc.s = 0x0020000000000000ULL;
6789 ur.h = fma(ua.h, ub.h, uc.h);
6790 if (ur.s != 0x0020000000000001ULL) {
6791 force_soft_fma = true;
6792 }
6793 }