]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
softfloat: Move scalbn_decomposed to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*----------------------------------------------------------------------------
405 | Returns the fraction bits of the single-precision floating-point value `a'.
406 *----------------------------------------------------------------------------*/
407
408 static inline uint32_t extractFloat32Frac(float32 a)
409 {
410 return float32_val(a) & 0x007FFFFF;
411 }
412
413 /*----------------------------------------------------------------------------
414 | Returns the exponent bits of the single-precision floating-point value `a'.
415 *----------------------------------------------------------------------------*/
416
417 static inline int extractFloat32Exp(float32 a)
418 {
419 return (float32_val(a) >> 23) & 0xFF;
420 }
421
422 /*----------------------------------------------------------------------------
423 | Returns the sign bit of the single-precision floating-point value `a'.
424 *----------------------------------------------------------------------------*/
425
426 static inline bool extractFloat32Sign(float32 a)
427 {
428 return float32_val(a) >> 31;
429 }
430
431 /*----------------------------------------------------------------------------
432 | Returns the fraction bits of the double-precision floating-point value `a'.
433 *----------------------------------------------------------------------------*/
434
435 static inline uint64_t extractFloat64Frac(float64 a)
436 {
437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
438 }
439
440 /*----------------------------------------------------------------------------
441 | Returns the exponent bits of the double-precision floating-point value `a'.
442 *----------------------------------------------------------------------------*/
443
444 static inline int extractFloat64Exp(float64 a)
445 {
446 return (float64_val(a) >> 52) & 0x7FF;
447 }
448
449 /*----------------------------------------------------------------------------
450 | Returns the sign bit of the double-precision floating-point value `a'.
451 *----------------------------------------------------------------------------*/
452
453 static inline bool extractFloat64Sign(float64 a)
454 {
455 return float64_val(a) >> 63;
456 }
457
458 /*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463 typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
470 } FloatClass;
471
472 #define float_cmask(bit) (1u << (bit))
473
474 enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483 };
484
485 /* Flags for parts_minmax. */
486 enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493 };
494
495 /* Simple helpers for checking if, or what kind of, NaN we have */
496 static inline __attribute__((unused)) bool is_nan(FloatClass c)
497 {
498 return unlikely(c >= float_class_qnan);
499 }
500
501 static inline __attribute__((unused)) bool is_snan(FloatClass c)
502 {
503 return c == float_class_snan;
504 }
505
506 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507 {
508 return c == float_class_qnan;
509 }
510
511 /*
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
514 *
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
518 */
519
520 typedef struct {
521 FloatClass cls;
522 bool sign;
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
535 } FloatParts64;
536
537 typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543 } FloatParts128;
544
545 typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553 } FloatParts256;
554
555 /* These apply to the most significant word of each FloatPartsN. */
556 #define DECOMPOSED_BINARY_POINT 63
557 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
558
559 /* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * frac_lsb: least significant bit of fraction
567 * frac_lsbm1: the bit below the least significant bit (for rounding)
568 * round_mask/roundeven_mask: masks used for rounding
569 * The following optional modifiers are available:
570 * arm_althp: handle ARM Alternative Half Precision
571 */
572 typedef struct {
573 int exp_size;
574 int exp_bias;
575 int exp_max;
576 int frac_size;
577 int frac_shift;
578 uint64_t frac_lsb;
579 uint64_t frac_lsbm1;
580 uint64_t round_mask;
581 uint64_t roundeven_mask;
582 bool arm_althp;
583 } FloatFmt;
584
585 /* Expand fields based on the size of exponent and fraction */
586 #define FLOAT_PARAMS(E, F) \
587 .exp_size = E, \
588 .exp_bias = ((1 << E) - 1) >> 1, \
589 .exp_max = (1 << E) - 1, \
590 .frac_size = F, \
591 .frac_shift = (-F - 1) & 63, \
592 .frac_lsb = 1ull << ((-F - 1) & 63), \
593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
596
597 static const FloatFmt float16_params = {
598 FLOAT_PARAMS(5, 10)
599 };
600
601 static const FloatFmt float16_params_ahp = {
602 FLOAT_PARAMS(5, 10),
603 .arm_althp = true
604 };
605
606 static const FloatFmt bfloat16_params = {
607 FLOAT_PARAMS(8, 7)
608 };
609
610 static const FloatFmt float32_params = {
611 FLOAT_PARAMS(8, 23)
612 };
613
614 static const FloatFmt float64_params = {
615 FLOAT_PARAMS(11, 52)
616 };
617
618 static const FloatFmt float128_params = {
619 FLOAT_PARAMS(15, 112)
620 };
621
622 /* Unpack a float to parts, but do not canonicalize. */
623 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
624 {
625 const int f_size = fmt->frac_size;
626 const int e_size = fmt->exp_size;
627
628 *r = (FloatParts64) {
629 .cls = float_class_unclassified,
630 .sign = extract64(raw, f_size + e_size, 1),
631 .exp = extract64(raw, f_size, e_size),
632 .frac = extract64(raw, 0, f_size)
633 };
634 }
635
636 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
637 {
638 unpack_raw64(p, &float16_params, f);
639 }
640
641 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
642 {
643 unpack_raw64(p, &bfloat16_params, f);
644 }
645
646 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
647 {
648 unpack_raw64(p, &float32_params, f);
649 }
650
651 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
652 {
653 unpack_raw64(p, &float64_params, f);
654 }
655
656 static void float128_unpack_raw(FloatParts128 *p, float128 f)
657 {
658 const int f_size = float128_params.frac_size - 64;
659 const int e_size = float128_params.exp_size;
660
661 *p = (FloatParts128) {
662 .cls = float_class_unclassified,
663 .sign = extract64(f.high, f_size + e_size, 1),
664 .exp = extract64(f.high, f_size, e_size),
665 .frac_hi = extract64(f.high, 0, f_size),
666 .frac_lo = f.low,
667 };
668 }
669
670 /* Pack a float from parts, but do not canonicalize. */
671 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
672 {
673 const int f_size = fmt->frac_size;
674 const int e_size = fmt->exp_size;
675 uint64_t ret;
676
677 ret = (uint64_t)p->sign << (f_size + e_size);
678 ret = deposit64(ret, f_size, e_size, p->exp);
679 ret = deposit64(ret, 0, f_size, p->frac);
680 return ret;
681 }
682
683 static inline float16 float16_pack_raw(const FloatParts64 *p)
684 {
685 return make_float16(pack_raw64(p, &float16_params));
686 }
687
688 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
689 {
690 return pack_raw64(p, &bfloat16_params);
691 }
692
693 static inline float32 float32_pack_raw(const FloatParts64 *p)
694 {
695 return make_float32(pack_raw64(p, &float32_params));
696 }
697
698 static inline float64 float64_pack_raw(const FloatParts64 *p)
699 {
700 return make_float64(pack_raw64(p, &float64_params));
701 }
702
703 static float128 float128_pack_raw(const FloatParts128 *p)
704 {
705 const int f_size = float128_params.frac_size - 64;
706 const int e_size = float128_params.exp_size;
707 uint64_t hi;
708
709 hi = (uint64_t)p->sign << (f_size + e_size);
710 hi = deposit64(hi, f_size, e_size, p->exp);
711 hi = deposit64(hi, 0, f_size, p->frac_hi);
712 return make_float128(hi, p->frac_lo);
713 }
714
715 /*----------------------------------------------------------------------------
716 | Functions and definitions to determine: (1) whether tininess for underflow
717 | is detected before or after rounding by default, (2) what (if anything)
718 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
719 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720 | are propagated from function inputs to output. These details are target-
721 | specific.
722 *----------------------------------------------------------------------------*/
723 #include "softfloat-specialize.c.inc"
724
725 #define PARTS_GENERIC_64_128(NAME, P) \
726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727
728 #define PARTS_GENERIC_64_128_256(NAME, P) \
729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731
732 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
733 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734
735 static void parts64_return_nan(FloatParts64 *a, float_status *s);
736 static void parts128_return_nan(FloatParts128 *a, float_status *s);
737
738 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
739
740 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741 float_status *s);
742 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743 float_status *s);
744
745 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746
747 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748 FloatParts64 *c, float_status *s,
749 int ab_mask, int abc_mask);
750 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751 FloatParts128 *b,
752 FloatParts128 *c,
753 float_status *s,
754 int ab_mask, int abc_mask);
755
756 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758
759 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760 const FloatFmt *fmt);
761 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762 const FloatFmt *fmt);
763
764 #define parts_canonicalize(A, S, F) \
765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766
767 static void parts64_uncanon(FloatParts64 *p, float_status *status,
768 const FloatFmt *fmt);
769 static void parts128_uncanon(FloatParts128 *p, float_status *status,
770 const FloatFmt *fmt);
771
772 #define parts_uncanon(A, S, F) \
773 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774
775 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
777 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
778
779 #define parts_add_normal(A, B) \
780 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
781
782 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
784 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
785
786 #define parts_sub_normal(A, B) \
787 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
788
789 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790 float_status *s, bool subtract);
791 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792 float_status *s, bool subtract);
793
794 #define parts_addsub(A, B, S, Z) \
795 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796
797 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798 float_status *s);
799 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800 float_status *s);
801
802 #define parts_mul(A, B, S) \
803 PARTS_GENERIC_64_128(mul, A)(A, B, S)
804
805 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806 FloatParts64 *c, int flags,
807 float_status *s);
808 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809 FloatParts128 *c, int flags,
810 float_status *s);
811
812 #define parts_muladd(A, B, C, Z, S) \
813 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814
815 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816 float_status *s);
817 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818 float_status *s);
819
820 #define parts_div(A, B, S) \
821 PARTS_GENERIC_64_128(div, A)(A, B, S)
822
823 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
824 int scale, int frac_size);
825 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
826 int scale, int frac_size);
827
828 #define parts_round_to_int_normal(A, R, C, F) \
829 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
830
831 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
832 int scale, float_status *s,
833 const FloatFmt *fmt);
834 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
835 int scale, float_status *s,
836 const FloatFmt *fmt);
837
838 #define parts_round_to_int(A, R, C, S, F) \
839 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
840
841 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
842 int scale, int64_t min, int64_t max,
843 float_status *s);
844 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
845 int scale, int64_t min, int64_t max,
846 float_status *s);
847
848 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
849 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
850
851 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
852 int scale, uint64_t max,
853 float_status *s);
854 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
855 int scale, uint64_t max,
856 float_status *s);
857
858 #define parts_float_to_uint(P, R, Z, M, S) \
859 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
860
861 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
862 int scale, float_status *s);
863 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
864 int scale, float_status *s);
865
866 #define parts_sint_to_float(P, I, Z, S) \
867 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
868
869 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
870 int scale, float_status *s);
871 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
872 int scale, float_status *s);
873
874 #define parts_uint_to_float(P, I, Z, S) \
875 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
876
877 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
878 float_status *s, int flags);
879 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
880 float_status *s, int flags);
881
882 #define parts_minmax(A, B, S, F) \
883 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
884
885 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
886 float_status *s, bool q);
887 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
888 float_status *s, bool q);
889
890 #define parts_compare(A, B, S, Q) \
891 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
892
893 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
894 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
895
896 #define parts_scalbn(A, N, S) \
897 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
898
899 /*
900 * Helper functions for softfloat-parts.c.inc, per-size operations.
901 */
902
903 #define FRAC_GENERIC_64_128(NAME, P) \
904 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
905
906 #define FRAC_GENERIC_64_128_256(NAME, P) \
907 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
908 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
909
910 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
911 {
912 return uadd64_overflow(a->frac, b->frac, &r->frac);
913 }
914
915 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
916 {
917 bool c = 0;
918 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
919 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
920 return c;
921 }
922
923 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
924 {
925 bool c = 0;
926 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
927 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
928 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
929 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
930 return c;
931 }
932
933 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
934
935 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
936 {
937 return uadd64_overflow(a->frac, c, &r->frac);
938 }
939
940 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
941 {
942 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
943 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
944 }
945
946 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
947
948 static void frac64_allones(FloatParts64 *a)
949 {
950 a->frac = -1;
951 }
952
953 static void frac128_allones(FloatParts128 *a)
954 {
955 a->frac_hi = a->frac_lo = -1;
956 }
957
958 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
959
960 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
961 {
962 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
963 }
964
965 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
966 {
967 uint64_t ta = a->frac_hi, tb = b->frac_hi;
968 if (ta == tb) {
969 ta = a->frac_lo, tb = b->frac_lo;
970 if (ta == tb) {
971 return 0;
972 }
973 }
974 return ta < tb ? -1 : 1;
975 }
976
977 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
978
979 static void frac64_clear(FloatParts64 *a)
980 {
981 a->frac = 0;
982 }
983
984 static void frac128_clear(FloatParts128 *a)
985 {
986 a->frac_hi = a->frac_lo = 0;
987 }
988
989 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
990
991 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
992 {
993 uint64_t n1, n0, r, q;
994 bool ret;
995
996 /*
997 * We want a 2*N / N-bit division to produce exactly an N-bit
998 * result, so that we do not lose any precision and so that we
999 * do not have to renormalize afterward. If A.frac < B.frac,
1000 * then division would produce an (N-1)-bit result; shift A left
1001 * by one to produce the an N-bit result, and return true to
1002 * decrement the exponent to match.
1003 *
1004 * The udiv_qrnnd algorithm that we're using requires normalization,
1005 * i.e. the msb of the denominator must be set, which is already true.
1006 */
1007 ret = a->frac < b->frac;
1008 if (ret) {
1009 n0 = a->frac;
1010 n1 = 0;
1011 } else {
1012 n0 = a->frac >> 1;
1013 n1 = a->frac << 63;
1014 }
1015 q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017 /* Set lsb if there is a remainder, to set inexact. */
1018 a->frac = q | (r != 0);
1019
1020 return ret;
1021 }
1022
1023 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024 {
1025 uint64_t q0, q1, a0, a1, b0, b1;
1026 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027 bool ret = false;
1028
1029 a0 = a->frac_hi, a1 = a->frac_lo;
1030 b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032 ret = lt128(a0, a1, b0, b1);
1033 if (!ret) {
1034 a1 = shr_double(a0, a1, 1);
1035 a0 = a0 >> 1;
1036 }
1037
1038 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039 q0 = estimateDiv128To64(a0, a1, b0);
1040
1041 /*
1042 * Estimate is high because B1 was not included (unless B1 == 0).
1043 * Reduce quotient and increase remainder until remainder is non-negative.
1044 * This loop will execute 0 to 2 times.
1045 */
1046 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048 while (r0 != 0) {
1049 q0--;
1050 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051 }
1052
1053 /* Repeat using the remainder, producing a second word of quotient. */
1054 q1 = estimateDiv128To64(r1, r2, b0);
1055 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057 while (r1 != 0) {
1058 q1--;
1059 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060 }
1061
1062 /* Any remainder indicates inexact; set sticky bit. */
1063 q1 |= (r2 | r3) != 0;
1064
1065 a->frac_hi = q0;
1066 a->frac_lo = q1;
1067 return ret;
1068 }
1069
1070 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072 static bool frac64_eqz(FloatParts64 *a)
1073 {
1074 return a->frac == 0;
1075 }
1076
1077 static bool frac128_eqz(FloatParts128 *a)
1078 {
1079 return (a->frac_hi | a->frac_lo) == 0;
1080 }
1081
1082 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085 {
1086 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087 }
1088
1089 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090 {
1091 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093 }
1094
1095 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097 static void frac64_neg(FloatParts64 *a)
1098 {
1099 a->frac = -a->frac;
1100 }
1101
1102 static void frac128_neg(FloatParts128 *a)
1103 {
1104 bool c = 0;
1105 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 static void frac256_neg(FloatParts256 *a)
1110 {
1111 bool c = 0;
1112 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116 }
1117
1118 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120 static int frac64_normalize(FloatParts64 *a)
1121 {
1122 if (a->frac) {
1123 int shift = clz64(a->frac);
1124 a->frac <<= shift;
1125 return shift;
1126 }
1127 return 64;
1128 }
1129
1130 static int frac128_normalize(FloatParts128 *a)
1131 {
1132 if (a->frac_hi) {
1133 int shl = clz64(a->frac_hi);
1134 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135 a->frac_lo <<= shl;
1136 return shl;
1137 } else if (a->frac_lo) {
1138 int shl = clz64(a->frac_lo);
1139 a->frac_hi = a->frac_lo << shl;
1140 a->frac_lo = 0;
1141 return shl + 64;
1142 }
1143 return 128;
1144 }
1145
1146 static int frac256_normalize(FloatParts256 *a)
1147 {
1148 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150 int ret, shl;
1151
1152 if (likely(a0)) {
1153 shl = clz64(a0);
1154 if (shl == 0) {
1155 return 0;
1156 }
1157 ret = shl;
1158 } else {
1159 if (a1) {
1160 ret = 64;
1161 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162 } else if (a2) {
1163 ret = 128;
1164 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165 } else if (a3) {
1166 ret = 192;
1167 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168 } else {
1169 ret = 256;
1170 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171 goto done;
1172 }
1173 shl = clz64(a0);
1174 if (shl == 0) {
1175 goto done;
1176 }
1177 ret += shl;
1178 }
1179
1180 a0 = shl_double(a0, a1, shl);
1181 a1 = shl_double(a1, a2, shl);
1182 a2 = shl_double(a2, a3, shl);
1183 a3 <<= shl;
1184
1185 done:
1186 a->frac_hi = a0;
1187 a->frac_hm = a1;
1188 a->frac_lm = a2;
1189 a->frac_lo = a3;
1190 return ret;
1191 }
1192
1193 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195 static void frac64_shl(FloatParts64 *a, int c)
1196 {
1197 a->frac <<= c;
1198 }
1199
1200 static void frac128_shl(FloatParts128 *a, int c)
1201 {
1202 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1203
1204 if (c & 64) {
1205 a0 = a1, a1 = 0;
1206 }
1207
1208 c &= 63;
1209 if (c) {
1210 a0 = shl_double(a0, a1, c);
1211 a1 = a1 << c;
1212 }
1213
1214 a->frac_hi = a0;
1215 a->frac_lo = a1;
1216 }
1217
1218 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1219
1220 static void frac64_shr(FloatParts64 *a, int c)
1221 {
1222 a->frac >>= c;
1223 }
1224
1225 static void frac128_shr(FloatParts128 *a, int c)
1226 {
1227 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1228
1229 if (c & 64) {
1230 a1 = a0, a0 = 0;
1231 }
1232
1233 c &= 63;
1234 if (c) {
1235 a1 = shr_double(a0, a1, c);
1236 a0 = a0 >> c;
1237 }
1238
1239 a->frac_hi = a0;
1240 a->frac_lo = a1;
1241 }
1242
1243 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1244
1245 static void frac64_shrjam(FloatParts64 *a, int c)
1246 {
1247 uint64_t a0 = a->frac;
1248
1249 if (likely(c != 0)) {
1250 if (likely(c < 64)) {
1251 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1252 } else {
1253 a0 = a0 != 0;
1254 }
1255 a->frac = a0;
1256 }
1257 }
1258
1259 static void frac128_shrjam(FloatParts128 *a, int c)
1260 {
1261 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1262 uint64_t sticky = 0;
1263
1264 if (unlikely(c == 0)) {
1265 return;
1266 } else if (likely(c < 64)) {
1267 /* nothing */
1268 } else if (likely(c < 128)) {
1269 sticky = a1;
1270 a1 = a0;
1271 a0 = 0;
1272 c &= 63;
1273 if (c == 0) {
1274 goto done;
1275 }
1276 } else {
1277 sticky = a0 | a1;
1278 a0 = a1 = 0;
1279 goto done;
1280 }
1281
1282 sticky |= shr_double(a1, 0, c);
1283 a1 = shr_double(a0, a1, c);
1284 a0 = a0 >> c;
1285
1286 done:
1287 a->frac_lo = a1 | (sticky != 0);
1288 a->frac_hi = a0;
1289 }
1290
1291 static void frac256_shrjam(FloatParts256 *a, int c)
1292 {
1293 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1294 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1295 uint64_t sticky = 0;
1296
1297 if (unlikely(c == 0)) {
1298 return;
1299 } else if (likely(c < 64)) {
1300 /* nothing */
1301 } else if (likely(c < 256)) {
1302 if (unlikely(c & 128)) {
1303 sticky |= a2 | a3;
1304 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1305 }
1306 if (unlikely(c & 64)) {
1307 sticky |= a3;
1308 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1309 }
1310 c &= 63;
1311 if (c == 0) {
1312 goto done;
1313 }
1314 } else {
1315 sticky = a0 | a1 | a2 | a3;
1316 a0 = a1 = a2 = a3 = 0;
1317 goto done;
1318 }
1319
1320 sticky |= shr_double(a3, 0, c);
1321 a3 = shr_double(a2, a3, c);
1322 a2 = shr_double(a1, a2, c);
1323 a1 = shr_double(a0, a1, c);
1324 a0 = a0 >> c;
1325
1326 done:
1327 a->frac_lo = a3 | (sticky != 0);
1328 a->frac_lm = a2;
1329 a->frac_hm = a1;
1330 a->frac_hi = a0;
1331 }
1332
1333 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1334
1335 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1336 {
1337 return usub64_overflow(a->frac, b->frac, &r->frac);
1338 }
1339
1340 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1341 {
1342 bool c = 0;
1343 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1344 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1345 return c;
1346 }
1347
1348 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1349 {
1350 bool c = 0;
1351 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1352 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1353 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1354 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1355 return c;
1356 }
1357
1358 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1359
1360 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1361 {
1362 r->frac = a->frac_hi | (a->frac_lo != 0);
1363 }
1364
1365 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1366 {
1367 r->frac_hi = a->frac_hi;
1368 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1369 }
1370
1371 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1372
1373 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1374 {
1375 r->frac_hi = a->frac;
1376 r->frac_lo = 0;
1377 }
1378
1379 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1380 {
1381 r->frac_hi = a->frac_hi;
1382 r->frac_hm = a->frac_lo;
1383 r->frac_lm = 0;
1384 r->frac_lo = 0;
1385 }
1386
1387 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1388
1389 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1390 #define FloatPartsN glue(FloatParts,N)
1391 #define FloatPartsW glue(FloatParts,W)
1392
1393 #define N 64
1394 #define W 128
1395
1396 #include "softfloat-parts-addsub.c.inc"
1397 #include "softfloat-parts.c.inc"
1398
1399 #undef N
1400 #undef W
1401 #define N 128
1402 #define W 256
1403
1404 #include "softfloat-parts-addsub.c.inc"
1405 #include "softfloat-parts.c.inc"
1406
1407 #undef N
1408 #undef W
1409 #define N 256
1410
1411 #include "softfloat-parts-addsub.c.inc"
1412
1413 #undef N
1414 #undef W
1415 #undef partsN
1416 #undef FloatPartsN
1417 #undef FloatPartsW
1418
1419 /*
1420 * Pack/unpack routines with a specific FloatFmt.
1421 */
1422
1423 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1424 float_status *s, const FloatFmt *params)
1425 {
1426 float16_unpack_raw(p, f);
1427 parts_canonicalize(p, s, params);
1428 }
1429
1430 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1431 float_status *s)
1432 {
1433 float16a_unpack_canonical(p, f, s, &float16_params);
1434 }
1435
1436 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1437 float_status *s)
1438 {
1439 bfloat16_unpack_raw(p, f);
1440 parts_canonicalize(p, s, &bfloat16_params);
1441 }
1442
1443 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1444 float_status *s,
1445 const FloatFmt *params)
1446 {
1447 parts_uncanon(p, s, params);
1448 return float16_pack_raw(p);
1449 }
1450
1451 static float16 float16_round_pack_canonical(FloatParts64 *p,
1452 float_status *s)
1453 {
1454 return float16a_round_pack_canonical(p, s, &float16_params);
1455 }
1456
1457 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1458 float_status *s)
1459 {
1460 parts_uncanon(p, s, &bfloat16_params);
1461 return bfloat16_pack_raw(p);
1462 }
1463
1464 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1465 float_status *s)
1466 {
1467 float32_unpack_raw(p, f);
1468 parts_canonicalize(p, s, &float32_params);
1469 }
1470
1471 static float32 float32_round_pack_canonical(FloatParts64 *p,
1472 float_status *s)
1473 {
1474 parts_uncanon(p, s, &float32_params);
1475 return float32_pack_raw(p);
1476 }
1477
1478 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1479 float_status *s)
1480 {
1481 float64_unpack_raw(p, f);
1482 parts_canonicalize(p, s, &float64_params);
1483 }
1484
1485 static float64 float64_round_pack_canonical(FloatParts64 *p,
1486 float_status *s)
1487 {
1488 parts_uncanon(p, s, &float64_params);
1489 return float64_pack_raw(p);
1490 }
1491
1492 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1493 float_status *s)
1494 {
1495 float128_unpack_raw(p, f);
1496 parts_canonicalize(p, s, &float128_params);
1497 }
1498
1499 static float128 float128_round_pack_canonical(FloatParts128 *p,
1500 float_status *s)
1501 {
1502 parts_uncanon(p, s, &float128_params);
1503 return float128_pack_raw(p);
1504 }
1505
1506 /*
1507 * Addition and subtraction
1508 */
1509
1510 static float16 QEMU_FLATTEN
1511 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1512 {
1513 FloatParts64 pa, pb, *pr;
1514
1515 float16_unpack_canonical(&pa, a, status);
1516 float16_unpack_canonical(&pb, b, status);
1517 pr = parts_addsub(&pa, &pb, status, subtract);
1518
1519 return float16_round_pack_canonical(pr, status);
1520 }
1521
1522 float16 float16_add(float16 a, float16 b, float_status *status)
1523 {
1524 return float16_addsub(a, b, status, false);
1525 }
1526
1527 float16 float16_sub(float16 a, float16 b, float_status *status)
1528 {
1529 return float16_addsub(a, b, status, true);
1530 }
1531
1532 static float32 QEMU_SOFTFLOAT_ATTR
1533 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1534 {
1535 FloatParts64 pa, pb, *pr;
1536
1537 float32_unpack_canonical(&pa, a, status);
1538 float32_unpack_canonical(&pb, b, status);
1539 pr = parts_addsub(&pa, &pb, status, subtract);
1540
1541 return float32_round_pack_canonical(pr, status);
1542 }
1543
1544 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1545 {
1546 return soft_f32_addsub(a, b, status, false);
1547 }
1548
1549 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1550 {
1551 return soft_f32_addsub(a, b, status, true);
1552 }
1553
1554 static float64 QEMU_SOFTFLOAT_ATTR
1555 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1556 {
1557 FloatParts64 pa, pb, *pr;
1558
1559 float64_unpack_canonical(&pa, a, status);
1560 float64_unpack_canonical(&pb, b, status);
1561 pr = parts_addsub(&pa, &pb, status, subtract);
1562
1563 return float64_round_pack_canonical(pr, status);
1564 }
1565
1566 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1567 {
1568 return soft_f64_addsub(a, b, status, false);
1569 }
1570
1571 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1572 {
1573 return soft_f64_addsub(a, b, status, true);
1574 }
1575
1576 static float hard_f32_add(float a, float b)
1577 {
1578 return a + b;
1579 }
1580
1581 static float hard_f32_sub(float a, float b)
1582 {
1583 return a - b;
1584 }
1585
1586 static double hard_f64_add(double a, double b)
1587 {
1588 return a + b;
1589 }
1590
1591 static double hard_f64_sub(double a, double b)
1592 {
1593 return a - b;
1594 }
1595
1596 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1597 {
1598 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1599 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1600 }
1601 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1602 }
1603
1604 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1605 {
1606 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1607 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1608 } else {
1609 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1610 }
1611 }
1612
1613 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1614 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1615 {
1616 return float32_gen2(a, b, s, hard, soft,
1617 f32_is_zon2, f32_addsubmul_post);
1618 }
1619
1620 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1621 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1622 {
1623 return float64_gen2(a, b, s, hard, soft,
1624 f64_is_zon2, f64_addsubmul_post);
1625 }
1626
1627 float32 QEMU_FLATTEN
1628 float32_add(float32 a, float32 b, float_status *s)
1629 {
1630 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1631 }
1632
1633 float32 QEMU_FLATTEN
1634 float32_sub(float32 a, float32 b, float_status *s)
1635 {
1636 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1637 }
1638
1639 float64 QEMU_FLATTEN
1640 float64_add(float64 a, float64 b, float_status *s)
1641 {
1642 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1643 }
1644
1645 float64 QEMU_FLATTEN
1646 float64_sub(float64 a, float64 b, float_status *s)
1647 {
1648 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1649 }
1650
1651 static bfloat16 QEMU_FLATTEN
1652 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1653 {
1654 FloatParts64 pa, pb, *pr;
1655
1656 bfloat16_unpack_canonical(&pa, a, status);
1657 bfloat16_unpack_canonical(&pb, b, status);
1658 pr = parts_addsub(&pa, &pb, status, subtract);
1659
1660 return bfloat16_round_pack_canonical(pr, status);
1661 }
1662
1663 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1664 {
1665 return bfloat16_addsub(a, b, status, false);
1666 }
1667
1668 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1669 {
1670 return bfloat16_addsub(a, b, status, true);
1671 }
1672
1673 static float128 QEMU_FLATTEN
1674 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1675 {
1676 FloatParts128 pa, pb, *pr;
1677
1678 float128_unpack_canonical(&pa, a, status);
1679 float128_unpack_canonical(&pb, b, status);
1680 pr = parts_addsub(&pa, &pb, status, subtract);
1681
1682 return float128_round_pack_canonical(pr, status);
1683 }
1684
1685 float128 float128_add(float128 a, float128 b, float_status *status)
1686 {
1687 return float128_addsub(a, b, status, false);
1688 }
1689
1690 float128 float128_sub(float128 a, float128 b, float_status *status)
1691 {
1692 return float128_addsub(a, b, status, true);
1693 }
1694
1695 /*
1696 * Multiplication
1697 */
1698
1699 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1700 {
1701 FloatParts64 pa, pb, *pr;
1702
1703 float16_unpack_canonical(&pa, a, status);
1704 float16_unpack_canonical(&pb, b, status);
1705 pr = parts_mul(&pa, &pb, status);
1706
1707 return float16_round_pack_canonical(pr, status);
1708 }
1709
1710 static float32 QEMU_SOFTFLOAT_ATTR
1711 soft_f32_mul(float32 a, float32 b, float_status *status)
1712 {
1713 FloatParts64 pa, pb, *pr;
1714
1715 float32_unpack_canonical(&pa, a, status);
1716 float32_unpack_canonical(&pb, b, status);
1717 pr = parts_mul(&pa, &pb, status);
1718
1719 return float32_round_pack_canonical(pr, status);
1720 }
1721
1722 static float64 QEMU_SOFTFLOAT_ATTR
1723 soft_f64_mul(float64 a, float64 b, float_status *status)
1724 {
1725 FloatParts64 pa, pb, *pr;
1726
1727 float64_unpack_canonical(&pa, a, status);
1728 float64_unpack_canonical(&pb, b, status);
1729 pr = parts_mul(&pa, &pb, status);
1730
1731 return float64_round_pack_canonical(pr, status);
1732 }
1733
1734 static float hard_f32_mul(float a, float b)
1735 {
1736 return a * b;
1737 }
1738
1739 static double hard_f64_mul(double a, double b)
1740 {
1741 return a * b;
1742 }
1743
1744 float32 QEMU_FLATTEN
1745 float32_mul(float32 a, float32 b, float_status *s)
1746 {
1747 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1748 f32_is_zon2, f32_addsubmul_post);
1749 }
1750
1751 float64 QEMU_FLATTEN
1752 float64_mul(float64 a, float64 b, float_status *s)
1753 {
1754 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1755 f64_is_zon2, f64_addsubmul_post);
1756 }
1757
1758 bfloat16 QEMU_FLATTEN
1759 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1760 {
1761 FloatParts64 pa, pb, *pr;
1762
1763 bfloat16_unpack_canonical(&pa, a, status);
1764 bfloat16_unpack_canonical(&pb, b, status);
1765 pr = parts_mul(&pa, &pb, status);
1766
1767 return bfloat16_round_pack_canonical(pr, status);
1768 }
1769
1770 float128 QEMU_FLATTEN
1771 float128_mul(float128 a, float128 b, float_status *status)
1772 {
1773 FloatParts128 pa, pb, *pr;
1774
1775 float128_unpack_canonical(&pa, a, status);
1776 float128_unpack_canonical(&pb, b, status);
1777 pr = parts_mul(&pa, &pb, status);
1778
1779 return float128_round_pack_canonical(pr, status);
1780 }
1781
1782 /*
1783 * Fused multiply-add
1784 */
1785
1786 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1787 int flags, float_status *status)
1788 {
1789 FloatParts64 pa, pb, pc, *pr;
1790
1791 float16_unpack_canonical(&pa, a, status);
1792 float16_unpack_canonical(&pb, b, status);
1793 float16_unpack_canonical(&pc, c, status);
1794 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1795
1796 return float16_round_pack_canonical(pr, status);
1797 }
1798
1799 static float32 QEMU_SOFTFLOAT_ATTR
1800 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1801 float_status *status)
1802 {
1803 FloatParts64 pa, pb, pc, *pr;
1804
1805 float32_unpack_canonical(&pa, a, status);
1806 float32_unpack_canonical(&pb, b, status);
1807 float32_unpack_canonical(&pc, c, status);
1808 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1809
1810 return float32_round_pack_canonical(pr, status);
1811 }
1812
1813 static float64 QEMU_SOFTFLOAT_ATTR
1814 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1815 float_status *status)
1816 {
1817 FloatParts64 pa, pb, pc, *pr;
1818
1819 float64_unpack_canonical(&pa, a, status);
1820 float64_unpack_canonical(&pb, b, status);
1821 float64_unpack_canonical(&pc, c, status);
1822 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1823
1824 return float64_round_pack_canonical(pr, status);
1825 }
1826
1827 static bool force_soft_fma;
1828
1829 float32 QEMU_FLATTEN
1830 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1831 {
1832 union_float32 ua, ub, uc, ur;
1833
1834 ua.s = xa;
1835 ub.s = xb;
1836 uc.s = xc;
1837
1838 if (unlikely(!can_use_fpu(s))) {
1839 goto soft;
1840 }
1841 if (unlikely(flags & float_muladd_halve_result)) {
1842 goto soft;
1843 }
1844
1845 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1846 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1847 goto soft;
1848 }
1849
1850 if (unlikely(force_soft_fma)) {
1851 goto soft;
1852 }
1853
1854 /*
1855 * When (a || b) == 0, there's no need to check for under/over flow,
1856 * since we know the addend is (normal || 0) and the product is 0.
1857 */
1858 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1859 union_float32 up;
1860 bool prod_sign;
1861
1862 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1863 prod_sign ^= !!(flags & float_muladd_negate_product);
1864 up.s = float32_set_sign(float32_zero, prod_sign);
1865
1866 if (flags & float_muladd_negate_c) {
1867 uc.h = -uc.h;
1868 }
1869 ur.h = up.h + uc.h;
1870 } else {
1871 union_float32 ua_orig = ua;
1872 union_float32 uc_orig = uc;
1873
1874 if (flags & float_muladd_negate_product) {
1875 ua.h = -ua.h;
1876 }
1877 if (flags & float_muladd_negate_c) {
1878 uc.h = -uc.h;
1879 }
1880
1881 ur.h = fmaf(ua.h, ub.h, uc.h);
1882
1883 if (unlikely(f32_is_inf(ur))) {
1884 float_raise(float_flag_overflow, s);
1885 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1886 ua = ua_orig;
1887 uc = uc_orig;
1888 goto soft;
1889 }
1890 }
1891 if (flags & float_muladd_negate_result) {
1892 return float32_chs(ur.s);
1893 }
1894 return ur.s;
1895
1896 soft:
1897 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1898 }
1899
1900 float64 QEMU_FLATTEN
1901 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1902 {
1903 union_float64 ua, ub, uc, ur;
1904
1905 ua.s = xa;
1906 ub.s = xb;
1907 uc.s = xc;
1908
1909 if (unlikely(!can_use_fpu(s))) {
1910 goto soft;
1911 }
1912 if (unlikely(flags & float_muladd_halve_result)) {
1913 goto soft;
1914 }
1915
1916 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1917 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1918 goto soft;
1919 }
1920
1921 if (unlikely(force_soft_fma)) {
1922 goto soft;
1923 }
1924
1925 /*
1926 * When (a || b) == 0, there's no need to check for under/over flow,
1927 * since we know the addend is (normal || 0) and the product is 0.
1928 */
1929 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1930 union_float64 up;
1931 bool prod_sign;
1932
1933 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1934 prod_sign ^= !!(flags & float_muladd_negate_product);
1935 up.s = float64_set_sign(float64_zero, prod_sign);
1936
1937 if (flags & float_muladd_negate_c) {
1938 uc.h = -uc.h;
1939 }
1940 ur.h = up.h + uc.h;
1941 } else {
1942 union_float64 ua_orig = ua;
1943 union_float64 uc_orig = uc;
1944
1945 if (flags & float_muladd_negate_product) {
1946 ua.h = -ua.h;
1947 }
1948 if (flags & float_muladd_negate_c) {
1949 uc.h = -uc.h;
1950 }
1951
1952 ur.h = fma(ua.h, ub.h, uc.h);
1953
1954 if (unlikely(f64_is_inf(ur))) {
1955 float_raise(float_flag_overflow, s);
1956 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1957 ua = ua_orig;
1958 uc = uc_orig;
1959 goto soft;
1960 }
1961 }
1962 if (flags & float_muladd_negate_result) {
1963 return float64_chs(ur.s);
1964 }
1965 return ur.s;
1966
1967 soft:
1968 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1969 }
1970
1971 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1972 int flags, float_status *status)
1973 {
1974 FloatParts64 pa, pb, pc, *pr;
1975
1976 bfloat16_unpack_canonical(&pa, a, status);
1977 bfloat16_unpack_canonical(&pb, b, status);
1978 bfloat16_unpack_canonical(&pc, c, status);
1979 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1980
1981 return bfloat16_round_pack_canonical(pr, status);
1982 }
1983
1984 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1985 int flags, float_status *status)
1986 {
1987 FloatParts128 pa, pb, pc, *pr;
1988
1989 float128_unpack_canonical(&pa, a, status);
1990 float128_unpack_canonical(&pb, b, status);
1991 float128_unpack_canonical(&pc, c, status);
1992 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1993
1994 return float128_round_pack_canonical(pr, status);
1995 }
1996
1997 /*
1998 * Division
1999 */
2000
2001 float16 float16_div(float16 a, float16 b, float_status *status)
2002 {
2003 FloatParts64 pa, pb, *pr;
2004
2005 float16_unpack_canonical(&pa, a, status);
2006 float16_unpack_canonical(&pb, b, status);
2007 pr = parts_div(&pa, &pb, status);
2008
2009 return float16_round_pack_canonical(pr, status);
2010 }
2011
2012 static float32 QEMU_SOFTFLOAT_ATTR
2013 soft_f32_div(float32 a, float32 b, float_status *status)
2014 {
2015 FloatParts64 pa, pb, *pr;
2016
2017 float32_unpack_canonical(&pa, a, status);
2018 float32_unpack_canonical(&pb, b, status);
2019 pr = parts_div(&pa, &pb, status);
2020
2021 return float32_round_pack_canonical(pr, status);
2022 }
2023
2024 static float64 QEMU_SOFTFLOAT_ATTR
2025 soft_f64_div(float64 a, float64 b, float_status *status)
2026 {
2027 FloatParts64 pa, pb, *pr;
2028
2029 float64_unpack_canonical(&pa, a, status);
2030 float64_unpack_canonical(&pb, b, status);
2031 pr = parts_div(&pa, &pb, status);
2032
2033 return float64_round_pack_canonical(pr, status);
2034 }
2035
2036 static float hard_f32_div(float a, float b)
2037 {
2038 return a / b;
2039 }
2040
2041 static double hard_f64_div(double a, double b)
2042 {
2043 return a / b;
2044 }
2045
2046 static bool f32_div_pre(union_float32 a, union_float32 b)
2047 {
2048 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2049 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2050 fpclassify(b.h) == FP_NORMAL;
2051 }
2052 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2053 }
2054
2055 static bool f64_div_pre(union_float64 a, union_float64 b)
2056 {
2057 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2058 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2059 fpclassify(b.h) == FP_NORMAL;
2060 }
2061 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2062 }
2063
2064 static bool f32_div_post(union_float32 a, union_float32 b)
2065 {
2066 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2067 return fpclassify(a.h) != FP_ZERO;
2068 }
2069 return !float32_is_zero(a.s);
2070 }
2071
2072 static bool f64_div_post(union_float64 a, union_float64 b)
2073 {
2074 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2075 return fpclassify(a.h) != FP_ZERO;
2076 }
2077 return !float64_is_zero(a.s);
2078 }
2079
2080 float32 QEMU_FLATTEN
2081 float32_div(float32 a, float32 b, float_status *s)
2082 {
2083 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2084 f32_div_pre, f32_div_post);
2085 }
2086
2087 float64 QEMU_FLATTEN
2088 float64_div(float64 a, float64 b, float_status *s)
2089 {
2090 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2091 f64_div_pre, f64_div_post);
2092 }
2093
2094 bfloat16 QEMU_FLATTEN
2095 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2096 {
2097 FloatParts64 pa, pb, *pr;
2098
2099 bfloat16_unpack_canonical(&pa, a, status);
2100 bfloat16_unpack_canonical(&pb, b, status);
2101 pr = parts_div(&pa, &pb, status);
2102
2103 return bfloat16_round_pack_canonical(pr, status);
2104 }
2105
2106 float128 QEMU_FLATTEN
2107 float128_div(float128 a, float128 b, float_status *status)
2108 {
2109 FloatParts128 pa, pb, *pr;
2110
2111 float128_unpack_canonical(&pa, a, status);
2112 float128_unpack_canonical(&pb, b, status);
2113 pr = parts_div(&pa, &pb, status);
2114
2115 return float128_round_pack_canonical(pr, status);
2116 }
2117
2118 /*
2119 * Float to Float conversions
2120 *
2121 * Returns the result of converting one float format to another. The
2122 * conversion is performed according to the IEC/IEEE Standard for
2123 * Binary Floating-Point Arithmetic.
2124 *
2125 * Usually this only needs to take care of raising invalid exceptions
2126 * and handling the conversion on NaNs.
2127 */
2128
2129 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2130 {
2131 switch (a->cls) {
2132 case float_class_qnan:
2133 case float_class_snan:
2134 /*
2135 * There is no NaN in the destination format. Raise Invalid
2136 * and return a zero with the sign of the input NaN.
2137 */
2138 float_raise(float_flag_invalid, s);
2139 a->cls = float_class_zero;
2140 break;
2141
2142 case float_class_inf:
2143 /*
2144 * There is no Inf in the destination format. Raise Invalid
2145 * and return the maximum normal with the correct sign.
2146 */
2147 float_raise(float_flag_invalid, s);
2148 a->cls = float_class_normal;
2149 a->exp = float16_params_ahp.exp_max;
2150 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2151 float16_params_ahp.frac_size + 1);
2152 break;
2153
2154 case float_class_normal:
2155 case float_class_zero:
2156 break;
2157
2158 default:
2159 g_assert_not_reached();
2160 }
2161 }
2162
2163 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2164 {
2165 if (is_nan(a->cls)) {
2166 parts_return_nan(a, s);
2167 }
2168 }
2169
2170 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2171 {
2172 if (is_nan(a->cls)) {
2173 parts_return_nan(a, s);
2174 }
2175 }
2176
2177 #define parts_float_to_float(P, S) \
2178 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2179
2180 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2181 float_status *s)
2182 {
2183 a->cls = b->cls;
2184 a->sign = b->sign;
2185 a->exp = b->exp;
2186
2187 if (a->cls == float_class_normal) {
2188 frac_truncjam(a, b);
2189 } else if (is_nan(a->cls)) {
2190 /* Discard the low bits of the NaN. */
2191 a->frac = b->frac_hi;
2192 parts_return_nan(a, s);
2193 }
2194 }
2195
2196 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2197 float_status *s)
2198 {
2199 a->cls = b->cls;
2200 a->sign = b->sign;
2201 a->exp = b->exp;
2202 frac_widen(a, b);
2203
2204 if (is_nan(a->cls)) {
2205 parts_return_nan(a, s);
2206 }
2207 }
2208
2209 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2210 {
2211 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2212 FloatParts64 p;
2213
2214 float16a_unpack_canonical(&p, a, s, fmt16);
2215 parts_float_to_float(&p, s);
2216 return float32_round_pack_canonical(&p, s);
2217 }
2218
2219 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2220 {
2221 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2222 FloatParts64 p;
2223
2224 float16a_unpack_canonical(&p, a, s, fmt16);
2225 parts_float_to_float(&p, s);
2226 return float64_round_pack_canonical(&p, s);
2227 }
2228
2229 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2230 {
2231 FloatParts64 p;
2232 const FloatFmt *fmt;
2233
2234 float32_unpack_canonical(&p, a, s);
2235 if (ieee) {
2236 parts_float_to_float(&p, s);
2237 fmt = &float16_params;
2238 } else {
2239 parts_float_to_ahp(&p, s);
2240 fmt = &float16_params_ahp;
2241 }
2242 return float16a_round_pack_canonical(&p, s, fmt);
2243 }
2244
2245 static float64 QEMU_SOFTFLOAT_ATTR
2246 soft_float32_to_float64(float32 a, float_status *s)
2247 {
2248 FloatParts64 p;
2249
2250 float32_unpack_canonical(&p, a, s);
2251 parts_float_to_float(&p, s);
2252 return float64_round_pack_canonical(&p, s);
2253 }
2254
2255 float64 float32_to_float64(float32 a, float_status *s)
2256 {
2257 if (likely(float32_is_normal(a))) {
2258 /* Widening conversion can never produce inexact results. */
2259 union_float32 uf;
2260 union_float64 ud;
2261 uf.s = a;
2262 ud.h = uf.h;
2263 return ud.s;
2264 } else if (float32_is_zero(a)) {
2265 return float64_set_sign(float64_zero, float32_is_neg(a));
2266 } else {
2267 return soft_float32_to_float64(a, s);
2268 }
2269 }
2270
2271 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2272 {
2273 FloatParts64 p;
2274 const FloatFmt *fmt;
2275
2276 float64_unpack_canonical(&p, a, s);
2277 if (ieee) {
2278 parts_float_to_float(&p, s);
2279 fmt = &float16_params;
2280 } else {
2281 parts_float_to_ahp(&p, s);
2282 fmt = &float16_params_ahp;
2283 }
2284 return float16a_round_pack_canonical(&p, s, fmt);
2285 }
2286
2287 float32 float64_to_float32(float64 a, float_status *s)
2288 {
2289 FloatParts64 p;
2290
2291 float64_unpack_canonical(&p, a, s);
2292 parts_float_to_float(&p, s);
2293 return float32_round_pack_canonical(&p, s);
2294 }
2295
2296 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2297 {
2298 FloatParts64 p;
2299
2300 bfloat16_unpack_canonical(&p, a, s);
2301 parts_float_to_float(&p, s);
2302 return float32_round_pack_canonical(&p, s);
2303 }
2304
2305 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2306 {
2307 FloatParts64 p;
2308
2309 bfloat16_unpack_canonical(&p, a, s);
2310 parts_float_to_float(&p, s);
2311 return float64_round_pack_canonical(&p, s);
2312 }
2313
2314 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2315 {
2316 FloatParts64 p;
2317
2318 float32_unpack_canonical(&p, a, s);
2319 parts_float_to_float(&p, s);
2320 return bfloat16_round_pack_canonical(&p, s);
2321 }
2322
2323 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2324 {
2325 FloatParts64 p;
2326
2327 float64_unpack_canonical(&p, a, s);
2328 parts_float_to_float(&p, s);
2329 return bfloat16_round_pack_canonical(&p, s);
2330 }
2331
2332 float32 float128_to_float32(float128 a, float_status *s)
2333 {
2334 FloatParts64 p64;
2335 FloatParts128 p128;
2336
2337 float128_unpack_canonical(&p128, a, s);
2338 parts_float_to_float_narrow(&p64, &p128, s);
2339 return float32_round_pack_canonical(&p64, s);
2340 }
2341
2342 float64 float128_to_float64(float128 a, float_status *s)
2343 {
2344 FloatParts64 p64;
2345 FloatParts128 p128;
2346
2347 float128_unpack_canonical(&p128, a, s);
2348 parts_float_to_float_narrow(&p64, &p128, s);
2349 return float64_round_pack_canonical(&p64, s);
2350 }
2351
2352 float128 float32_to_float128(float32 a, float_status *s)
2353 {
2354 FloatParts64 p64;
2355 FloatParts128 p128;
2356
2357 float32_unpack_canonical(&p64, a, s);
2358 parts_float_to_float_widen(&p128, &p64, s);
2359 return float128_round_pack_canonical(&p128, s);
2360 }
2361
2362 float128 float64_to_float128(float64 a, float_status *s)
2363 {
2364 FloatParts64 p64;
2365 FloatParts128 p128;
2366
2367 float64_unpack_canonical(&p64, a, s);
2368 parts_float_to_float_widen(&p128, &p64, s);
2369 return float128_round_pack_canonical(&p128, s);
2370 }
2371
2372 /*
2373 * Round to integral value
2374 */
2375
2376 float16 float16_round_to_int(float16 a, float_status *s)
2377 {
2378 FloatParts64 p;
2379
2380 float16_unpack_canonical(&p, a, s);
2381 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2382 return float16_round_pack_canonical(&p, s);
2383 }
2384
2385 float32 float32_round_to_int(float32 a, float_status *s)
2386 {
2387 FloatParts64 p;
2388
2389 float32_unpack_canonical(&p, a, s);
2390 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2391 return float32_round_pack_canonical(&p, s);
2392 }
2393
2394 float64 float64_round_to_int(float64 a, float_status *s)
2395 {
2396 FloatParts64 p;
2397
2398 float64_unpack_canonical(&p, a, s);
2399 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2400 return float64_round_pack_canonical(&p, s);
2401 }
2402
2403 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2404 {
2405 FloatParts64 p;
2406
2407 bfloat16_unpack_canonical(&p, a, s);
2408 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2409 return bfloat16_round_pack_canonical(&p, s);
2410 }
2411
2412 float128 float128_round_to_int(float128 a, float_status *s)
2413 {
2414 FloatParts128 p;
2415
2416 float128_unpack_canonical(&p, a, s);
2417 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2418 return float128_round_pack_canonical(&p, s);
2419 }
2420
2421 /*
2422 * Floating-point to signed integer conversions
2423 */
2424
2425 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2426 float_status *s)
2427 {
2428 FloatParts64 p;
2429
2430 float16_unpack_canonical(&p, a, s);
2431 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2432 }
2433
2434 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2435 float_status *s)
2436 {
2437 FloatParts64 p;
2438
2439 float16_unpack_canonical(&p, a, s);
2440 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2441 }
2442
2443 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2444 float_status *s)
2445 {
2446 FloatParts64 p;
2447
2448 float16_unpack_canonical(&p, a, s);
2449 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2450 }
2451
2452 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2453 float_status *s)
2454 {
2455 FloatParts64 p;
2456
2457 float16_unpack_canonical(&p, a, s);
2458 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2459 }
2460
2461 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2462 float_status *s)
2463 {
2464 FloatParts64 p;
2465
2466 float32_unpack_canonical(&p, a, s);
2467 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2468 }
2469
2470 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2471 float_status *s)
2472 {
2473 FloatParts64 p;
2474
2475 float32_unpack_canonical(&p, a, s);
2476 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2477 }
2478
2479 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2480 float_status *s)
2481 {
2482 FloatParts64 p;
2483
2484 float32_unpack_canonical(&p, a, s);
2485 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2486 }
2487
2488 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2489 float_status *s)
2490 {
2491 FloatParts64 p;
2492
2493 float64_unpack_canonical(&p, a, s);
2494 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2495 }
2496
2497 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2498 float_status *s)
2499 {
2500 FloatParts64 p;
2501
2502 float64_unpack_canonical(&p, a, s);
2503 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2504 }
2505
2506 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2507 float_status *s)
2508 {
2509 FloatParts64 p;
2510
2511 float64_unpack_canonical(&p, a, s);
2512 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2513 }
2514
2515 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2516 float_status *s)
2517 {
2518 FloatParts64 p;
2519
2520 bfloat16_unpack_canonical(&p, a, s);
2521 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2522 }
2523
2524 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2525 float_status *s)
2526 {
2527 FloatParts64 p;
2528
2529 bfloat16_unpack_canonical(&p, a, s);
2530 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2531 }
2532
2533 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2534 float_status *s)
2535 {
2536 FloatParts64 p;
2537
2538 bfloat16_unpack_canonical(&p, a, s);
2539 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2540 }
2541
2542 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2543 int scale, float_status *s)
2544 {
2545 FloatParts128 p;
2546
2547 float128_unpack_canonical(&p, a, s);
2548 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2549 }
2550
2551 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2552 int scale, float_status *s)
2553 {
2554 FloatParts128 p;
2555
2556 float128_unpack_canonical(&p, a, s);
2557 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2558 }
2559
2560 int8_t float16_to_int8(float16 a, float_status *s)
2561 {
2562 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2563 }
2564
2565 int16_t float16_to_int16(float16 a, float_status *s)
2566 {
2567 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2568 }
2569
2570 int32_t float16_to_int32(float16 a, float_status *s)
2571 {
2572 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2573 }
2574
2575 int64_t float16_to_int64(float16 a, float_status *s)
2576 {
2577 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2578 }
2579
2580 int16_t float32_to_int16(float32 a, float_status *s)
2581 {
2582 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2583 }
2584
2585 int32_t float32_to_int32(float32 a, float_status *s)
2586 {
2587 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2588 }
2589
2590 int64_t float32_to_int64(float32 a, float_status *s)
2591 {
2592 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2593 }
2594
2595 int16_t float64_to_int16(float64 a, float_status *s)
2596 {
2597 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2598 }
2599
2600 int32_t float64_to_int32(float64 a, float_status *s)
2601 {
2602 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2603 }
2604
2605 int64_t float64_to_int64(float64 a, float_status *s)
2606 {
2607 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2608 }
2609
2610 int32_t float128_to_int32(float128 a, float_status *s)
2611 {
2612 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2613 }
2614
2615 int64_t float128_to_int64(float128 a, float_status *s)
2616 {
2617 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2618 }
2619
2620 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2621 {
2622 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2623 }
2624
2625 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2626 {
2627 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2628 }
2629
2630 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2631 {
2632 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2633 }
2634
2635 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2636 {
2637 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2638 }
2639
2640 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2641 {
2642 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2643 }
2644
2645 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2646 {
2647 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2648 }
2649
2650 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2651 {
2652 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2653 }
2654
2655 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2656 {
2657 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2658 }
2659
2660 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2661 {
2662 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2663 }
2664
2665 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
2666 {
2667 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
2668 }
2669
2670 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
2671 {
2672 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
2673 }
2674
2675 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2676 {
2677 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2678 }
2679
2680 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2681 {
2682 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2683 }
2684
2685 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2686 {
2687 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2688 }
2689
2690 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2691 {
2692 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2693 }
2694
2695 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2696 {
2697 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2698 }
2699
2700 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2701 {
2702 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2703 }
2704
2705 /*
2706 * Floating-point to unsigned integer conversions
2707 */
2708
2709 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2710 float_status *s)
2711 {
2712 FloatParts64 p;
2713
2714 float16_unpack_canonical(&p, a, s);
2715 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
2716 }
2717
2718 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2719 float_status *s)
2720 {
2721 FloatParts64 p;
2722
2723 float16_unpack_canonical(&p, a, s);
2724 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2725 }
2726
2727 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2728 float_status *s)
2729 {
2730 FloatParts64 p;
2731
2732 float16_unpack_canonical(&p, a, s);
2733 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2734 }
2735
2736 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2737 float_status *s)
2738 {
2739 FloatParts64 p;
2740
2741 float16_unpack_canonical(&p, a, s);
2742 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2743 }
2744
2745 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2746 float_status *s)
2747 {
2748 FloatParts64 p;
2749
2750 float32_unpack_canonical(&p, a, s);
2751 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2752 }
2753
2754 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2755 float_status *s)
2756 {
2757 FloatParts64 p;
2758
2759 float32_unpack_canonical(&p, a, s);
2760 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2761 }
2762
2763 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2764 float_status *s)
2765 {
2766 FloatParts64 p;
2767
2768 float32_unpack_canonical(&p, a, s);
2769 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2770 }
2771
2772 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2773 float_status *s)
2774 {
2775 FloatParts64 p;
2776
2777 float64_unpack_canonical(&p, a, s);
2778 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2779 }
2780
2781 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2782 float_status *s)
2783 {
2784 FloatParts64 p;
2785
2786 float64_unpack_canonical(&p, a, s);
2787 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2788 }
2789
2790 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2791 float_status *s)
2792 {
2793 FloatParts64 p;
2794
2795 float64_unpack_canonical(&p, a, s);
2796 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2797 }
2798
2799 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2800 int scale, float_status *s)
2801 {
2802 FloatParts64 p;
2803
2804 bfloat16_unpack_canonical(&p, a, s);
2805 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2806 }
2807
2808 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2809 int scale, float_status *s)
2810 {
2811 FloatParts64 p;
2812
2813 bfloat16_unpack_canonical(&p, a, s);
2814 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2815 }
2816
2817 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2818 int scale, float_status *s)
2819 {
2820 FloatParts64 p;
2821
2822 bfloat16_unpack_canonical(&p, a, s);
2823 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2824 }
2825
2826 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2827 int scale, float_status *s)
2828 {
2829 FloatParts128 p;
2830
2831 float128_unpack_canonical(&p, a, s);
2832 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2833 }
2834
2835 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2836 int scale, float_status *s)
2837 {
2838 FloatParts128 p;
2839
2840 float128_unpack_canonical(&p, a, s);
2841 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2842 }
2843
2844 uint8_t float16_to_uint8(float16 a, float_status *s)
2845 {
2846 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2847 }
2848
2849 uint16_t float16_to_uint16(float16 a, float_status *s)
2850 {
2851 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2852 }
2853
2854 uint32_t float16_to_uint32(float16 a, float_status *s)
2855 {
2856 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2857 }
2858
2859 uint64_t float16_to_uint64(float16 a, float_status *s)
2860 {
2861 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2862 }
2863
2864 uint16_t float32_to_uint16(float32 a, float_status *s)
2865 {
2866 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2867 }
2868
2869 uint32_t float32_to_uint32(float32 a, float_status *s)
2870 {
2871 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2872 }
2873
2874 uint64_t float32_to_uint64(float32 a, float_status *s)
2875 {
2876 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2877 }
2878
2879 uint16_t float64_to_uint16(float64 a, float_status *s)
2880 {
2881 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2882 }
2883
2884 uint32_t float64_to_uint32(float64 a, float_status *s)
2885 {
2886 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2887 }
2888
2889 uint64_t float64_to_uint64(float64 a, float_status *s)
2890 {
2891 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2892 }
2893
2894 uint32_t float128_to_uint32(float128 a, float_status *s)
2895 {
2896 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2897 }
2898
2899 uint64_t float128_to_uint64(float128 a, float_status *s)
2900 {
2901 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2902 }
2903
2904 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2905 {
2906 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2907 }
2908
2909 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2910 {
2911 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2912 }
2913
2914 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2915 {
2916 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2917 }
2918
2919 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2920 {
2921 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2922 }
2923
2924 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2925 {
2926 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2927 }
2928
2929 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2930 {
2931 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2932 }
2933
2934 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2935 {
2936 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2937 }
2938
2939 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2940 {
2941 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2942 }
2943
2944 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2945 {
2946 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2947 }
2948
2949 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
2950 {
2951 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2952 }
2953
2954 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
2955 {
2956 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2957 }
2958
2959 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2960 {
2961 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2962 }
2963
2964 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2965 {
2966 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2967 }
2968
2969 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2970 {
2971 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2972 }
2973
2974 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2975 {
2976 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2977 }
2978
2979 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2980 {
2981 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2982 }
2983
2984 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2985 {
2986 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2987 }
2988
2989 /*
2990 * Signed integer to floating-point conversions
2991 */
2992
2993 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2994 {
2995 FloatParts64 p;
2996
2997 parts_sint_to_float(&p, a, scale, status);
2998 return float16_round_pack_canonical(&p, status);
2999 }
3000
3001 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3002 {
3003 return int64_to_float16_scalbn(a, scale, status);
3004 }
3005
3006 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3007 {
3008 return int64_to_float16_scalbn(a, scale, status);
3009 }
3010
3011 float16 int64_to_float16(int64_t a, float_status *status)
3012 {
3013 return int64_to_float16_scalbn(a, 0, status);
3014 }
3015
3016 float16 int32_to_float16(int32_t a, float_status *status)
3017 {
3018 return int64_to_float16_scalbn(a, 0, status);
3019 }
3020
3021 float16 int16_to_float16(int16_t a, float_status *status)
3022 {
3023 return int64_to_float16_scalbn(a, 0, status);
3024 }
3025
3026 float16 int8_to_float16(int8_t a, float_status *status)
3027 {
3028 return int64_to_float16_scalbn(a, 0, status);
3029 }
3030
3031 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3032 {
3033 FloatParts64 p;
3034
3035 parts64_sint_to_float(&p, a, scale, status);
3036 return float32_round_pack_canonical(&p, status);
3037 }
3038
3039 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3040 {
3041 return int64_to_float32_scalbn(a, scale, status);
3042 }
3043
3044 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3045 {
3046 return int64_to_float32_scalbn(a, scale, status);
3047 }
3048
3049 float32 int64_to_float32(int64_t a, float_status *status)
3050 {
3051 return int64_to_float32_scalbn(a, 0, status);
3052 }
3053
3054 float32 int32_to_float32(int32_t a, float_status *status)
3055 {
3056 return int64_to_float32_scalbn(a, 0, status);
3057 }
3058
3059 float32 int16_to_float32(int16_t a, float_status *status)
3060 {
3061 return int64_to_float32_scalbn(a, 0, status);
3062 }
3063
3064 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3065 {
3066 FloatParts64 p;
3067
3068 parts_sint_to_float(&p, a, scale, status);
3069 return float64_round_pack_canonical(&p, status);
3070 }
3071
3072 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3073 {
3074 return int64_to_float64_scalbn(a, scale, status);
3075 }
3076
3077 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3078 {
3079 return int64_to_float64_scalbn(a, scale, status);
3080 }
3081
3082 float64 int64_to_float64(int64_t a, float_status *status)
3083 {
3084 return int64_to_float64_scalbn(a, 0, status);
3085 }
3086
3087 float64 int32_to_float64(int32_t a, float_status *status)
3088 {
3089 return int64_to_float64_scalbn(a, 0, status);
3090 }
3091
3092 float64 int16_to_float64(int16_t a, float_status *status)
3093 {
3094 return int64_to_float64_scalbn(a, 0, status);
3095 }
3096
3097 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3098 {
3099 FloatParts64 p;
3100
3101 parts_sint_to_float(&p, a, scale, status);
3102 return bfloat16_round_pack_canonical(&p, status);
3103 }
3104
3105 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3106 {
3107 return int64_to_bfloat16_scalbn(a, scale, status);
3108 }
3109
3110 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3111 {
3112 return int64_to_bfloat16_scalbn(a, scale, status);
3113 }
3114
3115 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3116 {
3117 return int64_to_bfloat16_scalbn(a, 0, status);
3118 }
3119
3120 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3121 {
3122 return int64_to_bfloat16_scalbn(a, 0, status);
3123 }
3124
3125 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3126 {
3127 return int64_to_bfloat16_scalbn(a, 0, status);
3128 }
3129
3130 float128 int64_to_float128(int64_t a, float_status *status)
3131 {
3132 FloatParts128 p;
3133
3134 parts_sint_to_float(&p, a, 0, status);
3135 return float128_round_pack_canonical(&p, status);
3136 }
3137
3138 float128 int32_to_float128(int32_t a, float_status *status)
3139 {
3140 return int64_to_float128(a, status);
3141 }
3142
3143 /*
3144 * Unsigned Integer to floating-point conversions
3145 */
3146
3147 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3148 {
3149 FloatParts64 p;
3150
3151 parts_uint_to_float(&p, a, scale, status);
3152 return float16_round_pack_canonical(&p, status);
3153 }
3154
3155 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3156 {
3157 return uint64_to_float16_scalbn(a, scale, status);
3158 }
3159
3160 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3161 {
3162 return uint64_to_float16_scalbn(a, scale, status);
3163 }
3164
3165 float16 uint64_to_float16(uint64_t a, float_status *status)
3166 {
3167 return uint64_to_float16_scalbn(a, 0, status);
3168 }
3169
3170 float16 uint32_to_float16(uint32_t a, float_status *status)
3171 {
3172 return uint64_to_float16_scalbn(a, 0, status);
3173 }
3174
3175 float16 uint16_to_float16(uint16_t a, float_status *status)
3176 {
3177 return uint64_to_float16_scalbn(a, 0, status);
3178 }
3179
3180 float16 uint8_to_float16(uint8_t a, float_status *status)
3181 {
3182 return uint64_to_float16_scalbn(a, 0, status);
3183 }
3184
3185 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3186 {
3187 FloatParts64 p;
3188
3189 parts_uint_to_float(&p, a, scale, status);
3190 return float32_round_pack_canonical(&p, status);
3191 }
3192
3193 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3194 {
3195 return uint64_to_float32_scalbn(a, scale, status);
3196 }
3197
3198 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3199 {
3200 return uint64_to_float32_scalbn(a, scale, status);
3201 }
3202
3203 float32 uint64_to_float32(uint64_t a, float_status *status)
3204 {
3205 return uint64_to_float32_scalbn(a, 0, status);
3206 }
3207
3208 float32 uint32_to_float32(uint32_t a, float_status *status)
3209 {
3210 return uint64_to_float32_scalbn(a, 0, status);
3211 }
3212
3213 float32 uint16_to_float32(uint16_t a, float_status *status)
3214 {
3215 return uint64_to_float32_scalbn(a, 0, status);
3216 }
3217
3218 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3219 {
3220 FloatParts64 p;
3221
3222 parts_uint_to_float(&p, a, scale, status);
3223 return float64_round_pack_canonical(&p, status);
3224 }
3225
3226 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3227 {
3228 return uint64_to_float64_scalbn(a, scale, status);
3229 }
3230
3231 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3232 {
3233 return uint64_to_float64_scalbn(a, scale, status);
3234 }
3235
3236 float64 uint64_to_float64(uint64_t a, float_status *status)
3237 {
3238 return uint64_to_float64_scalbn(a, 0, status);
3239 }
3240
3241 float64 uint32_to_float64(uint32_t a, float_status *status)
3242 {
3243 return uint64_to_float64_scalbn(a, 0, status);
3244 }
3245
3246 float64 uint16_to_float64(uint16_t a, float_status *status)
3247 {
3248 return uint64_to_float64_scalbn(a, 0, status);
3249 }
3250
3251 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3252 {
3253 FloatParts64 p;
3254
3255 parts_uint_to_float(&p, a, scale, status);
3256 return bfloat16_round_pack_canonical(&p, status);
3257 }
3258
3259 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3260 {
3261 return uint64_to_bfloat16_scalbn(a, scale, status);
3262 }
3263
3264 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3265 {
3266 return uint64_to_bfloat16_scalbn(a, scale, status);
3267 }
3268
3269 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3270 {
3271 return uint64_to_bfloat16_scalbn(a, 0, status);
3272 }
3273
3274 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3275 {
3276 return uint64_to_bfloat16_scalbn(a, 0, status);
3277 }
3278
3279 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3280 {
3281 return uint64_to_bfloat16_scalbn(a, 0, status);
3282 }
3283
3284 float128 uint64_to_float128(uint64_t a, float_status *status)
3285 {
3286 FloatParts128 p;
3287
3288 parts_uint_to_float(&p, a, 0, status);
3289 return float128_round_pack_canonical(&p, status);
3290 }
3291
3292 /*
3293 * Minimum and maximum
3294 */
3295
3296 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3297 {
3298 FloatParts64 pa, pb, *pr;
3299
3300 float16_unpack_canonical(&pa, a, s);
3301 float16_unpack_canonical(&pb, b, s);
3302 pr = parts_minmax(&pa, &pb, s, flags);
3303
3304 return float16_round_pack_canonical(pr, s);
3305 }
3306
3307 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3308 float_status *s, int flags)
3309 {
3310 FloatParts64 pa, pb, *pr;
3311
3312 bfloat16_unpack_canonical(&pa, a, s);
3313 bfloat16_unpack_canonical(&pb, b, s);
3314 pr = parts_minmax(&pa, &pb, s, flags);
3315
3316 return bfloat16_round_pack_canonical(pr, s);
3317 }
3318
3319 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3320 {
3321 FloatParts64 pa, pb, *pr;
3322
3323 float32_unpack_canonical(&pa, a, s);
3324 float32_unpack_canonical(&pb, b, s);
3325 pr = parts_minmax(&pa, &pb, s, flags);
3326
3327 return float32_round_pack_canonical(pr, s);
3328 }
3329
3330 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3331 {
3332 FloatParts64 pa, pb, *pr;
3333
3334 float64_unpack_canonical(&pa, a, s);
3335 float64_unpack_canonical(&pb, b, s);
3336 pr = parts_minmax(&pa, &pb, s, flags);
3337
3338 return float64_round_pack_canonical(pr, s);
3339 }
3340
3341 static float128 float128_minmax(float128 a, float128 b,
3342 float_status *s, int flags)
3343 {
3344 FloatParts128 pa, pb, *pr;
3345
3346 float128_unpack_canonical(&pa, a, s);
3347 float128_unpack_canonical(&pb, b, s);
3348 pr = parts_minmax(&pa, &pb, s, flags);
3349
3350 return float128_round_pack_canonical(pr, s);
3351 }
3352
3353 #define MINMAX_1(type, name, flags) \
3354 type type##_##name(type a, type b, float_status *s) \
3355 { return type##_minmax(a, b, s, flags); }
3356
3357 #define MINMAX_2(type) \
3358 MINMAX_1(type, max, 0) \
3359 MINMAX_1(type, maxnum, minmax_isnum) \
3360 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3361 MINMAX_1(type, min, minmax_ismin) \
3362 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3363 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3364
3365 MINMAX_2(float16)
3366 MINMAX_2(bfloat16)
3367 MINMAX_2(float32)
3368 MINMAX_2(float64)
3369 MINMAX_2(float128)
3370
3371 #undef MINMAX_1
3372 #undef MINMAX_2
3373
3374 /*
3375 * Floating point compare
3376 */
3377
3378 static FloatRelation QEMU_FLATTEN
3379 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3380 {
3381 FloatParts64 pa, pb;
3382
3383 float16_unpack_canonical(&pa, a, s);
3384 float16_unpack_canonical(&pb, b, s);
3385 return parts_compare(&pa, &pb, s, is_quiet);
3386 }
3387
3388 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3389 {
3390 return float16_do_compare(a, b, s, false);
3391 }
3392
3393 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3394 {
3395 return float16_do_compare(a, b, s, true);
3396 }
3397
3398 static FloatRelation QEMU_SOFTFLOAT_ATTR
3399 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3400 {
3401 FloatParts64 pa, pb;
3402
3403 float32_unpack_canonical(&pa, a, s);
3404 float32_unpack_canonical(&pb, b, s);
3405 return parts_compare(&pa, &pb, s, is_quiet);
3406 }
3407
3408 static FloatRelation QEMU_FLATTEN
3409 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3410 {
3411 union_float32 ua, ub;
3412
3413 ua.s = xa;
3414 ub.s = xb;
3415
3416 if (QEMU_NO_HARDFLOAT) {
3417 goto soft;
3418 }
3419
3420 float32_input_flush2(&ua.s, &ub.s, s);
3421 if (isgreaterequal(ua.h, ub.h)) {
3422 if (isgreater(ua.h, ub.h)) {
3423 return float_relation_greater;
3424 }
3425 return float_relation_equal;
3426 }
3427 if (likely(isless(ua.h, ub.h))) {
3428 return float_relation_less;
3429 }
3430 /*
3431 * The only condition remaining is unordered.
3432 * Fall through to set flags.
3433 */
3434 soft:
3435 return float32_do_compare(ua.s, ub.s, s, is_quiet);
3436 }
3437
3438 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3439 {
3440 return float32_hs_compare(a, b, s, false);
3441 }
3442
3443 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3444 {
3445 return float32_hs_compare(a, b, s, true);
3446 }
3447
3448 static FloatRelation QEMU_SOFTFLOAT_ATTR
3449 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3450 {
3451 FloatParts64 pa, pb;
3452
3453 float64_unpack_canonical(&pa, a, s);
3454 float64_unpack_canonical(&pb, b, s);
3455 return parts_compare(&pa, &pb, s, is_quiet);
3456 }
3457
3458 static FloatRelation QEMU_FLATTEN
3459 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
3460 {
3461 union_float64 ua, ub;
3462
3463 ua.s = xa;
3464 ub.s = xb;
3465
3466 if (QEMU_NO_HARDFLOAT) {
3467 goto soft;
3468 }
3469
3470 float64_input_flush2(&ua.s, &ub.s, s);
3471 if (isgreaterequal(ua.h, ub.h)) {
3472 if (isgreater(ua.h, ub.h)) {
3473 return float_relation_greater;
3474 }
3475 return float_relation_equal;
3476 }
3477 if (likely(isless(ua.h, ub.h))) {
3478 return float_relation_less;
3479 }
3480 /*
3481 * The only condition remaining is unordered.
3482 * Fall through to set flags.
3483 */
3484 soft:
3485 return float64_do_compare(ua.s, ub.s, s, is_quiet);
3486 }
3487
3488 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3489 {
3490 return float64_hs_compare(a, b, s, false);
3491 }
3492
3493 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3494 {
3495 return float64_hs_compare(a, b, s, true);
3496 }
3497
3498 static FloatRelation QEMU_FLATTEN
3499 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
3500 {
3501 FloatParts64 pa, pb;
3502
3503 bfloat16_unpack_canonical(&pa, a, s);
3504 bfloat16_unpack_canonical(&pb, b, s);
3505 return parts_compare(&pa, &pb, s, is_quiet);
3506 }
3507
3508 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3509 {
3510 return bfloat16_do_compare(a, b, s, false);
3511 }
3512
3513 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3514 {
3515 return bfloat16_do_compare(a, b, s, true);
3516 }
3517
3518 static FloatRelation QEMU_FLATTEN
3519 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3520 {
3521 FloatParts128 pa, pb;
3522
3523 float128_unpack_canonical(&pa, a, s);
3524 float128_unpack_canonical(&pb, b, s);
3525 return parts_compare(&pa, &pb, s, is_quiet);
3526 }
3527
3528 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3529 {
3530 return float128_do_compare(a, b, s, false);
3531 }
3532
3533 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3534 {
3535 return float128_do_compare(a, b, s, true);
3536 }
3537
3538 /*
3539 * Scale by 2**N
3540 */
3541
3542 float16 float16_scalbn(float16 a, int n, float_status *status)
3543 {
3544 FloatParts64 p;
3545
3546 float16_unpack_canonical(&p, a, status);
3547 parts_scalbn(&p, n, status);
3548 return float16_round_pack_canonical(&p, status);
3549 }
3550
3551 float32 float32_scalbn(float32 a, int n, float_status *status)
3552 {
3553 FloatParts64 p;
3554
3555 float32_unpack_canonical(&p, a, status);
3556 parts_scalbn(&p, n, status);
3557 return float32_round_pack_canonical(&p, status);
3558 }
3559
3560 float64 float64_scalbn(float64 a, int n, float_status *status)
3561 {
3562 FloatParts64 p;
3563
3564 float64_unpack_canonical(&p, a, status);
3565 parts_scalbn(&p, n, status);
3566 return float64_round_pack_canonical(&p, status);
3567 }
3568
3569 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3570 {
3571 FloatParts64 p;
3572
3573 bfloat16_unpack_canonical(&p, a, status);
3574 parts_scalbn(&p, n, status);
3575 return bfloat16_round_pack_canonical(&p, status);
3576 }
3577
3578 float128 float128_scalbn(float128 a, int n, float_status *status)
3579 {
3580 FloatParts128 p;
3581
3582 float128_unpack_canonical(&p, a, status);
3583 parts_scalbn(&p, n, status);
3584 return float128_round_pack_canonical(&p, status);
3585 }
3586
3587 /*
3588 * Square Root
3589 *
3590 * The old softfloat code did an approximation step before zeroing in
3591 * on the final result. However for simpleness we just compute the
3592 * square root by iterating down from the implicit bit to enough extra
3593 * bits to ensure we get a correctly rounded result.
3594 *
3595 * This does mean however the calculation is slower than before,
3596 * especially for 64 bit floats.
3597 */
3598
3599 static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
3600 {
3601 uint64_t a_frac, r_frac, s_frac;
3602 int bit, last_bit;
3603
3604 if (is_nan(a.cls)) {
3605 parts_return_nan(&a, s);
3606 return a;
3607 }
3608 if (a.cls == float_class_zero) {
3609 return a; /* sqrt(+-0) = +-0 */
3610 }
3611 if (a.sign) {
3612 float_raise(float_flag_invalid, s);
3613 parts_default_nan(&a, s);
3614 return a;
3615 }
3616 if (a.cls == float_class_inf) {
3617 return a; /* sqrt(+inf) = +inf */
3618 }
3619
3620 assert(a.cls == float_class_normal);
3621
3622 /* We need two overflow bits at the top. Adding room for that is a
3623 * right shift. If the exponent is odd, we can discard the low bit
3624 * by multiplying the fraction by 2; that's a left shift. Combine
3625 * those and we shift right by 1 if the exponent is odd, otherwise 2.
3626 */
3627 a_frac = a.frac >> (2 - (a.exp & 1));
3628 a.exp >>= 1;
3629
3630 /* Bit-by-bit computation of sqrt. */
3631 r_frac = 0;
3632 s_frac = 0;
3633
3634 /* Iterate from implicit bit down to the 3 extra bits to compute a
3635 * properly rounded result. Remember we've inserted two more bits
3636 * at the top, so these positions are two less.
3637 */
3638 bit = DECOMPOSED_BINARY_POINT - 2;
3639 last_bit = MAX(p->frac_shift - 4, 0);
3640 do {
3641 uint64_t q = 1ULL << bit;
3642 uint64_t t_frac = s_frac + q;
3643 if (t_frac <= a_frac) {
3644 s_frac = t_frac + q;
3645 a_frac -= t_frac;
3646 r_frac += q;
3647 }
3648 a_frac <<= 1;
3649 } while (--bit >= last_bit);
3650
3651 /* Undo the right shift done above. If there is any remaining
3652 * fraction, the result is inexact. Set the sticky bit.
3653 */
3654 a.frac = (r_frac << 2) + (a_frac != 0);
3655
3656 return a;
3657 }
3658
3659 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3660 {
3661 FloatParts64 pa, pr;
3662
3663 float16_unpack_canonical(&pa, a, status);
3664 pr = sqrt_float(pa, status, &float16_params);
3665 return float16_round_pack_canonical(&pr, status);
3666 }
3667
3668 static float32 QEMU_SOFTFLOAT_ATTR
3669 soft_f32_sqrt(float32 a, float_status *status)
3670 {
3671 FloatParts64 pa, pr;
3672
3673 float32_unpack_canonical(&pa, a, status);
3674 pr = sqrt_float(pa, status, &float32_params);
3675 return float32_round_pack_canonical(&pr, status);
3676 }
3677
3678 static float64 QEMU_SOFTFLOAT_ATTR
3679 soft_f64_sqrt(float64 a, float_status *status)
3680 {
3681 FloatParts64 pa, pr;
3682
3683 float64_unpack_canonical(&pa, a, status);
3684 pr = sqrt_float(pa, status, &float64_params);
3685 return float64_round_pack_canonical(&pr, status);
3686 }
3687
3688 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3689 {
3690 union_float32 ua, ur;
3691
3692 ua.s = xa;
3693 if (unlikely(!can_use_fpu(s))) {
3694 goto soft;
3695 }
3696
3697 float32_input_flush1(&ua.s, s);
3698 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3699 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3700 fpclassify(ua.h) == FP_ZERO) ||
3701 signbit(ua.h))) {
3702 goto soft;
3703 }
3704 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3705 float32_is_neg(ua.s))) {
3706 goto soft;
3707 }
3708 ur.h = sqrtf(ua.h);
3709 return ur.s;
3710
3711 soft:
3712 return soft_f32_sqrt(ua.s, s);
3713 }
3714
3715 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3716 {
3717 union_float64 ua, ur;
3718
3719 ua.s = xa;
3720 if (unlikely(!can_use_fpu(s))) {
3721 goto soft;
3722 }
3723
3724 float64_input_flush1(&ua.s, s);
3725 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3726 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3727 fpclassify(ua.h) == FP_ZERO) ||
3728 signbit(ua.h))) {
3729 goto soft;
3730 }
3731 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3732 float64_is_neg(ua.s))) {
3733 goto soft;
3734 }
3735 ur.h = sqrt(ua.h);
3736 return ur.s;
3737
3738 soft:
3739 return soft_f64_sqrt(ua.s, s);
3740 }
3741
3742 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3743 {
3744 FloatParts64 pa, pr;
3745
3746 bfloat16_unpack_canonical(&pa, a, status);
3747 pr = sqrt_float(pa, status, &bfloat16_params);
3748 return bfloat16_round_pack_canonical(&pr, status);
3749 }
3750
3751 /*----------------------------------------------------------------------------
3752 | The pattern for a default generated NaN.
3753 *----------------------------------------------------------------------------*/
3754
3755 float16 float16_default_nan(float_status *status)
3756 {
3757 FloatParts64 p;
3758
3759 parts_default_nan(&p, status);
3760 p.frac >>= float16_params.frac_shift;
3761 return float16_pack_raw(&p);
3762 }
3763
3764 float32 float32_default_nan(float_status *status)
3765 {
3766 FloatParts64 p;
3767
3768 parts_default_nan(&p, status);
3769 p.frac >>= float32_params.frac_shift;
3770 return float32_pack_raw(&p);
3771 }
3772
3773 float64 float64_default_nan(float_status *status)
3774 {
3775 FloatParts64 p;
3776
3777 parts_default_nan(&p, status);
3778 p.frac >>= float64_params.frac_shift;
3779 return float64_pack_raw(&p);
3780 }
3781
3782 float128 float128_default_nan(float_status *status)
3783 {
3784 FloatParts128 p;
3785
3786 parts_default_nan(&p, status);
3787 frac_shr(&p, float128_params.frac_shift);
3788 return float128_pack_raw(&p);
3789 }
3790
3791 bfloat16 bfloat16_default_nan(float_status *status)
3792 {
3793 FloatParts64 p;
3794
3795 parts_default_nan(&p, status);
3796 p.frac >>= bfloat16_params.frac_shift;
3797 return bfloat16_pack_raw(&p);
3798 }
3799
3800 /*----------------------------------------------------------------------------
3801 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3802 *----------------------------------------------------------------------------*/
3803
3804 float16 float16_silence_nan(float16 a, float_status *status)
3805 {
3806 FloatParts64 p;
3807
3808 float16_unpack_raw(&p, a);
3809 p.frac <<= float16_params.frac_shift;
3810 parts_silence_nan(&p, status);
3811 p.frac >>= float16_params.frac_shift;
3812 return float16_pack_raw(&p);
3813 }
3814
3815 float32 float32_silence_nan(float32 a, float_status *status)
3816 {
3817 FloatParts64 p;
3818
3819 float32_unpack_raw(&p, a);
3820 p.frac <<= float32_params.frac_shift;
3821 parts_silence_nan(&p, status);
3822 p.frac >>= float32_params.frac_shift;
3823 return float32_pack_raw(&p);
3824 }
3825
3826 float64 float64_silence_nan(float64 a, float_status *status)
3827 {
3828 FloatParts64 p;
3829
3830 float64_unpack_raw(&p, a);
3831 p.frac <<= float64_params.frac_shift;
3832 parts_silence_nan(&p, status);
3833 p.frac >>= float64_params.frac_shift;
3834 return float64_pack_raw(&p);
3835 }
3836
3837 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3838 {
3839 FloatParts64 p;
3840
3841 bfloat16_unpack_raw(&p, a);
3842 p.frac <<= bfloat16_params.frac_shift;
3843 parts_silence_nan(&p, status);
3844 p.frac >>= bfloat16_params.frac_shift;
3845 return bfloat16_pack_raw(&p);
3846 }
3847
3848 float128 float128_silence_nan(float128 a, float_status *status)
3849 {
3850 FloatParts128 p;
3851
3852 float128_unpack_raw(&p, a);
3853 frac_shl(&p, float128_params.frac_shift);
3854 parts_silence_nan(&p, status);
3855 frac_shr(&p, float128_params.frac_shift);
3856 return float128_pack_raw(&p);
3857 }
3858
3859 /*----------------------------------------------------------------------------
3860 | If `a' is denormal and we are in flush-to-zero mode then set the
3861 | input-denormal exception and return zero. Otherwise just return the value.
3862 *----------------------------------------------------------------------------*/
3863
3864 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
3865 {
3866 if (p.exp == 0 && p.frac != 0) {
3867 float_raise(float_flag_input_denormal, status);
3868 return true;
3869 }
3870
3871 return false;
3872 }
3873
3874 float16 float16_squash_input_denormal(float16 a, float_status *status)
3875 {
3876 if (status->flush_inputs_to_zero) {
3877 FloatParts64 p;
3878
3879 float16_unpack_raw(&p, a);
3880 if (parts_squash_denormal(p, status)) {
3881 return float16_set_sign(float16_zero, p.sign);
3882 }
3883 }
3884 return a;
3885 }
3886
3887 float32 float32_squash_input_denormal(float32 a, float_status *status)
3888 {
3889 if (status->flush_inputs_to_zero) {
3890 FloatParts64 p;
3891
3892 float32_unpack_raw(&p, a);
3893 if (parts_squash_denormal(p, status)) {
3894 return float32_set_sign(float32_zero, p.sign);
3895 }
3896 }
3897 return a;
3898 }
3899
3900 float64 float64_squash_input_denormal(float64 a, float_status *status)
3901 {
3902 if (status->flush_inputs_to_zero) {
3903 FloatParts64 p;
3904
3905 float64_unpack_raw(&p, a);
3906 if (parts_squash_denormal(p, status)) {
3907 return float64_set_sign(float64_zero, p.sign);
3908 }
3909 }
3910 return a;
3911 }
3912
3913 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3914 {
3915 if (status->flush_inputs_to_zero) {
3916 FloatParts64 p;
3917
3918 bfloat16_unpack_raw(&p, a);
3919 if (parts_squash_denormal(p, status)) {
3920 return bfloat16_set_sign(bfloat16_zero, p.sign);
3921 }
3922 }
3923 return a;
3924 }
3925
3926 /*----------------------------------------------------------------------------
3927 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3928 | and 7, and returns the properly rounded 32-bit integer corresponding to the
3929 | input. If `zSign' is 1, the input is negated before being converted to an
3930 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3931 | is simply rounded to an integer, with the inexact exception raised if the
3932 | input cannot be represented exactly as an integer. However, if the fixed-
3933 | point input is too large, the invalid exception is raised and the largest
3934 | positive or negative integer is returned.
3935 *----------------------------------------------------------------------------*/
3936
3937 static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3938 float_status *status)
3939 {
3940 int8_t roundingMode;
3941 bool roundNearestEven;
3942 int8_t roundIncrement, roundBits;
3943 int32_t z;
3944
3945 roundingMode = status->float_rounding_mode;
3946 roundNearestEven = ( roundingMode == float_round_nearest_even );
3947 switch (roundingMode) {
3948 case float_round_nearest_even:
3949 case float_round_ties_away:
3950 roundIncrement = 0x40;
3951 break;
3952 case float_round_to_zero:
3953 roundIncrement = 0;
3954 break;
3955 case float_round_up:
3956 roundIncrement = zSign ? 0 : 0x7f;
3957 break;
3958 case float_round_down:
3959 roundIncrement = zSign ? 0x7f : 0;
3960 break;
3961 case float_round_to_odd:
3962 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3963 break;
3964 default:
3965 abort();
3966 }
3967 roundBits = absZ & 0x7F;
3968 absZ = ( absZ + roundIncrement )>>7;
3969 if (!(roundBits ^ 0x40) && roundNearestEven) {
3970 absZ &= ~1;
3971 }
3972 z = absZ;
3973 if ( zSign ) z = - z;
3974 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3975 float_raise(float_flag_invalid, status);
3976 return zSign ? INT32_MIN : INT32_MAX;
3977 }
3978 if (roundBits) {
3979 float_raise(float_flag_inexact, status);
3980 }
3981 return z;
3982
3983 }
3984
3985 /*----------------------------------------------------------------------------
3986 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3987 | `absZ1', with binary point between bits 63 and 64 (between the input words),
3988 | and returns the properly rounded 64-bit integer corresponding to the input.
3989 | If `zSign' is 1, the input is negated before being converted to an integer.
3990 | Ordinarily, the fixed-point input is simply rounded to an integer, with
3991 | the inexact exception raised if the input cannot be represented exactly as
3992 | an integer. However, if the fixed-point input is too large, the invalid
3993 | exception is raised and the largest positive or negative integer is
3994 | returned.
3995 *----------------------------------------------------------------------------*/
3996
3997 static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3998 float_status *status)
3999 {
4000 int8_t roundingMode;
4001 bool roundNearestEven, increment;
4002 int64_t z;
4003
4004 roundingMode = status->float_rounding_mode;
4005 roundNearestEven = ( roundingMode == float_round_nearest_even );
4006 switch (roundingMode) {
4007 case float_round_nearest_even:
4008 case float_round_ties_away:
4009 increment = ((int64_t) absZ1 < 0);
4010 break;
4011 case float_round_to_zero:
4012 increment = 0;
4013 break;
4014 case float_round_up:
4015 increment = !zSign && absZ1;
4016 break;
4017 case float_round_down:
4018 increment = zSign && absZ1;
4019 break;
4020 case float_round_to_odd:
4021 increment = !(absZ0 & 1) && absZ1;
4022 break;
4023 default:
4024 abort();
4025 }
4026 if ( increment ) {
4027 ++absZ0;
4028 if ( absZ0 == 0 ) goto overflow;
4029 if (!(absZ1 << 1) && roundNearestEven) {
4030 absZ0 &= ~1;
4031 }
4032 }
4033 z = absZ0;
4034 if ( zSign ) z = - z;
4035 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4036 overflow:
4037 float_raise(float_flag_invalid, status);
4038 return zSign ? INT64_MIN : INT64_MAX;
4039 }
4040 if (absZ1) {
4041 float_raise(float_flag_inexact, status);
4042 }
4043 return z;
4044
4045 }
4046
4047 /*----------------------------------------------------------------------------
4048 | Normalizes the subnormal single-precision floating-point value represented
4049 | by the denormalized significand `aSig'. The normalized exponent and
4050 | significand are stored at the locations pointed to by `zExpPtr' and
4051 | `zSigPtr', respectively.
4052 *----------------------------------------------------------------------------*/
4053
4054 static void
4055 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
4056 {
4057 int8_t shiftCount;
4058
4059 shiftCount = clz32(aSig) - 8;
4060 *zSigPtr = aSig<<shiftCount;
4061 *zExpPtr = 1 - shiftCount;
4062
4063 }
4064
4065 /*----------------------------------------------------------------------------
4066 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4067 | and significand `zSig', and returns the proper single-precision floating-
4068 | point value corresponding to the abstract input. Ordinarily, the abstract
4069 | value is simply rounded and packed into the single-precision format, with
4070 | the inexact exception raised if the abstract input cannot be represented
4071 | exactly. However, if the abstract value is too large, the overflow and
4072 | inexact exceptions are raised and an infinity or maximal finite value is
4073 | returned. If the abstract value is too small, the input value is rounded to
4074 | a subnormal number, and the underflow and inexact exceptions are raised if
4075 | the abstract input cannot be represented exactly as a subnormal single-
4076 | precision floating-point number.
4077 | The input significand `zSig' has its binary point between bits 30
4078 | and 29, which is 7 bits to the left of the usual location. This shifted
4079 | significand must be normalized or smaller. If `zSig' is not normalized,
4080 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4081 | and it must not require rounding. In the usual case that `zSig' is
4082 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4083 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4084 | Binary Floating-Point Arithmetic.
4085 *----------------------------------------------------------------------------*/
4086
4087 static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4088 float_status *status)
4089 {
4090 int8_t roundingMode;
4091 bool roundNearestEven;
4092 int8_t roundIncrement, roundBits;
4093 bool isTiny;
4094
4095 roundingMode = status->float_rounding_mode;
4096 roundNearestEven = ( roundingMode == float_round_nearest_even );
4097 switch (roundingMode) {
4098 case float_round_nearest_even:
4099 case float_round_ties_away:
4100 roundIncrement = 0x40;
4101 break;
4102 case float_round_to_zero:
4103 roundIncrement = 0;
4104 break;
4105 case float_round_up:
4106 roundIncrement = zSign ? 0 : 0x7f;
4107 break;
4108 case float_round_down:
4109 roundIncrement = zSign ? 0x7f : 0;
4110 break;
4111 case float_round_to_odd:
4112 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4113 break;
4114 default:
4115 abort();
4116 break;
4117 }
4118 roundBits = zSig & 0x7F;
4119 if ( 0xFD <= (uint16_t) zExp ) {
4120 if ( ( 0xFD < zExp )
4121 || ( ( zExp == 0xFD )
4122 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4123 ) {
4124 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4125 roundIncrement != 0;
4126 float_raise(float_flag_overflow | float_flag_inexact, status);
4127 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4128 }
4129 if ( zExp < 0 ) {
4130 if (status->flush_to_zero) {
4131 float_raise(float_flag_output_denormal, status);
4132 return packFloat32(zSign, 0, 0);
4133 }
4134 isTiny = status->tininess_before_rounding
4135 || (zExp < -1)
4136 || (zSig + roundIncrement < 0x80000000);
4137 shift32RightJamming( zSig, - zExp, &zSig );
4138 zExp = 0;
4139 roundBits = zSig & 0x7F;
4140 if (isTiny && roundBits) {
4141 float_raise(float_flag_underflow, status);
4142 }
4143 if (roundingMode == float_round_to_odd) {
4144 /*
4145 * For round-to-odd case, the roundIncrement depends on
4146 * zSig which just changed.
4147 */
4148 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4149 }
4150 }
4151 }
4152 if (roundBits) {
4153 float_raise(float_flag_inexact, status);
4154 }
4155 zSig = ( zSig + roundIncrement )>>7;
4156 if (!(roundBits ^ 0x40) && roundNearestEven) {
4157 zSig &= ~1;
4158 }
4159 if ( zSig == 0 ) zExp = 0;
4160 return packFloat32( zSign, zExp, zSig );
4161
4162 }
4163
4164 /*----------------------------------------------------------------------------
4165 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4166 | and significand `zSig', and returns the proper single-precision floating-
4167 | point value corresponding to the abstract input. This routine is just like
4168 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4169 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4170 | floating-point exponent.
4171 *----------------------------------------------------------------------------*/
4172
4173 static float32
4174 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4175 float_status *status)
4176 {
4177 int8_t shiftCount;
4178
4179 shiftCount = clz32(zSig) - 1;
4180 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4181 status);
4182
4183 }
4184
4185 /*----------------------------------------------------------------------------
4186 | Normalizes the subnormal double-precision floating-point value represented
4187 | by the denormalized significand `aSig'. The normalized exponent and
4188 | significand are stored at the locations pointed to by `zExpPtr' and
4189 | `zSigPtr', respectively.
4190 *----------------------------------------------------------------------------*/
4191
4192 static void
4193 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4194 {
4195 int8_t shiftCount;
4196
4197 shiftCount = clz64(aSig) - 11;
4198 *zSigPtr = aSig<<shiftCount;
4199 *zExpPtr = 1 - shiftCount;
4200
4201 }
4202
4203 /*----------------------------------------------------------------------------
4204 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4205 | double-precision floating-point value, returning the result. After being
4206 | shifted into the proper positions, the three fields are simply added
4207 | together to form the result. This means that any integer portion of `zSig'
4208 | will be added into the exponent. Since a properly normalized significand
4209 | will have an integer portion equal to 1, the `zExp' input should be 1 less
4210 | than the desired result exponent whenever `zSig' is a complete, normalized
4211 | significand.
4212 *----------------------------------------------------------------------------*/
4213
4214 static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4215 {
4216
4217 return make_float64(
4218 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4219
4220 }
4221
4222 /*----------------------------------------------------------------------------
4223 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4224 | and significand `zSig', and returns the proper double-precision floating-
4225 | point value corresponding to the abstract input. Ordinarily, the abstract
4226 | value is simply rounded and packed into the double-precision format, with
4227 | the inexact exception raised if the abstract input cannot be represented
4228 | exactly. However, if the abstract value is too large, the overflow and
4229 | inexact exceptions are raised and an infinity or maximal finite value is
4230 | returned. If the abstract value is too small, the input value is rounded to
4231 | a subnormal number, and the underflow and inexact exceptions are raised if
4232 | the abstract input cannot be represented exactly as a subnormal double-
4233 | precision floating-point number.
4234 | The input significand `zSig' has its binary point between bits 62
4235 | and 61, which is 10 bits to the left of the usual location. This shifted
4236 | significand must be normalized or smaller. If `zSig' is not normalized,
4237 | `zExp' must be 0; in that case, the result returned is a subnormal number,
4238 | and it must not require rounding. In the usual case that `zSig' is
4239 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4240 | The handling of underflow and overflow follows the IEC/IEEE Standard for
4241 | Binary Floating-Point Arithmetic.
4242 *----------------------------------------------------------------------------*/
4243
4244 static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4245 float_status *status)
4246 {
4247 int8_t roundingMode;
4248 bool roundNearestEven;
4249 int roundIncrement, roundBits;
4250 bool isTiny;
4251
4252 roundingMode = status->float_rounding_mode;
4253 roundNearestEven = ( roundingMode == float_round_nearest_even );
4254 switch (roundingMode) {
4255 case float_round_nearest_even:
4256 case float_round_ties_away:
4257 roundIncrement = 0x200;
4258 break;
4259 case float_round_to_zero:
4260 roundIncrement = 0;
4261 break;
4262 case float_round_up:
4263 roundIncrement = zSign ? 0 : 0x3ff;
4264 break;
4265 case float_round_down:
4266 roundIncrement = zSign ? 0x3ff : 0;
4267 break;
4268 case float_round_to_odd:
4269 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4270 break;
4271 default:
4272 abort();
4273 }
4274 roundBits = zSig & 0x3FF;
4275 if ( 0x7FD <= (uint16_t) zExp ) {
4276 if ( ( 0x7FD < zExp )
4277 || ( ( zExp == 0x7FD )
4278 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4279 ) {
4280 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4281 roundIncrement != 0;
4282 float_raise(float_flag_overflow | float_flag_inexact, status);
4283 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4284 }
4285 if ( zExp < 0 ) {
4286 if (status->flush_to_zero) {
4287 float_raise(float_flag_output_denormal, status);
4288 return packFloat64(zSign, 0, 0);
4289 }
4290 isTiny = status->tininess_before_rounding
4291 || (zExp < -1)
4292 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4293 shift64RightJamming( zSig, - zExp, &zSig );
4294 zExp = 0;
4295 roundBits = zSig & 0x3FF;
4296 if (isTiny && roundBits) {
4297 float_raise(float_flag_underflow, status);
4298 }
4299 if (roundingMode == float_round_to_odd) {
4300 /*
4301 * For round-to-odd case, the roundIncrement depends on
4302 * zSig which just changed.
4303 */
4304 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4305 }
4306 }
4307 }
4308 if (roundBits) {
4309 float_raise(float_flag_inexact, status);
4310 }
4311 zSig = ( zSig + roundIncrement )>>10;
4312 if (!(roundBits ^ 0x200) && roundNearestEven) {
4313 zSig &= ~1;
4314 }
4315 if ( zSig == 0 ) zExp = 0;
4316 return packFloat64( zSign, zExp, zSig );
4317
4318 }
4319
4320 /*----------------------------------------------------------------------------
4321 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4322 | and significand `zSig', and returns the proper double-precision floating-
4323 | point value corresponding to the abstract input. This routine is just like
4324 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4325 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4326 | floating-point exponent.
4327 *----------------------------------------------------------------------------*/
4328
4329 static float64
4330 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4331 float_status *status)
4332 {
4333 int8_t shiftCount;
4334
4335 shiftCount = clz64(zSig) - 1;
4336 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4337 status);
4338
4339 }
4340
4341 /*----------------------------------------------------------------------------
4342 | Normalizes the subnormal extended double-precision floating-point value
4343 | represented by the denormalized significand `aSig'. The normalized exponent
4344 | and significand are stored at the locations pointed to by `zExpPtr' and
4345 | `zSigPtr', respectively.
4346 *----------------------------------------------------------------------------*/
4347
4348 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4349 uint64_t *zSigPtr)
4350 {
4351 int8_t shiftCount;
4352
4353 shiftCount = clz64(aSig);
4354 *zSigPtr = aSig<<shiftCount;
4355 *zExpPtr = 1 - shiftCount;
4356 }
4357
4358 /*----------------------------------------------------------------------------
4359 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4360 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4361 | and returns the proper extended double-precision floating-point value
4362 | corresponding to the abstract input. Ordinarily, the abstract value is
4363 | rounded and packed into the extended double-precision format, with the
4364 | inexact exception raised if the abstract input cannot be represented
4365 | exactly. However, if the abstract value is too large, the overflow and
4366 | inexact exceptions are raised and an infinity or maximal finite value is
4367 | returned. If the abstract value is too small, the input value is rounded to
4368 | a subnormal number, and the underflow and inexact exceptions are raised if
4369 | the abstract input cannot be represented exactly as a subnormal extended
4370 | double-precision floating-point number.
4371 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
4372 | number of bits as single or double precision, respectively. Otherwise, the
4373 | result is rounded to the full precision of the extended double-precision
4374 | format.
4375 | The input significand must be normalized or smaller. If the input
4376 | significand is not normalized, `zExp' must be 0; in that case, the result
4377 | returned is a subnormal number, and it must not require rounding. The
4378 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4379 | Floating-Point Arithmetic.
4380 *----------------------------------------------------------------------------*/
4381
4382 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4383 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4384 float_status *status)
4385 {
4386 int8_t roundingMode;
4387 bool roundNearestEven, increment, isTiny;
4388 int64_t roundIncrement, roundMask, roundBits;
4389
4390 roundingMode = status->float_rounding_mode;
4391 roundNearestEven = ( roundingMode == float_round_nearest_even );
4392 if ( roundingPrecision == 80 ) goto precision80;
4393 if ( roundingPrecision == 64 ) {
4394 roundIncrement = UINT64_C(0x0000000000000400);
4395 roundMask = UINT64_C(0x00000000000007FF);
4396 }
4397 else if ( roundingPrecision == 32 ) {
4398 roundIncrement = UINT64_C(0x0000008000000000);
4399 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4400 }
4401 else {
4402 goto precision80;
4403 }
4404 zSig0 |= ( zSig1 != 0 );
4405 switch (roundingMode) {
4406 case float_round_nearest_even:
4407 case float_round_ties_away:
4408 break;
4409 case float_round_to_zero:
4410 roundIncrement = 0;
4411 break;
4412 case float_round_up:
4413 roundIncrement = zSign ? 0 : roundMask;
4414 break;
4415 case float_round_down:
4416 roundIncrement = zSign ? roundMask : 0;
4417 break;
4418 default:
4419 abort();
4420 }
4421 roundBits = zSig0 & roundMask;
4422 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4423 if ( ( 0x7FFE < zExp )
4424 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4425 ) {
4426 goto overflow;
4427 }
4428 if ( zExp <= 0 ) {
4429 if (status->flush_to_zero) {
4430 float_raise(float_flag_output_denormal, status);
4431 return packFloatx80(zSign, 0, 0);
4432 }
4433 isTiny = status->tininess_before_rounding
4434 || (zExp < 0 )
4435 || (zSig0 <= zSig0 + roundIncrement);
4436 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4437 zExp = 0;
4438 roundBits = zSig0 & roundMask;
4439 if (isTiny && roundBits) {
4440 float_raise(float_flag_underflow, status);
4441 }
4442 if (roundBits) {
4443 float_raise(float_flag_inexact, status);
4444 }
4445 zSig0 += roundIncrement;
4446 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4447 roundIncrement = roundMask + 1;
4448 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4449 roundMask |= roundIncrement;
4450 }
4451 zSig0 &= ~ roundMask;
4452 return packFloatx80( zSign, zExp, zSig0 );
4453 }
4454 }
4455 if (roundBits) {
4456 float_raise(float_flag_inexact, status);
4457 }
4458 zSig0 += roundIncrement;
4459 if ( zSig0 < roundIncrement ) {
4460 ++zExp;
4461 zSig0 = UINT64_C(0x8000000000000000);
4462 }
4463 roundIncrement = roundMask + 1;
4464 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4465 roundMask |= roundIncrement;
4466 }
4467 zSig0 &= ~ roundMask;
4468 if ( zSig0 == 0 ) zExp = 0;
4469 return packFloatx80( zSign, zExp, zSig0 );
4470 precision80:
4471 switch (roundingMode) {
4472 case float_round_nearest_even:
4473 case float_round_ties_away:
4474 increment = ((int64_t)zSig1 < 0);
4475 break;
4476 case float_round_to_zero:
4477 increment = 0;
4478 break;
4479 case float_round_up:
4480 increment = !zSign && zSig1;
4481 break;
4482 case float_round_down:
4483 increment = zSign && zSig1;
4484 break;
4485 default:
4486 abort();
4487 }
4488 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4489 if ( ( 0x7FFE < zExp )
4490 || ( ( zExp == 0x7FFE )
4491 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4492 && increment
4493 )
4494 ) {
4495 roundMask = 0;
4496 overflow:
4497 float_raise(float_flag_overflow | float_flag_inexact, status);
4498 if ( ( roundingMode == float_round_to_zero )
4499 || ( zSign && ( roundingMode == float_round_up ) )
4500 || ( ! zSign && ( roundingMode == float_round_down ) )
4501 ) {
4502 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4503 }
4504 return packFloatx80(zSign,
4505 floatx80_infinity_high,
4506 floatx80_infinity_low);
4507 }
4508 if ( zExp <= 0 ) {
4509 isTiny = status->tininess_before_rounding
4510 || (zExp < 0)
4511 || !increment
4512 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4513 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4514 zExp = 0;
4515 if (isTiny && zSig1) {
4516 float_raise(float_flag_underflow, status);
4517 }
4518 if (zSig1) {
4519 float_raise(float_flag_inexact, status);
4520 }
4521 switch (roundingMode) {
4522 case float_round_nearest_even:
4523 case float_round_ties_away:
4524 increment = ((int64_t)zSig1 < 0);
4525 break;
4526 case float_round_to_zero:
4527 increment = 0;
4528 break;
4529 case float_round_up:
4530 increment = !zSign && zSig1;
4531 break;
4532 case float_round_down:
4533 increment = zSign && zSig1;
4534 break;
4535 default:
4536 abort();
4537 }
4538 if ( increment ) {
4539 ++zSig0;
4540 if (!(zSig1 << 1) && roundNearestEven) {
4541 zSig0 &= ~1;
4542 }
4543 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4544 }
4545 return packFloatx80( zSign, zExp, zSig0 );
4546 }
4547 }
4548 if (zSig1) {
4549 float_raise(float_flag_inexact, status);
4550 }
4551 if ( increment ) {
4552 ++zSig0;
4553 if ( zSig0 == 0 ) {
4554 ++zExp;
4555 zSig0 = UINT64_C(0x8000000000000000);
4556 }
4557 else {
4558 if (!(zSig1 << 1) && roundNearestEven) {
4559 zSig0 &= ~1;
4560 }
4561 }
4562 }
4563 else {
4564 if ( zSig0 == 0 ) zExp = 0;
4565 }
4566 return packFloatx80( zSign, zExp, zSig0 );
4567
4568 }
4569
4570 /*----------------------------------------------------------------------------
4571 | Takes an abstract floating-point value having sign `zSign', exponent
4572 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4573 | and returns the proper extended double-precision floating-point value
4574 | corresponding to the abstract input. This routine is just like
4575 | `roundAndPackFloatx80' except that the input significand does not have to be
4576 | normalized.
4577 *----------------------------------------------------------------------------*/
4578
4579 floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4580 bool zSign, int32_t zExp,
4581 uint64_t zSig0, uint64_t zSig1,
4582 float_status *status)
4583 {
4584 int8_t shiftCount;
4585
4586 if ( zSig0 == 0 ) {
4587 zSig0 = zSig1;
4588 zSig1 = 0;
4589 zExp -= 64;
4590 }
4591 shiftCount = clz64(zSig0);
4592 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4593 zExp -= shiftCount;
4594 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4595 zSig0, zSig1, status);
4596
4597 }
4598
4599 /*----------------------------------------------------------------------------
4600 | Returns the least-significant 64 fraction bits of the quadruple-precision
4601 | floating-point value `a'.
4602 *----------------------------------------------------------------------------*/
4603
4604 static inline uint64_t extractFloat128Frac1( float128 a )
4605 {
4606
4607 return a.low;
4608
4609 }
4610
4611 /*----------------------------------------------------------------------------
4612 | Returns the most-significant 48 fraction bits of the quadruple-precision
4613 | floating-point value `a'.
4614 *----------------------------------------------------------------------------*/
4615
4616 static inline uint64_t extractFloat128Frac0( float128 a )
4617 {
4618
4619 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4620
4621 }
4622
4623 /*----------------------------------------------------------------------------
4624 | Returns the exponent bits of the quadruple-precision floating-point value
4625 | `a'.
4626 *----------------------------------------------------------------------------*/
4627
4628 static inline int32_t extractFloat128Exp( float128 a )
4629 {
4630
4631 return ( a.high>>48 ) & 0x7FFF;
4632
4633 }
4634
4635 /*----------------------------------------------------------------------------
4636 | Returns the sign bit of the quadruple-precision floating-point value `a'.
4637 *----------------------------------------------------------------------------*/
4638
4639 static inline bool extractFloat128Sign(float128 a)
4640 {
4641 return a.high >> 63;
4642 }
4643
4644 /*----------------------------------------------------------------------------
4645 | Normalizes the subnormal quadruple-precision floating-point value
4646 | represented by the denormalized significand formed by the concatenation of
4647 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
4648 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4649 | significand are stored at the location pointed to by `zSig0Ptr', and the
4650 | least significant 64 bits of the normalized significand are stored at the
4651 | location pointed to by `zSig1Ptr'.
4652 *----------------------------------------------------------------------------*/
4653
4654 static void
4655 normalizeFloat128Subnormal(
4656 uint64_t aSig0,
4657 uint64_t aSig1,
4658 int32_t *zExpPtr,
4659 uint64_t *zSig0Ptr,
4660 uint64_t *zSig1Ptr
4661 )
4662 {
4663 int8_t shiftCount;
4664
4665 if ( aSig0 == 0 ) {
4666 shiftCount = clz64(aSig1) - 15;
4667 if ( shiftCount < 0 ) {
4668 *zSig0Ptr = aSig1>>( - shiftCount );
4669 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4670 }
4671 else {
4672 *zSig0Ptr = aSig1<<shiftCount;
4673 *zSig1Ptr = 0;
4674 }
4675 *zExpPtr = - shiftCount - 63;
4676 }
4677 else {
4678 shiftCount = clz64(aSig0) - 15;
4679 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4680 *zExpPtr = 1 - shiftCount;
4681 }
4682
4683 }
4684
4685 /*----------------------------------------------------------------------------
4686 | Packs the sign `zSign', the exponent `zExp', and the significand formed
4687 | by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4688 | floating-point value, returning the result. After being shifted into the
4689 | proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4690 | added together to form the most significant 32 bits of the result. This
4691 | means that any integer portion of `zSig0' will be added into the exponent.
4692 | Since a properly normalized significand will have an integer portion equal
4693 | to 1, the `zExp' input should be 1 less than the desired result exponent
4694 | whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4695 | significand.
4696 *----------------------------------------------------------------------------*/
4697
4698 static inline float128
4699 packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4700 {
4701 float128 z;
4702
4703 z.low = zSig1;
4704 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4705 return z;
4706 }
4707
4708 /*----------------------------------------------------------------------------
4709 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4710 | and extended significand formed by the concatenation of `zSig0', `zSig1',
4711 | and `zSig2', and returns the proper quadruple-precision floating-point value
4712 | corresponding to the abstract input. Ordinarily, the abstract value is
4713 | simply rounded and packed into the quadruple-precision format, with the
4714 | inexact exception raised if the abstract input cannot be represented
4715 | exactly. However, if the abstract value is too large, the overflow and
4716 | inexact exceptions are raised and an infinity or maximal finite value is
4717 | returned. If the abstract value is too small, the input value is rounded to
4718 | a subnormal number, and the underflow and inexact exceptions are raised if
4719 | the abstract input cannot be represented exactly as a subnormal quadruple-
4720 | precision floating-point number.
4721 | The input significand must be normalized or smaller. If the input
4722 | significand is not normalized, `zExp' must be 0; in that case, the result
4723 | returned is a subnormal number, and it must not require rounding. In the
4724 | usual case that the input significand is normalized, `zExp' must be 1 less
4725 | than the ``true'' floating-point exponent. The handling of underflow and
4726 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4727 *----------------------------------------------------------------------------*/
4728
4729 static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4730 uint64_t zSig0, uint64_t zSig1,
4731 uint64_t zSig2, float_status *status)
4732 {
4733 int8_t roundingMode;
4734 bool roundNearestEven, increment, isTiny;
4735
4736 roundingMode = status->float_rounding_mode;
4737 roundNearestEven = ( roundingMode == float_round_nearest_even );
4738 switch (roundingMode) {
4739 case float_round_nearest_even:
4740 case float_round_ties_away:
4741 increment = ((int64_t)zSig2 < 0);
4742 break;
4743 case float_round_to_zero:
4744 increment = 0;
4745 break;
4746 case float_round_up:
4747 increment = !zSign && zSig2;
4748 break;
4749 case float_round_down:
4750 increment = zSign && zSig2;
4751 break;
4752 case float_round_to_odd:
4753 increment = !(zSig1 & 0x1) && zSig2;
4754 break;
4755 default:
4756 abort();
4757 }
4758 if ( 0x7FFD <= (uint32_t) zExp ) {
4759 if ( ( 0x7FFD < zExp )
4760 || ( ( zExp == 0x7FFD )
4761 && eq128(
4762 UINT64_C(0x0001FFFFFFFFFFFF),
4763 UINT64_C(0xFFFFFFFFFFFFFFFF),
4764 zSig0,
4765 zSig1
4766 )
4767 && increment
4768 )
4769 ) {
4770 float_raise(float_flag_overflow | float_flag_inexact, status);
4771 if ( ( roundingMode == float_round_to_zero )
4772 || ( zSign && ( roundingMode == float_round_up ) )
4773 || ( ! zSign && ( roundingMode == float_round_down ) )
4774 || (roundingMode == float_round_to_odd)
4775 ) {
4776 return
4777 packFloat128(
4778 zSign,
4779 0x7FFE,
4780 UINT64_C(0x0000FFFFFFFFFFFF),
4781 UINT64_C(0xFFFFFFFFFFFFFFFF)
4782 );
4783 }
4784 return packFloat128( zSign, 0x7FFF, 0, 0 );
4785 }
4786 if ( zExp < 0 ) {
4787 if (status->flush_to_zero) {
4788 float_raise(float_flag_output_denormal, status);
4789 return packFloat128(zSign, 0, 0, 0);
4790 }
4791 isTiny = status->tininess_before_rounding
4792 || (zExp < -1)
4793 || !increment
4794 || lt128(zSig0, zSig1,
4795 UINT64_C(0x0001FFFFFFFFFFFF),
4796 UINT64_C(0xFFFFFFFFFFFFFFFF));
4797 shift128ExtraRightJamming(
4798 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4799 zExp = 0;
4800 if (isTiny && zSig2) {
4801 float_raise(float_flag_underflow, status);
4802 }
4803 switch (roundingMode) {
4804 case float_round_nearest_even:
4805 case float_round_ties_away:
4806 increment = ((int64_t)zSig2 < 0);
4807 break;
4808 case float_round_to_zero:
4809 increment = 0;
4810 break;
4811 case float_round_up:
4812 increment = !zSign && zSig2;
4813 break;
4814 case float_round_down:
4815 increment = zSign && zSig2;
4816 break;
4817 case float_round_to_odd:
4818 increment = !(zSig1 & 0x1) && zSig2;
4819 break;
4820 default:
4821 abort();
4822 }
4823 }
4824 }
4825 if (zSig2) {
4826 float_raise(float_flag_inexact, status);
4827 }
4828 if ( increment ) {
4829 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4830 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4831 zSig1 &= ~1;
4832 }
4833 }
4834 else {
4835 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4836 }
4837 return packFloat128( zSign, zExp, zSig0, zSig1 );
4838
4839 }
4840
4841 /*----------------------------------------------------------------------------
4842 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4843 | and significand formed by the concatenation of `zSig0' and `zSig1', and
4844 | returns the proper quadruple-precision floating-point value corresponding
4845 | to the abstract input. This routine is just like `roundAndPackFloat128'
4846 | except that the input significand has fewer bits and does not have to be
4847 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4848 | point exponent.
4849 *----------------------------------------------------------------------------*/
4850
4851 static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4852 uint64_t zSig0, uint64_t zSig1,
4853 float_status *status)
4854 {
4855 int8_t shiftCount;
4856 uint64_t zSig2;
4857
4858 if ( zSig0 == 0 ) {
4859 zSig0 = zSig1;
4860 zSig1 = 0;
4861 zExp -= 64;
4862 }
4863 shiftCount = clz64(zSig0) - 15;
4864 if ( 0 <= shiftCount ) {
4865 zSig2 = 0;
4866 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4867 }
4868 else {
4869 shift128ExtraRightJamming(
4870 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4871 }
4872 zExp -= shiftCount;
4873 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4874
4875 }
4876
4877
4878 /*----------------------------------------------------------------------------
4879 | Returns the result of converting the 32-bit two's complement integer `a'
4880 | to the extended double-precision floating-point format. The conversion
4881 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4882 | Arithmetic.
4883 *----------------------------------------------------------------------------*/
4884
4885 floatx80 int32_to_floatx80(int32_t a, float_status *status)
4886 {
4887 bool zSign;
4888 uint32_t absA;
4889 int8_t shiftCount;
4890 uint64_t zSig;
4891
4892 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4893 zSign = ( a < 0 );
4894 absA = zSign ? - a : a;
4895 shiftCount = clz32(absA) + 32;
4896 zSig = absA;
4897 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4898
4899 }
4900
4901 /*----------------------------------------------------------------------------
4902 | Returns the result of converting the 64-bit two's complement integer `a'
4903 | to the extended double-precision floating-point format. The conversion
4904 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4905 | Arithmetic.
4906 *----------------------------------------------------------------------------*/
4907
4908 floatx80 int64_to_floatx80(int64_t a, float_status *status)
4909 {
4910 bool zSign;
4911 uint64_t absA;
4912 int8_t shiftCount;
4913
4914 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4915 zSign = ( a < 0 );
4916 absA = zSign ? - a : a;
4917 shiftCount = clz64(absA);
4918 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4919
4920 }
4921
4922 /*----------------------------------------------------------------------------
4923 | Returns the result of converting the single-precision floating-point value
4924 | `a' to the extended double-precision floating-point format. The conversion
4925 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
4926 | Arithmetic.
4927 *----------------------------------------------------------------------------*/
4928
4929 floatx80 float32_to_floatx80(float32 a, float_status *status)
4930 {
4931 bool aSign;
4932 int aExp;
4933 uint32_t aSig;
4934
4935 a = float32_squash_input_denormal(a, status);
4936 aSig = extractFloat32Frac( a );
4937 aExp = extractFloat32Exp( a );
4938 aSign = extractFloat32Sign( a );
4939 if ( aExp == 0xFF ) {
4940 if (aSig) {
4941 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4942 status);
4943 return floatx80_silence_nan(res, status);
4944 }
4945 return packFloatx80(aSign,
4946 floatx80_infinity_high,
4947 floatx80_infinity_low);
4948 }
4949 if ( aExp == 0 ) {
4950 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4951 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4952 }
4953 aSig |= 0x00800000;
4954 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4955
4956 }
4957
4958 /*----------------------------------------------------------------------------
4959 | Returns the remainder of the single-precision floating-point value `a'
4960 | with respect to the corresponding value `b'. The operation is performed
4961 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4962 *----------------------------------------------------------------------------*/
4963
4964 float32 float32_rem(float32 a, float32 b, float_status *status)
4965 {
4966 bool aSign, zSign;
4967 int aExp, bExp, expDiff;
4968 uint32_t aSig, bSig;
4969 uint32_t q;
4970 uint64_t aSig64, bSig64, q64;
4971 uint32_t alternateASig;
4972 int32_t sigMean;
4973 a = float32_squash_input_denormal(a, status);
4974 b = float32_squash_input_denormal(b, status);
4975
4976 aSig = extractFloat32Frac( a );
4977 aExp = extractFloat32Exp( a );
4978 aSign = extractFloat32Sign( a );
4979 bSig = extractFloat32Frac( b );
4980 bExp = extractFloat32Exp( b );
4981 if ( aExp == 0xFF ) {
4982 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4983 return propagateFloat32NaN(a, b, status);
4984 }
4985 float_raise(float_flag_invalid, status);
4986 return float32_default_nan(status);
4987 }
4988 if ( bExp == 0xFF ) {
4989 if (bSig) {
4990 return propagateFloat32NaN(a, b, status);
4991 }
4992 return a;
4993 }
4994 if ( bExp == 0 ) {
4995 if ( bSig == 0 ) {
4996 float_raise(float_flag_invalid, status);
4997 return float32_default_nan(status);
4998 }
4999 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5000 }
5001 if ( aExp == 0 ) {
5002 if ( aSig == 0 ) return a;
5003 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5004 }
5005 expDiff = aExp - bExp;
5006 aSig |= 0x00800000;
5007 bSig |= 0x00800000;
5008 if ( expDiff < 32 ) {
5009 aSig <<= 8;
5010 bSig <<= 8;
5011 if ( expDiff < 0 ) {
5012 if ( expDiff < -1 ) return a;
5013 aSig >>= 1;
5014 }
5015 q = ( bSig <= aSig );
5016 if ( q ) aSig -= bSig;
5017 if ( 0 < expDiff ) {
5018 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5019 q >>= 32 - expDiff;
5020 bSig >>= 2;
5021 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5022 }
5023 else {
5024 aSig >>= 2;
5025 bSig >>= 2;
5026 }
5027 }
5028 else {
5029 if ( bSig <= aSig ) aSig -= bSig;
5030 aSig64 = ( (uint64_t) aSig )<<40;
5031 bSig64 = ( (uint64_t) bSig )<<40;
5032 expDiff -= 64;
5033 while ( 0 < expDiff ) {
5034 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5035 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5036 aSig64 = - ( ( bSig * q64 )<<38 );
5037 expDiff -= 62;
5038 }
5039 expDiff += 64;
5040 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5041 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5042 q = q64>>( 64 - expDiff );
5043 bSig <<= 6;
5044 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5045 }
5046 do {
5047 alternateASig = aSig;
5048 ++q;
5049 aSig -= bSig;
5050 } while ( 0 <= (int32_t) aSig );
5051 sigMean = aSig + alternateASig;
5052 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5053 aSig = alternateASig;
5054 }
5055 zSign = ( (int32_t) aSig < 0 );
5056 if ( zSign ) aSig = - aSig;
5057 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5058 }
5059
5060
5061
5062 /*----------------------------------------------------------------------------
5063 | Returns the binary exponential of the single-precision floating-point value
5064 | `a'. The operation is performed according to the IEC/IEEE Standard for
5065 | Binary Floating-Point Arithmetic.
5066 |
5067 | Uses the following identities:
5068 |
5069 | 1. -------------------------------------------------------------------------
5070 | x x*ln(2)
5071 | 2 = e
5072 |
5073 | 2. -------------------------------------------------------------------------
5074 | 2 3 4 5 n
5075 | x x x x x x x
5076 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5077 | 1! 2! 3! 4! 5! n!
5078 *----------------------------------------------------------------------------*/
5079
5080 static const float64 float32_exp2_coefficients[15] =
5081 {
5082 const_float64( 0x3ff0000000000000ll ), /* 1 */
5083 const_float64( 0x3fe0000000000000ll ), /* 2 */
5084 const_float64( 0x3fc5555555555555ll ), /* 3 */
5085 const_float64( 0x3fa5555555555555ll ), /* 4 */
5086 const_float64( 0x3f81111111111111ll ), /* 5 */
5087 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5088 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5089 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5090 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5091 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5092 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5093 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5094 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5095 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5096 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5097 };
5098
5099 float32 float32_exp2(float32 a, float_status *status)
5100 {
5101 bool aSign;
5102 int aExp;
5103 uint32_t aSig;
5104 float64 r, x, xn;
5105 int i;
5106 a = float32_squash_input_denormal(a, status);
5107
5108 aSig = extractFloat32Frac( a );
5109 aExp = extractFloat32Exp( a );
5110 aSign = extractFloat32Sign( a );
5111
5112 if ( aExp == 0xFF) {
5113 if (aSig) {
5114 return propagateFloat32NaN(a, float32_zero, status);
5115 }
5116 return (aSign) ? float32_zero : a;
5117 }
5118 if (aExp == 0) {
5119 if (aSig == 0) return float32_one;
5120 }
5121
5122 float_raise(float_flag_inexact, status);
5123
5124 /* ******************************* */
5125 /* using float64 for approximation */
5126 /* ******************************* */
5127 x = float32_to_float64(a, status);
5128 x = float64_mul(x, float64_ln2, status);
5129
5130 xn = x;
5131 r = float64_one;
5132 for (i = 0 ; i < 15 ; i++) {
5133 float64 f;
5134
5135 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5136 r = float64_add(r, f, status);
5137
5138 xn = float64_mul(xn, x, status);
5139 }
5140
5141 return float64_to_float32(r, status);
5142 }
5143
5144 /*----------------------------------------------------------------------------
5145 | Returns the binary log of the single-precision floating-point value `a'.
5146 | The operation is performed according to the IEC/IEEE Standard for Binary
5147 | Floating-Point Arithmetic.
5148 *----------------------------------------------------------------------------*/
5149 float32 float32_log2(float32 a, float_status *status)
5150 {
5151 bool aSign, zSign;
5152 int aExp;
5153 uint32_t aSig, zSig, i;
5154
5155 a = float32_squash_input_denormal(a, status);
5156 aSig = extractFloat32Frac( a );
5157 aExp = extractFloat32Exp( a );
5158 aSign = extractFloat32Sign( a );
5159
5160 if ( aExp == 0 ) {
5161 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5162 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5163 }
5164 if ( aSign ) {
5165 float_raise(float_flag_invalid, status);
5166 return float32_default_nan(status);
5167 }
5168 if ( aExp == 0xFF ) {
5169 if (aSig) {
5170 return propagateFloat32NaN(a, float32_zero, status);
5171 }
5172 return a;
5173 }
5174
5175 aExp -= 0x7F;
5176 aSig |= 0x00800000;
5177 zSign = aExp < 0;
5178 zSig = aExp << 23;
5179
5180 for (i = 1 << 22; i > 0; i >>= 1) {
5181 aSig = ( (uint64_t)aSig * aSig ) >> 23;
5182 if ( aSig & 0x01000000 ) {
5183 aSig >>= 1;
5184 zSig |= i;
5185 }
5186 }
5187
5188 if ( zSign )
5189 zSig = -zSig;
5190
5191 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5192 }
5193
5194 /*----------------------------------------------------------------------------
5195 | Returns the result of converting the double-precision floating-point value
5196 | `a' to the extended double-precision floating-point format. The conversion
5197 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
5198 | Arithmetic.
5199 *----------------------------------------------------------------------------*/
5200
5201 floatx80 float64_to_floatx80(float64 a, float_status *status)
5202 {
5203 bool aSign;
5204 int aExp;
5205 uint64_t aSig;
5206
5207 a = float64_squash_input_denormal(a, status);
5208 aSig = extractFloat64Frac( a );
5209 aExp = extractFloat64Exp( a );
5210 aSign = extractFloat64Sign( a );
5211 if ( aExp == 0x7FF ) {
5212 if (aSig) {
5213 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5214 status);
5215 return floatx80_silence_nan(res, status);
5216 }
5217 return packFloatx80(aSign,
5218 floatx80_infinity_high,
5219 floatx80_infinity_low);
5220 }
5221 if ( aExp == 0 ) {
5222 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5223 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5224 }
5225 return
5226 packFloatx80(
5227 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5228
5229 }
5230
5231 /*----------------------------------------------------------------------------
5232 | Returns the remainder of the double-precision floating-point value `a'
5233 | with respect to the corresponding value `b'. The operation is performed
5234 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5235 *----------------------------------------------------------------------------*/
5236
5237 float64 float64_rem(float64 a, float64 b, float_status *status)
5238 {
5239 bool aSign, zSign;
5240 int aExp, bExp, expDiff;
5241 uint64_t aSig, bSig;
5242 uint64_t q, alternateASig;
5243 int64_t sigMean;
5244
5245 a = float64_squash_input_denormal(a, status);
5246 b = float64_squash_input_denormal(b, status);
5247 aSig = extractFloat64Frac( a );
5248 aExp = extractFloat64Exp( a );
5249 aSign = extractFloat64Sign( a );
5250 bSig = extractFloat64Frac( b );
5251 bExp = extractFloat64Exp( b );
5252 if ( aExp == 0x7FF ) {
5253 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5254 return propagateFloat64NaN(a, b, status);
5255 }
5256 float_raise(float_flag_invalid, status);
5257 return float64_default_nan(status);
5258 }
5259 if ( bExp == 0x7FF ) {
5260 if (bSig) {
5261 return propagateFloat64NaN(a, b, status);
5262 }
5263 return a;
5264 }
5265 if ( bExp == 0 ) {
5266 if ( bSig == 0 ) {
5267 float_raise(float_flag_invalid, status);
5268 return float64_default_nan(status);
5269 }
5270 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5271 }
5272 if ( aExp == 0 ) {
5273 if ( aSig == 0 ) return a;
5274 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5275 }
5276 expDiff = aExp - bExp;
5277 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5278 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5279 if ( expDiff < 0 ) {
5280 if ( expDiff < -1 ) return a;
5281 aSig >>= 1;
5282 }
5283 q = ( bSig <= aSig );
5284 if ( q ) aSig -= bSig;
5285 expDiff -= 64;
5286 while ( 0 < expDiff ) {
5287 q = estimateDiv128To64( aSig, 0, bSig );
5288 q = ( 2 < q ) ? q - 2 : 0;
5289 aSig = - ( ( bSig>>2 ) * q );
5290 expDiff -= 62;
5291 }
5292 expDiff += 64;
5293 if ( 0 < expDiff ) {
5294 q = estimateDiv128To64( aSig, 0, bSig );
5295 q = ( 2 < q ) ? q - 2 : 0;
5296 q >>= 64 - expDiff;
5297 bSig >>= 2;
5298 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5299 }
5300 else {
5301 aSig >>= 2;
5302 bSig >>= 2;
5303 }
5304 do {
5305 alternateASig = aSig;
5306 ++q;
5307 aSig -= bSig;
5308 } while ( 0 <= (int64_t) aSig );
5309 sigMean = aSig + alternateASig;
5310 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5311 aSig = alternateASig;
5312 }
5313 zSign = ( (int64_t) aSig < 0 );
5314 if ( zSign ) aSig = - aSig;
5315 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5316
5317 }
5318
5319 /*----------------------------------------------------------------------------
5320 | Returns the binary log of the double-precision floating-point value `a'.
5321 | The operation is performed according to the IEC/IEEE Standard for Binary
5322 | Floating-Point Arithmetic.
5323 *----------------------------------------------------------------------------*/
5324 float64 float64_log2(float64 a, float_status *status)
5325 {
5326 bool aSign, zSign;
5327 int aExp;
5328 uint64_t aSig, aSig0, aSig1, zSig, i;
5329 a = float64_squash_input_denormal(a, status);
5330
5331 aSig = extractFloat64Frac( a );
5332 aExp = extractFloat64Exp( a );
5333 aSign = extractFloat64Sign( a );
5334
5335 if ( aExp == 0 ) {
5336 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5337 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5338 }
5339 if ( aSign ) {
5340 float_raise(float_flag_invalid, status);
5341 return float64_default_nan(status);
5342 }
5343 if ( aExp == 0x7FF ) {
5344 if (aSig) {
5345 return propagateFloat64NaN(a, float64_zero, status);
5346 }
5347 return a;
5348 }
5349
5350 aExp -= 0x3FF;
5351 aSig |= UINT64_C(0x0010000000000000);
5352 zSign = aExp < 0;
5353 zSig = (uint64_t)aExp << 52;
5354 for (i = 1LL << 51; i > 0; i >>= 1) {
5355 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5356 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5357 if ( aSig & UINT64_C(0x0020000000000000) ) {
5358 aSig >>= 1;
5359 zSig |= i;
5360 }
5361 }
5362
5363 if ( zSign )
5364 zSig = -zSig;
5365 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5366 }
5367
5368 /*----------------------------------------------------------------------------
5369 | Returns the result of converting the extended double-precision floating-
5370 | point value `a' to the 32-bit two's complement integer format. The
5371 | conversion is performed according to the IEC/IEEE Standard for Binary
5372 | Floating-Point Arithmetic---which means in particular that the conversion
5373 | is rounded according to the current rounding mode. If `a' is a NaN, the
5374 | largest positive integer is returned. Otherwise, if the conversion
5375 | overflows, the largest integer with the same sign as `a' is returned.
5376 *----------------------------------------------------------------------------*/
5377
5378 int32_t floatx80_to_int32(floatx80 a, float_status *status)
5379 {
5380 bool aSign;
5381 int32_t aExp, shiftCount;
5382 uint64_t aSig;
5383
5384 if (floatx80_invalid_encoding(a)) {
5385 float_raise(float_flag_invalid, status);
5386 return 1 << 31;
5387 }
5388 aSig = extractFloatx80Frac( a );
5389 aExp = extractFloatx80Exp( a );
5390 aSign = extractFloatx80Sign( a );
5391 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5392 shiftCount = 0x4037 - aExp;
5393 if ( shiftCount <= 0 ) shiftCount = 1;
5394 shift64RightJamming( aSig, shiftCount, &aSig );
5395 return roundAndPackInt32(aSign, aSig, status);
5396
5397 }
5398
5399 /*----------------------------------------------------------------------------
5400 | Returns the result of converting the extended double-precision floating-
5401 | point value `a' to the 32-bit two's complement integer format. The
5402 | conversion is performed according to the IEC/IEEE Standard for Binary
5403 | Floating-Point Arithmetic, except that the conversion is always rounded
5404 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5405 | Otherwise, if the conversion overflows, the largest integer with the same
5406 | sign as `a' is returned.
5407 *----------------------------------------------------------------------------*/
5408
5409 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5410 {
5411 bool aSign;
5412 int32_t aExp, shiftCount;
5413 uint64_t aSig, savedASig;
5414 int32_t z;
5415
5416 if (floatx80_invalid_encoding(a)) {
5417 float_raise(float_flag_invalid, status);
5418 return 1 << 31;
5419 }
5420 aSig = extractFloatx80Frac( a );
5421 aExp = extractFloatx80Exp( a );
5422 aSign = extractFloatx80Sign( a );
5423 if ( 0x401E < aExp ) {
5424 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5425 goto invalid;
5426 }
5427 else if ( aExp < 0x3FFF ) {
5428 if (aExp || aSig) {
5429 float_raise(float_flag_inexact, status);
5430 }
5431 return 0;
5432 }
5433 shiftCount = 0x403E - aExp;
5434 savedASig = aSig;
5435 aSig >>= shiftCount;
5436 z = aSig;
5437 if ( aSign ) z = - z;
5438 if ( ( z < 0 ) ^ aSign ) {
5439 invalid:
5440 float_raise(float_flag_invalid, status);
5441 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5442 }
5443 if ( ( aSig<<shiftCount ) != savedASig ) {
5444 float_raise(float_flag_inexact, status);
5445 }
5446 return z;
5447
5448 }
5449
5450 /*----------------------------------------------------------------------------
5451 | Returns the result of converting the extended double-precision floating-
5452 | point value `a' to the 64-bit two's complement integer format. The
5453 | conversion is performed according to the IEC/IEEE Standard for Binary
5454 | Floating-Point Arithmetic---which means in particular that the conversion
5455 | is rounded according to the current rounding mode. If `a' is a NaN,
5456 | the largest positive integer is returned. Otherwise, if the conversion
5457 | overflows, the largest integer with the same sign as `a' is returned.
5458 *----------------------------------------------------------------------------*/
5459
5460 int64_t floatx80_to_int64(floatx80 a, float_status *status)
5461 {
5462 bool aSign;
5463 int32_t aExp, shiftCount;
5464 uint64_t aSig, aSigExtra;
5465
5466 if (floatx80_invalid_encoding(a)) {
5467 float_raise(float_flag_invalid, status);
5468 return 1ULL << 63;
5469 }
5470 aSig = extractFloatx80Frac( a );
5471 aExp = extractFloatx80Exp( a );
5472 aSign = extractFloatx80Sign( a );
5473 shiftCount = 0x403E - aExp;
5474 if ( shiftCount <= 0 ) {
5475 if ( shiftCount ) {
5476 float_raise(float_flag_invalid, status);
5477 if (!aSign || floatx80_is_any_nan(a)) {
5478 return INT64_MAX;
5479 }
5480 return INT64_MIN;
5481 }
5482 aSigExtra = 0;
5483 }
5484 else {
5485 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5486 }
5487 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5488
5489 }
5490
5491 /*----------------------------------------------------------------------------
5492 | Returns the result of converting the extended double-precision floating-
5493 | point value `a' to the 64-bit two's complement integer format. The
5494 | conversion is performed according to the IEC/IEEE Standard for Binary
5495 | Floating-Point Arithmetic, except that the conversion is always rounded
5496 | toward zero. If `a' is a NaN, the largest positive integer is returned.
5497 | Otherwise, if the conversion overflows, the largest integer with the same
5498 | sign as `a' is returned.
5499 *----------------------------------------------------------------------------*/
5500
5501 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5502 {
5503 bool aSign;
5504 int32_t aExp, shiftCount;
5505 uint64_t aSig;
5506 int64_t z;
5507
5508 if (floatx80_invalid_encoding(a)) {
5509 float_raise(float_flag_invalid, status);
5510 return 1ULL << 63;
5511 }
5512 aSig = extractFloatx80Frac( a );
5513 aExp = extractFloatx80Exp( a );
5514 aSign = extractFloatx80Sign( a );
5515 shiftCount = aExp - 0x403E;
5516 if ( 0 <= shiftCount ) {
5517 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5518 if ( ( a.high != 0xC03E ) || aSig ) {
5519 float_raise(float_flag_invalid, status);
5520 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5521 return INT64_MAX;
5522 }
5523 }
5524 return INT64_MIN;
5525 }
5526 else if ( aExp < 0x3FFF ) {
5527 if (aExp | aSig) {
5528 float_raise(float_flag_inexact, status);
5529 }
5530 return 0;
5531 }
5532 z = aSig>>( - shiftCount );
5533 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5534 float_raise(float_flag_inexact, status);
5535 }
5536 if ( aSign ) z = - z;
5537 return z;
5538
5539 }
5540
5541 /*----------------------------------------------------------------------------
5542 | Returns the result of converting the extended double-precision floating-
5543 | point value `a' to the single-precision floating-point format. The
5544 | conversion is performed according to the IEC/IEEE Standard for Binary
5545 | Floating-Point Arithmetic.
5546 *----------------------------------------------------------------------------*/
5547
5548 float32 floatx80_to_float32(floatx80 a, float_status *status)
5549 {
5550 bool aSign;
5551 int32_t aExp;
5552 uint64_t aSig;
5553
5554 if (floatx80_invalid_encoding(a)) {
5555 float_raise(float_flag_invalid, status);
5556 return float32_default_nan(status);
5557 }
5558 aSig = extractFloatx80Frac( a );
5559 aExp = extractFloatx80Exp( a );
5560 aSign = extractFloatx80Sign( a );
5561 if ( aExp == 0x7FFF ) {
5562 if ( (uint64_t) ( aSig<<1 ) ) {
5563 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5564 status);
5565 return float32_silence_nan(res, status);
5566 }
5567 return packFloat32( aSign, 0xFF, 0 );
5568 }
5569 shift64RightJamming( aSig, 33, &aSig );
5570 if ( aExp || aSig ) aExp -= 0x3F81;
5571 return roundAndPackFloat32(aSign, aExp, aSig, status);
5572
5573 }
5574
5575 /*----------------------------------------------------------------------------
5576 | Returns the result of converting the extended double-precision floating-
5577 | point value `a' to the double-precision floating-point format. The
5578 | conversion is performed according to the IEC/IEEE Standard for Binary
5579 | Floating-Point Arithmetic.
5580 *----------------------------------------------------------------------------*/
5581
5582 float64 floatx80_to_float64(floatx80 a, float_status *status)
5583 {
5584 bool aSign;
5585 int32_t aExp;
5586 uint64_t aSig, zSig;
5587
5588 if (floatx80_invalid_encoding(a)) {
5589 float_raise(float_flag_invalid, status);
5590 return float64_default_nan(status);
5591 }
5592 aSig = extractFloatx80Frac( a );
5593 aExp = extractFloatx80Exp( a );
5594 aSign = extractFloatx80Sign( a );
5595 if ( aExp == 0x7FFF ) {
5596 if ( (uint64_t) ( aSig<<1 ) ) {
5597 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5598 status);
5599 return float64_silence_nan(res, status);
5600 }
5601 return packFloat64( aSign, 0x7FF, 0 );
5602 }
5603 shift64RightJamming( aSig, 1, &zSig );
5604 if ( aExp || aSig ) aExp -= 0x3C01;
5605 return roundAndPackFloat64(aSign, aExp, zSig, status);
5606
5607 }
5608
5609 /*----------------------------------------------------------------------------
5610 | Returns the result of converting the extended double-precision floating-
5611 | point value `a' to the quadruple-precision floating-point format. The
5612 | conversion is performed according to the IEC/IEEE Standard for Binary
5613 | Floating-Point Arithmetic.
5614 *----------------------------------------------------------------------------*/
5615
5616 float128 floatx80_to_float128(floatx80 a, float_status *status)
5617 {
5618 bool aSign;
5619 int aExp;
5620 uint64_t aSig, zSig0, zSig1;
5621
5622 if (floatx80_invalid_encoding(a)) {
5623 float_raise(float_flag_invalid, status);
5624 return float128_default_nan(status);
5625 }
5626 aSig = extractFloatx80Frac( a );
5627 aExp = extractFloatx80Exp( a );
5628 aSign = extractFloatx80Sign( a );
5629 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5630 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5631 status);
5632 return float128_silence_nan(res, status);
5633 }
5634 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5635 return packFloat128( aSign, aExp, zSig0, zSig1 );
5636
5637 }
5638
5639 /*----------------------------------------------------------------------------
5640 | Rounds the extended double-precision floating-point value `a'
5641 | to the precision provided by floatx80_rounding_precision and returns the
5642 | result as an extended double-precision floating-point value.
5643 | The operation is performed according to the IEC/IEEE Standard for Binary
5644 | Floating-Point Arithmetic.
5645 *----------------------------------------------------------------------------*/
5646
5647 floatx80 floatx80_round(floatx80 a, float_status *status)
5648 {
5649 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5650 extractFloatx80Sign(a),
5651 extractFloatx80Exp(a),
5652 extractFloatx80Frac(a), 0, status);
5653 }
5654
5655 /*----------------------------------------------------------------------------
5656 | Rounds the extended double-precision floating-point value `a' to an integer,
5657 | and returns the result as an extended quadruple-precision floating-point
5658 | value. The operation is performed according to the IEC/IEEE Standard for
5659 | Binary Floating-Point Arithmetic.
5660 *----------------------------------------------------------------------------*/
5661
5662 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5663 {
5664 bool aSign;
5665 int32_t aExp;
5666 uint64_t lastBitMask, roundBitsMask;
5667 floatx80 z;
5668
5669 if (floatx80_invalid_encoding(a)) {
5670 float_raise(float_flag_invalid, status);
5671 return floatx80_default_nan(status);
5672 }
5673 aExp = extractFloatx80Exp( a );
5674 if ( 0x403E <= aExp ) {
5675 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5676 return propagateFloatx80NaN(a, a, status);
5677 }
5678 return a;
5679 }
5680 if ( aExp < 0x3FFF ) {
5681 if ( ( aExp == 0 )
5682 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5683 return a;
5684 }
5685 float_raise(float_flag_inexact, status);
5686 aSign = extractFloatx80Sign( a );
5687 switch (status->float_rounding_mode) {
5688 case float_round_nearest_even:
5689 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5690 ) {
5691 return
5692 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5693 }
5694 break;
5695 case float_round_ties_away:
5696 if (aExp == 0x3FFE) {
5697 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5698 }
5699 break;
5700 case float_round_down:
5701 return
5702 aSign ?
5703 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5704 : packFloatx80( 0, 0, 0 );
5705 case float_round_up:
5706 return
5707 aSign ? packFloatx80( 1, 0, 0 )
5708 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5709
5710 case float_round_to_zero:
5711 break;
5712 default:
5713 g_assert_not_reached();
5714 }
5715 return packFloatx80( aSign, 0, 0 );
5716 }
5717 lastBitMask = 1;
5718 lastBitMask <<= 0x403E - aExp;
5719 roundBitsMask = lastBitMask - 1;
5720 z = a;
5721 switch (status->float_rounding_mode) {
5722 case float_round_nearest_even:
5723 z.low += lastBitMask>>1;
5724 if ((z.low & roundBitsMask) == 0) {
5725 z.low &= ~lastBitMask;
5726 }
5727 break;
5728 case float_round_ties_away:
5729 z.low += lastBitMask >> 1;
5730 break;
5731 case float_round_to_zero:
5732 break;
5733 case float_round_up:
5734 if (!extractFloatx80Sign(z)) {
5735 z.low += roundBitsMask;
5736 }
5737 break;
5738 case float_round_down:
5739 if (extractFloatx80Sign(z)) {
5740 z.low += roundBitsMask;
5741 }
5742 break;
5743 default:
5744 abort();
5745 }
5746 z.low &= ~ roundBitsMask;
5747 if ( z.low == 0 ) {
5748 ++z.high;
5749 z.low = UINT64_C(0x8000000000000000);
5750 }
5751 if (z.low != a.low) {
5752 float_raise(float_flag_inexact, status);
5753 }
5754 return z;
5755
5756 }
5757
5758 /*----------------------------------------------------------------------------
5759 | Returns the result of adding the absolute values of the extended double-
5760 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5761 | negated before being returned. `zSign' is ignored if the result is a NaN.
5762 | The addition is performed according to the IEC/IEEE Standard for Binary
5763 | Floating-Point Arithmetic.
5764 *----------------------------------------------------------------------------*/
5765
5766 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5767 float_status *status)
5768 {
5769 int32_t aExp, bExp, zExp;
5770 uint64_t aSig, bSig, zSig0, zSig1;
5771 int32_t expDiff;
5772
5773 aSig = extractFloatx80Frac( a );
5774 aExp = extractFloatx80Exp( a );
5775 bSig = extractFloatx80Frac( b );
5776 bExp = extractFloatx80Exp( b );
5777 expDiff = aExp - bExp;
5778 if ( 0 < expDiff ) {
5779 if ( aExp == 0x7FFF ) {
5780 if ((uint64_t)(aSig << 1)) {
5781 return propagateFloatx80NaN(a, b, status);
5782 }
5783 return a;
5784 }
5785 if ( bExp == 0 ) --expDiff;
5786 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5787 zExp = aExp;
5788 }
5789 else if ( expDiff < 0 ) {
5790 if ( bExp == 0x7FFF ) {
5791 if ((uint64_t)(bSig << 1)) {
5792 return propagateFloatx80NaN(a, b, status);
5793 }
5794 return packFloatx80(zSign,
5795 floatx80_infinity_high,
5796 floatx80_infinity_low);
5797 }
5798 if ( aExp == 0 ) ++expDiff;
5799 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5800 zExp = bExp;
5801 }
5802 else {
5803 if ( aExp == 0x7FFF ) {
5804 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5805 return propagateFloatx80NaN(a, b, status);
5806 }
5807 return a;
5808 }
5809 zSig1 = 0;
5810 zSig0 = aSig + bSig;
5811 if ( aExp == 0 ) {
5812 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5813 /* At least one of the values is a pseudo-denormal,
5814 * and there is a carry out of the result. */
5815 zExp = 1;
5816 goto shiftRight1;
5817 }
5818 if (zSig0 == 0) {
5819 return packFloatx80(zSign, 0, 0);
5820 }
5821 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5822 goto roundAndPack;
5823 }
5824 zExp = aExp;
5825 goto shiftRight1;
5826 }
5827 zSig0 = aSig + bSig;
5828 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5829 shiftRight1:
5830 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5831 zSig0 |= UINT64_C(0x8000000000000000);
5832 ++zExp;
5833 roundAndPack:
5834 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5835 zSign, zExp, zSig0, zSig1, status);
5836 }
5837
5838 /*----------------------------------------------------------------------------
5839 | Returns the result of subtracting the absolute values of the extended
5840 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5841 | difference is negated before being returned. `zSign' is ignored if the
5842 | result is a NaN. The subtraction is performed according to the IEC/IEEE
5843 | Standard for Binary Floating-Point Arithmetic.
5844 *----------------------------------------------------------------------------*/
5845
5846 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5847 float_status *status)
5848 {
5849 int32_t aExp, bExp, zExp;
5850 uint64_t aSig, bSig, zSig0, zSig1;
5851 int32_t expDiff;
5852
5853 aSig = extractFloatx80Frac( a );
5854 aExp = extractFloatx80Exp( a );
5855 bSig = extractFloatx80Frac( b );
5856 bExp = extractFloatx80Exp( b );
5857 expDiff = aExp - bExp;
5858 if ( 0 < expDiff ) goto aExpBigger;
5859 if ( expDiff < 0 ) goto bExpBigger;
5860 if ( aExp == 0x7FFF ) {
5861 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5862 return propagateFloatx80NaN(a, b, status);
5863 }
5864 float_raise(float_flag_invalid, status);
5865 return floatx80_default_nan(status);
5866 }
5867 if ( aExp == 0 ) {
5868 aExp = 1;
5869 bExp = 1;
5870 }
5871 zSig1 = 0;
5872 if ( bSig < aSig ) goto aBigger;
5873 if ( aSig < bSig ) goto bBigger;
5874 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5875 bExpBigger:
5876 if ( bExp == 0x7FFF ) {
5877 if ((uint64_t)(bSig << 1)) {
5878 return propagateFloatx80NaN(a, b, status);
5879 }
5880 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5881 floatx80_infinity_low);
5882 }
5883 if ( aExp == 0 ) ++expDiff;
5884 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5885 bBigger:
5886 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5887 zExp = bExp;
5888 zSign ^= 1;
5889 goto normalizeRoundAndPack;
5890 aExpBigger:
5891 if ( aExp == 0x7FFF ) {
5892 if ((uint64_t)(aSig << 1)) {
5893 return propagateFloatx80NaN(a, b, status);
5894 }
5895 return a;
5896 }
5897 if ( bExp == 0 ) --expDiff;
5898 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5899 aBigger:
5900 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5901 zExp = aExp;
5902 normalizeRoundAndPack:
5903 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5904 zSign, zExp, zSig0, zSig1, status);
5905 }
5906
5907 /*----------------------------------------------------------------------------
5908 | Returns the result of adding the extended double-precision floating-point
5909 | values `a' and `b'. The operation is performed according to the IEC/IEEE
5910 | Standard for Binary Floating-Point Arithmetic.
5911 *----------------------------------------------------------------------------*/
5912
5913 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5914 {
5915 bool aSign, bSign;
5916
5917 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5918 float_raise(float_flag_invalid, status);
5919 return floatx80_default_nan(status);
5920 }
5921 aSign = extractFloatx80Sign( a );
5922 bSign = extractFloatx80Sign( b );
5923 if ( aSign == bSign ) {
5924 return addFloatx80Sigs(a, b, aSign, status);
5925 }
5926 else {
5927 return subFloatx80Sigs(a, b, aSign, status);
5928 }
5929
5930 }
5931
5932 /*----------------------------------------------------------------------------
5933 | Returns the result of subtracting the extended double-precision floating-
5934 | point values `a' and `b'. The operation is performed according to the
5935 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5936 *----------------------------------------------------------------------------*/
5937
5938 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5939 {
5940 bool aSign, bSign;
5941
5942 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5943 float_raise(float_flag_invalid, status);
5944 return floatx80_default_nan(status);
5945 }
5946 aSign = extractFloatx80Sign( a );
5947 bSign = extractFloatx80Sign( b );
5948 if ( aSign == bSign ) {
5949 return subFloatx80Sigs(a, b, aSign, status);
5950 }
5951 else {
5952 return addFloatx80Sigs(a, b, aSign, status);
5953 }
5954
5955 }
5956
5957 /*----------------------------------------------------------------------------
5958 | Returns the result of multiplying the extended double-precision floating-
5959 | point values `a' and `b'. The operation is performed according to the
5960 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5961 *----------------------------------------------------------------------------*/
5962
5963 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5964 {
5965 bool aSign, bSign, zSign;
5966 int32_t aExp, bExp, zExp;
5967 uint64_t aSig, bSig, zSig0, zSig1;
5968
5969 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5970 float_raise(float_flag_invalid, status);
5971 return floatx80_default_nan(status);
5972 }
5973 aSig = extractFloatx80Frac( a );
5974 aExp = extractFloatx80Exp( a );
5975 aSign = extractFloatx80Sign( a );
5976 bSig = extractFloatx80Frac( b );
5977 bExp = extractFloatx80Exp( b );
5978 bSign = extractFloatx80Sign( b );
5979 zSign = aSign ^ bSign;
5980 if ( aExp == 0x7FFF ) {
5981 if ( (uint64_t) ( aSig<<1 )
5982 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5983 return propagateFloatx80NaN(a, b, status);
5984 }
5985 if ( ( bExp | bSig ) == 0 ) goto invalid;
5986 return packFloatx80(zSign, floatx80_infinity_high,
5987 floatx80_infinity_low);
5988 }
5989 if ( bExp == 0x7FFF ) {
5990 if ((uint64_t)(bSig << 1)) {
5991 return propagateFloatx80NaN(a, b, status);
5992 }
5993 if ( ( aExp | aSig ) == 0 ) {
5994 invalid:
5995 float_raise(float_flag_invalid, status);
5996 return floatx80_default_nan(status);
5997 }
5998 return packFloatx80(zSign, floatx80_infinity_high,
5999 floatx80_infinity_low);
6000 }
6001 if ( aExp == 0 ) {
6002 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6003 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6004 }
6005 if ( bExp == 0 ) {
6006 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6007 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6008 }
6009 zExp = aExp + bExp - 0x3FFE;
6010 mul64To128( aSig, bSig, &zSig0, &zSig1 );
6011 if ( 0 < (int64_t) zSig0 ) {
6012 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6013 --zExp;
6014 }
6015 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6016 zSign, zExp, zSig0, zSig1, status);
6017 }
6018
6019 /*----------------------------------------------------------------------------
6020 | Returns the result of dividing the extended double-precision floating-point
6021 | value `a' by the corresponding value `b'. The operation is performed
6022 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6023 *----------------------------------------------------------------------------*/
6024
6025 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6026 {
6027 bool aSign, bSign, zSign;
6028 int32_t aExp, bExp, zExp;
6029 uint64_t aSig, bSig, zSig0, zSig1;
6030 uint64_t rem0, rem1, rem2, term0, term1, term2;
6031
6032 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6033 float_raise(float_flag_invalid, status);
6034 return floatx80_default_nan(status);
6035 }
6036 aSig = extractFloatx80Frac( a );
6037 aExp = extractFloatx80Exp( a );
6038 aSign = extractFloatx80Sign( a );
6039 bSig = extractFloatx80Frac( b );
6040 bExp = extractFloatx80Exp( b );
6041 bSign = extractFloatx80Sign( b );
6042 zSign = aSign ^ bSign;
6043 if ( aExp == 0x7FFF ) {
6044 if ((uint64_t)(aSig << 1)) {
6045 return propagateFloatx80NaN(a, b, status);
6046 }
6047 if ( bExp == 0x7FFF ) {
6048 if ((uint64_t)(bSig << 1)) {
6049 return propagateFloatx80NaN(a, b, status);
6050 }
6051 goto invalid;
6052 }
6053 return packFloatx80(zSign, floatx80_infinity_high,
6054 floatx80_infinity_low);
6055 }
6056 if ( bExp == 0x7FFF ) {
6057 if ((uint64_t)(bSig << 1)) {
6058 return propagateFloatx80NaN(a, b, status);
6059 }
6060 return packFloatx80( zSign, 0, 0 );
6061 }
6062 if ( bExp == 0 ) {
6063 if ( bSig == 0 ) {
6064 if ( ( aExp | aSig ) == 0 ) {
6065 invalid:
6066 float_raise(float_flag_invalid, status);
6067 return floatx80_default_nan(status);
6068 }
6069 float_raise(float_flag_divbyzero, status);
6070 return packFloatx80(zSign, floatx80_infinity_high,
6071 floatx80_infinity_low);
6072 }
6073 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6074 }
6075 if ( aExp == 0 ) {
6076 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6077 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6078 }
6079 zExp = aExp - bExp + 0x3FFE;
6080 rem1 = 0;
6081 if ( bSig <= aSig ) {
6082 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6083 ++zExp;
6084 }
6085 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6086 mul64To128( bSig, zSig0, &term0, &term1 );
6087 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6088 while ( (int64_t) rem0 < 0 ) {
6089 --zSig0;
6090 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6091 }
6092 zSig1 = estimateDiv128To64( rem1, 0, bSig );
6093 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6094 mul64To128( bSig, zSig1, &term1, &term2 );
6095 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6096 while ( (int64_t) rem1 < 0 ) {
6097 --zSig1;
6098 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6099 }
6100 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6101 }
6102 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6103 zSign, zExp, zSig0, zSig1, status);
6104 }
6105
6106 /*----------------------------------------------------------------------------
6107 | Returns the remainder of the extended double-precision floating-point value
6108 | `a' with respect to the corresponding value `b'. The operation is performed
6109 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6110 | if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6111 | the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6112 | the absolute value of the integer quotient.
6113 *----------------------------------------------------------------------------*/
6114
6115 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6116 float_status *status)
6117 {
6118 bool aSign, zSign;
6119 int32_t aExp, bExp, expDiff, aExpOrig;
6120 uint64_t aSig0, aSig1, bSig;
6121 uint64_t q, term0, term1, alternateASig0, alternateASig1;
6122
6123 *quotient = 0;
6124 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6125 float_raise(float_flag_invalid, status);
6126 return floatx80_default_nan(status);
6127 }
6128 aSig0 = extractFloatx80Frac( a );
6129 aExpOrig = aExp = extractFloatx80Exp( a );
6130 aSign = extractFloatx80Sign( a );
6131 bSig = extractFloatx80Frac( b );
6132 bExp = extractFloatx80Exp( b );
6133 if ( aExp == 0x7FFF ) {
6134 if ( (uint64_t) ( aSig0<<1 )
6135 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6136 return propagateFloatx80NaN(a, b, status);
6137 }
6138 goto invalid;
6139 }
6140 if ( bExp == 0x7FFF ) {
6141 if ((uint64_t)(bSig << 1)) {
6142 return propagateFloatx80NaN(a, b, status);
6143 }
6144 if (aExp == 0 && aSig0 >> 63) {
6145 /*
6146 * Pseudo-denormal argument must be returned in normalized
6147 * form.
6148 */
6149 return packFloatx80(aSign, 1, aSig0);
6150 }
6151 return a;
6152 }
6153 if ( bExp == 0 ) {
6154 if ( bSig == 0 ) {
6155 invalid:
6156 float_raise(float_flag_invalid, status);
6157 return floatx80_default_nan(status);
6158 }
6159 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6160 }
6161 if ( aExp == 0 ) {
6162 if ( aSig0 == 0 ) return a;
6163 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6164 }
6165 zSign = aSign;
6166 expDiff = aExp - bExp;
6167 aSig1 = 0;
6168 if ( expDiff < 0 ) {
6169 if ( mod || expDiff < -1 ) {
6170 if (aExp == 1 && aExpOrig == 0) {
6171 /*
6172 * Pseudo-denormal argument must be returned in
6173 * normalized form.
6174 */
6175 return packFloatx80(aSign, aExp, aSig0);
6176 }
6177 return a;
6178 }
6179 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6180 expDiff = 0;
6181 }
6182 *quotient = q = ( bSig <= aSig0 );
6183 if ( q ) aSig0 -= bSig;
6184 expDiff -= 64;
6185 while ( 0 < expDiff ) {
6186 q = estimateDiv128To64( aSig0, aSig1, bSig );
6187 q = ( 2 < q ) ? q - 2 : 0;
6188 mul64To128( bSig, q, &term0, &term1 );
6189 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6190 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6191 expDiff -= 62;
6192 *quotient <<= 62;
6193 *quotient += q;
6194 }
6195 expDiff += 64;
6196 if ( 0 < expDiff ) {
6197 q = estimateDiv128To64( aSig0, aSig1, bSig );
6198 q = ( 2 < q ) ? q - 2 : 0;
6199 q >>= 64 - expDiff;
6200 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6201 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6202 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6203 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6204 ++q;
6205 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6206 }
6207 if (expDiff < 64) {
6208 *quotient <<= expDiff;
6209 } else {
6210 *quotient = 0;
6211 }
6212 *quotient += q;
6213 }
6214 else {
6215 term1 = 0;
6216 term0 = bSig;
6217 }
6218 if (!mod) {
6219 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6220 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6221 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6222 && ( q & 1 ) )
6223 ) {
6224 aSig0 = alternateASig0;
6225 aSig1 = alternateASig1;
6226 zSign = ! zSign;
6227 ++*quotient;
6228 }
6229 }
6230 return
6231 normalizeRoundAndPackFloatx80(
6232 80, zSign, bExp + expDiff, aSig0, aSig1, status);
6233
6234 }
6235
6236 /*----------------------------------------------------------------------------
6237 | Returns the remainder of the extended double-precision floating-point value
6238 | `a' with respect to the corresponding value `b'. The operation is performed
6239 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6240 *----------------------------------------------------------------------------*/
6241
6242 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6243 {
6244 uint64_t quotient;
6245 return floatx80_modrem(a, b, false, &quotient, status);
6246 }
6247
6248 /*----------------------------------------------------------------------------
6249 | Returns the remainder of the extended double-precision floating-point value
6250 | `a' with respect to the corresponding value `b', with the quotient truncated
6251 | toward zero.
6252 *----------------------------------------------------------------------------*/
6253
6254 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6255 {
6256 uint64_t quotient;
6257 return floatx80_modrem(a, b, true, &quotient, status);
6258 }
6259
6260 /*----------------------------------------------------------------------------
6261 | Returns the square root of the extended double-precision floating-point
6262 | value `a'. The operation is performed according to the IEC/IEEE Standard
6263 | for Binary Floating-Point Arithmetic.
6264 *----------------------------------------------------------------------------*/
6265
6266 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6267 {
6268 bool aSign;
6269 int32_t aExp, zExp;
6270 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6271 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6272
6273 if (floatx80_invalid_encoding(a)) {
6274 float_raise(float_flag_invalid, status);
6275 return floatx80_default_nan(status);
6276 }
6277 aSig0 = extractFloatx80Frac( a );
6278 aExp = extractFloatx80Exp( a );
6279 aSign = extractFloatx80Sign( a );
6280 if ( aExp == 0x7FFF ) {
6281 if ((uint64_t)(aSig0 << 1)) {
6282 return propagateFloatx80NaN(a, a, status);
6283 }
6284 if ( ! aSign ) return a;
6285 goto invalid;
6286 }
6287 if ( aSign ) {
6288 if ( ( aExp | aSig0 ) == 0 ) return a;
6289 invalid:
6290 float_raise(float_flag_invalid, status);
6291 return floatx80_default_nan(status);
6292 }
6293 if ( aExp == 0 ) {
6294 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6295 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6296 }
6297 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6298 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6299 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6300 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6301 doubleZSig0 = zSig0<<1;
6302 mul64To128( zSig0, zSig0, &term0, &term1 );
6303 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6304 while ( (int64_t) rem0 < 0 ) {
6305 --zSig0;
6306 doubleZSig0 -= 2;
6307 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6308 }
6309 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6310 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6311 if ( zSig1 == 0 ) zSig1 = 1;
6312 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6313 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6314 mul64To128( zSig1, zSig1, &term2, &term3 );
6315 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6316 while ( (int64_t) rem1 < 0 ) {
6317 --zSig1;
6318 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6319 term3 |= 1;
6320 term2 |= doubleZSig0;
6321 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6322 }
6323 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6324 }
6325 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6326 zSig0 |= doubleZSig0;
6327 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6328 0, zExp, zSig0, zSig1, status);
6329 }
6330
6331 /*----------------------------------------------------------------------------
6332 | Returns the result of converting the quadruple-precision floating-point
6333 | value `a' to the extended double-precision floating-point format. The
6334 | conversion is performed according to the IEC/IEEE Standard for Binary
6335 | Floating-Point Arithmetic.
6336 *----------------------------------------------------------------------------*/
6337
6338 floatx80 float128_to_floatx80(float128 a, float_status *status)
6339 {
6340 bool aSign;
6341 int32_t aExp;
6342 uint64_t aSig0, aSig1;
6343
6344 aSig1 = extractFloat128Frac1( a );
6345 aSig0 = extractFloat128Frac0( a );
6346 aExp = extractFloat128Exp( a );
6347 aSign = extractFloat128Sign( a );
6348 if ( aExp == 0x7FFF ) {
6349 if ( aSig0 | aSig1 ) {
6350 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6351 status);
6352 return floatx80_silence_nan(res, status);
6353 }
6354 return packFloatx80(aSign, floatx80_infinity_high,
6355 floatx80_infinity_low);
6356 }
6357 if ( aExp == 0 ) {
6358 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6359 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6360 }
6361 else {
6362 aSig0 |= UINT64_C(0x0001000000000000);
6363 }
6364 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6365 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6366
6367 }
6368
6369 /*----------------------------------------------------------------------------
6370 | Returns the remainder of the quadruple-precision floating-point value `a'
6371 | with respect to the corresponding value `b'. The operation is performed
6372 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6373 *----------------------------------------------------------------------------*/
6374
6375 float128 float128_rem(float128 a, float128 b, float_status *status)
6376 {
6377 bool aSign, zSign;
6378 int32_t aExp, bExp, expDiff;
6379 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6380 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6381 int64_t sigMean0;
6382
6383 aSig1 = extractFloat128Frac1( a );
6384 aSig0 = extractFloat128Frac0( a );
6385 aExp = extractFloat128Exp( a );
6386 aSign = extractFloat128Sign( a );
6387 bSig1 = extractFloat128Frac1( b );
6388 bSig0 = extractFloat128Frac0( b );
6389 bExp = extractFloat128Exp( b );
6390 if ( aExp == 0x7FFF ) {
6391 if ( ( aSig0 | aSig1 )
6392 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6393 return propagateFloat128NaN(a, b, status);
6394 }
6395 goto invalid;
6396 }
6397 if ( bExp == 0x7FFF ) {
6398 if (bSig0 | bSig1) {
6399 return propagateFloat128NaN(a, b, status);
6400 }
6401 return a;
6402 }
6403 if ( bExp == 0 ) {
6404 if ( ( bSig0 | bSig1 ) == 0 ) {
6405 invalid:
6406 float_raise(float_flag_invalid, status);
6407 return float128_default_nan(status);
6408 }
6409 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6410 }
6411 if ( aExp == 0 ) {
6412 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6413 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6414 }
6415 expDiff = aExp - bExp;
6416 if ( expDiff < -1 ) return a;
6417 shortShift128Left(
6418 aSig0 | UINT64_C(0x0001000000000000),
6419 aSig1,
6420 15 - ( expDiff < 0 ),
6421 &aSig0,
6422 &aSig1
6423 );
6424 shortShift128Left(
6425 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
6426 q = le128( bSig0, bSig1, aSig0, aSig1 );
6427 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6428 expDiff -= 64;
6429 while ( 0 < expDiff ) {
6430 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6431 q = ( 4 < q ) ? q - 4 : 0;
6432 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6433 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6434 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6435 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6436 expDiff -= 61;
6437 }
6438 if ( -64 < expDiff ) {
6439 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6440 q = ( 4 < q ) ? q - 4 : 0;
6441 q >>= - expDiff;
6442 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6443 expDiff += 52;
6444 if ( expDiff < 0 ) {
6445 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6446 }
6447 else {
6448 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6449 }
6450 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6451 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6452 }
6453 else {
6454 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6455 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6456 }
6457 do {
6458 alternateASig0 = aSig0;
6459 alternateASig1 = aSig1;
6460 ++q;
6461 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6462 } while ( 0 <= (int64_t) aSig0 );
6463 add128(
6464 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6465 if ( ( sigMean0 < 0 )
6466 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6467 aSig0 = alternateASig0;
6468 aSig1 = alternateASig1;
6469 }
6470 zSign = ( (int64_t) aSig0 < 0 );
6471 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6472 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6473 status);
6474 }
6475
6476 /*----------------------------------------------------------------------------
6477 | Returns the square root of the quadruple-precision floating-point value `a'.
6478 | The operation is performed according to the IEC/IEEE Standard for Binary
6479 | Floating-Point Arithmetic.
6480 *----------------------------------------------------------------------------*/
6481
6482 float128 float128_sqrt(float128 a, float_status *status)
6483 {
6484 bool aSign;
6485 int32_t aExp, zExp;
6486 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6487 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6488
6489 aSig1 = extractFloat128Frac1( a );
6490 aSig0 = extractFloat128Frac0( a );
6491 aExp = extractFloat128Exp( a );
6492 aSign = extractFloat128Sign( a );
6493 if ( aExp == 0x7FFF ) {
6494 if (aSig0 | aSig1) {
6495 return propagateFloat128NaN(a, a, status);
6496 }
6497 if ( ! aSign ) return a;
6498 goto invalid;
6499 }
6500 if ( aSign ) {
6501 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6502 invalid:
6503 float_raise(float_flag_invalid, status);
6504 return float128_default_nan(status);
6505 }
6506 if ( aExp == 0 ) {
6507 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6508 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6509 }
6510 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6511 aSig0 |= UINT64_C(0x0001000000000000);
6512 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6513 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6514 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6515 doubleZSig0 = zSig0<<1;
6516 mul64To128( zSig0, zSig0, &term0, &term1 );
6517 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6518 while ( (int64_t) rem0 < 0 ) {
6519 --zSig0;
6520 doubleZSig0 -= 2;
6521 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6522 }
6523 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6524 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6525 if ( zSig1 == 0 ) zSig1 = 1;
6526 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6527 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6528 mul64To128( zSig1, zSig1, &term2, &term3 );
6529 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6530 while ( (int64_t) rem1 < 0 ) {
6531 --zSig1;
6532 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6533 term3 |= 1;
6534 term2 |= doubleZSig0;
6535 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6536 }
6537 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6538 }
6539 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6540 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6541
6542 }
6543
6544 static inline FloatRelation
6545 floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6546 float_status *status)
6547 {
6548 bool aSign, bSign;
6549
6550 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6551 float_raise(float_flag_invalid, status);
6552 return float_relation_unordered;
6553 }
6554 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6555 ( extractFloatx80Frac( a )<<1 ) ) ||
6556 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6557 ( extractFloatx80Frac( b )<<1 ) )) {
6558 if (!is_quiet ||
6559 floatx80_is_signaling_nan(a, status) ||
6560 floatx80_is_signaling_nan(b, status)) {
6561 float_raise(float_flag_invalid, status);
6562 }
6563 return float_relation_unordered;
6564 }
6565 aSign = extractFloatx80Sign( a );
6566 bSign = extractFloatx80Sign( b );
6567 if ( aSign != bSign ) {
6568
6569 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6570 ( ( a.low | b.low ) == 0 ) ) {
6571 /* zero case */
6572 return float_relation_equal;
6573 } else {
6574 return 1 - (2 * aSign);
6575 }
6576 } else {
6577 /* Normalize pseudo-denormals before comparison. */
6578 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6579 ++a.high;
6580 }
6581 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6582 ++b.high;
6583 }
6584 if (a.low == b.low && a.high == b.high) {
6585 return float_relation_equal;
6586 } else {
6587 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6588 }
6589 }
6590 }
6591
6592 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6593 {
6594 return floatx80_compare_internal(a, b, 0, status);
6595 }
6596
6597 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6598 float_status *status)
6599 {
6600 return floatx80_compare_internal(a, b, 1, status);
6601 }
6602
6603 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
6604 {
6605 bool aSign;
6606 int32_t aExp;
6607 uint64_t aSig;
6608
6609 if (floatx80_invalid_encoding(a)) {
6610 float_raise(float_flag_invalid, status);
6611 return floatx80_default_nan(status);
6612 }
6613 aSig = extractFloatx80Frac( a );
6614 aExp = extractFloatx80Exp( a );
6615 aSign = extractFloatx80Sign( a );
6616
6617 if ( aExp == 0x7FFF ) {
6618 if ( aSig<<1 ) {
6619 return propagateFloatx80NaN(a, a, status);
6620 }
6621 return a;
6622 }
6623
6624 if (aExp == 0) {
6625 if (aSig == 0) {
6626 return a;
6627 }
6628 aExp++;
6629 }
6630
6631 if (n > 0x10000) {
6632 n = 0x10000;
6633 } else if (n < -0x10000) {
6634 n = -0x10000;
6635 }
6636
6637 aExp += n;
6638 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6639 aSign, aExp, aSig, 0, status);
6640 }
6641
6642 static void __attribute__((constructor)) softfloat_init(void)
6643 {
6644 union_float64 ua, ub, uc, ur;
6645
6646 if (QEMU_NO_HARDFLOAT) {
6647 return;
6648 }
6649 /*
6650 * Test that the host's FMA is not obviously broken. For example,
6651 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6652 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6653 */
6654 ua.s = 0x0020000000000001ULL;
6655 ub.s = 0x3ca0000000000000ULL;
6656 uc.s = 0x0020000000000000ULL;
6657 ur.h = fma(ua.h, ub.h, uc.h);
6658 if (ur.s != 0x0020000000000001ULL) {
6659 force_soft_fma = true;
6660 }
6661 }