]> git.proxmox.com Git - mirror_qemu.git/blob - fpu/softfloat.c
Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210613' into staging
[mirror_qemu.git] / fpu / softfloat.c
1 /*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18 /*
19 ===============================================================================
20 This C source file is part of the SoftFloat IEC/IEEE Floating-point
21 Arithmetic Package, Release 2a.
22
23 Written by John R. Hauser. This work was made possible in part by the
24 International Computer Science Institute, located at Suite 600, 1947 Center
25 Street, Berkeley, California 94704. Funding was partially provided by the
26 National Science Foundation under grant MIP-9311980. The original version
27 of this code was written as part of a project to build a fixed-point vector
28 processor in collaboration with the University of California at Berkeley,
29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31 arithmetic/SoftFloat.html'.
32
33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39 Derivative works are acceptable, even for commercial purposes, so long as
40 (1) they include prominent notice that the work is derivative, and (2) they
41 include prominent notice akin to these four paragraphs for those parts of
42 this code that are retained.
43
44 ===============================================================================
45 */
46
47 /* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78 /* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
82 /* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
85 #include "qemu/osdep.h"
86 #include <math.h>
87 #include "qemu/bitops.h"
88 #include "fpu/softfloat.h"
89
90 /* We only need stdlib for abort() */
91
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
95 | desired.)
96 *----------------------------------------------------------------------------*/
97 #include "fpu/softfloat-macros.h"
98
99 /*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129 #define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 float_raise(float_flag_input_denormal, s); \
136 } \
137 }
138
139 GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140 GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141 #undef GEN_INPUT_FLUSH__NOCHECK
142
143 #define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152 GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153 GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154 #undef GEN_INPUT_FLUSH1
155
156 #define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166 GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167 GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168 #undef GEN_INPUT_FLUSH2
169
170 #define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181 GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182 GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183 #undef GEN_INPUT_FLUSH3
184
185 /*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190 #if defined(__x86_64__)
191 # define QEMU_HARDFLOAT_1F32_USE_FP 0
192 # define QEMU_HARDFLOAT_1F64_USE_FP 1
193 # define QEMU_HARDFLOAT_2F32_USE_FP 0
194 # define QEMU_HARDFLOAT_2F64_USE_FP 1
195 # define QEMU_HARDFLOAT_3F32_USE_FP 0
196 # define QEMU_HARDFLOAT_3F64_USE_FP 1
197 #else
198 # define QEMU_HARDFLOAT_1F32_USE_FP 0
199 # define QEMU_HARDFLOAT_1F64_USE_FP 0
200 # define QEMU_HARDFLOAT_2F32_USE_FP 0
201 # define QEMU_HARDFLOAT_2F64_USE_FP 0
202 # define QEMU_HARDFLOAT_3F32_USE_FP 0
203 # define QEMU_HARDFLOAT_3F64_USE_FP 0
204 #endif
205
206 /*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212 #if defined(__x86_64__) || defined(__aarch64__)
213 # define QEMU_HARDFLOAT_USE_ISINF 1
214 #else
215 # define QEMU_HARDFLOAT_USE_ISINF 0
216 #endif
217
218 /*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223 #if defined(TARGET_PPC) || defined(__FAST_MATH__)
224 # if defined(__FAST_MATH__)
225 # warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227 # endif
228 # define QEMU_NO_HARDFLOAT 1
229 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230 #else
231 # define QEMU_NO_HARDFLOAT 0
232 # define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233 #endif
234
235 static inline bool can_use_fpu(const float_status *s)
236 {
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242 }
243
244 /*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256 typedef union {
257 float32 s;
258 float h;
259 } union_float32;
260
261 typedef union {
262 float64 s;
263 double h;
264 } union_float64;
265
266 typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267 typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269 typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270 typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271 typedef float (*hard_f32_op2_fn)(float a, float b);
272 typedef double (*hard_f64_op2_fn)(double a, double b);
273
274 /* 2-input is-zero-or-normal */
275 static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276 {
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287 }
288
289 static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290 {
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297 }
298
299 /* 3-input is-zero-or-normal */
300 static inline
301 bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302 {
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311 }
312
313 static inline
314 bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315 {
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324 }
325
326 static inline bool f32_is_inf(union_float32 a)
327 {
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332 }
333
334 static inline bool f64_is_inf(union_float64 a)
335 {
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340 }
341
342 static inline float32
343 float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
345 f32_check_fn pre, f32_check_fn post)
346 {
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 float_raise(float_flag_overflow, s);
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371 }
372
373 static inline float64
374 float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
376 f64_check_fn pre, f64_check_fn post)
377 {
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 float_raise(float_flag_overflow, s);
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402 }
403
404 /*
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
407 */
408
409 typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
416 } FloatClass;
417
418 #define float_cmask(bit) (1u << (bit))
419
420 enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
426
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
429 };
430
431 /* Flags for parts_minmax. */
432 enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
439 };
440
441 /* Simple helpers for checking if, or what kind of, NaN we have */
442 static inline __attribute__((unused)) bool is_nan(FloatClass c)
443 {
444 return unlikely(c >= float_class_qnan);
445 }
446
447 static inline __attribute__((unused)) bool is_snan(FloatClass c)
448 {
449 return c == float_class_snan;
450 }
451
452 static inline __attribute__((unused)) bool is_qnan(FloatClass c)
453 {
454 return c == float_class_qnan;
455 }
456
457 /*
458 * Structure holding all of the decomposed parts of a float.
459 * The exponent is unbiased and the fraction is normalized.
460 *
461 * The fraction words are stored in big-endian word ordering,
462 * so that truncation from a larger format to a smaller format
463 * can be done simply by ignoring subsequent elements.
464 */
465
466 typedef struct {
467 FloatClass cls;
468 bool sign;
469 int32_t exp;
470 union {
471 /* Routines that know the structure may reference the singular name. */
472 uint64_t frac;
473 /*
474 * Routines expanded with multiple structures reference "hi" and "lo"
475 * depending on the operation. In FloatParts64, "hi" and "lo" are
476 * both the same word and aliased here.
477 */
478 uint64_t frac_hi;
479 uint64_t frac_lo;
480 };
481 } FloatParts64;
482
483 typedef struct {
484 FloatClass cls;
485 bool sign;
486 int32_t exp;
487 uint64_t frac_hi;
488 uint64_t frac_lo;
489 } FloatParts128;
490
491 typedef struct {
492 FloatClass cls;
493 bool sign;
494 int32_t exp;
495 uint64_t frac_hi;
496 uint64_t frac_hm; /* high-middle */
497 uint64_t frac_lm; /* low-middle */
498 uint64_t frac_lo;
499 } FloatParts256;
500
501 /* These apply to the most significant word of each FloatPartsN. */
502 #define DECOMPOSED_BINARY_POINT 63
503 #define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
504
505 /* Structure holding all of the relevant parameters for a format.
506 * exp_size: the size of the exponent field
507 * exp_bias: the offset applied to the exponent field
508 * exp_max: the maximum normalised exponent
509 * frac_size: the size of the fraction field
510 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
511 * The following are computed based the size of fraction
512 * round_mask: bits below lsb which must be rounded
513 * The following optional modifiers are available:
514 * arm_althp: handle ARM Alternative Half Precision
515 */
516 typedef struct {
517 int exp_size;
518 int exp_bias;
519 int exp_max;
520 int frac_size;
521 int frac_shift;
522 bool arm_althp;
523 uint64_t round_mask;
524 } FloatFmt;
525
526 /* Expand fields based on the size of exponent and fraction */
527 #define FLOAT_PARAMS_(E) \
528 .exp_size = E, \
529 .exp_bias = ((1 << E) - 1) >> 1, \
530 .exp_max = (1 << E) - 1
531
532 #define FLOAT_PARAMS(E, F) \
533 FLOAT_PARAMS_(E), \
534 .frac_size = F, \
535 .frac_shift = (-F - 1) & 63, \
536 .round_mask = (1ull << ((-F - 1) & 63)) - 1
537
538 static const FloatFmt float16_params = {
539 FLOAT_PARAMS(5, 10)
540 };
541
542 static const FloatFmt float16_params_ahp = {
543 FLOAT_PARAMS(5, 10),
544 .arm_althp = true
545 };
546
547 static const FloatFmt bfloat16_params = {
548 FLOAT_PARAMS(8, 7)
549 };
550
551 static const FloatFmt float32_params = {
552 FLOAT_PARAMS(8, 23)
553 };
554
555 static const FloatFmt float64_params = {
556 FLOAT_PARAMS(11, 52)
557 };
558
559 static const FloatFmt float128_params = {
560 FLOAT_PARAMS(15, 112)
561 };
562
563 #define FLOATX80_PARAMS(R) \
564 FLOAT_PARAMS_(15), \
565 .frac_size = R == 64 ? 63 : R, \
566 .frac_shift = 0, \
567 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
568
569 static const FloatFmt floatx80_params[3] = {
570 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
571 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
572 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
573 };
574
575 /* Unpack a float to parts, but do not canonicalize. */
576 static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
577 {
578 const int f_size = fmt->frac_size;
579 const int e_size = fmt->exp_size;
580
581 *r = (FloatParts64) {
582 .cls = float_class_unclassified,
583 .sign = extract64(raw, f_size + e_size, 1),
584 .exp = extract64(raw, f_size, e_size),
585 .frac = extract64(raw, 0, f_size)
586 };
587 }
588
589 static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
590 {
591 unpack_raw64(p, &float16_params, f);
592 }
593
594 static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
595 {
596 unpack_raw64(p, &bfloat16_params, f);
597 }
598
599 static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
600 {
601 unpack_raw64(p, &float32_params, f);
602 }
603
604 static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
605 {
606 unpack_raw64(p, &float64_params, f);
607 }
608
609 static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
610 {
611 *p = (FloatParts128) {
612 .cls = float_class_unclassified,
613 .sign = extract32(f.high, 15, 1),
614 .exp = extract32(f.high, 0, 15),
615 .frac_hi = f.low
616 };
617 }
618
619 static void float128_unpack_raw(FloatParts128 *p, float128 f)
620 {
621 const int f_size = float128_params.frac_size - 64;
622 const int e_size = float128_params.exp_size;
623
624 *p = (FloatParts128) {
625 .cls = float_class_unclassified,
626 .sign = extract64(f.high, f_size + e_size, 1),
627 .exp = extract64(f.high, f_size, e_size),
628 .frac_hi = extract64(f.high, 0, f_size),
629 .frac_lo = f.low,
630 };
631 }
632
633 /* Pack a float from parts, but do not canonicalize. */
634 static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
635 {
636 const int f_size = fmt->frac_size;
637 const int e_size = fmt->exp_size;
638 uint64_t ret;
639
640 ret = (uint64_t)p->sign << (f_size + e_size);
641 ret = deposit64(ret, f_size, e_size, p->exp);
642 ret = deposit64(ret, 0, f_size, p->frac);
643 return ret;
644 }
645
646 static inline float16 float16_pack_raw(const FloatParts64 *p)
647 {
648 return make_float16(pack_raw64(p, &float16_params));
649 }
650
651 static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
652 {
653 return pack_raw64(p, &bfloat16_params);
654 }
655
656 static inline float32 float32_pack_raw(const FloatParts64 *p)
657 {
658 return make_float32(pack_raw64(p, &float32_params));
659 }
660
661 static inline float64 float64_pack_raw(const FloatParts64 *p)
662 {
663 return make_float64(pack_raw64(p, &float64_params));
664 }
665
666 static float128 float128_pack_raw(const FloatParts128 *p)
667 {
668 const int f_size = float128_params.frac_size - 64;
669 const int e_size = float128_params.exp_size;
670 uint64_t hi;
671
672 hi = (uint64_t)p->sign << (f_size + e_size);
673 hi = deposit64(hi, f_size, e_size, p->exp);
674 hi = deposit64(hi, 0, f_size, p->frac_hi);
675 return make_float128(hi, p->frac_lo);
676 }
677
678 /*----------------------------------------------------------------------------
679 | Functions and definitions to determine: (1) whether tininess for underflow
680 | is detected before or after rounding by default, (2) what (if anything)
681 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
682 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
683 | are propagated from function inputs to output. These details are target-
684 | specific.
685 *----------------------------------------------------------------------------*/
686 #include "softfloat-specialize.c.inc"
687
688 #define PARTS_GENERIC_64_128(NAME, P) \
689 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
690
691 #define PARTS_GENERIC_64_128_256(NAME, P) \
692 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
693 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
694
695 #define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
696 #define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
697
698 static void parts64_return_nan(FloatParts64 *a, float_status *s);
699 static void parts128_return_nan(FloatParts128 *a, float_status *s);
700
701 #define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
702
703 static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
704 float_status *s);
705 static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
706 float_status *s);
707
708 #define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
709
710 static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
711 FloatParts64 *c, float_status *s,
712 int ab_mask, int abc_mask);
713 static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
714 FloatParts128 *b,
715 FloatParts128 *c,
716 float_status *s,
717 int ab_mask, int abc_mask);
718
719 #define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
720 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
721
722 static void parts64_canonicalize(FloatParts64 *p, float_status *status,
723 const FloatFmt *fmt);
724 static void parts128_canonicalize(FloatParts128 *p, float_status *status,
725 const FloatFmt *fmt);
726
727 #define parts_canonicalize(A, S, F) \
728 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
729
730 static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
731 const FloatFmt *fmt);
732 static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
733 const FloatFmt *fmt);
734
735 #define parts_uncanon_normal(A, S, F) \
736 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
737
738 static void parts64_uncanon(FloatParts64 *p, float_status *status,
739 const FloatFmt *fmt);
740 static void parts128_uncanon(FloatParts128 *p, float_status *status,
741 const FloatFmt *fmt);
742
743 #define parts_uncanon(A, S, F) \
744 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
745
746 static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
747 static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
748 static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
749
750 #define parts_add_normal(A, B) \
751 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
752
753 static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
754 static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
755 static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
756
757 #define parts_sub_normal(A, B) \
758 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
759
760 static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
761 float_status *s, bool subtract);
762 static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
763 float_status *s, bool subtract);
764
765 #define parts_addsub(A, B, S, Z) \
766 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
767
768 static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
769 float_status *s);
770 static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
771 float_status *s);
772
773 #define parts_mul(A, B, S) \
774 PARTS_GENERIC_64_128(mul, A)(A, B, S)
775
776 static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
777 FloatParts64 *c, int flags,
778 float_status *s);
779 static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
780 FloatParts128 *c, int flags,
781 float_status *s);
782
783 #define parts_muladd(A, B, C, Z, S) \
784 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
785
786 static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
787 float_status *s);
788 static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
789 float_status *s);
790
791 #define parts_div(A, B, S) \
792 PARTS_GENERIC_64_128(div, A)(A, B, S)
793
794 static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
795 uint64_t *mod_quot, float_status *s);
796 static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
797 uint64_t *mod_quot, float_status *s);
798
799 #define parts_modrem(A, B, Q, S) \
800 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
801
802 static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
803 static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
804
805 #define parts_sqrt(A, S, F) \
806 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
807
808 static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
809 int scale, int frac_size);
810 static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
811 int scale, int frac_size);
812
813 #define parts_round_to_int_normal(A, R, C, F) \
814 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
815
816 static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
817 int scale, float_status *s,
818 const FloatFmt *fmt);
819 static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
820 int scale, float_status *s,
821 const FloatFmt *fmt);
822
823 #define parts_round_to_int(A, R, C, S, F) \
824 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
825
826 static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
827 int scale, int64_t min, int64_t max,
828 float_status *s);
829 static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
830 int scale, int64_t min, int64_t max,
831 float_status *s);
832
833 #define parts_float_to_sint(P, R, Z, MN, MX, S) \
834 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
835
836 static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
837 int scale, uint64_t max,
838 float_status *s);
839 static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
840 int scale, uint64_t max,
841 float_status *s);
842
843 #define parts_float_to_uint(P, R, Z, M, S) \
844 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
845
846 static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
847 int scale, float_status *s);
848 static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
849 int scale, float_status *s);
850
851 #define parts_sint_to_float(P, I, Z, S) \
852 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
853
854 static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
855 int scale, float_status *s);
856 static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
857 int scale, float_status *s);
858
859 #define parts_uint_to_float(P, I, Z, S) \
860 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
861
862 static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
863 float_status *s, int flags);
864 static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
865 float_status *s, int flags);
866
867 #define parts_minmax(A, B, S, F) \
868 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
869
870 static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
871 float_status *s, bool q);
872 static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
873 float_status *s, bool q);
874
875 #define parts_compare(A, B, S, Q) \
876 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
877
878 static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
879 static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
880
881 #define parts_scalbn(A, N, S) \
882 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
883
884 static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
885 static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
886
887 #define parts_log2(A, S, F) \
888 PARTS_GENERIC_64_128(log2, A)(A, S, F)
889
890 /*
891 * Helper functions for softfloat-parts.c.inc, per-size operations.
892 */
893
894 #define FRAC_GENERIC_64_128(NAME, P) \
895 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
896
897 #define FRAC_GENERIC_64_128_256(NAME, P) \
898 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
899 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
900
901 static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
902 {
903 return uadd64_overflow(a->frac, b->frac, &r->frac);
904 }
905
906 static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
907 {
908 bool c = 0;
909 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
910 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
911 return c;
912 }
913
914 static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
915 {
916 bool c = 0;
917 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
918 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
919 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
920 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
921 return c;
922 }
923
924 #define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
925
926 static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
927 {
928 return uadd64_overflow(a->frac, c, &r->frac);
929 }
930
931 static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
932 {
933 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
934 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
935 }
936
937 #define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
938
939 static void frac64_allones(FloatParts64 *a)
940 {
941 a->frac = -1;
942 }
943
944 static void frac128_allones(FloatParts128 *a)
945 {
946 a->frac_hi = a->frac_lo = -1;
947 }
948
949 #define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
950
951 static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
952 {
953 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
954 }
955
956 static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
957 {
958 uint64_t ta = a->frac_hi, tb = b->frac_hi;
959 if (ta == tb) {
960 ta = a->frac_lo, tb = b->frac_lo;
961 if (ta == tb) {
962 return 0;
963 }
964 }
965 return ta < tb ? -1 : 1;
966 }
967
968 #define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
969
970 static void frac64_clear(FloatParts64 *a)
971 {
972 a->frac = 0;
973 }
974
975 static void frac128_clear(FloatParts128 *a)
976 {
977 a->frac_hi = a->frac_lo = 0;
978 }
979
980 #define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
981
982 static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
983 {
984 uint64_t n1, n0, r, q;
985 bool ret;
986
987 /*
988 * We want a 2*N / N-bit division to produce exactly an N-bit
989 * result, so that we do not lose any precision and so that we
990 * do not have to renormalize afterward. If A.frac < B.frac,
991 * then division would produce an (N-1)-bit result; shift A left
992 * by one to produce the an N-bit result, and return true to
993 * decrement the exponent to match.
994 *
995 * The udiv_qrnnd algorithm that we're using requires normalization,
996 * i.e. the msb of the denominator must be set, which is already true.
997 */
998 ret = a->frac < b->frac;
999 if (ret) {
1000 n0 = a->frac;
1001 n1 = 0;
1002 } else {
1003 n0 = a->frac >> 1;
1004 n1 = a->frac << 63;
1005 }
1006 q = udiv_qrnnd(&r, n0, n1, b->frac);
1007
1008 /* Set lsb if there is a remainder, to set inexact. */
1009 a->frac = q | (r != 0);
1010
1011 return ret;
1012 }
1013
1014 static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1015 {
1016 uint64_t q0, q1, a0, a1, b0, b1;
1017 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1018 bool ret = false;
1019
1020 a0 = a->frac_hi, a1 = a->frac_lo;
1021 b0 = b->frac_hi, b1 = b->frac_lo;
1022
1023 ret = lt128(a0, a1, b0, b1);
1024 if (!ret) {
1025 a1 = shr_double(a0, a1, 1);
1026 a0 = a0 >> 1;
1027 }
1028
1029 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1030 q0 = estimateDiv128To64(a0, a1, b0);
1031
1032 /*
1033 * Estimate is high because B1 was not included (unless B1 == 0).
1034 * Reduce quotient and increase remainder until remainder is non-negative.
1035 * This loop will execute 0 to 2 times.
1036 */
1037 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1038 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1039 while (r0 != 0) {
1040 q0--;
1041 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1042 }
1043
1044 /* Repeat using the remainder, producing a second word of quotient. */
1045 q1 = estimateDiv128To64(r1, r2, b0);
1046 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1047 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1048 while (r1 != 0) {
1049 q1--;
1050 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1051 }
1052
1053 /* Any remainder indicates inexact; set sticky bit. */
1054 q1 |= (r2 | r3) != 0;
1055
1056 a->frac_hi = q0;
1057 a->frac_lo = q1;
1058 return ret;
1059 }
1060
1061 #define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1062
1063 static bool frac64_eqz(FloatParts64 *a)
1064 {
1065 return a->frac == 0;
1066 }
1067
1068 static bool frac128_eqz(FloatParts128 *a)
1069 {
1070 return (a->frac_hi | a->frac_lo) == 0;
1071 }
1072
1073 #define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
1074
1075 static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1076 {
1077 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1078 }
1079
1080 static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1081 {
1082 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1083 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1084 }
1085
1086 #define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1087
1088 static void frac64_neg(FloatParts64 *a)
1089 {
1090 a->frac = -a->frac;
1091 }
1092
1093 static void frac128_neg(FloatParts128 *a)
1094 {
1095 bool c = 0;
1096 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1097 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1098 }
1099
1100 static void frac256_neg(FloatParts256 *a)
1101 {
1102 bool c = 0;
1103 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1104 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1105 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1106 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107 }
1108
1109 #define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
1110
1111 static int frac64_normalize(FloatParts64 *a)
1112 {
1113 if (a->frac) {
1114 int shift = clz64(a->frac);
1115 a->frac <<= shift;
1116 return shift;
1117 }
1118 return 64;
1119 }
1120
1121 static int frac128_normalize(FloatParts128 *a)
1122 {
1123 if (a->frac_hi) {
1124 int shl = clz64(a->frac_hi);
1125 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1126 a->frac_lo <<= shl;
1127 return shl;
1128 } else if (a->frac_lo) {
1129 int shl = clz64(a->frac_lo);
1130 a->frac_hi = a->frac_lo << shl;
1131 a->frac_lo = 0;
1132 return shl + 64;
1133 }
1134 return 128;
1135 }
1136
1137 static int frac256_normalize(FloatParts256 *a)
1138 {
1139 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1140 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1141 int ret, shl;
1142
1143 if (likely(a0)) {
1144 shl = clz64(a0);
1145 if (shl == 0) {
1146 return 0;
1147 }
1148 ret = shl;
1149 } else {
1150 if (a1) {
1151 ret = 64;
1152 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1153 } else if (a2) {
1154 ret = 128;
1155 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1156 } else if (a3) {
1157 ret = 192;
1158 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1159 } else {
1160 ret = 256;
1161 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1162 goto done;
1163 }
1164 shl = clz64(a0);
1165 if (shl == 0) {
1166 goto done;
1167 }
1168 ret += shl;
1169 }
1170
1171 a0 = shl_double(a0, a1, shl);
1172 a1 = shl_double(a1, a2, shl);
1173 a2 = shl_double(a2, a3, shl);
1174 a3 <<= shl;
1175
1176 done:
1177 a->frac_hi = a0;
1178 a->frac_hm = a1;
1179 a->frac_lm = a2;
1180 a->frac_lo = a3;
1181 return ret;
1182 }
1183
1184 #define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
1185
1186 static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1187 {
1188 uint64_t a0, a1, b0, t0, t1, q, quot;
1189 int exp_diff = a->exp - b->exp;
1190 int shift;
1191
1192 a0 = a->frac;
1193 a1 = 0;
1194
1195 if (exp_diff < -1) {
1196 if (mod_quot) {
1197 *mod_quot = 0;
1198 }
1199 return;
1200 }
1201 if (exp_diff == -1) {
1202 a0 >>= 1;
1203 exp_diff = 0;
1204 }
1205
1206 b0 = b->frac;
1207 quot = q = b0 <= a0;
1208 if (q) {
1209 a0 -= b0;
1210 }
1211
1212 exp_diff -= 64;
1213 while (exp_diff > 0) {
1214 q = estimateDiv128To64(a0, a1, b0);
1215 q = q > 2 ? q - 2 : 0;
1216 mul64To128(b0, q, &t0, &t1);
1217 sub128(a0, a1, t0, t1, &a0, &a1);
1218 shortShift128Left(a0, a1, 62, &a0, &a1);
1219 exp_diff -= 62;
1220 quot = (quot << 62) + q;
1221 }
1222
1223 exp_diff += 64;
1224 if (exp_diff > 0) {
1225 q = estimateDiv128To64(a0, a1, b0);
1226 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1227 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1228 sub128(a0, a1, t0, t1, &a0, &a1);
1229 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1230 while (le128(t0, t1, a0, a1)) {
1231 ++q;
1232 sub128(a0, a1, t0, t1, &a0, &a1);
1233 }
1234 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1235 } else {
1236 t0 = b0;
1237 t1 = 0;
1238 }
1239
1240 if (mod_quot) {
1241 *mod_quot = quot;
1242 } else {
1243 sub128(t0, t1, a0, a1, &t0, &t1);
1244 if (lt128(t0, t1, a0, a1) ||
1245 (eq128(t0, t1, a0, a1) && (q & 1))) {
1246 a0 = t0;
1247 a1 = t1;
1248 a->sign = !a->sign;
1249 }
1250 }
1251
1252 if (likely(a0)) {
1253 shift = clz64(a0);
1254 shortShift128Left(a0, a1, shift, &a0, &a1);
1255 } else if (likely(a1)) {
1256 shift = clz64(a1);
1257 a0 = a1 << shift;
1258 a1 = 0;
1259 shift += 64;
1260 } else {
1261 a->cls = float_class_zero;
1262 return;
1263 }
1264
1265 a->exp = b->exp + exp_diff - shift;
1266 a->frac = a0 | (a1 != 0);
1267 }
1268
1269 static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1270 uint64_t *mod_quot)
1271 {
1272 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1273 int exp_diff = a->exp - b->exp;
1274 int shift;
1275
1276 a0 = a->frac_hi;
1277 a1 = a->frac_lo;
1278 a2 = 0;
1279
1280 if (exp_diff < -1) {
1281 if (mod_quot) {
1282 *mod_quot = 0;
1283 }
1284 return;
1285 }
1286 if (exp_diff == -1) {
1287 shift128Right(a0, a1, 1, &a0, &a1);
1288 exp_diff = 0;
1289 }
1290
1291 b0 = b->frac_hi;
1292 b1 = b->frac_lo;
1293
1294 quot = q = le128(b0, b1, a0, a1);
1295 if (q) {
1296 sub128(a0, a1, b0, b1, &a0, &a1);
1297 }
1298
1299 exp_diff -= 64;
1300 while (exp_diff > 0) {
1301 q = estimateDiv128To64(a0, a1, b0);
1302 q = q > 4 ? q - 4 : 0;
1303 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1304 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1305 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1306 exp_diff -= 61;
1307 quot = (quot << 61) + q;
1308 }
1309
1310 exp_diff += 64;
1311 if (exp_diff > 0) {
1312 q = estimateDiv128To64(a0, a1, b0);
1313 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1314 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1315 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1317 while (le192(t0, t1, t2, a0, a1, a2)) {
1318 ++q;
1319 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1320 }
1321 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1322 } else {
1323 t0 = b0;
1324 t1 = b1;
1325 t2 = 0;
1326 }
1327
1328 if (mod_quot) {
1329 *mod_quot = quot;
1330 } else {
1331 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1332 if (lt192(t0, t1, t2, a0, a1, a2) ||
1333 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1334 a0 = t0;
1335 a1 = t1;
1336 a2 = t2;
1337 a->sign = !a->sign;
1338 }
1339 }
1340
1341 if (likely(a0)) {
1342 shift = clz64(a0);
1343 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1344 } else if (likely(a1)) {
1345 shift = clz64(a1);
1346 shortShift128Left(a1, a2, shift, &a0, &a1);
1347 a2 = 0;
1348 shift += 64;
1349 } else if (likely(a2)) {
1350 shift = clz64(a2);
1351 a0 = a2 << shift;
1352 a1 = a2 = 0;
1353 shift += 128;
1354 } else {
1355 a->cls = float_class_zero;
1356 return;
1357 }
1358
1359 a->exp = b->exp + exp_diff - shift;
1360 a->frac_hi = a0;
1361 a->frac_lo = a1 | (a2 != 0);
1362 }
1363
1364 #define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1365
1366 static void frac64_shl(FloatParts64 *a, int c)
1367 {
1368 a->frac <<= c;
1369 }
1370
1371 static void frac128_shl(FloatParts128 *a, int c)
1372 {
1373 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1374
1375 if (c & 64) {
1376 a0 = a1, a1 = 0;
1377 }
1378
1379 c &= 63;
1380 if (c) {
1381 a0 = shl_double(a0, a1, c);
1382 a1 = a1 << c;
1383 }
1384
1385 a->frac_hi = a0;
1386 a->frac_lo = a1;
1387 }
1388
1389 #define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1390
1391 static void frac64_shr(FloatParts64 *a, int c)
1392 {
1393 a->frac >>= c;
1394 }
1395
1396 static void frac128_shr(FloatParts128 *a, int c)
1397 {
1398 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1399
1400 if (c & 64) {
1401 a1 = a0, a0 = 0;
1402 }
1403
1404 c &= 63;
1405 if (c) {
1406 a1 = shr_double(a0, a1, c);
1407 a0 = a0 >> c;
1408 }
1409
1410 a->frac_hi = a0;
1411 a->frac_lo = a1;
1412 }
1413
1414 #define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1415
1416 static void frac64_shrjam(FloatParts64 *a, int c)
1417 {
1418 uint64_t a0 = a->frac;
1419
1420 if (likely(c != 0)) {
1421 if (likely(c < 64)) {
1422 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1423 } else {
1424 a0 = a0 != 0;
1425 }
1426 a->frac = a0;
1427 }
1428 }
1429
1430 static void frac128_shrjam(FloatParts128 *a, int c)
1431 {
1432 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1433 uint64_t sticky = 0;
1434
1435 if (unlikely(c == 0)) {
1436 return;
1437 } else if (likely(c < 64)) {
1438 /* nothing */
1439 } else if (likely(c < 128)) {
1440 sticky = a1;
1441 a1 = a0;
1442 a0 = 0;
1443 c &= 63;
1444 if (c == 0) {
1445 goto done;
1446 }
1447 } else {
1448 sticky = a0 | a1;
1449 a0 = a1 = 0;
1450 goto done;
1451 }
1452
1453 sticky |= shr_double(a1, 0, c);
1454 a1 = shr_double(a0, a1, c);
1455 a0 = a0 >> c;
1456
1457 done:
1458 a->frac_lo = a1 | (sticky != 0);
1459 a->frac_hi = a0;
1460 }
1461
1462 static void frac256_shrjam(FloatParts256 *a, int c)
1463 {
1464 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1465 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1466 uint64_t sticky = 0;
1467
1468 if (unlikely(c == 0)) {
1469 return;
1470 } else if (likely(c < 64)) {
1471 /* nothing */
1472 } else if (likely(c < 256)) {
1473 if (unlikely(c & 128)) {
1474 sticky |= a2 | a3;
1475 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1476 }
1477 if (unlikely(c & 64)) {
1478 sticky |= a3;
1479 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1480 }
1481 c &= 63;
1482 if (c == 0) {
1483 goto done;
1484 }
1485 } else {
1486 sticky = a0 | a1 | a2 | a3;
1487 a0 = a1 = a2 = a3 = 0;
1488 goto done;
1489 }
1490
1491 sticky |= shr_double(a3, 0, c);
1492 a3 = shr_double(a2, a3, c);
1493 a2 = shr_double(a1, a2, c);
1494 a1 = shr_double(a0, a1, c);
1495 a0 = a0 >> c;
1496
1497 done:
1498 a->frac_lo = a3 | (sticky != 0);
1499 a->frac_lm = a2;
1500 a->frac_hm = a1;
1501 a->frac_hi = a0;
1502 }
1503
1504 #define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1505
1506 static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1507 {
1508 return usub64_overflow(a->frac, b->frac, &r->frac);
1509 }
1510
1511 static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1512 {
1513 bool c = 0;
1514 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1515 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1516 return c;
1517 }
1518
1519 static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1520 {
1521 bool c = 0;
1522 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1523 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1524 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1525 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1526 return c;
1527 }
1528
1529 #define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1530
1531 static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1532 {
1533 r->frac = a->frac_hi | (a->frac_lo != 0);
1534 }
1535
1536 static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1537 {
1538 r->frac_hi = a->frac_hi;
1539 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1540 }
1541
1542 #define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1543
1544 static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1545 {
1546 r->frac_hi = a->frac;
1547 r->frac_lo = 0;
1548 }
1549
1550 static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1551 {
1552 r->frac_hi = a->frac_hi;
1553 r->frac_hm = a->frac_lo;
1554 r->frac_lm = 0;
1555 r->frac_lo = 0;
1556 }
1557
1558 #define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1559
1560 /*
1561 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1562 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1563 * and thus MIT licenced.
1564 */
1565 static const uint16_t rsqrt_tab[128] = {
1566 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1567 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1568 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1569 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1570 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1571 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1572 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1573 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1574 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1575 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1576 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1577 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1578 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1579 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1580 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1581 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1582 };
1583
1584 #define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1585 #define FloatPartsN glue(FloatParts,N)
1586 #define FloatPartsW glue(FloatParts,W)
1587
1588 #define N 64
1589 #define W 128
1590
1591 #include "softfloat-parts-addsub.c.inc"
1592 #include "softfloat-parts.c.inc"
1593
1594 #undef N
1595 #undef W
1596 #define N 128
1597 #define W 256
1598
1599 #include "softfloat-parts-addsub.c.inc"
1600 #include "softfloat-parts.c.inc"
1601
1602 #undef N
1603 #undef W
1604 #define N 256
1605
1606 #include "softfloat-parts-addsub.c.inc"
1607
1608 #undef N
1609 #undef W
1610 #undef partsN
1611 #undef FloatPartsN
1612 #undef FloatPartsW
1613
1614 /*
1615 * Pack/unpack routines with a specific FloatFmt.
1616 */
1617
1618 static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1619 float_status *s, const FloatFmt *params)
1620 {
1621 float16_unpack_raw(p, f);
1622 parts_canonicalize(p, s, params);
1623 }
1624
1625 static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1626 float_status *s)
1627 {
1628 float16a_unpack_canonical(p, f, s, &float16_params);
1629 }
1630
1631 static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1632 float_status *s)
1633 {
1634 bfloat16_unpack_raw(p, f);
1635 parts_canonicalize(p, s, &bfloat16_params);
1636 }
1637
1638 static float16 float16a_round_pack_canonical(FloatParts64 *p,
1639 float_status *s,
1640 const FloatFmt *params)
1641 {
1642 parts_uncanon(p, s, params);
1643 return float16_pack_raw(p);
1644 }
1645
1646 static float16 float16_round_pack_canonical(FloatParts64 *p,
1647 float_status *s)
1648 {
1649 return float16a_round_pack_canonical(p, s, &float16_params);
1650 }
1651
1652 static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1653 float_status *s)
1654 {
1655 parts_uncanon(p, s, &bfloat16_params);
1656 return bfloat16_pack_raw(p);
1657 }
1658
1659 static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1660 float_status *s)
1661 {
1662 float32_unpack_raw(p, f);
1663 parts_canonicalize(p, s, &float32_params);
1664 }
1665
1666 static float32 float32_round_pack_canonical(FloatParts64 *p,
1667 float_status *s)
1668 {
1669 parts_uncanon(p, s, &float32_params);
1670 return float32_pack_raw(p);
1671 }
1672
1673 static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1674 float_status *s)
1675 {
1676 float64_unpack_raw(p, f);
1677 parts_canonicalize(p, s, &float64_params);
1678 }
1679
1680 static float64 float64_round_pack_canonical(FloatParts64 *p,
1681 float_status *s)
1682 {
1683 parts_uncanon(p, s, &float64_params);
1684 return float64_pack_raw(p);
1685 }
1686
1687 static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1688 float_status *s)
1689 {
1690 float128_unpack_raw(p, f);
1691 parts_canonicalize(p, s, &float128_params);
1692 }
1693
1694 static float128 float128_round_pack_canonical(FloatParts128 *p,
1695 float_status *s)
1696 {
1697 parts_uncanon(p, s, &float128_params);
1698 return float128_pack_raw(p);
1699 }
1700
1701 /* Returns false if the encoding is invalid. */
1702 static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1703 float_status *s)
1704 {
1705 /* Ensure rounding precision is set before beginning. */
1706 switch (s->floatx80_rounding_precision) {
1707 case floatx80_precision_x:
1708 case floatx80_precision_d:
1709 case floatx80_precision_s:
1710 break;
1711 default:
1712 g_assert_not_reached();
1713 }
1714
1715 if (unlikely(floatx80_invalid_encoding(f))) {
1716 float_raise(float_flag_invalid, s);
1717 return false;
1718 }
1719
1720 floatx80_unpack_raw(p, f);
1721
1722 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1723 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1724 } else {
1725 /* The explicit integer bit is ignored, after invalid checks. */
1726 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1727 p->cls = (p->frac_hi == 0 ? float_class_inf
1728 : parts_is_snan_frac(p->frac_hi, s)
1729 ? float_class_snan : float_class_qnan);
1730 }
1731 return true;
1732 }
1733
1734 static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1735 float_status *s)
1736 {
1737 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1738 uint64_t frac;
1739 int exp;
1740
1741 switch (p->cls) {
1742 case float_class_normal:
1743 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1744 parts_uncanon_normal(p, s, fmt);
1745 frac = p->frac_hi;
1746 exp = p->exp;
1747 } else {
1748 FloatParts64 p64;
1749
1750 p64.sign = p->sign;
1751 p64.exp = p->exp;
1752 frac_truncjam(&p64, p);
1753 parts_uncanon_normal(&p64, s, fmt);
1754 frac = p64.frac;
1755 exp = p64.exp;
1756 }
1757 if (exp != fmt->exp_max) {
1758 break;
1759 }
1760 /* rounded to inf -- fall through to set frac correctly */
1761
1762 case float_class_inf:
1763 /* x86 and m68k differ in the setting of the integer bit. */
1764 frac = floatx80_infinity_low;
1765 exp = fmt->exp_max;
1766 break;
1767
1768 case float_class_zero:
1769 frac = 0;
1770 exp = 0;
1771 break;
1772
1773 case float_class_snan:
1774 case float_class_qnan:
1775 /* NaNs have the integer bit set. */
1776 frac = p->frac_hi | (1ull << 63);
1777 exp = fmt->exp_max;
1778 break;
1779
1780 default:
1781 g_assert_not_reached();
1782 }
1783
1784 return packFloatx80(p->sign, exp, frac);
1785 }
1786
1787 /*
1788 * Addition and subtraction
1789 */
1790
1791 static float16 QEMU_FLATTEN
1792 float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1793 {
1794 FloatParts64 pa, pb, *pr;
1795
1796 float16_unpack_canonical(&pa, a, status);
1797 float16_unpack_canonical(&pb, b, status);
1798 pr = parts_addsub(&pa, &pb, status, subtract);
1799
1800 return float16_round_pack_canonical(pr, status);
1801 }
1802
1803 float16 float16_add(float16 a, float16 b, float_status *status)
1804 {
1805 return float16_addsub(a, b, status, false);
1806 }
1807
1808 float16 float16_sub(float16 a, float16 b, float_status *status)
1809 {
1810 return float16_addsub(a, b, status, true);
1811 }
1812
1813 static float32 QEMU_SOFTFLOAT_ATTR
1814 soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1815 {
1816 FloatParts64 pa, pb, *pr;
1817
1818 float32_unpack_canonical(&pa, a, status);
1819 float32_unpack_canonical(&pb, b, status);
1820 pr = parts_addsub(&pa, &pb, status, subtract);
1821
1822 return float32_round_pack_canonical(pr, status);
1823 }
1824
1825 static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1826 {
1827 return soft_f32_addsub(a, b, status, false);
1828 }
1829
1830 static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1831 {
1832 return soft_f32_addsub(a, b, status, true);
1833 }
1834
1835 static float64 QEMU_SOFTFLOAT_ATTR
1836 soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1837 {
1838 FloatParts64 pa, pb, *pr;
1839
1840 float64_unpack_canonical(&pa, a, status);
1841 float64_unpack_canonical(&pb, b, status);
1842 pr = parts_addsub(&pa, &pb, status, subtract);
1843
1844 return float64_round_pack_canonical(pr, status);
1845 }
1846
1847 static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1848 {
1849 return soft_f64_addsub(a, b, status, false);
1850 }
1851
1852 static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1853 {
1854 return soft_f64_addsub(a, b, status, true);
1855 }
1856
1857 static float hard_f32_add(float a, float b)
1858 {
1859 return a + b;
1860 }
1861
1862 static float hard_f32_sub(float a, float b)
1863 {
1864 return a - b;
1865 }
1866
1867 static double hard_f64_add(double a, double b)
1868 {
1869 return a + b;
1870 }
1871
1872 static double hard_f64_sub(double a, double b)
1873 {
1874 return a - b;
1875 }
1876
1877 static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1878 {
1879 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1880 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1881 }
1882 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1883 }
1884
1885 static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1886 {
1887 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1888 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1889 } else {
1890 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1891 }
1892 }
1893
1894 static float32 float32_addsub(float32 a, float32 b, float_status *s,
1895 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1896 {
1897 return float32_gen2(a, b, s, hard, soft,
1898 f32_is_zon2, f32_addsubmul_post);
1899 }
1900
1901 static float64 float64_addsub(float64 a, float64 b, float_status *s,
1902 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1903 {
1904 return float64_gen2(a, b, s, hard, soft,
1905 f64_is_zon2, f64_addsubmul_post);
1906 }
1907
1908 float32 QEMU_FLATTEN
1909 float32_add(float32 a, float32 b, float_status *s)
1910 {
1911 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1912 }
1913
1914 float32 QEMU_FLATTEN
1915 float32_sub(float32 a, float32 b, float_status *s)
1916 {
1917 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1918 }
1919
1920 float64 QEMU_FLATTEN
1921 float64_add(float64 a, float64 b, float_status *s)
1922 {
1923 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1924 }
1925
1926 float64 QEMU_FLATTEN
1927 float64_sub(float64 a, float64 b, float_status *s)
1928 {
1929 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1930 }
1931
1932 static bfloat16 QEMU_FLATTEN
1933 bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1934 {
1935 FloatParts64 pa, pb, *pr;
1936
1937 bfloat16_unpack_canonical(&pa, a, status);
1938 bfloat16_unpack_canonical(&pb, b, status);
1939 pr = parts_addsub(&pa, &pb, status, subtract);
1940
1941 return bfloat16_round_pack_canonical(pr, status);
1942 }
1943
1944 bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1945 {
1946 return bfloat16_addsub(a, b, status, false);
1947 }
1948
1949 bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1950 {
1951 return bfloat16_addsub(a, b, status, true);
1952 }
1953
1954 static float128 QEMU_FLATTEN
1955 float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1956 {
1957 FloatParts128 pa, pb, *pr;
1958
1959 float128_unpack_canonical(&pa, a, status);
1960 float128_unpack_canonical(&pb, b, status);
1961 pr = parts_addsub(&pa, &pb, status, subtract);
1962
1963 return float128_round_pack_canonical(pr, status);
1964 }
1965
1966 float128 float128_add(float128 a, float128 b, float_status *status)
1967 {
1968 return float128_addsub(a, b, status, false);
1969 }
1970
1971 float128 float128_sub(float128 a, float128 b, float_status *status)
1972 {
1973 return float128_addsub(a, b, status, true);
1974 }
1975
1976 static floatx80 QEMU_FLATTEN
1977 floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1978 {
1979 FloatParts128 pa, pb, *pr;
1980
1981 if (!floatx80_unpack_canonical(&pa, a, status) ||
1982 !floatx80_unpack_canonical(&pb, b, status)) {
1983 return floatx80_default_nan(status);
1984 }
1985
1986 pr = parts_addsub(&pa, &pb, status, subtract);
1987 return floatx80_round_pack_canonical(pr, status);
1988 }
1989
1990 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
1991 {
1992 return floatx80_addsub(a, b, status, false);
1993 }
1994
1995 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
1996 {
1997 return floatx80_addsub(a, b, status, true);
1998 }
1999
2000 /*
2001 * Multiplication
2002 */
2003
2004 float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2005 {
2006 FloatParts64 pa, pb, *pr;
2007
2008 float16_unpack_canonical(&pa, a, status);
2009 float16_unpack_canonical(&pb, b, status);
2010 pr = parts_mul(&pa, &pb, status);
2011
2012 return float16_round_pack_canonical(pr, status);
2013 }
2014
2015 static float32 QEMU_SOFTFLOAT_ATTR
2016 soft_f32_mul(float32 a, float32 b, float_status *status)
2017 {
2018 FloatParts64 pa, pb, *pr;
2019
2020 float32_unpack_canonical(&pa, a, status);
2021 float32_unpack_canonical(&pb, b, status);
2022 pr = parts_mul(&pa, &pb, status);
2023
2024 return float32_round_pack_canonical(pr, status);
2025 }
2026
2027 static float64 QEMU_SOFTFLOAT_ATTR
2028 soft_f64_mul(float64 a, float64 b, float_status *status)
2029 {
2030 FloatParts64 pa, pb, *pr;
2031
2032 float64_unpack_canonical(&pa, a, status);
2033 float64_unpack_canonical(&pb, b, status);
2034 pr = parts_mul(&pa, &pb, status);
2035
2036 return float64_round_pack_canonical(pr, status);
2037 }
2038
2039 static float hard_f32_mul(float a, float b)
2040 {
2041 return a * b;
2042 }
2043
2044 static double hard_f64_mul(double a, double b)
2045 {
2046 return a * b;
2047 }
2048
2049 float32 QEMU_FLATTEN
2050 float32_mul(float32 a, float32 b, float_status *s)
2051 {
2052 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2053 f32_is_zon2, f32_addsubmul_post);
2054 }
2055
2056 float64 QEMU_FLATTEN
2057 float64_mul(float64 a, float64 b, float_status *s)
2058 {
2059 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2060 f64_is_zon2, f64_addsubmul_post);
2061 }
2062
2063 bfloat16 QEMU_FLATTEN
2064 bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2065 {
2066 FloatParts64 pa, pb, *pr;
2067
2068 bfloat16_unpack_canonical(&pa, a, status);
2069 bfloat16_unpack_canonical(&pb, b, status);
2070 pr = parts_mul(&pa, &pb, status);
2071
2072 return bfloat16_round_pack_canonical(pr, status);
2073 }
2074
2075 float128 QEMU_FLATTEN
2076 float128_mul(float128 a, float128 b, float_status *status)
2077 {
2078 FloatParts128 pa, pb, *pr;
2079
2080 float128_unpack_canonical(&pa, a, status);
2081 float128_unpack_canonical(&pb, b, status);
2082 pr = parts_mul(&pa, &pb, status);
2083
2084 return float128_round_pack_canonical(pr, status);
2085 }
2086
2087 floatx80 QEMU_FLATTEN
2088 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2089 {
2090 FloatParts128 pa, pb, *pr;
2091
2092 if (!floatx80_unpack_canonical(&pa, a, status) ||
2093 !floatx80_unpack_canonical(&pb, b, status)) {
2094 return floatx80_default_nan(status);
2095 }
2096
2097 pr = parts_mul(&pa, &pb, status);
2098 return floatx80_round_pack_canonical(pr, status);
2099 }
2100
2101 /*
2102 * Fused multiply-add
2103 */
2104
2105 float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2106 int flags, float_status *status)
2107 {
2108 FloatParts64 pa, pb, pc, *pr;
2109
2110 float16_unpack_canonical(&pa, a, status);
2111 float16_unpack_canonical(&pb, b, status);
2112 float16_unpack_canonical(&pc, c, status);
2113 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2114
2115 return float16_round_pack_canonical(pr, status);
2116 }
2117
2118 static float32 QEMU_SOFTFLOAT_ATTR
2119 soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2120 float_status *status)
2121 {
2122 FloatParts64 pa, pb, pc, *pr;
2123
2124 float32_unpack_canonical(&pa, a, status);
2125 float32_unpack_canonical(&pb, b, status);
2126 float32_unpack_canonical(&pc, c, status);
2127 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2128
2129 return float32_round_pack_canonical(pr, status);
2130 }
2131
2132 static float64 QEMU_SOFTFLOAT_ATTR
2133 soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2134 float_status *status)
2135 {
2136 FloatParts64 pa, pb, pc, *pr;
2137
2138 float64_unpack_canonical(&pa, a, status);
2139 float64_unpack_canonical(&pb, b, status);
2140 float64_unpack_canonical(&pc, c, status);
2141 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2142
2143 return float64_round_pack_canonical(pr, status);
2144 }
2145
2146 static bool force_soft_fma;
2147
2148 float32 QEMU_FLATTEN
2149 float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2150 {
2151 union_float32 ua, ub, uc, ur;
2152
2153 ua.s = xa;
2154 ub.s = xb;
2155 uc.s = xc;
2156
2157 if (unlikely(!can_use_fpu(s))) {
2158 goto soft;
2159 }
2160 if (unlikely(flags & float_muladd_halve_result)) {
2161 goto soft;
2162 }
2163
2164 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2165 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2166 goto soft;
2167 }
2168
2169 if (unlikely(force_soft_fma)) {
2170 goto soft;
2171 }
2172
2173 /*
2174 * When (a || b) == 0, there's no need to check for under/over flow,
2175 * since we know the addend is (normal || 0) and the product is 0.
2176 */
2177 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2178 union_float32 up;
2179 bool prod_sign;
2180
2181 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2182 prod_sign ^= !!(flags & float_muladd_negate_product);
2183 up.s = float32_set_sign(float32_zero, prod_sign);
2184
2185 if (flags & float_muladd_negate_c) {
2186 uc.h = -uc.h;
2187 }
2188 ur.h = up.h + uc.h;
2189 } else {
2190 union_float32 ua_orig = ua;
2191 union_float32 uc_orig = uc;
2192
2193 if (flags & float_muladd_negate_product) {
2194 ua.h = -ua.h;
2195 }
2196 if (flags & float_muladd_negate_c) {
2197 uc.h = -uc.h;
2198 }
2199
2200 ur.h = fmaf(ua.h, ub.h, uc.h);
2201
2202 if (unlikely(f32_is_inf(ur))) {
2203 float_raise(float_flag_overflow, s);
2204 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2205 ua = ua_orig;
2206 uc = uc_orig;
2207 goto soft;
2208 }
2209 }
2210 if (flags & float_muladd_negate_result) {
2211 return float32_chs(ur.s);
2212 }
2213 return ur.s;
2214
2215 soft:
2216 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2217 }
2218
2219 float64 QEMU_FLATTEN
2220 float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2221 {
2222 union_float64 ua, ub, uc, ur;
2223
2224 ua.s = xa;
2225 ub.s = xb;
2226 uc.s = xc;
2227
2228 if (unlikely(!can_use_fpu(s))) {
2229 goto soft;
2230 }
2231 if (unlikely(flags & float_muladd_halve_result)) {
2232 goto soft;
2233 }
2234
2235 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2236 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2237 goto soft;
2238 }
2239
2240 if (unlikely(force_soft_fma)) {
2241 goto soft;
2242 }
2243
2244 /*
2245 * When (a || b) == 0, there's no need to check for under/over flow,
2246 * since we know the addend is (normal || 0) and the product is 0.
2247 */
2248 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2249 union_float64 up;
2250 bool prod_sign;
2251
2252 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2253 prod_sign ^= !!(flags & float_muladd_negate_product);
2254 up.s = float64_set_sign(float64_zero, prod_sign);
2255
2256 if (flags & float_muladd_negate_c) {
2257 uc.h = -uc.h;
2258 }
2259 ur.h = up.h + uc.h;
2260 } else {
2261 union_float64 ua_orig = ua;
2262 union_float64 uc_orig = uc;
2263
2264 if (flags & float_muladd_negate_product) {
2265 ua.h = -ua.h;
2266 }
2267 if (flags & float_muladd_negate_c) {
2268 uc.h = -uc.h;
2269 }
2270
2271 ur.h = fma(ua.h, ub.h, uc.h);
2272
2273 if (unlikely(f64_is_inf(ur))) {
2274 float_raise(float_flag_overflow, s);
2275 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2276 ua = ua_orig;
2277 uc = uc_orig;
2278 goto soft;
2279 }
2280 }
2281 if (flags & float_muladd_negate_result) {
2282 return float64_chs(ur.s);
2283 }
2284 return ur.s;
2285
2286 soft:
2287 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2288 }
2289
2290 bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2291 int flags, float_status *status)
2292 {
2293 FloatParts64 pa, pb, pc, *pr;
2294
2295 bfloat16_unpack_canonical(&pa, a, status);
2296 bfloat16_unpack_canonical(&pb, b, status);
2297 bfloat16_unpack_canonical(&pc, c, status);
2298 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2299
2300 return bfloat16_round_pack_canonical(pr, status);
2301 }
2302
2303 float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2304 int flags, float_status *status)
2305 {
2306 FloatParts128 pa, pb, pc, *pr;
2307
2308 float128_unpack_canonical(&pa, a, status);
2309 float128_unpack_canonical(&pb, b, status);
2310 float128_unpack_canonical(&pc, c, status);
2311 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2312
2313 return float128_round_pack_canonical(pr, status);
2314 }
2315
2316 /*
2317 * Division
2318 */
2319
2320 float16 float16_div(float16 a, float16 b, float_status *status)
2321 {
2322 FloatParts64 pa, pb, *pr;
2323
2324 float16_unpack_canonical(&pa, a, status);
2325 float16_unpack_canonical(&pb, b, status);
2326 pr = parts_div(&pa, &pb, status);
2327
2328 return float16_round_pack_canonical(pr, status);
2329 }
2330
2331 static float32 QEMU_SOFTFLOAT_ATTR
2332 soft_f32_div(float32 a, float32 b, float_status *status)
2333 {
2334 FloatParts64 pa, pb, *pr;
2335
2336 float32_unpack_canonical(&pa, a, status);
2337 float32_unpack_canonical(&pb, b, status);
2338 pr = parts_div(&pa, &pb, status);
2339
2340 return float32_round_pack_canonical(pr, status);
2341 }
2342
2343 static float64 QEMU_SOFTFLOAT_ATTR
2344 soft_f64_div(float64 a, float64 b, float_status *status)
2345 {
2346 FloatParts64 pa, pb, *pr;
2347
2348 float64_unpack_canonical(&pa, a, status);
2349 float64_unpack_canonical(&pb, b, status);
2350 pr = parts_div(&pa, &pb, status);
2351
2352 return float64_round_pack_canonical(pr, status);
2353 }
2354
2355 static float hard_f32_div(float a, float b)
2356 {
2357 return a / b;
2358 }
2359
2360 static double hard_f64_div(double a, double b)
2361 {
2362 return a / b;
2363 }
2364
2365 static bool f32_div_pre(union_float32 a, union_float32 b)
2366 {
2367 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2368 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2369 fpclassify(b.h) == FP_NORMAL;
2370 }
2371 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2372 }
2373
2374 static bool f64_div_pre(union_float64 a, union_float64 b)
2375 {
2376 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2377 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378 fpclassify(b.h) == FP_NORMAL;
2379 }
2380 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2381 }
2382
2383 static bool f32_div_post(union_float32 a, union_float32 b)
2384 {
2385 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2386 return fpclassify(a.h) != FP_ZERO;
2387 }
2388 return !float32_is_zero(a.s);
2389 }
2390
2391 static bool f64_div_post(union_float64 a, union_float64 b)
2392 {
2393 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2394 return fpclassify(a.h) != FP_ZERO;
2395 }
2396 return !float64_is_zero(a.s);
2397 }
2398
2399 float32 QEMU_FLATTEN
2400 float32_div(float32 a, float32 b, float_status *s)
2401 {
2402 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2403 f32_div_pre, f32_div_post);
2404 }
2405
2406 float64 QEMU_FLATTEN
2407 float64_div(float64 a, float64 b, float_status *s)
2408 {
2409 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2410 f64_div_pre, f64_div_post);
2411 }
2412
2413 bfloat16 QEMU_FLATTEN
2414 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2415 {
2416 FloatParts64 pa, pb, *pr;
2417
2418 bfloat16_unpack_canonical(&pa, a, status);
2419 bfloat16_unpack_canonical(&pb, b, status);
2420 pr = parts_div(&pa, &pb, status);
2421
2422 return bfloat16_round_pack_canonical(pr, status);
2423 }
2424
2425 float128 QEMU_FLATTEN
2426 float128_div(float128 a, float128 b, float_status *status)
2427 {
2428 FloatParts128 pa, pb, *pr;
2429
2430 float128_unpack_canonical(&pa, a, status);
2431 float128_unpack_canonical(&pb, b, status);
2432 pr = parts_div(&pa, &pb, status);
2433
2434 return float128_round_pack_canonical(pr, status);
2435 }
2436
2437 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2438 {
2439 FloatParts128 pa, pb, *pr;
2440
2441 if (!floatx80_unpack_canonical(&pa, a, status) ||
2442 !floatx80_unpack_canonical(&pb, b, status)) {
2443 return floatx80_default_nan(status);
2444 }
2445
2446 pr = parts_div(&pa, &pb, status);
2447 return floatx80_round_pack_canonical(pr, status);
2448 }
2449
2450 /*
2451 * Remainder
2452 */
2453
2454 float32 float32_rem(float32 a, float32 b, float_status *status)
2455 {
2456 FloatParts64 pa, pb, *pr;
2457
2458 float32_unpack_canonical(&pa, a, status);
2459 float32_unpack_canonical(&pb, b, status);
2460 pr = parts_modrem(&pa, &pb, NULL, status);
2461
2462 return float32_round_pack_canonical(pr, status);
2463 }
2464
2465 float64 float64_rem(float64 a, float64 b, float_status *status)
2466 {
2467 FloatParts64 pa, pb, *pr;
2468
2469 float64_unpack_canonical(&pa, a, status);
2470 float64_unpack_canonical(&pb, b, status);
2471 pr = parts_modrem(&pa, &pb, NULL, status);
2472
2473 return float64_round_pack_canonical(pr, status);
2474 }
2475
2476 float128 float128_rem(float128 a, float128 b, float_status *status)
2477 {
2478 FloatParts128 pa, pb, *pr;
2479
2480 float128_unpack_canonical(&pa, a, status);
2481 float128_unpack_canonical(&pb, b, status);
2482 pr = parts_modrem(&pa, &pb, NULL, status);
2483
2484 return float128_round_pack_canonical(pr, status);
2485 }
2486
2487 /*
2488 * Returns the remainder of the extended double-precision floating-point value
2489 * `a' with respect to the corresponding value `b'.
2490 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2491 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2492 * the remainder based on truncating the quotient toward zero instead and
2493 * *quotient is set to the low 64 bits of the absolute value of the integer
2494 * quotient.
2495 */
2496 floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2497 uint64_t *quotient, float_status *status)
2498 {
2499 FloatParts128 pa, pb, *pr;
2500
2501 *quotient = 0;
2502 if (!floatx80_unpack_canonical(&pa, a, status) ||
2503 !floatx80_unpack_canonical(&pb, b, status)) {
2504 return floatx80_default_nan(status);
2505 }
2506 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2507
2508 return floatx80_round_pack_canonical(pr, status);
2509 }
2510
2511 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2512 {
2513 uint64_t quotient;
2514 return floatx80_modrem(a, b, false, &quotient, status);
2515 }
2516
2517 floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2518 {
2519 uint64_t quotient;
2520 return floatx80_modrem(a, b, true, &quotient, status);
2521 }
2522
2523 /*
2524 * Float to Float conversions
2525 *
2526 * Returns the result of converting one float format to another. The
2527 * conversion is performed according to the IEC/IEEE Standard for
2528 * Binary Floating-Point Arithmetic.
2529 *
2530 * Usually this only needs to take care of raising invalid exceptions
2531 * and handling the conversion on NaNs.
2532 */
2533
2534 static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2535 {
2536 switch (a->cls) {
2537 case float_class_qnan:
2538 case float_class_snan:
2539 /*
2540 * There is no NaN in the destination format. Raise Invalid
2541 * and return a zero with the sign of the input NaN.
2542 */
2543 float_raise(float_flag_invalid, s);
2544 a->cls = float_class_zero;
2545 break;
2546
2547 case float_class_inf:
2548 /*
2549 * There is no Inf in the destination format. Raise Invalid
2550 * and return the maximum normal with the correct sign.
2551 */
2552 float_raise(float_flag_invalid, s);
2553 a->cls = float_class_normal;
2554 a->exp = float16_params_ahp.exp_max;
2555 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2556 float16_params_ahp.frac_size + 1);
2557 break;
2558
2559 case float_class_normal:
2560 case float_class_zero:
2561 break;
2562
2563 default:
2564 g_assert_not_reached();
2565 }
2566 }
2567
2568 static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2569 {
2570 if (is_nan(a->cls)) {
2571 parts_return_nan(a, s);
2572 }
2573 }
2574
2575 static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2576 {
2577 if (is_nan(a->cls)) {
2578 parts_return_nan(a, s);
2579 }
2580 }
2581
2582 #define parts_float_to_float(P, S) \
2583 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2584
2585 static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2586 float_status *s)
2587 {
2588 a->cls = b->cls;
2589 a->sign = b->sign;
2590 a->exp = b->exp;
2591
2592 if (a->cls == float_class_normal) {
2593 frac_truncjam(a, b);
2594 } else if (is_nan(a->cls)) {
2595 /* Discard the low bits of the NaN. */
2596 a->frac = b->frac_hi;
2597 parts_return_nan(a, s);
2598 }
2599 }
2600
2601 static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2602 float_status *s)
2603 {
2604 a->cls = b->cls;
2605 a->sign = b->sign;
2606 a->exp = b->exp;
2607 frac_widen(a, b);
2608
2609 if (is_nan(a->cls)) {
2610 parts_return_nan(a, s);
2611 }
2612 }
2613
2614 float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2615 {
2616 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2617 FloatParts64 p;
2618
2619 float16a_unpack_canonical(&p, a, s, fmt16);
2620 parts_float_to_float(&p, s);
2621 return float32_round_pack_canonical(&p, s);
2622 }
2623
2624 float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2625 {
2626 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2627 FloatParts64 p;
2628
2629 float16a_unpack_canonical(&p, a, s, fmt16);
2630 parts_float_to_float(&p, s);
2631 return float64_round_pack_canonical(&p, s);
2632 }
2633
2634 float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2635 {
2636 FloatParts64 p;
2637 const FloatFmt *fmt;
2638
2639 float32_unpack_canonical(&p, a, s);
2640 if (ieee) {
2641 parts_float_to_float(&p, s);
2642 fmt = &float16_params;
2643 } else {
2644 parts_float_to_ahp(&p, s);
2645 fmt = &float16_params_ahp;
2646 }
2647 return float16a_round_pack_canonical(&p, s, fmt);
2648 }
2649
2650 static float64 QEMU_SOFTFLOAT_ATTR
2651 soft_float32_to_float64(float32 a, float_status *s)
2652 {
2653 FloatParts64 p;
2654
2655 float32_unpack_canonical(&p, a, s);
2656 parts_float_to_float(&p, s);
2657 return float64_round_pack_canonical(&p, s);
2658 }
2659
2660 float64 float32_to_float64(float32 a, float_status *s)
2661 {
2662 if (likely(float32_is_normal(a))) {
2663 /* Widening conversion can never produce inexact results. */
2664 union_float32 uf;
2665 union_float64 ud;
2666 uf.s = a;
2667 ud.h = uf.h;
2668 return ud.s;
2669 } else if (float32_is_zero(a)) {
2670 return float64_set_sign(float64_zero, float32_is_neg(a));
2671 } else {
2672 return soft_float32_to_float64(a, s);
2673 }
2674 }
2675
2676 float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2677 {
2678 FloatParts64 p;
2679 const FloatFmt *fmt;
2680
2681 float64_unpack_canonical(&p, a, s);
2682 if (ieee) {
2683 parts_float_to_float(&p, s);
2684 fmt = &float16_params;
2685 } else {
2686 parts_float_to_ahp(&p, s);
2687 fmt = &float16_params_ahp;
2688 }
2689 return float16a_round_pack_canonical(&p, s, fmt);
2690 }
2691
2692 float32 float64_to_float32(float64 a, float_status *s)
2693 {
2694 FloatParts64 p;
2695
2696 float64_unpack_canonical(&p, a, s);
2697 parts_float_to_float(&p, s);
2698 return float32_round_pack_canonical(&p, s);
2699 }
2700
2701 float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2702 {
2703 FloatParts64 p;
2704
2705 bfloat16_unpack_canonical(&p, a, s);
2706 parts_float_to_float(&p, s);
2707 return float32_round_pack_canonical(&p, s);
2708 }
2709
2710 float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2711 {
2712 FloatParts64 p;
2713
2714 bfloat16_unpack_canonical(&p, a, s);
2715 parts_float_to_float(&p, s);
2716 return float64_round_pack_canonical(&p, s);
2717 }
2718
2719 bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2720 {
2721 FloatParts64 p;
2722
2723 float32_unpack_canonical(&p, a, s);
2724 parts_float_to_float(&p, s);
2725 return bfloat16_round_pack_canonical(&p, s);
2726 }
2727
2728 bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2729 {
2730 FloatParts64 p;
2731
2732 float64_unpack_canonical(&p, a, s);
2733 parts_float_to_float(&p, s);
2734 return bfloat16_round_pack_canonical(&p, s);
2735 }
2736
2737 float32 float128_to_float32(float128 a, float_status *s)
2738 {
2739 FloatParts64 p64;
2740 FloatParts128 p128;
2741
2742 float128_unpack_canonical(&p128, a, s);
2743 parts_float_to_float_narrow(&p64, &p128, s);
2744 return float32_round_pack_canonical(&p64, s);
2745 }
2746
2747 float64 float128_to_float64(float128 a, float_status *s)
2748 {
2749 FloatParts64 p64;
2750 FloatParts128 p128;
2751
2752 float128_unpack_canonical(&p128, a, s);
2753 parts_float_to_float_narrow(&p64, &p128, s);
2754 return float64_round_pack_canonical(&p64, s);
2755 }
2756
2757 float128 float32_to_float128(float32 a, float_status *s)
2758 {
2759 FloatParts64 p64;
2760 FloatParts128 p128;
2761
2762 float32_unpack_canonical(&p64, a, s);
2763 parts_float_to_float_widen(&p128, &p64, s);
2764 return float128_round_pack_canonical(&p128, s);
2765 }
2766
2767 float128 float64_to_float128(float64 a, float_status *s)
2768 {
2769 FloatParts64 p64;
2770 FloatParts128 p128;
2771
2772 float64_unpack_canonical(&p64, a, s);
2773 parts_float_to_float_widen(&p128, &p64, s);
2774 return float128_round_pack_canonical(&p128, s);
2775 }
2776
2777 float32 floatx80_to_float32(floatx80 a, float_status *s)
2778 {
2779 FloatParts64 p64;
2780 FloatParts128 p128;
2781
2782 if (floatx80_unpack_canonical(&p128, a, s)) {
2783 parts_float_to_float_narrow(&p64, &p128, s);
2784 } else {
2785 parts_default_nan(&p64, s);
2786 }
2787 return float32_round_pack_canonical(&p64, s);
2788 }
2789
2790 float64 floatx80_to_float64(floatx80 a, float_status *s)
2791 {
2792 FloatParts64 p64;
2793 FloatParts128 p128;
2794
2795 if (floatx80_unpack_canonical(&p128, a, s)) {
2796 parts_float_to_float_narrow(&p64, &p128, s);
2797 } else {
2798 parts_default_nan(&p64, s);
2799 }
2800 return float64_round_pack_canonical(&p64, s);
2801 }
2802
2803 float128 floatx80_to_float128(floatx80 a, float_status *s)
2804 {
2805 FloatParts128 p;
2806
2807 if (floatx80_unpack_canonical(&p, a, s)) {
2808 parts_float_to_float(&p, s);
2809 } else {
2810 parts_default_nan(&p, s);
2811 }
2812 return float128_round_pack_canonical(&p, s);
2813 }
2814
2815 floatx80 float32_to_floatx80(float32 a, float_status *s)
2816 {
2817 FloatParts64 p64;
2818 FloatParts128 p128;
2819
2820 float32_unpack_canonical(&p64, a, s);
2821 parts_float_to_float_widen(&p128, &p64, s);
2822 return floatx80_round_pack_canonical(&p128, s);
2823 }
2824
2825 floatx80 float64_to_floatx80(float64 a, float_status *s)
2826 {
2827 FloatParts64 p64;
2828 FloatParts128 p128;
2829
2830 float64_unpack_canonical(&p64, a, s);
2831 parts_float_to_float_widen(&p128, &p64, s);
2832 return floatx80_round_pack_canonical(&p128, s);
2833 }
2834
2835 floatx80 float128_to_floatx80(float128 a, float_status *s)
2836 {
2837 FloatParts128 p;
2838
2839 float128_unpack_canonical(&p, a, s);
2840 parts_float_to_float(&p, s);
2841 return floatx80_round_pack_canonical(&p, s);
2842 }
2843
2844 /*
2845 * Round to integral value
2846 */
2847
2848 float16 float16_round_to_int(float16 a, float_status *s)
2849 {
2850 FloatParts64 p;
2851
2852 float16_unpack_canonical(&p, a, s);
2853 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2854 return float16_round_pack_canonical(&p, s);
2855 }
2856
2857 float32 float32_round_to_int(float32 a, float_status *s)
2858 {
2859 FloatParts64 p;
2860
2861 float32_unpack_canonical(&p, a, s);
2862 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2863 return float32_round_pack_canonical(&p, s);
2864 }
2865
2866 float64 float64_round_to_int(float64 a, float_status *s)
2867 {
2868 FloatParts64 p;
2869
2870 float64_unpack_canonical(&p, a, s);
2871 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2872 return float64_round_pack_canonical(&p, s);
2873 }
2874
2875 bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2876 {
2877 FloatParts64 p;
2878
2879 bfloat16_unpack_canonical(&p, a, s);
2880 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2881 return bfloat16_round_pack_canonical(&p, s);
2882 }
2883
2884 float128 float128_round_to_int(float128 a, float_status *s)
2885 {
2886 FloatParts128 p;
2887
2888 float128_unpack_canonical(&p, a, s);
2889 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2890 return float128_round_pack_canonical(&p, s);
2891 }
2892
2893 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2894 {
2895 FloatParts128 p;
2896
2897 if (!floatx80_unpack_canonical(&p, a, status)) {
2898 return floatx80_default_nan(status);
2899 }
2900
2901 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2902 &floatx80_params[status->floatx80_rounding_precision]);
2903 return floatx80_round_pack_canonical(&p, status);
2904 }
2905
2906 /*
2907 * Floating-point to signed integer conversions
2908 */
2909
2910 int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2911 float_status *s)
2912 {
2913 FloatParts64 p;
2914
2915 float16_unpack_canonical(&p, a, s);
2916 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2917 }
2918
2919 int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920 float_status *s)
2921 {
2922 FloatParts64 p;
2923
2924 float16_unpack_canonical(&p, a, s);
2925 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2926 }
2927
2928 int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929 float_status *s)
2930 {
2931 FloatParts64 p;
2932
2933 float16_unpack_canonical(&p, a, s);
2934 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2935 }
2936
2937 int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938 float_status *s)
2939 {
2940 FloatParts64 p;
2941
2942 float16_unpack_canonical(&p, a, s);
2943 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2944 }
2945
2946 int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2947 float_status *s)
2948 {
2949 FloatParts64 p;
2950
2951 float32_unpack_canonical(&p, a, s);
2952 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2953 }
2954
2955 int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956 float_status *s)
2957 {
2958 FloatParts64 p;
2959
2960 float32_unpack_canonical(&p, a, s);
2961 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2962 }
2963
2964 int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965 float_status *s)
2966 {
2967 FloatParts64 p;
2968
2969 float32_unpack_canonical(&p, a, s);
2970 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2971 }
2972
2973 int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2974 float_status *s)
2975 {
2976 FloatParts64 p;
2977
2978 float64_unpack_canonical(&p, a, s);
2979 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2980 }
2981
2982 int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983 float_status *s)
2984 {
2985 FloatParts64 p;
2986
2987 float64_unpack_canonical(&p, a, s);
2988 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2989 }
2990
2991 int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992 float_status *s)
2993 {
2994 FloatParts64 p;
2995
2996 float64_unpack_canonical(&p, a, s);
2997 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2998 }
2999
3000 int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3001 float_status *s)
3002 {
3003 FloatParts64 p;
3004
3005 bfloat16_unpack_canonical(&p, a, s);
3006 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3007 }
3008
3009 int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010 float_status *s)
3011 {
3012 FloatParts64 p;
3013
3014 bfloat16_unpack_canonical(&p, a, s);
3015 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3016 }
3017
3018 int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019 float_status *s)
3020 {
3021 FloatParts64 p;
3022
3023 bfloat16_unpack_canonical(&p, a, s);
3024 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3025 }
3026
3027 static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3028 int scale, float_status *s)
3029 {
3030 FloatParts128 p;
3031
3032 float128_unpack_canonical(&p, a, s);
3033 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3034 }
3035
3036 static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3037 int scale, float_status *s)
3038 {
3039 FloatParts128 p;
3040
3041 float128_unpack_canonical(&p, a, s);
3042 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3043 }
3044
3045 static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3046 int scale, float_status *s)
3047 {
3048 FloatParts128 p;
3049
3050 if (!floatx80_unpack_canonical(&p, a, s)) {
3051 parts_default_nan(&p, s);
3052 }
3053 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3054 }
3055
3056 static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3057 int scale, float_status *s)
3058 {
3059 FloatParts128 p;
3060
3061 if (!floatx80_unpack_canonical(&p, a, s)) {
3062 parts_default_nan(&p, s);
3063 }
3064 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3065 }
3066
3067 int8_t float16_to_int8(float16 a, float_status *s)
3068 {
3069 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3070 }
3071
3072 int16_t float16_to_int16(float16 a, float_status *s)
3073 {
3074 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3075 }
3076
3077 int32_t float16_to_int32(float16 a, float_status *s)
3078 {
3079 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3080 }
3081
3082 int64_t float16_to_int64(float16 a, float_status *s)
3083 {
3084 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3085 }
3086
3087 int16_t float32_to_int16(float32 a, float_status *s)
3088 {
3089 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3090 }
3091
3092 int32_t float32_to_int32(float32 a, float_status *s)
3093 {
3094 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3095 }
3096
3097 int64_t float32_to_int64(float32 a, float_status *s)
3098 {
3099 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3100 }
3101
3102 int16_t float64_to_int16(float64 a, float_status *s)
3103 {
3104 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3105 }
3106
3107 int32_t float64_to_int32(float64 a, float_status *s)
3108 {
3109 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3110 }
3111
3112 int64_t float64_to_int64(float64 a, float_status *s)
3113 {
3114 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3115 }
3116
3117 int32_t float128_to_int32(float128 a, float_status *s)
3118 {
3119 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3120 }
3121
3122 int64_t float128_to_int64(float128 a, float_status *s)
3123 {
3124 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3125 }
3126
3127 int32_t floatx80_to_int32(floatx80 a, float_status *s)
3128 {
3129 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3130 }
3131
3132 int64_t floatx80_to_int64(floatx80 a, float_status *s)
3133 {
3134 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3135 }
3136
3137 int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3138 {
3139 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3140 }
3141
3142 int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3143 {
3144 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3145 }
3146
3147 int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3148 {
3149 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3150 }
3151
3152 int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3153 {
3154 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3155 }
3156
3157 int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3158 {
3159 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3160 }
3161
3162 int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3163 {
3164 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3165 }
3166
3167 int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3168 {
3169 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3170 }
3171
3172 int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3173 {
3174 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3175 }
3176
3177 int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3178 {
3179 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3180 }
3181
3182 int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3183 {
3184 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3185 }
3186
3187 int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3188 {
3189 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3190 }
3191
3192 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3193 {
3194 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3195 }
3196
3197 int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3198 {
3199 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3200 }
3201
3202 int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3203 {
3204 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3205 }
3206
3207 int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3208 {
3209 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3210 }
3211
3212 int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3213 {
3214 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3215 }
3216
3217 int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3218 {
3219 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3220 }
3221
3222 int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3223 {
3224 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3225 }
3226
3227 int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3228 {
3229 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3230 }
3231
3232 /*
3233 * Floating-point to unsigned integer conversions
3234 */
3235
3236 uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3237 float_status *s)
3238 {
3239 FloatParts64 p;
3240
3241 float16_unpack_canonical(&p, a, s);
3242 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3243 }
3244
3245 uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246 float_status *s)
3247 {
3248 FloatParts64 p;
3249
3250 float16_unpack_canonical(&p, a, s);
3251 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3252 }
3253
3254 uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255 float_status *s)
3256 {
3257 FloatParts64 p;
3258
3259 float16_unpack_canonical(&p, a, s);
3260 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3261 }
3262
3263 uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264 float_status *s)
3265 {
3266 FloatParts64 p;
3267
3268 float16_unpack_canonical(&p, a, s);
3269 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3270 }
3271
3272 uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3273 float_status *s)
3274 {
3275 FloatParts64 p;
3276
3277 float32_unpack_canonical(&p, a, s);
3278 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3279 }
3280
3281 uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282 float_status *s)
3283 {
3284 FloatParts64 p;
3285
3286 float32_unpack_canonical(&p, a, s);
3287 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3288 }
3289
3290 uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291 float_status *s)
3292 {
3293 FloatParts64 p;
3294
3295 float32_unpack_canonical(&p, a, s);
3296 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3297 }
3298
3299 uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3300 float_status *s)
3301 {
3302 FloatParts64 p;
3303
3304 float64_unpack_canonical(&p, a, s);
3305 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3306 }
3307
3308 uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309 float_status *s)
3310 {
3311 FloatParts64 p;
3312
3313 float64_unpack_canonical(&p, a, s);
3314 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3315 }
3316
3317 uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318 float_status *s)
3319 {
3320 FloatParts64 p;
3321
3322 float64_unpack_canonical(&p, a, s);
3323 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3324 }
3325
3326 uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3327 int scale, float_status *s)
3328 {
3329 FloatParts64 p;
3330
3331 bfloat16_unpack_canonical(&p, a, s);
3332 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3333 }
3334
3335 uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3336 int scale, float_status *s)
3337 {
3338 FloatParts64 p;
3339
3340 bfloat16_unpack_canonical(&p, a, s);
3341 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3342 }
3343
3344 uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3345 int scale, float_status *s)
3346 {
3347 FloatParts64 p;
3348
3349 bfloat16_unpack_canonical(&p, a, s);
3350 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3351 }
3352
3353 static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3354 int scale, float_status *s)
3355 {
3356 FloatParts128 p;
3357
3358 float128_unpack_canonical(&p, a, s);
3359 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3360 }
3361
3362 static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3363 int scale, float_status *s)
3364 {
3365 FloatParts128 p;
3366
3367 float128_unpack_canonical(&p, a, s);
3368 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3369 }
3370
3371 uint8_t float16_to_uint8(float16 a, float_status *s)
3372 {
3373 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3374 }
3375
3376 uint16_t float16_to_uint16(float16 a, float_status *s)
3377 {
3378 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3379 }
3380
3381 uint32_t float16_to_uint32(float16 a, float_status *s)
3382 {
3383 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3384 }
3385
3386 uint64_t float16_to_uint64(float16 a, float_status *s)
3387 {
3388 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3389 }
3390
3391 uint16_t float32_to_uint16(float32 a, float_status *s)
3392 {
3393 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3394 }
3395
3396 uint32_t float32_to_uint32(float32 a, float_status *s)
3397 {
3398 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3399 }
3400
3401 uint64_t float32_to_uint64(float32 a, float_status *s)
3402 {
3403 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3404 }
3405
3406 uint16_t float64_to_uint16(float64 a, float_status *s)
3407 {
3408 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3409 }
3410
3411 uint32_t float64_to_uint32(float64 a, float_status *s)
3412 {
3413 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3414 }
3415
3416 uint64_t float64_to_uint64(float64 a, float_status *s)
3417 {
3418 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3419 }
3420
3421 uint32_t float128_to_uint32(float128 a, float_status *s)
3422 {
3423 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3424 }
3425
3426 uint64_t float128_to_uint64(float128 a, float_status *s)
3427 {
3428 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3429 }
3430
3431 uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3432 {
3433 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3434 }
3435
3436 uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3437 {
3438 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3439 }
3440
3441 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3442 {
3443 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3444 }
3445
3446 uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3447 {
3448 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3449 }
3450
3451 uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3452 {
3453 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3454 }
3455
3456 uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3457 {
3458 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3459 }
3460
3461 uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3462 {
3463 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3464 }
3465
3466 uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3467 {
3468 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3469 }
3470
3471 uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3472 {
3473 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3474 }
3475
3476 uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3477 {
3478 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3479 }
3480
3481 uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3482 {
3483 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3484 }
3485
3486 uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3487 {
3488 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3489 }
3490
3491 uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3492 {
3493 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3494 }
3495
3496 uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3497 {
3498 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3499 }
3500
3501 uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3502 {
3503 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3504 }
3505
3506 uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3507 {
3508 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3509 }
3510
3511 uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3512 {
3513 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3514 }
3515
3516 /*
3517 * Signed integer to floating-point conversions
3518 */
3519
3520 float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3521 {
3522 FloatParts64 p;
3523
3524 parts_sint_to_float(&p, a, scale, status);
3525 return float16_round_pack_canonical(&p, status);
3526 }
3527
3528 float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3529 {
3530 return int64_to_float16_scalbn(a, scale, status);
3531 }
3532
3533 float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3534 {
3535 return int64_to_float16_scalbn(a, scale, status);
3536 }
3537
3538 float16 int64_to_float16(int64_t a, float_status *status)
3539 {
3540 return int64_to_float16_scalbn(a, 0, status);
3541 }
3542
3543 float16 int32_to_float16(int32_t a, float_status *status)
3544 {
3545 return int64_to_float16_scalbn(a, 0, status);
3546 }
3547
3548 float16 int16_to_float16(int16_t a, float_status *status)
3549 {
3550 return int64_to_float16_scalbn(a, 0, status);
3551 }
3552
3553 float16 int8_to_float16(int8_t a, float_status *status)
3554 {
3555 return int64_to_float16_scalbn(a, 0, status);
3556 }
3557
3558 float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3559 {
3560 FloatParts64 p;
3561
3562 /* Without scaling, there are no overflow concerns. */
3563 if (likely(scale == 0) && can_use_fpu(status)) {
3564 union_float32 ur;
3565 ur.h = a;
3566 return ur.s;
3567 }
3568
3569 parts64_sint_to_float(&p, a, scale, status);
3570 return float32_round_pack_canonical(&p, status);
3571 }
3572
3573 float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3574 {
3575 return int64_to_float32_scalbn(a, scale, status);
3576 }
3577
3578 float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3579 {
3580 return int64_to_float32_scalbn(a, scale, status);
3581 }
3582
3583 float32 int64_to_float32(int64_t a, float_status *status)
3584 {
3585 return int64_to_float32_scalbn(a, 0, status);
3586 }
3587
3588 float32 int32_to_float32(int32_t a, float_status *status)
3589 {
3590 return int64_to_float32_scalbn(a, 0, status);
3591 }
3592
3593 float32 int16_to_float32(int16_t a, float_status *status)
3594 {
3595 return int64_to_float32_scalbn(a, 0, status);
3596 }
3597
3598 float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3599 {
3600 FloatParts64 p;
3601
3602 /* Without scaling, there are no overflow concerns. */
3603 if (likely(scale == 0) && can_use_fpu(status)) {
3604 union_float64 ur;
3605 ur.h = a;
3606 return ur.s;
3607 }
3608
3609 parts_sint_to_float(&p, a, scale, status);
3610 return float64_round_pack_canonical(&p, status);
3611 }
3612
3613 float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3614 {
3615 return int64_to_float64_scalbn(a, scale, status);
3616 }
3617
3618 float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3619 {
3620 return int64_to_float64_scalbn(a, scale, status);
3621 }
3622
3623 float64 int64_to_float64(int64_t a, float_status *status)
3624 {
3625 return int64_to_float64_scalbn(a, 0, status);
3626 }
3627
3628 float64 int32_to_float64(int32_t a, float_status *status)
3629 {
3630 return int64_to_float64_scalbn(a, 0, status);
3631 }
3632
3633 float64 int16_to_float64(int16_t a, float_status *status)
3634 {
3635 return int64_to_float64_scalbn(a, 0, status);
3636 }
3637
3638 bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3639 {
3640 FloatParts64 p;
3641
3642 parts_sint_to_float(&p, a, scale, status);
3643 return bfloat16_round_pack_canonical(&p, status);
3644 }
3645
3646 bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3647 {
3648 return int64_to_bfloat16_scalbn(a, scale, status);
3649 }
3650
3651 bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3652 {
3653 return int64_to_bfloat16_scalbn(a, scale, status);
3654 }
3655
3656 bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3657 {
3658 return int64_to_bfloat16_scalbn(a, 0, status);
3659 }
3660
3661 bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3662 {
3663 return int64_to_bfloat16_scalbn(a, 0, status);
3664 }
3665
3666 bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3667 {
3668 return int64_to_bfloat16_scalbn(a, 0, status);
3669 }
3670
3671 float128 int64_to_float128(int64_t a, float_status *status)
3672 {
3673 FloatParts128 p;
3674
3675 parts_sint_to_float(&p, a, 0, status);
3676 return float128_round_pack_canonical(&p, status);
3677 }
3678
3679 float128 int32_to_float128(int32_t a, float_status *status)
3680 {
3681 return int64_to_float128(a, status);
3682 }
3683
3684 floatx80 int64_to_floatx80(int64_t a, float_status *status)
3685 {
3686 FloatParts128 p;
3687
3688 parts_sint_to_float(&p, a, 0, status);
3689 return floatx80_round_pack_canonical(&p, status);
3690 }
3691
3692 floatx80 int32_to_floatx80(int32_t a, float_status *status)
3693 {
3694 return int64_to_floatx80(a, status);
3695 }
3696
3697 /*
3698 * Unsigned Integer to floating-point conversions
3699 */
3700
3701 float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3702 {
3703 FloatParts64 p;
3704
3705 parts_uint_to_float(&p, a, scale, status);
3706 return float16_round_pack_canonical(&p, status);
3707 }
3708
3709 float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3710 {
3711 return uint64_to_float16_scalbn(a, scale, status);
3712 }
3713
3714 float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3715 {
3716 return uint64_to_float16_scalbn(a, scale, status);
3717 }
3718
3719 float16 uint64_to_float16(uint64_t a, float_status *status)
3720 {
3721 return uint64_to_float16_scalbn(a, 0, status);
3722 }
3723
3724 float16 uint32_to_float16(uint32_t a, float_status *status)
3725 {
3726 return uint64_to_float16_scalbn(a, 0, status);
3727 }
3728
3729 float16 uint16_to_float16(uint16_t a, float_status *status)
3730 {
3731 return uint64_to_float16_scalbn(a, 0, status);
3732 }
3733
3734 float16 uint8_to_float16(uint8_t a, float_status *status)
3735 {
3736 return uint64_to_float16_scalbn(a, 0, status);
3737 }
3738
3739 float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3740 {
3741 FloatParts64 p;
3742
3743 /* Without scaling, there are no overflow concerns. */
3744 if (likely(scale == 0) && can_use_fpu(status)) {
3745 union_float32 ur;
3746 ur.h = a;
3747 return ur.s;
3748 }
3749
3750 parts_uint_to_float(&p, a, scale, status);
3751 return float32_round_pack_canonical(&p, status);
3752 }
3753
3754 float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3755 {
3756 return uint64_to_float32_scalbn(a, scale, status);
3757 }
3758
3759 float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3760 {
3761 return uint64_to_float32_scalbn(a, scale, status);
3762 }
3763
3764 float32 uint64_to_float32(uint64_t a, float_status *status)
3765 {
3766 return uint64_to_float32_scalbn(a, 0, status);
3767 }
3768
3769 float32 uint32_to_float32(uint32_t a, float_status *status)
3770 {
3771 return uint64_to_float32_scalbn(a, 0, status);
3772 }
3773
3774 float32 uint16_to_float32(uint16_t a, float_status *status)
3775 {
3776 return uint64_to_float32_scalbn(a, 0, status);
3777 }
3778
3779 float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3780 {
3781 FloatParts64 p;
3782
3783 /* Without scaling, there are no overflow concerns. */
3784 if (likely(scale == 0) && can_use_fpu(status)) {
3785 union_float64 ur;
3786 ur.h = a;
3787 return ur.s;
3788 }
3789
3790 parts_uint_to_float(&p, a, scale, status);
3791 return float64_round_pack_canonical(&p, status);
3792 }
3793
3794 float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3795 {
3796 return uint64_to_float64_scalbn(a, scale, status);
3797 }
3798
3799 float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3800 {
3801 return uint64_to_float64_scalbn(a, scale, status);
3802 }
3803
3804 float64 uint64_to_float64(uint64_t a, float_status *status)
3805 {
3806 return uint64_to_float64_scalbn(a, 0, status);
3807 }
3808
3809 float64 uint32_to_float64(uint32_t a, float_status *status)
3810 {
3811 return uint64_to_float64_scalbn(a, 0, status);
3812 }
3813
3814 float64 uint16_to_float64(uint16_t a, float_status *status)
3815 {
3816 return uint64_to_float64_scalbn(a, 0, status);
3817 }
3818
3819 bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3820 {
3821 FloatParts64 p;
3822
3823 parts_uint_to_float(&p, a, scale, status);
3824 return bfloat16_round_pack_canonical(&p, status);
3825 }
3826
3827 bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3828 {
3829 return uint64_to_bfloat16_scalbn(a, scale, status);
3830 }
3831
3832 bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3833 {
3834 return uint64_to_bfloat16_scalbn(a, scale, status);
3835 }
3836
3837 bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3838 {
3839 return uint64_to_bfloat16_scalbn(a, 0, status);
3840 }
3841
3842 bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3843 {
3844 return uint64_to_bfloat16_scalbn(a, 0, status);
3845 }
3846
3847 bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3848 {
3849 return uint64_to_bfloat16_scalbn(a, 0, status);
3850 }
3851
3852 float128 uint64_to_float128(uint64_t a, float_status *status)
3853 {
3854 FloatParts128 p;
3855
3856 parts_uint_to_float(&p, a, 0, status);
3857 return float128_round_pack_canonical(&p, status);
3858 }
3859
3860 /*
3861 * Minimum and maximum
3862 */
3863
3864 static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3865 {
3866 FloatParts64 pa, pb, *pr;
3867
3868 float16_unpack_canonical(&pa, a, s);
3869 float16_unpack_canonical(&pb, b, s);
3870 pr = parts_minmax(&pa, &pb, s, flags);
3871
3872 return float16_round_pack_canonical(pr, s);
3873 }
3874
3875 static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3876 float_status *s, int flags)
3877 {
3878 FloatParts64 pa, pb, *pr;
3879
3880 bfloat16_unpack_canonical(&pa, a, s);
3881 bfloat16_unpack_canonical(&pb, b, s);
3882 pr = parts_minmax(&pa, &pb, s, flags);
3883
3884 return bfloat16_round_pack_canonical(pr, s);
3885 }
3886
3887 static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3888 {
3889 FloatParts64 pa, pb, *pr;
3890
3891 float32_unpack_canonical(&pa, a, s);
3892 float32_unpack_canonical(&pb, b, s);
3893 pr = parts_minmax(&pa, &pb, s, flags);
3894
3895 return float32_round_pack_canonical(pr, s);
3896 }
3897
3898 static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3899 {
3900 FloatParts64 pa, pb, *pr;
3901
3902 float64_unpack_canonical(&pa, a, s);
3903 float64_unpack_canonical(&pb, b, s);
3904 pr = parts_minmax(&pa, &pb, s, flags);
3905
3906 return float64_round_pack_canonical(pr, s);
3907 }
3908
3909 static float128 float128_minmax(float128 a, float128 b,
3910 float_status *s, int flags)
3911 {
3912 FloatParts128 pa, pb, *pr;
3913
3914 float128_unpack_canonical(&pa, a, s);
3915 float128_unpack_canonical(&pb, b, s);
3916 pr = parts_minmax(&pa, &pb, s, flags);
3917
3918 return float128_round_pack_canonical(pr, s);
3919 }
3920
3921 #define MINMAX_1(type, name, flags) \
3922 type type##_##name(type a, type b, float_status *s) \
3923 { return type##_minmax(a, b, s, flags); }
3924
3925 #define MINMAX_2(type) \
3926 MINMAX_1(type, max, 0) \
3927 MINMAX_1(type, maxnum, minmax_isnum) \
3928 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3929 MINMAX_1(type, min, minmax_ismin) \
3930 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3931 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3932
3933 MINMAX_2(float16)
3934 MINMAX_2(bfloat16)
3935 MINMAX_2(float32)
3936 MINMAX_2(float64)
3937 MINMAX_2(float128)
3938
3939 #undef MINMAX_1
3940 #undef MINMAX_2
3941
3942 /*
3943 * Floating point compare
3944 */
3945
3946 static FloatRelation QEMU_FLATTEN
3947 float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3948 {
3949 FloatParts64 pa, pb;
3950
3951 float16_unpack_canonical(&pa, a, s);
3952 float16_unpack_canonical(&pb, b, s);
3953 return parts_compare(&pa, &pb, s, is_quiet);
3954 }
3955
3956 FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3957 {
3958 return float16_do_compare(a, b, s, false);
3959 }
3960
3961 FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3962 {
3963 return float16_do_compare(a, b, s, true);
3964 }
3965
3966 static FloatRelation QEMU_SOFTFLOAT_ATTR
3967 float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3968 {
3969 FloatParts64 pa, pb;
3970
3971 float32_unpack_canonical(&pa, a, s);
3972 float32_unpack_canonical(&pb, b, s);
3973 return parts_compare(&pa, &pb, s, is_quiet);
3974 }
3975
3976 static FloatRelation QEMU_FLATTEN
3977 float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3978 {
3979 union_float32 ua, ub;
3980
3981 ua.s = xa;
3982 ub.s = xb;
3983
3984 if (QEMU_NO_HARDFLOAT) {
3985 goto soft;
3986 }
3987
3988 float32_input_flush2(&ua.s, &ub.s, s);
3989 if (isgreaterequal(ua.h, ub.h)) {
3990 if (isgreater(ua.h, ub.h)) {
3991 return float_relation_greater;
3992 }
3993 return float_relation_equal;
3994 }
3995 if (likely(isless(ua.h, ub.h))) {
3996 return float_relation_less;
3997 }
3998 /*
3999 * The only condition remaining is unordered.
4000 * Fall through to set flags.
4001 */
4002 soft:
4003 return float32_do_compare(ua.s, ub.s, s, is_quiet);
4004 }
4005
4006 FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4007 {
4008 return float32_hs_compare(a, b, s, false);
4009 }
4010
4011 FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4012 {
4013 return float32_hs_compare(a, b, s, true);
4014 }
4015
4016 static FloatRelation QEMU_SOFTFLOAT_ATTR
4017 float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4018 {
4019 FloatParts64 pa, pb;
4020
4021 float64_unpack_canonical(&pa, a, s);
4022 float64_unpack_canonical(&pb, b, s);
4023 return parts_compare(&pa, &pb, s, is_quiet);
4024 }
4025
4026 static FloatRelation QEMU_FLATTEN
4027 float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4028 {
4029 union_float64 ua, ub;
4030
4031 ua.s = xa;
4032 ub.s = xb;
4033
4034 if (QEMU_NO_HARDFLOAT) {
4035 goto soft;
4036 }
4037
4038 float64_input_flush2(&ua.s, &ub.s, s);
4039 if (isgreaterequal(ua.h, ub.h)) {
4040 if (isgreater(ua.h, ub.h)) {
4041 return float_relation_greater;
4042 }
4043 return float_relation_equal;
4044 }
4045 if (likely(isless(ua.h, ub.h))) {
4046 return float_relation_less;
4047 }
4048 /*
4049 * The only condition remaining is unordered.
4050 * Fall through to set flags.
4051 */
4052 soft:
4053 return float64_do_compare(ua.s, ub.s, s, is_quiet);
4054 }
4055
4056 FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4057 {
4058 return float64_hs_compare(a, b, s, false);
4059 }
4060
4061 FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4062 {
4063 return float64_hs_compare(a, b, s, true);
4064 }
4065
4066 static FloatRelation QEMU_FLATTEN
4067 bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4068 {
4069 FloatParts64 pa, pb;
4070
4071 bfloat16_unpack_canonical(&pa, a, s);
4072 bfloat16_unpack_canonical(&pb, b, s);
4073 return parts_compare(&pa, &pb, s, is_quiet);
4074 }
4075
4076 FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4077 {
4078 return bfloat16_do_compare(a, b, s, false);
4079 }
4080
4081 FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4082 {
4083 return bfloat16_do_compare(a, b, s, true);
4084 }
4085
4086 static FloatRelation QEMU_FLATTEN
4087 float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4088 {
4089 FloatParts128 pa, pb;
4090
4091 float128_unpack_canonical(&pa, a, s);
4092 float128_unpack_canonical(&pb, b, s);
4093 return parts_compare(&pa, &pb, s, is_quiet);
4094 }
4095
4096 FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4097 {
4098 return float128_do_compare(a, b, s, false);
4099 }
4100
4101 FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4102 {
4103 return float128_do_compare(a, b, s, true);
4104 }
4105
4106 static FloatRelation QEMU_FLATTEN
4107 floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4108 {
4109 FloatParts128 pa, pb;
4110
4111 if (!floatx80_unpack_canonical(&pa, a, s) ||
4112 !floatx80_unpack_canonical(&pb, b, s)) {
4113 return float_relation_unordered;
4114 }
4115 return parts_compare(&pa, &pb, s, is_quiet);
4116 }
4117
4118 FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4119 {
4120 return floatx80_do_compare(a, b, s, false);
4121 }
4122
4123 FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4124 {
4125 return floatx80_do_compare(a, b, s, true);
4126 }
4127
4128 /*
4129 * Scale by 2**N
4130 */
4131
4132 float16 float16_scalbn(float16 a, int n, float_status *status)
4133 {
4134 FloatParts64 p;
4135
4136 float16_unpack_canonical(&p, a, status);
4137 parts_scalbn(&p, n, status);
4138 return float16_round_pack_canonical(&p, status);
4139 }
4140
4141 float32 float32_scalbn(float32 a, int n, float_status *status)
4142 {
4143 FloatParts64 p;
4144
4145 float32_unpack_canonical(&p, a, status);
4146 parts_scalbn(&p, n, status);
4147 return float32_round_pack_canonical(&p, status);
4148 }
4149
4150 float64 float64_scalbn(float64 a, int n, float_status *status)
4151 {
4152 FloatParts64 p;
4153
4154 float64_unpack_canonical(&p, a, status);
4155 parts_scalbn(&p, n, status);
4156 return float64_round_pack_canonical(&p, status);
4157 }
4158
4159 bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4160 {
4161 FloatParts64 p;
4162
4163 bfloat16_unpack_canonical(&p, a, status);
4164 parts_scalbn(&p, n, status);
4165 return bfloat16_round_pack_canonical(&p, status);
4166 }
4167
4168 float128 float128_scalbn(float128 a, int n, float_status *status)
4169 {
4170 FloatParts128 p;
4171
4172 float128_unpack_canonical(&p, a, status);
4173 parts_scalbn(&p, n, status);
4174 return float128_round_pack_canonical(&p, status);
4175 }
4176
4177 floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4178 {
4179 FloatParts128 p;
4180
4181 if (!floatx80_unpack_canonical(&p, a, status)) {
4182 return floatx80_default_nan(status);
4183 }
4184 parts_scalbn(&p, n, status);
4185 return floatx80_round_pack_canonical(&p, status);
4186 }
4187
4188 /*
4189 * Square Root
4190 */
4191
4192 float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4193 {
4194 FloatParts64 p;
4195
4196 float16_unpack_canonical(&p, a, status);
4197 parts_sqrt(&p, status, &float16_params);
4198 return float16_round_pack_canonical(&p, status);
4199 }
4200
4201 static float32 QEMU_SOFTFLOAT_ATTR
4202 soft_f32_sqrt(float32 a, float_status *status)
4203 {
4204 FloatParts64 p;
4205
4206 float32_unpack_canonical(&p, a, status);
4207 parts_sqrt(&p, status, &float32_params);
4208 return float32_round_pack_canonical(&p, status);
4209 }
4210
4211 static float64 QEMU_SOFTFLOAT_ATTR
4212 soft_f64_sqrt(float64 a, float_status *status)
4213 {
4214 FloatParts64 p;
4215
4216 float64_unpack_canonical(&p, a, status);
4217 parts_sqrt(&p, status, &float64_params);
4218 return float64_round_pack_canonical(&p, status);
4219 }
4220
4221 float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4222 {
4223 union_float32 ua, ur;
4224
4225 ua.s = xa;
4226 if (unlikely(!can_use_fpu(s))) {
4227 goto soft;
4228 }
4229
4230 float32_input_flush1(&ua.s, s);
4231 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4232 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4233 fpclassify(ua.h) == FP_ZERO) ||
4234 signbit(ua.h))) {
4235 goto soft;
4236 }
4237 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4238 float32_is_neg(ua.s))) {
4239 goto soft;
4240 }
4241 ur.h = sqrtf(ua.h);
4242 return ur.s;
4243
4244 soft:
4245 return soft_f32_sqrt(ua.s, s);
4246 }
4247
4248 float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4249 {
4250 union_float64 ua, ur;
4251
4252 ua.s = xa;
4253 if (unlikely(!can_use_fpu(s))) {
4254 goto soft;
4255 }
4256
4257 float64_input_flush1(&ua.s, s);
4258 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4259 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4260 fpclassify(ua.h) == FP_ZERO) ||
4261 signbit(ua.h))) {
4262 goto soft;
4263 }
4264 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4265 float64_is_neg(ua.s))) {
4266 goto soft;
4267 }
4268 ur.h = sqrt(ua.h);
4269 return ur.s;
4270
4271 soft:
4272 return soft_f64_sqrt(ua.s, s);
4273 }
4274
4275 bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4276 {
4277 FloatParts64 p;
4278
4279 bfloat16_unpack_canonical(&p, a, status);
4280 parts_sqrt(&p, status, &bfloat16_params);
4281 return bfloat16_round_pack_canonical(&p, status);
4282 }
4283
4284 float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4285 {
4286 FloatParts128 p;
4287
4288 float128_unpack_canonical(&p, a, status);
4289 parts_sqrt(&p, status, &float128_params);
4290 return float128_round_pack_canonical(&p, status);
4291 }
4292
4293 floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4294 {
4295 FloatParts128 p;
4296
4297 if (!floatx80_unpack_canonical(&p, a, s)) {
4298 return floatx80_default_nan(s);
4299 }
4300 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4301 return floatx80_round_pack_canonical(&p, s);
4302 }
4303
4304 /*
4305 * log2
4306 */
4307 float32 float32_log2(float32 a, float_status *status)
4308 {
4309 FloatParts64 p;
4310
4311 float32_unpack_canonical(&p, a, status);
4312 parts_log2(&p, status, &float32_params);
4313 return float32_round_pack_canonical(&p, status);
4314 }
4315
4316 float64 float64_log2(float64 a, float_status *status)
4317 {
4318 FloatParts64 p;
4319
4320 float64_unpack_canonical(&p, a, status);
4321 parts_log2(&p, status, &float64_params);
4322 return float64_round_pack_canonical(&p, status);
4323 }
4324
4325 /*----------------------------------------------------------------------------
4326 | The pattern for a default generated NaN.
4327 *----------------------------------------------------------------------------*/
4328
4329 float16 float16_default_nan(float_status *status)
4330 {
4331 FloatParts64 p;
4332
4333 parts_default_nan(&p, status);
4334 p.frac >>= float16_params.frac_shift;
4335 return float16_pack_raw(&p);
4336 }
4337
4338 float32 float32_default_nan(float_status *status)
4339 {
4340 FloatParts64 p;
4341
4342 parts_default_nan(&p, status);
4343 p.frac >>= float32_params.frac_shift;
4344 return float32_pack_raw(&p);
4345 }
4346
4347 float64 float64_default_nan(float_status *status)
4348 {
4349 FloatParts64 p;
4350
4351 parts_default_nan(&p, status);
4352 p.frac >>= float64_params.frac_shift;
4353 return float64_pack_raw(&p);
4354 }
4355
4356 float128 float128_default_nan(float_status *status)
4357 {
4358 FloatParts128 p;
4359
4360 parts_default_nan(&p, status);
4361 frac_shr(&p, float128_params.frac_shift);
4362 return float128_pack_raw(&p);
4363 }
4364
4365 bfloat16 bfloat16_default_nan(float_status *status)
4366 {
4367 FloatParts64 p;
4368
4369 parts_default_nan(&p, status);
4370 p.frac >>= bfloat16_params.frac_shift;
4371 return bfloat16_pack_raw(&p);
4372 }
4373
4374 /*----------------------------------------------------------------------------
4375 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4376 *----------------------------------------------------------------------------*/
4377
4378 float16 float16_silence_nan(float16 a, float_status *status)
4379 {
4380 FloatParts64 p;
4381
4382 float16_unpack_raw(&p, a);
4383 p.frac <<= float16_params.frac_shift;
4384 parts_silence_nan(&p, status);
4385 p.frac >>= float16_params.frac_shift;
4386 return float16_pack_raw(&p);
4387 }
4388
4389 float32 float32_silence_nan(float32 a, float_status *status)
4390 {
4391 FloatParts64 p;
4392
4393 float32_unpack_raw(&p, a);
4394 p.frac <<= float32_params.frac_shift;
4395 parts_silence_nan(&p, status);
4396 p.frac >>= float32_params.frac_shift;
4397 return float32_pack_raw(&p);
4398 }
4399
4400 float64 float64_silence_nan(float64 a, float_status *status)
4401 {
4402 FloatParts64 p;
4403
4404 float64_unpack_raw(&p, a);
4405 p.frac <<= float64_params.frac_shift;
4406 parts_silence_nan(&p, status);
4407 p.frac >>= float64_params.frac_shift;
4408 return float64_pack_raw(&p);
4409 }
4410
4411 bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4412 {
4413 FloatParts64 p;
4414
4415 bfloat16_unpack_raw(&p, a);
4416 p.frac <<= bfloat16_params.frac_shift;
4417 parts_silence_nan(&p, status);
4418 p.frac >>= bfloat16_params.frac_shift;
4419 return bfloat16_pack_raw(&p);
4420 }
4421
4422 float128 float128_silence_nan(float128 a, float_status *status)
4423 {
4424 FloatParts128 p;
4425
4426 float128_unpack_raw(&p, a);
4427 frac_shl(&p, float128_params.frac_shift);
4428 parts_silence_nan(&p, status);
4429 frac_shr(&p, float128_params.frac_shift);
4430 return float128_pack_raw(&p);
4431 }
4432
4433 /*----------------------------------------------------------------------------
4434 | If `a' is denormal and we are in flush-to-zero mode then set the
4435 | input-denormal exception and return zero. Otherwise just return the value.
4436 *----------------------------------------------------------------------------*/
4437
4438 static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4439 {
4440 if (p.exp == 0 && p.frac != 0) {
4441 float_raise(float_flag_input_denormal, status);
4442 return true;
4443 }
4444
4445 return false;
4446 }
4447
4448 float16 float16_squash_input_denormal(float16 a, float_status *status)
4449 {
4450 if (status->flush_inputs_to_zero) {
4451 FloatParts64 p;
4452
4453 float16_unpack_raw(&p, a);
4454 if (parts_squash_denormal(p, status)) {
4455 return float16_set_sign(float16_zero, p.sign);
4456 }
4457 }
4458 return a;
4459 }
4460
4461 float32 float32_squash_input_denormal(float32 a, float_status *status)
4462 {
4463 if (status->flush_inputs_to_zero) {
4464 FloatParts64 p;
4465
4466 float32_unpack_raw(&p, a);
4467 if (parts_squash_denormal(p, status)) {
4468 return float32_set_sign(float32_zero, p.sign);
4469 }
4470 }
4471 return a;
4472 }
4473
4474 float64 float64_squash_input_denormal(float64 a, float_status *status)
4475 {
4476 if (status->flush_inputs_to_zero) {
4477 FloatParts64 p;
4478
4479 float64_unpack_raw(&p, a);
4480 if (parts_squash_denormal(p, status)) {
4481 return float64_set_sign(float64_zero, p.sign);
4482 }
4483 }
4484 return a;
4485 }
4486
4487 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4488 {
4489 if (status->flush_inputs_to_zero) {
4490 FloatParts64 p;
4491
4492 bfloat16_unpack_raw(&p, a);
4493 if (parts_squash_denormal(p, status)) {
4494 return bfloat16_set_sign(bfloat16_zero, p.sign);
4495 }
4496 }
4497 return a;
4498 }
4499
4500 /*----------------------------------------------------------------------------
4501 | Normalizes the subnormal extended double-precision floating-point value
4502 | represented by the denormalized significand `aSig'. The normalized exponent
4503 | and significand are stored at the locations pointed to by `zExpPtr' and
4504 | `zSigPtr', respectively.
4505 *----------------------------------------------------------------------------*/
4506
4507 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4508 uint64_t *zSigPtr)
4509 {
4510 int8_t shiftCount;
4511
4512 shiftCount = clz64(aSig);
4513 *zSigPtr = aSig<<shiftCount;
4514 *zExpPtr = 1 - shiftCount;
4515 }
4516
4517 /*----------------------------------------------------------------------------
4518 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4519 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
4520 | and returns the proper extended double-precision floating-point value
4521 | corresponding to the abstract input. Ordinarily, the abstract value is
4522 | rounded and packed into the extended double-precision format, with the
4523 | inexact exception raised if the abstract input cannot be represented
4524 | exactly. However, if the abstract value is too large, the overflow and
4525 | inexact exceptions are raised and an infinity or maximal finite value is
4526 | returned. If the abstract value is too small, the input value is rounded to
4527 | a subnormal number, and the underflow and inexact exceptions are raised if
4528 | the abstract input cannot be represented exactly as a subnormal extended
4529 | double-precision floating-point number.
4530 | If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4531 | the result is rounded to the same number of bits as single or double
4532 | precision, respectively. Otherwise, the result is rounded to the full
4533 | precision of the extended double-precision format.
4534 | The input significand must be normalized or smaller. If the input
4535 | significand is not normalized, `zExp' must be 0; in that case, the result
4536 | returned is a subnormal number, and it must not require rounding. The
4537 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4538 | Floating-Point Arithmetic.
4539 *----------------------------------------------------------------------------*/
4540
4541 floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4542 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4543 float_status *status)
4544 {
4545 FloatRoundMode roundingMode;
4546 bool roundNearestEven, increment, isTiny;
4547 int64_t roundIncrement, roundMask, roundBits;
4548
4549 roundingMode = status->float_rounding_mode;
4550 roundNearestEven = ( roundingMode == float_round_nearest_even );
4551 switch (roundingPrecision) {
4552 case floatx80_precision_x:
4553 goto precision80;
4554 case floatx80_precision_d:
4555 roundIncrement = UINT64_C(0x0000000000000400);
4556 roundMask = UINT64_C(0x00000000000007FF);
4557 break;
4558 case floatx80_precision_s:
4559 roundIncrement = UINT64_C(0x0000008000000000);
4560 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4561 break;
4562 default:
4563 g_assert_not_reached();
4564 }
4565 zSig0 |= ( zSig1 != 0 );
4566 switch (roundingMode) {
4567 case float_round_nearest_even:
4568 case float_round_ties_away:
4569 break;
4570 case float_round_to_zero:
4571 roundIncrement = 0;
4572 break;
4573 case float_round_up:
4574 roundIncrement = zSign ? 0 : roundMask;
4575 break;
4576 case float_round_down:
4577 roundIncrement = zSign ? roundMask : 0;
4578 break;
4579 default:
4580 abort();
4581 }
4582 roundBits = zSig0 & roundMask;
4583 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4584 if ( ( 0x7FFE < zExp )
4585 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4586 ) {
4587 goto overflow;
4588 }
4589 if ( zExp <= 0 ) {
4590 if (status->flush_to_zero) {
4591 float_raise(float_flag_output_denormal, status);
4592 return packFloatx80(zSign, 0, 0);
4593 }
4594 isTiny = status->tininess_before_rounding
4595 || (zExp < 0 )
4596 || (zSig0 <= zSig0 + roundIncrement);
4597 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4598 zExp = 0;
4599 roundBits = zSig0 & roundMask;
4600 if (isTiny && roundBits) {
4601 float_raise(float_flag_underflow, status);
4602 }
4603 if (roundBits) {
4604 float_raise(float_flag_inexact, status);
4605 }
4606 zSig0 += roundIncrement;
4607 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4608 roundIncrement = roundMask + 1;
4609 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4610 roundMask |= roundIncrement;
4611 }
4612 zSig0 &= ~ roundMask;
4613 return packFloatx80( zSign, zExp, zSig0 );
4614 }
4615 }
4616 if (roundBits) {
4617 float_raise(float_flag_inexact, status);
4618 }
4619 zSig0 += roundIncrement;
4620 if ( zSig0 < roundIncrement ) {
4621 ++zExp;
4622 zSig0 = UINT64_C(0x8000000000000000);
4623 }
4624 roundIncrement = roundMask + 1;
4625 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4626 roundMask |= roundIncrement;
4627 }
4628 zSig0 &= ~ roundMask;
4629 if ( zSig0 == 0 ) zExp = 0;
4630 return packFloatx80( zSign, zExp, zSig0 );
4631 precision80:
4632 switch (roundingMode) {
4633 case float_round_nearest_even:
4634 case float_round_ties_away:
4635 increment = ((int64_t)zSig1 < 0);
4636 break;
4637 case float_round_to_zero:
4638 increment = 0;
4639 break;
4640 case float_round_up:
4641 increment = !zSign && zSig1;
4642 break;
4643 case float_round_down:
4644 increment = zSign && zSig1;
4645 break;
4646 default:
4647 abort();
4648 }
4649 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4650 if ( ( 0x7FFE < zExp )
4651 || ( ( zExp == 0x7FFE )
4652 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4653 && increment
4654 )
4655 ) {
4656 roundMask = 0;
4657 overflow:
4658 float_raise(float_flag_overflow | float_flag_inexact, status);
4659 if ( ( roundingMode == float_round_to_zero )
4660 || ( zSign && ( roundingMode == float_round_up ) )
4661 || ( ! zSign && ( roundingMode == float_round_down ) )
4662 ) {
4663 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4664 }
4665 return packFloatx80(zSign,
4666 floatx80_infinity_high,
4667 floatx80_infinity_low);
4668 }
4669 if ( zExp <= 0 ) {
4670 isTiny = status->tininess_before_rounding
4671 || (zExp < 0)
4672 || !increment
4673 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4674 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4675 zExp = 0;
4676 if (isTiny && zSig1) {
4677 float_raise(float_flag_underflow, status);
4678 }
4679 if (zSig1) {
4680 float_raise(float_flag_inexact, status);
4681 }
4682 switch (roundingMode) {
4683 case float_round_nearest_even:
4684 case float_round_ties_away:
4685 increment = ((int64_t)zSig1 < 0);
4686 break;
4687 case float_round_to_zero:
4688 increment = 0;
4689 break;
4690 case float_round_up:
4691 increment = !zSign && zSig1;
4692 break;
4693 case float_round_down:
4694 increment = zSign && zSig1;
4695 break;
4696 default:
4697 abort();
4698 }
4699 if ( increment ) {
4700 ++zSig0;
4701 if (!(zSig1 << 1) && roundNearestEven) {
4702 zSig0 &= ~1;
4703 }
4704 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4705 }
4706 return packFloatx80( zSign, zExp, zSig0 );
4707 }
4708 }
4709 if (zSig1) {
4710 float_raise(float_flag_inexact, status);
4711 }
4712 if ( increment ) {
4713 ++zSig0;
4714 if ( zSig0 == 0 ) {
4715 ++zExp;
4716 zSig0 = UINT64_C(0x8000000000000000);
4717 }
4718 else {
4719 if (!(zSig1 << 1) && roundNearestEven) {
4720 zSig0 &= ~1;
4721 }
4722 }
4723 }
4724 else {
4725 if ( zSig0 == 0 ) zExp = 0;
4726 }
4727 return packFloatx80( zSign, zExp, zSig0 );
4728
4729 }
4730
4731 /*----------------------------------------------------------------------------
4732 | Takes an abstract floating-point value having sign `zSign', exponent
4733 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4734 | and returns the proper extended double-precision floating-point value
4735 | corresponding to the abstract input. This routine is just like
4736 | `roundAndPackFloatx80' except that the input significand does not have to be
4737 | normalized.
4738 *----------------------------------------------------------------------------*/
4739
4740 floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4741 bool zSign, int32_t zExp,
4742 uint64_t zSig0, uint64_t zSig1,
4743 float_status *status)
4744 {
4745 int8_t shiftCount;
4746
4747 if ( zSig0 == 0 ) {
4748 zSig0 = zSig1;
4749 zSig1 = 0;
4750 zExp -= 64;
4751 }
4752 shiftCount = clz64(zSig0);
4753 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4754 zExp -= shiftCount;
4755 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4756 zSig0, zSig1, status);
4757
4758 }
4759
4760 /*----------------------------------------------------------------------------
4761 | Returns the binary exponential of the single-precision floating-point value
4762 | `a'. The operation is performed according to the IEC/IEEE Standard for
4763 | Binary Floating-Point Arithmetic.
4764 |
4765 | Uses the following identities:
4766 |
4767 | 1. -------------------------------------------------------------------------
4768 | x x*ln(2)
4769 | 2 = e
4770 |
4771 | 2. -------------------------------------------------------------------------
4772 | 2 3 4 5 n
4773 | x x x x x x x
4774 | e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4775 | 1! 2! 3! 4! 5! n!
4776 *----------------------------------------------------------------------------*/
4777
4778 static const float64 float32_exp2_coefficients[15] =
4779 {
4780 const_float64( 0x3ff0000000000000ll ), /* 1 */
4781 const_float64( 0x3fe0000000000000ll ), /* 2 */
4782 const_float64( 0x3fc5555555555555ll ), /* 3 */
4783 const_float64( 0x3fa5555555555555ll ), /* 4 */
4784 const_float64( 0x3f81111111111111ll ), /* 5 */
4785 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4786 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4787 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4788 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4789 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4790 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4791 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4792 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4793 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4794 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4795 };
4796
4797 float32 float32_exp2(float32 a, float_status *status)
4798 {
4799 FloatParts64 xp, xnp, tp, rp;
4800 int i;
4801
4802 float32_unpack_canonical(&xp, a, status);
4803 if (unlikely(xp.cls != float_class_normal)) {
4804 switch (xp.cls) {
4805 case float_class_snan:
4806 case float_class_qnan:
4807 parts_return_nan(&xp, status);
4808 return float32_round_pack_canonical(&xp, status);
4809 case float_class_inf:
4810 return xp.sign ? float32_zero : a;
4811 case float_class_zero:
4812 return float32_one;
4813 default:
4814 break;
4815 }
4816 g_assert_not_reached();
4817 }
4818
4819 float_raise(float_flag_inexact, status);
4820
4821 float64_unpack_canonical(&tp, float64_ln2, status);
4822 xp = *parts_mul(&xp, &tp, status);
4823 xnp = xp;
4824
4825 float64_unpack_canonical(&rp, float64_one, status);
4826 for (i = 0 ; i < 15 ; i++) {
4827 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4828 rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4829 xnp = *parts_mul(&xnp, &xp, status);
4830 }
4831
4832 return float32_round_pack_canonical(&rp, status);
4833 }
4834
4835 /*----------------------------------------------------------------------------
4836 | Rounds the extended double-precision floating-point value `a'
4837 | to the precision provided by floatx80_rounding_precision and returns the
4838 | result as an extended double-precision floating-point value.
4839 | The operation is performed according to the IEC/IEEE Standard for Binary
4840 | Floating-Point Arithmetic.
4841 *----------------------------------------------------------------------------*/
4842
4843 floatx80 floatx80_round(floatx80 a, float_status *status)
4844 {
4845 FloatParts128 p;
4846
4847 if (!floatx80_unpack_canonical(&p, a, status)) {
4848 return floatx80_default_nan(status);
4849 }
4850 return floatx80_round_pack_canonical(&p, status);
4851 }
4852
4853 static void __attribute__((constructor)) softfloat_init(void)
4854 {
4855 union_float64 ua, ub, uc, ur;
4856
4857 if (QEMU_NO_HARDFLOAT) {
4858 return;
4859 }
4860 /*
4861 * Test that the host's FMA is not obviously broken. For example,
4862 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4863 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4864 */
4865 ua.s = 0x0020000000000001ULL;
4866 ub.s = 0x3ca0000000000000ULL;
4867 uc.s = 0x0020000000000000ULL;
4868 ur.h = fma(ua.h, ub.h, uc.h);
4869 if (ur.s != 0x0020000000000001ULL) {
4870 force_soft_fma = true;
4871 }
4872 }