]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into...
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
a90119b5
AB
404/*
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
407 */
408
409typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
a90119b5
AB
416} FloatClass;
417
134eda00
RH
418#define float_cmask(bit) (1u << (bit))
419
420enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
426
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
429};
430
e1c4667a
RH
431/* Flags for parts_minmax. */
432enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
0e903037
CMC
439 /*
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441 * operations.
442 */
443 minmax_isnumber = 8,
e1c4667a 444};
134eda00 445
247d1f21
RH
446/* Simple helpers for checking if, or what kind of, NaN we have */
447static inline __attribute__((unused)) bool is_nan(FloatClass c)
448{
449 return unlikely(c >= float_class_qnan);
450}
451
452static inline __attribute__((unused)) bool is_snan(FloatClass c)
453{
454 return c == float_class_snan;
455}
456
457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458{
459 return c == float_class_qnan;
460}
461
a90119b5 462/*
0018b1f4
RH
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
a90119b5 465 *
0018b1f4
RH
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
a90119b5
AB
469 */
470
471typedef struct {
a90119b5
AB
472 FloatClass cls;
473 bool sign;
4109b9ea
RH
474 int32_t exp;
475 union {
476 /* Routines that know the structure may reference the singular name. */
477 uint64_t frac;
478 /*
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
482 */
483 uint64_t frac_hi;
484 uint64_t frac_lo;
485 };
f8155c1d 486} FloatParts64;
a90119b5 487
0018b1f4
RH
488typedef struct {
489 FloatClass cls;
490 bool sign;
491 int32_t exp;
492 uint64_t frac_hi;
493 uint64_t frac_lo;
494} FloatParts128;
495
aca84527
RH
496typedef struct {
497 FloatClass cls;
498 bool sign;
499 int32_t exp;
500 uint64_t frac_hi;
501 uint64_t frac_hm; /* high-middle */
502 uint64_t frac_lm; /* low-middle */
503 uint64_t frac_lo;
504} FloatParts256;
505
0018b1f4 506/* These apply to the most significant word of each FloatPartsN. */
e99c4373 507#define DECOMPOSED_BINARY_POINT 63
a90119b5 508#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
509
510/* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
d6e1f0cd 517 * round_mask: bits below lsb which must be rounded
ca3a3d5a
AB
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
520 */
521typedef struct {
522 int exp_size;
523 int exp_bias;
c40da5c6 524 int exp_re_bias;
a90119b5
AB
525 int exp_max;
526 int frac_size;
527 int frac_shift;
ca3a3d5a 528 bool arm_althp;
d6e1f0cd 529 uint64_t round_mask;
a90119b5
AB
530} FloatFmt;
531
532/* Expand fields based on the size of exponent and fraction */
c1b6299b 533#define FLOAT_PARAMS_(E) \
d6e1f0cd
RH
534 .exp_size = E, \
535 .exp_bias = ((1 << E) - 1) >> 1, \
c40da5c6 536 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
c1b6299b 537 .exp_max = (1 << E) - 1
d6e1f0cd
RH
538
539#define FLOAT_PARAMS(E, F) \
c1b6299b
RH
540 FLOAT_PARAMS_(E), \
541 .frac_size = F, \
d6e1f0cd
RH
542 .frac_shift = (-F - 1) & 63, \
543 .round_mask = (1ull << ((-F - 1) & 63)) - 1
a90119b5
AB
544
545static const FloatFmt float16_params = {
546 FLOAT_PARAMS(5, 10)
547};
548
6fed16b2
AB
549static const FloatFmt float16_params_ahp = {
550 FLOAT_PARAMS(5, 10),
551 .arm_althp = true
552};
553
8282310d
LZ
554static const FloatFmt bfloat16_params = {
555 FLOAT_PARAMS(8, 7)
556};
557
a90119b5
AB
558static const FloatFmt float32_params = {
559 FLOAT_PARAMS(8, 23)
560};
561
562static const FloatFmt float64_params = {
563 FLOAT_PARAMS(11, 52)
564};
565
0018b1f4
RH
566static const FloatFmt float128_params = {
567 FLOAT_PARAMS(15, 112)
568};
569
c1b6299b
RH
570#define FLOATX80_PARAMS(R) \
571 FLOAT_PARAMS_(15), \
572 .frac_size = R == 64 ? 63 : R, \
573 .frac_shift = 0, \
574 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
575
576static const FloatFmt floatx80_params[3] = {
577 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
578 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
579 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
580};
581
6fff2167 582/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 583static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 584{
d8fdd172
RH
585 const int f_size = fmt->frac_size;
586 const int e_size = fmt->exp_size;
6fff2167 587
d8fdd172 588 *r = (FloatParts64) {
6fff2167 589 .cls = float_class_unclassified,
d8fdd172
RH
590 .sign = extract64(raw, f_size + e_size, 1),
591 .exp = extract64(raw, f_size, e_size),
592 .frac = extract64(raw, 0, f_size)
6fff2167
AB
593 };
594}
595
1d3daf95 596static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 597{
3dddb203 598 unpack_raw64(p, &float16_params, f);
6fff2167
AB
599}
600
1d3daf95 601static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 602{
3dddb203 603 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
604}
605
1d3daf95 606static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 607{
3dddb203 608 unpack_raw64(p, &float32_params, f);
6fff2167
AB
609}
610
1d3daf95 611static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 612{
3dddb203 613 unpack_raw64(p, &float64_params, f);
6fff2167
AB
614}
615
1d3daf95 616static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
c1b6299b
RH
617{
618 *p = (FloatParts128) {
619 .cls = float_class_unclassified,
620 .sign = extract32(f.high, 15, 1),
621 .exp = extract32(f.high, 0, 15),
622 .frac_hi = f.low
623 };
624}
625
1d3daf95 626static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
0018b1f4
RH
627{
628 const int f_size = float128_params.frac_size - 64;
629 const int e_size = float128_params.exp_size;
630
631 *p = (FloatParts128) {
632 .cls = float_class_unclassified,
633 .sign = extract64(f.high, f_size + e_size, 1),
634 .exp = extract64(f.high, f_size, e_size),
635 .frac_hi = extract64(f.high, 0, f_size),
636 .frac_lo = f.low,
637 };
638}
639
6fff2167 640/* Pack a float from parts, but do not canonicalize. */
9e4af58c 641static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 642{
9e4af58c
RH
643 const int f_size = fmt->frac_size;
644 const int e_size = fmt->exp_size;
645 uint64_t ret;
646
647 ret = (uint64_t)p->sign << (f_size + e_size);
648 ret = deposit64(ret, f_size, e_size, p->exp);
649 ret = deposit64(ret, 0, f_size, p->frac);
650 return ret;
6fff2167
AB
651}
652
1d3daf95 653static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
6fff2167 654{
71fd178e 655 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
656}
657
1d3daf95 658static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
8282310d 659{
71fd178e 660 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
661}
662
1d3daf95 663static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
6fff2167 664{
71fd178e 665 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
666}
667
1d3daf95 668static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
6fff2167 669{
71fd178e 670 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
671}
672
1d3daf95 673static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
0018b1f4
RH
674{
675 const int f_size = float128_params.frac_size - 64;
676 const int e_size = float128_params.exp_size;
677 uint64_t hi;
678
679 hi = (uint64_t)p->sign << (f_size + e_size);
680 hi = deposit64(hi, f_size, e_size, p->exp);
681 hi = deposit64(hi, 0, f_size, p->frac_hi);
682 return make_float128(hi, p->frac_lo);
683}
684
0664335a
RH
685/*----------------------------------------------------------------------------
686| Functions and definitions to determine: (1) whether tininess for underflow
687| is detected before or after rounding by default, (2) what (if anything)
688| happens when exceptions are raised, (3) how signaling NaNs are distinguished
689| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
690| are propagated from function inputs to output. These details are target-
691| specific.
692*----------------------------------------------------------------------------*/
139c1837 693#include "softfloat-specialize.c.inc"
0664335a 694
0018b1f4 695#define PARTS_GENERIC_64_128(NAME, P) \
6c06aca0
RH
696 _Generic((P), FloatParts64 *: parts64_##NAME, \
697 FloatParts128 *: parts128_##NAME)
0018b1f4 698
dedd123c 699#define PARTS_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
700 _Generic((P), FloatParts64 *: parts64_##NAME, \
701 FloatParts128 *: parts128_##NAME, \
702 FloatParts256 *: parts256_##NAME)
dedd123c 703
e9034ea8 704#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
705#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
706
7c45bad8
RH
707static void parts64_return_nan(FloatParts64 *a, float_status *s);
708static void parts128_return_nan(FloatParts128 *a, float_status *s);
709
710#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 711
22c355f4
RH
712static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
713 float_status *s);
714static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
715 float_status *s);
716
717#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
718
979582d0
RH
719static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
720 FloatParts64 *c, float_status *s,
721 int ab_mask, int abc_mask);
722static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
723 FloatParts128 *b,
724 FloatParts128 *c,
725 float_status *s,
726 int ab_mask, int abc_mask);
727
728#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
729 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
730
d46975bc
RH
731static void parts64_canonicalize(FloatParts64 *p, float_status *status,
732 const FloatFmt *fmt);
733static void parts128_canonicalize(FloatParts128 *p, float_status *status,
734 const FloatFmt *fmt);
735
736#define parts_canonicalize(A, S, F) \
737 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
738
25fdedf0
RH
739static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
740 const FloatFmt *fmt);
741static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
742 const FloatFmt *fmt);
743
744#define parts_uncanon_normal(A, S, F) \
745 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
746
ee6959f2
RH
747static void parts64_uncanon(FloatParts64 *p, float_status *status,
748 const FloatFmt *fmt);
749static void parts128_uncanon(FloatParts128 *p, float_status *status,
750 const FloatFmt *fmt);
751
752#define parts_uncanon(A, S, F) \
753 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
754
da10a907
RH
755static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
756static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 757static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
758
759#define parts_add_normal(A, B) \
dedd123c 760 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
761
762static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
763static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 764static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
765
766#define parts_sub_normal(A, B) \
dedd123c 767 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
768
769static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
770 float_status *s, bool subtract);
771static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
772 float_status *s, bool subtract);
773
774#define parts_addsub(A, B, S, Z) \
775 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
776
aca84527
RH
777static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
778 float_status *s);
779static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
780 float_status *s);
781
782#define parts_mul(A, B, S) \
783 PARTS_GENERIC_64_128(mul, A)(A, B, S)
784
dedd123c
RH
785static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
786 FloatParts64 *c, int flags,
787 float_status *s);
788static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
789 FloatParts128 *c, int flags,
790 float_status *s);
791
792#define parts_muladd(A, B, C, Z, S) \
793 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
794
ec961b81
RH
795static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
796 float_status *s);
797static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
798 float_status *s);
799
800#define parts_div(A, B, S) \
801 PARTS_GENERIC_64_128(div, A)(A, B, S)
802
feaf2e9c
RH
803static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
804 uint64_t *mod_quot, float_status *s);
805static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
806 uint64_t *mod_quot, float_status *s);
807
808#define parts_modrem(A, B, Q, S) \
809 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
810
9261b245
RH
811static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
812static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
813
814#define parts_sqrt(A, S, F) \
815 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
816
afc34931
RH
817static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
818 int scale, int frac_size);
819static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
820 int scale, int frac_size);
821
822#define parts_round_to_int_normal(A, R, C, F) \
823 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
824
825static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
826 int scale, float_status *s,
827 const FloatFmt *fmt);
828static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
829 int scale, float_status *s,
830 const FloatFmt *fmt);
831
832#define parts_round_to_int(A, R, C, S, F) \
833 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
834
463b3f0d
RH
835static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
836 int scale, int64_t min, int64_t max,
837 float_status *s);
838static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
839 int scale, int64_t min, int64_t max,
840 float_status *s);
841
842#define parts_float_to_sint(P, R, Z, MN, MX, S) \
843 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
844
4ab4aef0
RH
845static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
846 int scale, uint64_t max,
847 float_status *s);
848static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
849 int scale, uint64_t max,
850 float_status *s);
851
852#define parts_float_to_uint(P, R, Z, M, S) \
853 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
854
e3689519
RH
855static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
856 int scale, float_status *s);
857static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
858 int scale, float_status *s);
859
860#define parts_sint_to_float(P, I, Z, S) \
861 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
862
37c954a1
RH
863static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
864 int scale, float_status *s);
865static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
866 int scale, float_status *s);
867
868#define parts_uint_to_float(P, I, Z, S) \
869 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
870
e1c4667a
RH
871static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
872 float_status *s, int flags);
873static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
874 float_status *s, int flags);
875
876#define parts_minmax(A, B, S, F) \
877 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
878
b880867f
RH
879static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
880 float_status *s, bool q);
881static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
882 float_status *s, bool q);
6eb169b8
RH
883
884#define parts_compare(A, B, S, Q) \
885 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
886
39626b0c
RH
887static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
888static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
889
890#define parts_scalbn(A, N, S) \
891 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
892
2fa3546c
RH
893static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
894static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
895
896#define parts_log2(A, S, F) \
897 PARTS_GENERIC_64_128(log2, A)(A, S, F)
898
0018b1f4
RH
899/*
900 * Helper functions for softfloat-parts.c.inc, per-size operations.
901 */
902
22c355f4 903#define FRAC_GENERIC_64_128(NAME, P) \
6c06aca0
RH
904 _Generic((P), FloatParts64 *: frac64_##NAME, \
905 FloatParts128 *: frac128_##NAME)
22c355f4 906
dedd123c 907#define FRAC_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
908 _Generic((P), FloatParts64 *: frac64_##NAME, \
909 FloatParts128 *: frac128_##NAME, \
910 FloatParts256 *: frac256_##NAME)
dedd123c 911
da10a907
RH
912static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
913{
914 return uadd64_overflow(a->frac, b->frac, &r->frac);
915}
916
917static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
918{
919 bool c = 0;
920 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
921 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
922 return c;
923}
924
dedd123c
RH
925static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
926{
927 bool c = 0;
928 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
929 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
930 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
931 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
932 return c;
933}
934
935#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 936
ee6959f2
RH
937static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
938{
939 return uadd64_overflow(a->frac, c, &r->frac);
940}
941
942static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
943{
944 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
945 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
946}
947
948#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
949
950static void frac64_allones(FloatParts64 *a)
951{
952 a->frac = -1;
953}
954
955static void frac128_allones(FloatParts128 *a)
956{
957 a->frac_hi = a->frac_lo = -1;
958}
959
960#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
961
dee3fcfb 962static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
22c355f4 963{
dee3fcfb
RH
964 return (a->frac == b->frac ? float_relation_equal
965 : a->frac < b->frac ? float_relation_less
966 : float_relation_greater);
22c355f4
RH
967}
968
dee3fcfb 969static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
22c355f4
RH
970{
971 uint64_t ta = a->frac_hi, tb = b->frac_hi;
972 if (ta == tb) {
973 ta = a->frac_lo, tb = b->frac_lo;
974 if (ta == tb) {
dee3fcfb 975 return float_relation_equal;
22c355f4
RH
976 }
977 }
dee3fcfb 978 return ta < tb ? float_relation_less : float_relation_greater;
22c355f4
RH
979}
980
981#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
982
d46975bc 983static void frac64_clear(FloatParts64 *a)
0018b1f4 984{
d46975bc
RH
985 a->frac = 0;
986}
987
988static void frac128_clear(FloatParts128 *a)
989{
990 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
991}
992
d46975bc 993#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 994
ec961b81
RH
995static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
996{
997 uint64_t n1, n0, r, q;
998 bool ret;
999
1000 /*
1001 * We want a 2*N / N-bit division to produce exactly an N-bit
1002 * result, so that we do not lose any precision and so that we
1003 * do not have to renormalize afterward. If A.frac < B.frac,
1004 * then division would produce an (N-1)-bit result; shift A left
1005 * by one to produce the an N-bit result, and return true to
1006 * decrement the exponent to match.
1007 *
1008 * The udiv_qrnnd algorithm that we're using requires normalization,
1009 * i.e. the msb of the denominator must be set, which is already true.
1010 */
1011 ret = a->frac < b->frac;
1012 if (ret) {
1013 n0 = a->frac;
1014 n1 = 0;
1015 } else {
1016 n0 = a->frac >> 1;
1017 n1 = a->frac << 63;
1018 }
1019 q = udiv_qrnnd(&r, n0, n1, b->frac);
1020
1021 /* Set lsb if there is a remainder, to set inexact. */
1022 a->frac = q | (r != 0);
1023
1024 return ret;
1025}
1026
1027static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1028{
1029 uint64_t q0, q1, a0, a1, b0, b1;
1030 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1031 bool ret = false;
1032
1033 a0 = a->frac_hi, a1 = a->frac_lo;
1034 b0 = b->frac_hi, b1 = b->frac_lo;
1035
1036 ret = lt128(a0, a1, b0, b1);
1037 if (!ret) {
1038 a1 = shr_double(a0, a1, 1);
1039 a0 = a0 >> 1;
1040 }
1041
1042 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1043 q0 = estimateDiv128To64(a0, a1, b0);
1044
1045 /*
1046 * Estimate is high because B1 was not included (unless B1 == 0).
1047 * Reduce quotient and increase remainder until remainder is non-negative.
1048 * This loop will execute 0 to 2 times.
1049 */
1050 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1051 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1052 while (r0 != 0) {
1053 q0--;
1054 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1055 }
1056
1057 /* Repeat using the remainder, producing a second word of quotient. */
1058 q1 = estimateDiv128To64(r1, r2, b0);
1059 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1060 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1061 while (r1 != 0) {
1062 q1--;
1063 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1064 }
1065
1066 /* Any remainder indicates inexact; set sticky bit. */
1067 q1 |= (r2 | r3) != 0;
1068
1069 a->frac_hi = q0;
1070 a->frac_lo = q1;
1071 return ret;
1072}
1073
1074#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1075
d46975bc 1076static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1077{
d46975bc
RH
1078 return a->frac == 0;
1079}
1080
1081static bool frac128_eqz(FloatParts128 *a)
1082{
1083 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1084}
1085
d46975bc 1086#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1087
aca84527
RH
1088static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1089{
1090 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1091}
1092
1093static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1094{
1095 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1096 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1097}
1098
1099#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1100
da10a907
RH
1101static void frac64_neg(FloatParts64 *a)
1102{
1103 a->frac = -a->frac;
1104}
1105
1106static void frac128_neg(FloatParts128 *a)
1107{
1108 bool c = 0;
1109 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1110 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1111}
1112
dedd123c
RH
1113static void frac256_neg(FloatParts256 *a)
1114{
1115 bool c = 0;
1116 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1117 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1118 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1119 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1120}
1121
1122#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1123
d46975bc 1124static int frac64_normalize(FloatParts64 *a)
6fff2167 1125{
d46975bc
RH
1126 if (a->frac) {
1127 int shift = clz64(a->frac);
1128 a->frac <<= shift;
1129 return shift;
1130 }
1131 return 64;
1132}
1133
1134static int frac128_normalize(FloatParts128 *a)
1135{
1136 if (a->frac_hi) {
1137 int shl = clz64(a->frac_hi);
463e45dc
RH
1138 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1139 a->frac_lo <<= shl;
d46975bc
RH
1140 return shl;
1141 } else if (a->frac_lo) {
1142 int shl = clz64(a->frac_lo);
463e45dc 1143 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1144 a->frac_lo = 0;
1145 return shl + 64;
6fff2167 1146 }
d46975bc 1147 return 128;
6fff2167
AB
1148}
1149
dedd123c
RH
1150static int frac256_normalize(FloatParts256 *a)
1151{
1152 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1153 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1154 int ret, shl;
dedd123c
RH
1155
1156 if (likely(a0)) {
1157 shl = clz64(a0);
1158 if (shl == 0) {
1159 return 0;
1160 }
1161 ret = shl;
1162 } else {
1163 if (a1) {
1164 ret = 64;
1165 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1166 } else if (a2) {
1167 ret = 128;
1168 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1169 } else if (a3) {
1170 ret = 192;
1171 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1172 } else {
1173 ret = 256;
1174 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1175 goto done;
1176 }
1177 shl = clz64(a0);
1178 if (shl == 0) {
1179 goto done;
1180 }
1181 ret += shl;
1182 }
1183
463e45dc
RH
1184 a0 = shl_double(a0, a1, shl);
1185 a1 = shl_double(a1, a2, shl);
1186 a2 = shl_double(a2, a3, shl);
1187 a3 <<= shl;
dedd123c
RH
1188
1189 done:
1190 a->frac_hi = a0;
1191 a->frac_hm = a1;
1192 a->frac_lm = a2;
1193 a->frac_lo = a3;
1194 return ret;
1195}
1196
1197#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc 1198
feaf2e9c
RH
1199static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1200{
1201 uint64_t a0, a1, b0, t0, t1, q, quot;
1202 int exp_diff = a->exp - b->exp;
1203 int shift;
1204
1205 a0 = a->frac;
1206 a1 = 0;
1207
1208 if (exp_diff < -1) {
1209 if (mod_quot) {
1210 *mod_quot = 0;
1211 }
1212 return;
1213 }
1214 if (exp_diff == -1) {
1215 a0 >>= 1;
1216 exp_diff = 0;
1217 }
1218
1219 b0 = b->frac;
1220 quot = q = b0 <= a0;
1221 if (q) {
1222 a0 -= b0;
1223 }
1224
1225 exp_diff -= 64;
1226 while (exp_diff > 0) {
1227 q = estimateDiv128To64(a0, a1, b0);
1228 q = q > 2 ? q - 2 : 0;
1229 mul64To128(b0, q, &t0, &t1);
1230 sub128(a0, a1, t0, t1, &a0, &a1);
1231 shortShift128Left(a0, a1, 62, &a0, &a1);
1232 exp_diff -= 62;
1233 quot = (quot << 62) + q;
1234 }
1235
1236 exp_diff += 64;
1237 if (exp_diff > 0) {
1238 q = estimateDiv128To64(a0, a1, b0);
1239 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1240 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1241 sub128(a0, a1, t0, t1, &a0, &a1);
1242 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1243 while (le128(t0, t1, a0, a1)) {
1244 ++q;
1245 sub128(a0, a1, t0, t1, &a0, &a1);
1246 }
1247 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1248 } else {
1249 t0 = b0;
1250 t1 = 0;
1251 }
1252
1253 if (mod_quot) {
1254 *mod_quot = quot;
1255 } else {
1256 sub128(t0, t1, a0, a1, &t0, &t1);
1257 if (lt128(t0, t1, a0, a1) ||
1258 (eq128(t0, t1, a0, a1) && (q & 1))) {
1259 a0 = t0;
1260 a1 = t1;
1261 a->sign = !a->sign;
1262 }
1263 }
1264
1265 if (likely(a0)) {
1266 shift = clz64(a0);
1267 shortShift128Left(a0, a1, shift, &a0, &a1);
1268 } else if (likely(a1)) {
1269 shift = clz64(a1);
1270 a0 = a1 << shift;
1271 a1 = 0;
1272 shift += 64;
1273 } else {
1274 a->cls = float_class_zero;
1275 return;
1276 }
1277
1278 a->exp = b->exp + exp_diff - shift;
1279 a->frac = a0 | (a1 != 0);
1280}
1281
1282static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1283 uint64_t *mod_quot)
1284{
1285 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1286 int exp_diff = a->exp - b->exp;
1287 int shift;
1288
1289 a0 = a->frac_hi;
1290 a1 = a->frac_lo;
1291 a2 = 0;
1292
1293 if (exp_diff < -1) {
1294 if (mod_quot) {
1295 *mod_quot = 0;
1296 }
1297 return;
1298 }
1299 if (exp_diff == -1) {
1300 shift128Right(a0, a1, 1, &a0, &a1);
1301 exp_diff = 0;
1302 }
1303
1304 b0 = b->frac_hi;
1305 b1 = b->frac_lo;
1306
1307 quot = q = le128(b0, b1, a0, a1);
1308 if (q) {
1309 sub128(a0, a1, b0, b1, &a0, &a1);
1310 }
1311
1312 exp_diff -= 64;
1313 while (exp_diff > 0) {
1314 q = estimateDiv128To64(a0, a1, b0);
1315 q = q > 4 ? q - 4 : 0;
1316 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1317 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1318 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1319 exp_diff -= 61;
1320 quot = (quot << 61) + q;
1321 }
1322
1323 exp_diff += 64;
1324 if (exp_diff > 0) {
1325 q = estimateDiv128To64(a0, a1, b0);
1326 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1327 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1328 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1330 while (le192(t0, t1, t2, a0, a1, a2)) {
1331 ++q;
1332 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1333 }
1334 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1335 } else {
1336 t0 = b0;
1337 t1 = b1;
1338 t2 = 0;
1339 }
1340
1341 if (mod_quot) {
1342 *mod_quot = quot;
1343 } else {
1344 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1345 if (lt192(t0, t1, t2, a0, a1, a2) ||
1346 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1347 a0 = t0;
1348 a1 = t1;
1349 a2 = t2;
1350 a->sign = !a->sign;
1351 }
1352 }
1353
1354 if (likely(a0)) {
1355 shift = clz64(a0);
1356 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1357 } else if (likely(a1)) {
1358 shift = clz64(a1);
1359 shortShift128Left(a1, a2, shift, &a0, &a1);
1360 a2 = 0;
1361 shift += 64;
1362 } else if (likely(a2)) {
1363 shift = clz64(a2);
1364 a0 = a2 << shift;
1365 a1 = a2 = 0;
1366 shift += 128;
1367 } else {
1368 a->cls = float_class_zero;
1369 return;
1370 }
1371
1372 a->exp = b->exp + exp_diff - shift;
1373 a->frac_hi = a0;
1374 a->frac_lo = a1 | (a2 != 0);
1375}
1376
1377#define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1378
d46975bc
RH
1379static void frac64_shl(FloatParts64 *a, int c)
1380{
1381 a->frac <<= c;
1382}
1383
1384static void frac128_shl(FloatParts128 *a, int c)
1385{
463e45dc
RH
1386 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1387
1388 if (c & 64) {
1389 a0 = a1, a1 = 0;
1390 }
1391
1392 c &= 63;
1393 if (c) {
1394 a0 = shl_double(a0, a1, c);
1395 a1 = a1 << c;
1396 }
1397
1398 a->frac_hi = a0;
1399 a->frac_lo = a1;
d46975bc
RH
1400}
1401
1402#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1403
1404static void frac64_shr(FloatParts64 *a, int c)
1405{
1406 a->frac >>= c;
1407}
1408
1409static void frac128_shr(FloatParts128 *a, int c)
1410{
463e45dc
RH
1411 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1412
1413 if (c & 64) {
1414 a1 = a0, a0 = 0;
1415 }
1416
1417 c &= 63;
1418 if (c) {
1419 a1 = shr_double(a0, a1, c);
1420 a0 = a0 >> c;
1421 }
1422
1423 a->frac_hi = a0;
1424 a->frac_lo = a1;
d46975bc
RH
1425}
1426
1427#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1428
ee6959f2 1429static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1430{
463e45dc
RH
1431 uint64_t a0 = a->frac;
1432
1433 if (likely(c != 0)) {
1434 if (likely(c < 64)) {
1435 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1436 } else {
1437 a0 = a0 != 0;
1438 }
1439 a->frac = a0;
1440 }
ee6959f2 1441}
6fff2167 1442
ee6959f2
RH
1443static void frac128_shrjam(FloatParts128 *a, int c)
1444{
463e45dc
RH
1445 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1446 uint64_t sticky = 0;
1447
1448 if (unlikely(c == 0)) {
1449 return;
1450 } else if (likely(c < 64)) {
1451 /* nothing */
1452 } else if (likely(c < 128)) {
1453 sticky = a1;
1454 a1 = a0;
1455 a0 = 0;
1456 c &= 63;
1457 if (c == 0) {
1458 goto done;
1459 }
1460 } else {
1461 sticky = a0 | a1;
1462 a0 = a1 = 0;
1463 goto done;
1464 }
1465
1466 sticky |= shr_double(a1, 0, c);
1467 a1 = shr_double(a0, a1, c);
1468 a0 = a0 >> c;
1469
1470 done:
1471 a->frac_lo = a1 | (sticky != 0);
1472 a->frac_hi = a0;
6fff2167
AB
1473}
1474
dedd123c
RH
1475static void frac256_shrjam(FloatParts256 *a, int c)
1476{
1477 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1478 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1479 uint64_t sticky = 0;
dedd123c
RH
1480
1481 if (unlikely(c == 0)) {
1482 return;
1483 } else if (likely(c < 64)) {
1484 /* nothing */
1485 } else if (likely(c < 256)) {
1486 if (unlikely(c & 128)) {
1487 sticky |= a2 | a3;
1488 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1489 }
1490 if (unlikely(c & 64)) {
1491 sticky |= a3;
1492 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1493 }
1494 c &= 63;
1495 if (c == 0) {
1496 goto done;
1497 }
1498 } else {
1499 sticky = a0 | a1 | a2 | a3;
1500 a0 = a1 = a2 = a3 = 0;
1501 goto done;
1502 }
1503
463e45dc
RH
1504 sticky |= shr_double(a3, 0, c);
1505 a3 = shr_double(a2, a3, c);
1506 a2 = shr_double(a1, a2, c);
1507 a1 = shr_double(a0, a1, c);
1508 a0 = a0 >> c;
dedd123c
RH
1509
1510 done:
1511 a->frac_lo = a3 | (sticky != 0);
1512 a->frac_lm = a2;
1513 a->frac_hm = a1;
1514 a->frac_hi = a0;
1515}
1516
1517#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1518
da10a907
RH
1519static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1520{
1521 return usub64_overflow(a->frac, b->frac, &r->frac);
1522}
7c45bad8 1523
da10a907
RH
1524static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1525{
1526 bool c = 0;
1527 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1528 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1529 return c;
1530}
1531
dedd123c
RH
1532static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1533{
1534 bool c = 0;
1535 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1536 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1537 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1538 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1539 return c;
1540}
1541
1542#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1543
aca84527
RH
1544static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1545{
1546 r->frac = a->frac_hi | (a->frac_lo != 0);
1547}
1548
1549static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1550{
1551 r->frac_hi = a->frac_hi;
1552 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1553}
1554
1555#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1556
dedd123c
RH
1557static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1558{
1559 r->frac_hi = a->frac;
1560 r->frac_lo = 0;
1561}
1562
1563static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1564{
1565 r->frac_hi = a->frac_hi;
1566 r->frac_hm = a->frac_lo;
1567 r->frac_lm = 0;
1568 r->frac_lo = 0;
1569}
1570
1571#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1572
9261b245
RH
1573/*
1574 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1575 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1576 * and thus MIT licenced.
1577 */
1578static const uint16_t rsqrt_tab[128] = {
1579 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1580 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1581 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1582 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1583 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1584 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1585 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1586 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1587 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1588 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1589 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1590 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1591 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1592 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1593 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1594 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1595};
1596
da10a907
RH
1597#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1598#define FloatPartsN glue(FloatParts,N)
aca84527 1599#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1600
1601#define N 64
aca84527 1602#define W 128
da10a907
RH
1603
1604#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1605#include "softfloat-parts.c.inc"
1606
da10a907 1607#undef N
aca84527 1608#undef W
da10a907 1609#define N 128
aca84527 1610#define W 256
7c45bad8 1611
da10a907 1612#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1613#include "softfloat-parts.c.inc"
1614
dedd123c
RH
1615#undef N
1616#undef W
1617#define N 256
1618
1619#include "softfloat-parts-addsub.c.inc"
1620
da10a907 1621#undef N
aca84527 1622#undef W
7c45bad8
RH
1623#undef partsN
1624#undef FloatPartsN
aca84527 1625#undef FloatPartsW
7c45bad8 1626
aaffb7bf
RH
1627/*
1628 * Pack/unpack routines with a specific FloatFmt.
1629 */
1630
98e256fc
RH
1631static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1632 float_status *s, const FloatFmt *params)
aaffb7bf 1633{
98e256fc 1634 float16_unpack_raw(p, f);
d46975bc 1635 parts_canonicalize(p, s, params);
aaffb7bf
RH
1636}
1637
98e256fc
RH
1638static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1639 float_status *s)
aaffb7bf 1640{
98e256fc 1641 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1642}
1643
98e256fc
RH
1644static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1645 float_status *s)
aaffb7bf 1646{
98e256fc 1647 bfloat16_unpack_raw(p, f);
d46975bc 1648 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1649}
1650
e293e927
RH
1651static float16 float16a_round_pack_canonical(FloatParts64 *p,
1652 float_status *s,
aaffb7bf
RH
1653 const FloatFmt *params)
1654{
ee6959f2 1655 parts_uncanon(p, s, params);
e293e927 1656 return float16_pack_raw(p);
aaffb7bf
RH
1657}
1658
e293e927
RH
1659static float16 float16_round_pack_canonical(FloatParts64 *p,
1660 float_status *s)
aaffb7bf
RH
1661{
1662 return float16a_round_pack_canonical(p, s, &float16_params);
1663}
1664
e293e927
RH
1665static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1666 float_status *s)
aaffb7bf 1667{
ee6959f2 1668 parts_uncanon(p, s, &bfloat16_params);
e293e927 1669 return bfloat16_pack_raw(p);
aaffb7bf
RH
1670}
1671
98e256fc
RH
1672static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1673 float_status *s)
aaffb7bf 1674{
98e256fc 1675 float32_unpack_raw(p, f);
d46975bc 1676 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1677}
1678
e293e927
RH
1679static float32 float32_round_pack_canonical(FloatParts64 *p,
1680 float_status *s)
aaffb7bf 1681{
ee6959f2 1682 parts_uncanon(p, s, &float32_params);
e293e927 1683 return float32_pack_raw(p);
aaffb7bf
RH
1684}
1685
98e256fc
RH
1686static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1687 float_status *s)
aaffb7bf 1688{
98e256fc 1689 float64_unpack_raw(p, f);
d46975bc 1690 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1691}
1692
e293e927
RH
1693static float64 float64_round_pack_canonical(FloatParts64 *p,
1694 float_status *s)
aaffb7bf 1695{
ee6959f2 1696 parts_uncanon(p, s, &float64_params);
e293e927 1697 return float64_pack_raw(p);
aaffb7bf
RH
1698}
1699
42636fb9
RH
1700static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1701 float_status *s)
1702{
1703 parts_uncanon(p, s, &float32_params);
1704
1705 /*
1706 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1707 * We need to adjust the fraction higher so that the least N bits are
1708 * zero, and the fraction is adjacent to the float64 implicit bit.
1709 */
1710 switch (p->cls) {
1711 case float_class_normal:
1712 if (unlikely(p->exp == 0)) {
1713 /*
1714 * The result is denormal for float32, but can be represented
1715 * in normalized form for float64. Adjust, per canonicalize.
1716 */
1717 int shift = frac_normalize(p);
1718 p->exp = (float32_params.frac_shift -
1719 float32_params.exp_bias - shift + 1 +
1720 float64_params.exp_bias);
1721 frac_shr(p, float64_params.frac_shift);
1722 } else {
1723 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1724 p->exp += float64_params.exp_bias - float32_params.exp_bias;
1725 }
1726 break;
1727 case float_class_snan:
1728 case float_class_qnan:
1729 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1730 p->exp = float64_params.exp_max;
1731 break;
1732 case float_class_inf:
1733 p->exp = float64_params.exp_max;
1734 break;
1735 case float_class_zero:
1736 break;
1737 default:
1738 g_assert_not_reached();
1739 }
1740
1741 return float64_pack_raw(p);
1742}
1743
3ff49e56
RH
1744static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1745 float_status *s)
1746{
1747 float128_unpack_raw(p, f);
1748 parts_canonicalize(p, s, &float128_params);
1749}
1750
1751static float128 float128_round_pack_canonical(FloatParts128 *p,
1752 float_status *s)
1753{
1754 parts_uncanon(p, s, &float128_params);
1755 return float128_pack_raw(p);
1756}
1757
c1b6299b
RH
1758/* Returns false if the encoding is invalid. */
1759static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1760 float_status *s)
1761{
1762 /* Ensure rounding precision is set before beginning. */
1763 switch (s->floatx80_rounding_precision) {
1764 case floatx80_precision_x:
1765 case floatx80_precision_d:
1766 case floatx80_precision_s:
1767 break;
1768 default:
1769 g_assert_not_reached();
1770 }
1771
1772 if (unlikely(floatx80_invalid_encoding(f))) {
1773 float_raise(float_flag_invalid, s);
1774 return false;
1775 }
1776
1777 floatx80_unpack_raw(p, f);
1778
1779 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1780 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1781 } else {
1782 /* The explicit integer bit is ignored, after invalid checks. */
1783 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1784 p->cls = (p->frac_hi == 0 ? float_class_inf
1785 : parts_is_snan_frac(p->frac_hi, s)
1786 ? float_class_snan : float_class_qnan);
1787 }
1788 return true;
1789}
1790
1791static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1792 float_status *s)
1793{
1794 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1795 uint64_t frac;
1796 int exp;
1797
1798 switch (p->cls) {
1799 case float_class_normal:
1800 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1801 parts_uncanon_normal(p, s, fmt);
1802 frac = p->frac_hi;
1803 exp = p->exp;
1804 } else {
1805 FloatParts64 p64;
1806
1807 p64.sign = p->sign;
1808 p64.exp = p->exp;
1809 frac_truncjam(&p64, p);
1810 parts_uncanon_normal(&p64, s, fmt);
1811 frac = p64.frac;
1812 exp = p64.exp;
1813 }
1814 if (exp != fmt->exp_max) {
1815 break;
1816 }
1817 /* rounded to inf -- fall through to set frac correctly */
1818
1819 case float_class_inf:
1820 /* x86 and m68k differ in the setting of the integer bit. */
1821 frac = floatx80_infinity_low;
1822 exp = fmt->exp_max;
1823 break;
1824
1825 case float_class_zero:
1826 frac = 0;
1827 exp = 0;
1828 break;
1829
1830 case float_class_snan:
1831 case float_class_qnan:
1832 /* NaNs have the integer bit set. */
1833 frac = p->frac_hi | (1ull << 63);
1834 exp = fmt->exp_max;
1835 break;
1836
1837 default:
1838 g_assert_not_reached();
1839 }
1840
1841 return packFloatx80(p->sign, exp, frac);
1842}
1843
6fff2167 1844/*
da10a907 1845 * Addition and subtraction
6fff2167
AB
1846 */
1847
da10a907
RH
1848static float16 QEMU_FLATTEN
1849float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1850{
da10a907 1851 FloatParts64 pa, pb, *pr;
98e256fc
RH
1852
1853 float16_unpack_canonical(&pa, a, status);
1854 float16_unpack_canonical(&pb, b, status);
da10a907 1855 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1856
da10a907 1857 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1858}
1859
da10a907 1860float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1861{
da10a907
RH
1862 return float16_addsub(a, b, status, false);
1863}
1b615d48 1864
da10a907
RH
1865float16 float16_sub(float16 a, float16 b, float_status *status)
1866{
1867 return float16_addsub(a, b, status, true);
1b615d48
EC
1868}
1869
1870static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1871soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1872{
da10a907 1873 FloatParts64 pa, pb, *pr;
98e256fc
RH
1874
1875 float32_unpack_canonical(&pa, a, status);
1876 float32_unpack_canonical(&pb, b, status);
da10a907 1877 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1878
da10a907 1879 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1880}
1881
da10a907 1882static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1883{
da10a907 1884 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1885}
1886
da10a907 1887static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1888{
da10a907 1889 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1890}
1891
1892static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1893soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1894{
da10a907 1895 FloatParts64 pa, pb, *pr;
98e256fc
RH
1896
1897 float64_unpack_canonical(&pa, a, status);
1898 float64_unpack_canonical(&pb, b, status);
da10a907 1899 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1900
da10a907 1901 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1902}
1903
da10a907 1904static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1905{
da10a907 1906 return soft_f64_addsub(a, b, status, false);
1b615d48 1907}
6fff2167 1908
da10a907 1909static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1910{
da10a907 1911 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1912}
1913
1b615d48 1914static float hard_f32_add(float a, float b)
6fff2167 1915{
1b615d48
EC
1916 return a + b;
1917}
6fff2167 1918
1b615d48
EC
1919static float hard_f32_sub(float a, float b)
1920{
1921 return a - b;
6fff2167
AB
1922}
1923
1b615d48 1924static double hard_f64_add(double a, double b)
6fff2167 1925{
1b615d48
EC
1926 return a + b;
1927}
6fff2167 1928
1b615d48
EC
1929static double hard_f64_sub(double a, double b)
1930{
1931 return a - b;
1932}
1933
b240c9c4 1934static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1935{
1936 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1937 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1938 }
1939 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1940}
1941
b240c9c4 1942static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1943{
1944 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1945 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1946 } else {
1947 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1948 }
1949}
1950
1951static float32 float32_addsub(float32 a, float32 b, float_status *s,
1952 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1953{
1954 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1955 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1956}
1957
1958static float64 float64_addsub(float64 a, float64 b, float_status *s,
1959 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1960{
1961 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1962 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1963}
1964
1965float32 QEMU_FLATTEN
1966float32_add(float32 a, float32 b, float_status *s)
1967{
1968 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1969}
1970
1971float32 QEMU_FLATTEN
1972float32_sub(float32 a, float32 b, float_status *s)
1973{
1974 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1975}
1976
1977float64 QEMU_FLATTEN
1978float64_add(float64 a, float64 b, float_status *s)
1979{
1980 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1981}
1982
1983float64 QEMU_FLATTEN
1984float64_sub(float64 a, float64 b, float_status *s)
1985{
1986 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1987}
1988
42636fb9
RH
1989static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1990 bool subtract)
1991{
1992 FloatParts64 pa, pb, *pr;
1993
1994 float64_unpack_canonical(&pa, a, status);
1995 float64_unpack_canonical(&pb, b, status);
1996 pr = parts_addsub(&pa, &pb, status, subtract);
1997
1998 return float64r32_round_pack_canonical(pr, status);
1999}
2000
2001float64 float64r32_add(float64 a, float64 b, float_status *status)
2002{
2003 return float64r32_addsub(a, b, status, false);
2004}
2005
2006float64 float64r32_sub(float64 a, float64 b, float_status *status)
2007{
2008 return float64r32_addsub(a, b, status, true);
2009}
2010
da10a907
RH
2011static bfloat16 QEMU_FLATTEN
2012bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 2013{
da10a907 2014 FloatParts64 pa, pb, *pr;
98e256fc
RH
2015
2016 bfloat16_unpack_canonical(&pa, a, status);
2017 bfloat16_unpack_canonical(&pb, b, status);
da10a907 2018 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 2019
da10a907 2020 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
2021}
2022
da10a907 2023bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2024{
da10a907
RH
2025 return bfloat16_addsub(a, b, status, false);
2026}
8282310d 2027
da10a907
RH
2028bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2029{
2030 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
2031}
2032
3ff49e56
RH
2033static float128 QEMU_FLATTEN
2034float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2035{
2036 FloatParts128 pa, pb, *pr;
2037
2038 float128_unpack_canonical(&pa, a, status);
2039 float128_unpack_canonical(&pb, b, status);
2040 pr = parts_addsub(&pa, &pb, status, subtract);
2041
2042 return float128_round_pack_canonical(pr, status);
2043}
2044
2045float128 float128_add(float128 a, float128 b, float_status *status)
2046{
2047 return float128_addsub(a, b, status, false);
2048}
2049
2050float128 float128_sub(float128 a, float128 b, float_status *status)
2051{
2052 return float128_addsub(a, b, status, true);
2053}
2054
c1b6299b
RH
2055static floatx80 QEMU_FLATTEN
2056floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2057{
2058 FloatParts128 pa, pb, *pr;
2059
2060 if (!floatx80_unpack_canonical(&pa, a, status) ||
2061 !floatx80_unpack_canonical(&pb, b, status)) {
2062 return floatx80_default_nan(status);
2063 }
2064
2065 pr = parts_addsub(&pa, &pb, status, subtract);
2066 return floatx80_round_pack_canonical(pr, status);
2067}
2068
2069floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2070{
2071 return floatx80_addsub(a, b, status, false);
2072}
2073
2074floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2075{
2076 return floatx80_addsub(a, b, status, true);
2077}
2078
74d707e2 2079/*
aca84527 2080 * Multiplication
74d707e2
AB
2081 */
2082
97ff87c0 2083float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 2084{
aca84527 2085 FloatParts64 pa, pb, *pr;
98e256fc
RH
2086
2087 float16_unpack_canonical(&pa, a, status);
2088 float16_unpack_canonical(&pb, b, status);
aca84527 2089 pr = parts_mul(&pa, &pb, status);
74d707e2 2090
aca84527 2091 return float16_round_pack_canonical(pr, status);
74d707e2
AB
2092}
2093
2dfabc86
EC
2094static float32 QEMU_SOFTFLOAT_ATTR
2095soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 2096{
aca84527 2097 FloatParts64 pa, pb, *pr;
98e256fc
RH
2098
2099 float32_unpack_canonical(&pa, a, status);
2100 float32_unpack_canonical(&pb, b, status);
aca84527 2101 pr = parts_mul(&pa, &pb, status);
74d707e2 2102
aca84527 2103 return float32_round_pack_canonical(pr, status);
74d707e2
AB
2104}
2105
2dfabc86
EC
2106static float64 QEMU_SOFTFLOAT_ATTR
2107soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 2108{
aca84527 2109 FloatParts64 pa, pb, *pr;
98e256fc
RH
2110
2111 float64_unpack_canonical(&pa, a, status);
2112 float64_unpack_canonical(&pb, b, status);
aca84527 2113 pr = parts_mul(&pa, &pb, status);
74d707e2 2114
aca84527 2115 return float64_round_pack_canonical(pr, status);
74d707e2
AB
2116}
2117
2dfabc86
EC
2118static float hard_f32_mul(float a, float b)
2119{
2120 return a * b;
2121}
2122
2123static double hard_f64_mul(double a, double b)
2124{
2125 return a * b;
2126}
2127
2dfabc86
EC
2128float32 QEMU_FLATTEN
2129float32_mul(float32 a, float32 b, float_status *s)
2130{
2131 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 2132 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
2133}
2134
2135float64 QEMU_FLATTEN
2136float64_mul(float64 a, float64 b, float_status *s)
2137{
2138 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 2139 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
2140}
2141
42636fb9
RH
2142float64 float64r32_mul(float64 a, float64 b, float_status *status)
2143{
2144 FloatParts64 pa, pb, *pr;
2145
2146 float64_unpack_canonical(&pa, a, status);
2147 float64_unpack_canonical(&pb, b, status);
2148 pr = parts_mul(&pa, &pb, status);
2149
2150 return float64r32_round_pack_canonical(pr, status);
2151}
2152
aca84527
RH
2153bfloat16 QEMU_FLATTEN
2154bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2155{
aca84527 2156 FloatParts64 pa, pb, *pr;
98e256fc
RH
2157
2158 bfloat16_unpack_canonical(&pa, a, status);
2159 bfloat16_unpack_canonical(&pb, b, status);
aca84527 2160 pr = parts_mul(&pa, &pb, status);
8282310d 2161
aca84527
RH
2162 return bfloat16_round_pack_canonical(pr, status);
2163}
2164
2165float128 QEMU_FLATTEN
2166float128_mul(float128 a, float128 b, float_status *status)
2167{
2168 FloatParts128 pa, pb, *pr;
2169
2170 float128_unpack_canonical(&pa, a, status);
2171 float128_unpack_canonical(&pb, b, status);
2172 pr = parts_mul(&pa, &pb, status);
2173
2174 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2175}
2176
153f664a
RH
2177floatx80 QEMU_FLATTEN
2178floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2179{
2180 FloatParts128 pa, pb, *pr;
2181
2182 if (!floatx80_unpack_canonical(&pa, a, status) ||
2183 !floatx80_unpack_canonical(&pb, b, status)) {
2184 return floatx80_default_nan(status);
2185 }
2186
2187 pr = parts_mul(&pa, &pb, status);
2188 return floatx80_round_pack_canonical(pr, status);
2189}
2190
d446830a 2191/*
dedd123c 2192 * Fused multiply-add
d446830a
AB
2193 */
2194
97ff87c0 2195float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 2196 int flags, float_status *status)
d446830a 2197{
dedd123c 2198 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2199
2200 float16_unpack_canonical(&pa, a, status);
2201 float16_unpack_canonical(&pb, b, status);
2202 float16_unpack_canonical(&pc, c, status);
dedd123c 2203 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2204
dedd123c 2205 return float16_round_pack_canonical(pr, status);
d446830a
AB
2206}
2207
ccf770ba
EC
2208static float32 QEMU_SOFTFLOAT_ATTR
2209soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2210 float_status *status)
d446830a 2211{
dedd123c 2212 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2213
2214 float32_unpack_canonical(&pa, a, status);
2215 float32_unpack_canonical(&pb, b, status);
2216 float32_unpack_canonical(&pc, c, status);
dedd123c 2217 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2218
dedd123c 2219 return float32_round_pack_canonical(pr, status);
d446830a
AB
2220}
2221
ccf770ba
EC
2222static float64 QEMU_SOFTFLOAT_ATTR
2223soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2224 float_status *status)
d446830a 2225{
dedd123c 2226 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2227
2228 float64_unpack_canonical(&pa, a, status);
2229 float64_unpack_canonical(&pb, b, status);
2230 float64_unpack_canonical(&pc, c, status);
dedd123c 2231 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2232
dedd123c 2233 return float64_round_pack_canonical(pr, status);
d446830a
AB
2234}
2235
f6b3b108
EC
2236static bool force_soft_fma;
2237
ccf770ba
EC
2238float32 QEMU_FLATTEN
2239float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2240{
2241 union_float32 ua, ub, uc, ur;
2242
2243 ua.s = xa;
2244 ub.s = xb;
2245 uc.s = xc;
2246
2247 if (unlikely(!can_use_fpu(s))) {
2248 goto soft;
2249 }
2250 if (unlikely(flags & float_muladd_halve_result)) {
2251 goto soft;
2252 }
2253
2254 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2255 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2256 goto soft;
2257 }
f6b3b108
EC
2258
2259 if (unlikely(force_soft_fma)) {
2260 goto soft;
2261 }
2262
ccf770ba
EC
2263 /*
2264 * When (a || b) == 0, there's no need to check for under/over flow,
2265 * since we know the addend is (normal || 0) and the product is 0.
2266 */
2267 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2268 union_float32 up;
2269 bool prod_sign;
2270
2271 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2272 prod_sign ^= !!(flags & float_muladd_negate_product);
2273 up.s = float32_set_sign(float32_zero, prod_sign);
2274
2275 if (flags & float_muladd_negate_c) {
2276 uc.h = -uc.h;
2277 }
2278 ur.h = up.h + uc.h;
2279 } else {
896f51fb
KC
2280 union_float32 ua_orig = ua;
2281 union_float32 uc_orig = uc;
2282
ccf770ba
EC
2283 if (flags & float_muladd_negate_product) {
2284 ua.h = -ua.h;
2285 }
2286 if (flags & float_muladd_negate_c) {
2287 uc.h = -uc.h;
2288 }
2289
2290 ur.h = fmaf(ua.h, ub.h, uc.h);
2291
2292 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 2293 float_raise(float_flag_overflow, s);
ccf770ba 2294 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
2295 ua = ua_orig;
2296 uc = uc_orig;
ccf770ba
EC
2297 goto soft;
2298 }
2299 }
2300 if (flags & float_muladd_negate_result) {
2301 return float32_chs(ur.s);
2302 }
2303 return ur.s;
2304
2305 soft:
2306 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2307}
2308
2309float64 QEMU_FLATTEN
2310float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2311{
2312 union_float64 ua, ub, uc, ur;
2313
2314 ua.s = xa;
2315 ub.s = xb;
2316 uc.s = xc;
2317
2318 if (unlikely(!can_use_fpu(s))) {
2319 goto soft;
2320 }
2321 if (unlikely(flags & float_muladd_halve_result)) {
2322 goto soft;
2323 }
2324
2325 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2326 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2327 goto soft;
2328 }
f6b3b108
EC
2329
2330 if (unlikely(force_soft_fma)) {
2331 goto soft;
2332 }
2333
ccf770ba
EC
2334 /*
2335 * When (a || b) == 0, there's no need to check for under/over flow,
2336 * since we know the addend is (normal || 0) and the product is 0.
2337 */
2338 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2339 union_float64 up;
2340 bool prod_sign;
2341
2342 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2343 prod_sign ^= !!(flags & float_muladd_negate_product);
2344 up.s = float64_set_sign(float64_zero, prod_sign);
2345
2346 if (flags & float_muladd_negate_c) {
2347 uc.h = -uc.h;
2348 }
2349 ur.h = up.h + uc.h;
2350 } else {
896f51fb
KC
2351 union_float64 ua_orig = ua;
2352 union_float64 uc_orig = uc;
2353
ccf770ba
EC
2354 if (flags & float_muladd_negate_product) {
2355 ua.h = -ua.h;
2356 }
2357 if (flags & float_muladd_negate_c) {
2358 uc.h = -uc.h;
2359 }
2360
2361 ur.h = fma(ua.h, ub.h, uc.h);
2362
2363 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 2364 float_raise(float_flag_overflow, s);
ccf770ba 2365 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
2366 ua = ua_orig;
2367 uc = uc_orig;
ccf770ba
EC
2368 goto soft;
2369 }
2370 }
2371 if (flags & float_muladd_negate_result) {
2372 return float64_chs(ur.s);
2373 }
2374 return ur.s;
2375
2376 soft:
2377 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2378}
2379
42636fb9
RH
2380float64 float64r32_muladd(float64 a, float64 b, float64 c,
2381 int flags, float_status *status)
2382{
2383 FloatParts64 pa, pb, pc, *pr;
2384
2385 float64_unpack_canonical(&pa, a, status);
2386 float64_unpack_canonical(&pb, b, status);
2387 float64_unpack_canonical(&pc, c, status);
2388 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2389
2390 return float64r32_round_pack_canonical(pr, status);
2391}
2392
8282310d
LZ
2393bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2394 int flags, float_status *status)
2395{
dedd123c 2396 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2397
2398 bfloat16_unpack_canonical(&pa, a, status);
2399 bfloat16_unpack_canonical(&pb, b, status);
2400 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2401 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2402
2403 return bfloat16_round_pack_canonical(pr, status);
2404}
8282310d 2405
dedd123c
RH
2406float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2407 int flags, float_status *status)
2408{
2409 FloatParts128 pa, pb, pc, *pr;
2410
2411 float128_unpack_canonical(&pa, a, status);
2412 float128_unpack_canonical(&pb, b, status);
2413 float128_unpack_canonical(&pc, c, status);
2414 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2415
2416 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2417}
2418
cf07323d 2419/*
ec961b81 2420 * Division
cf07323d
AB
2421 */
2422
cf07323d
AB
2423float16 float16_div(float16 a, float16 b, float_status *status)
2424{
ec961b81 2425 FloatParts64 pa, pb, *pr;
98e256fc
RH
2426
2427 float16_unpack_canonical(&pa, a, status);
2428 float16_unpack_canonical(&pb, b, status);
ec961b81 2429 pr = parts_div(&pa, &pb, status);
cf07323d 2430
ec961b81 2431 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2432}
2433
4a629561
EC
2434static float32 QEMU_SOFTFLOAT_ATTR
2435soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2436{
ec961b81 2437 FloatParts64 pa, pb, *pr;
98e256fc
RH
2438
2439 float32_unpack_canonical(&pa, a, status);
2440 float32_unpack_canonical(&pb, b, status);
ec961b81 2441 pr = parts_div(&pa, &pb, status);
cf07323d 2442
ec961b81 2443 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2444}
2445
4a629561
EC
2446static float64 QEMU_SOFTFLOAT_ATTR
2447soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2448{
ec961b81 2449 FloatParts64 pa, pb, *pr;
98e256fc
RH
2450
2451 float64_unpack_canonical(&pa, a, status);
2452 float64_unpack_canonical(&pb, b, status);
ec961b81 2453 pr = parts_div(&pa, &pb, status);
cf07323d 2454
ec961b81 2455 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2456}
2457
4a629561
EC
2458static float hard_f32_div(float a, float b)
2459{
2460 return a / b;
2461}
2462
2463static double hard_f64_div(double a, double b)
2464{
2465 return a / b;
2466}
2467
2468static bool f32_div_pre(union_float32 a, union_float32 b)
2469{
2470 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2471 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2472 fpclassify(b.h) == FP_NORMAL;
2473 }
2474 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2475}
2476
2477static bool f64_div_pre(union_float64 a, union_float64 b)
2478{
2479 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2480 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2481 fpclassify(b.h) == FP_NORMAL;
2482 }
2483 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2484}
2485
2486static bool f32_div_post(union_float32 a, union_float32 b)
2487{
2488 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2489 return fpclassify(a.h) != FP_ZERO;
2490 }
2491 return !float32_is_zero(a.s);
2492}
2493
2494static bool f64_div_post(union_float64 a, union_float64 b)
2495{
2496 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2497 return fpclassify(a.h) != FP_ZERO;
2498 }
2499 return !float64_is_zero(a.s);
2500}
2501
2502float32 QEMU_FLATTEN
2503float32_div(float32 a, float32 b, float_status *s)
2504{
2505 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2506 f32_div_pre, f32_div_post);
4a629561
EC
2507}
2508
2509float64 QEMU_FLATTEN
2510float64_div(float64 a, float64 b, float_status *s)
2511{
2512 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2513 f64_div_pre, f64_div_post);
4a629561
EC
2514}
2515
42636fb9
RH
2516float64 float64r32_div(float64 a, float64 b, float_status *status)
2517{
2518 FloatParts64 pa, pb, *pr;
2519
2520 float64_unpack_canonical(&pa, a, status);
2521 float64_unpack_canonical(&pb, b, status);
2522 pr = parts_div(&pa, &pb, status);
2523
2524 return float64r32_round_pack_canonical(pr, status);
2525}
2526
ec961b81
RH
2527bfloat16 QEMU_FLATTEN
2528bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2529{
ec961b81 2530 FloatParts64 pa, pb, *pr;
98e256fc
RH
2531
2532 bfloat16_unpack_canonical(&pa, a, status);
2533 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2534 pr = parts_div(&pa, &pb, status);
8282310d 2535
ec961b81
RH
2536 return bfloat16_round_pack_canonical(pr, status);
2537}
2538
2539float128 QEMU_FLATTEN
2540float128_div(float128 a, float128 b, float_status *status)
2541{
2542 FloatParts128 pa, pb, *pr;
2543
2544 float128_unpack_canonical(&pa, a, status);
2545 float128_unpack_canonical(&pb, b, status);
2546 pr = parts_div(&pa, &pb, status);
2547
2548 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2549}
2550
38db99e2
RH
2551floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2552{
2553 FloatParts128 pa, pb, *pr;
2554
2555 if (!floatx80_unpack_canonical(&pa, a, status) ||
2556 !floatx80_unpack_canonical(&pb, b, status)) {
2557 return floatx80_default_nan(status);
2558 }
2559
2560 pr = parts_div(&pa, &pb, status);
2561 return floatx80_round_pack_canonical(pr, status);
2562}
2563
feaf2e9c
RH
2564/*
2565 * Remainder
2566 */
2567
2568float32 float32_rem(float32 a, float32 b, float_status *status)
2569{
2570 FloatParts64 pa, pb, *pr;
2571
2572 float32_unpack_canonical(&pa, a, status);
2573 float32_unpack_canonical(&pb, b, status);
2574 pr = parts_modrem(&pa, &pb, NULL, status);
2575
2576 return float32_round_pack_canonical(pr, status);
2577}
2578
2579float64 float64_rem(float64 a, float64 b, float_status *status)
2580{
2581 FloatParts64 pa, pb, *pr;
2582
2583 float64_unpack_canonical(&pa, a, status);
2584 float64_unpack_canonical(&pb, b, status);
2585 pr = parts_modrem(&pa, &pb, NULL, status);
2586
2587 return float64_round_pack_canonical(pr, status);
2588}
2589
2590float128 float128_rem(float128 a, float128 b, float_status *status)
2591{
2592 FloatParts128 pa, pb, *pr;
2593
2594 float128_unpack_canonical(&pa, a, status);
2595 float128_unpack_canonical(&pb, b, status);
2596 pr = parts_modrem(&pa, &pb, NULL, status);
2597
2598 return float128_round_pack_canonical(pr, status);
2599}
2600
2601/*
2602 * Returns the remainder of the extended double-precision floating-point value
2603 * `a' with respect to the corresponding value `b'.
2604 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2605 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2606 * the remainder based on truncating the quotient toward zero instead and
2607 * *quotient is set to the low 64 bits of the absolute value of the integer
2608 * quotient.
2609 */
2610floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2611 uint64_t *quotient, float_status *status)
2612{
2613 FloatParts128 pa, pb, *pr;
2614
2615 *quotient = 0;
2616 if (!floatx80_unpack_canonical(&pa, a, status) ||
2617 !floatx80_unpack_canonical(&pb, b, status)) {
2618 return floatx80_default_nan(status);
2619 }
2620 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2621
2622 return floatx80_round_pack_canonical(pr, status);
2623}
2624
2625floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2626{
2627 uint64_t quotient;
2628 return floatx80_modrem(a, b, false, &quotient, status);
2629}
2630
2631floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2632{
2633 uint64_t quotient;
2634 return floatx80_modrem(a, b, true, &quotient, status);
2635}
2636
6fed16b2
AB
2637/*
2638 * Float to Float conversions
2639 *
2640 * Returns the result of converting one float format to another. The
2641 * conversion is performed according to the IEC/IEEE Standard for
2642 * Binary Floating-Point Arithmetic.
2643 *
c3f1875e
RH
2644 * Usually this only needs to take care of raising invalid exceptions
2645 * and handling the conversion on NaNs.
6fed16b2
AB
2646 */
2647
c3f1875e
RH
2648static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2649{
2650 switch (a->cls) {
c3f1875e 2651 case float_class_snan:
e706d445
RH
2652 float_raise(float_flag_invalid_snan, s);
2653 /* fall through */
2654 case float_class_qnan:
c3f1875e
RH
2655 /*
2656 * There is no NaN in the destination format. Raise Invalid
2657 * and return a zero with the sign of the input NaN.
2658 */
2659 float_raise(float_flag_invalid, s);
2660 a->cls = float_class_zero;
2661 break;
2662
2663 case float_class_inf:
2664 /*
2665 * There is no Inf in the destination format. Raise Invalid
2666 * and return the maximum normal with the correct sign.
2667 */
2668 float_raise(float_flag_invalid, s);
2669 a->cls = float_class_normal;
2670 a->exp = float16_params_ahp.exp_max;
2671 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2672 float16_params_ahp.frac_size + 1);
2673 break;
2674
2675 case float_class_normal:
2676 case float_class_zero:
2677 break;
2678
2679 default:
2680 g_assert_not_reached();
2681 }
2682}
2683
2684static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2685{
2686 if (is_nan(a->cls)) {
2687 parts_return_nan(a, s);
6fed16b2 2688 }
6fed16b2
AB
2689}
2690
c3f1875e
RH
2691static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2692{
2693 if (is_nan(a->cls)) {
2694 parts_return_nan(a, s);
2695 }
2696}
2697
2698#define parts_float_to_float(P, S) \
2699 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2700
9882ccaf
RH
2701static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2702 float_status *s)
2703{
2704 a->cls = b->cls;
2705 a->sign = b->sign;
2706 a->exp = b->exp;
2707
2708 if (a->cls == float_class_normal) {
2709 frac_truncjam(a, b);
2710 } else if (is_nan(a->cls)) {
2711 /* Discard the low bits of the NaN. */
2712 a->frac = b->frac_hi;
2713 parts_return_nan(a, s);
2714 }
2715}
2716
2717static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2718 float_status *s)
2719{
2720 a->cls = b->cls;
2721 a->sign = b->sign;
2722 a->exp = b->exp;
2723 frac_widen(a, b);
2724
2725 if (is_nan(a->cls)) {
2726 parts_return_nan(a, s);
2727 }
2728}
2729
6fed16b2
AB
2730float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2731{
2732 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2733 FloatParts64 p;
98e256fc 2734
c3f1875e
RH
2735 float16a_unpack_canonical(&p, a, s, fmt16);
2736 parts_float_to_float(&p, s);
2737 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2738}
2739
2740float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2741{
2742 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2743 FloatParts64 p;
98e256fc 2744
c3f1875e
RH
2745 float16a_unpack_canonical(&p, a, s, fmt16);
2746 parts_float_to_float(&p, s);
2747 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2748}
2749
2750float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2751{
c3f1875e
RH
2752 FloatParts64 p;
2753 const FloatFmt *fmt;
98e256fc 2754
c3f1875e
RH
2755 float32_unpack_canonical(&p, a, s);
2756 if (ieee) {
2757 parts_float_to_float(&p, s);
2758 fmt = &float16_params;
2759 } else {
2760 parts_float_to_ahp(&p, s);
2761 fmt = &float16_params_ahp;
2762 }
2763 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2764}
2765
21381dcf
MK
2766static float64 QEMU_SOFTFLOAT_ATTR
2767soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2768{
c3f1875e 2769 FloatParts64 p;
98e256fc 2770
c3f1875e
RH
2771 float32_unpack_canonical(&p, a, s);
2772 parts_float_to_float(&p, s);
2773 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2774}
2775
21381dcf
MK
2776float64 float32_to_float64(float32 a, float_status *s)
2777{
2778 if (likely(float32_is_normal(a))) {
2779 /* Widening conversion can never produce inexact results. */
2780 union_float32 uf;
2781 union_float64 ud;
2782 uf.s = a;
2783 ud.h = uf.h;
2784 return ud.s;
2785 } else if (float32_is_zero(a)) {
2786 return float64_set_sign(float64_zero, float32_is_neg(a));
2787 } else {
2788 return soft_float32_to_float64(a, s);
2789 }
2790}
2791
6fed16b2
AB
2792float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2793{
c3f1875e
RH
2794 FloatParts64 p;
2795 const FloatFmt *fmt;
98e256fc 2796
c3f1875e
RH
2797 float64_unpack_canonical(&p, a, s);
2798 if (ieee) {
2799 parts_float_to_float(&p, s);
2800 fmt = &float16_params;
2801 } else {
2802 parts_float_to_ahp(&p, s);
2803 fmt = &float16_params_ahp;
2804 }
2805 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2806}
2807
2808float32 float64_to_float32(float64 a, float_status *s)
2809{
c3f1875e 2810 FloatParts64 p;
98e256fc 2811
c3f1875e
RH
2812 float64_unpack_canonical(&p, a, s);
2813 parts_float_to_float(&p, s);
2814 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2815}
2816
34f0c0a9
LZ
2817float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2818{
c3f1875e 2819 FloatParts64 p;
98e256fc 2820
c3f1875e
RH
2821 bfloat16_unpack_canonical(&p, a, s);
2822 parts_float_to_float(&p, s);
2823 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2824}
2825
2826float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2827{
c3f1875e 2828 FloatParts64 p;
98e256fc 2829
c3f1875e
RH
2830 bfloat16_unpack_canonical(&p, a, s);
2831 parts_float_to_float(&p, s);
2832 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2833}
2834
2835bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2836{
c3f1875e 2837 FloatParts64 p;
98e256fc 2838
c3f1875e
RH
2839 float32_unpack_canonical(&p, a, s);
2840 parts_float_to_float(&p, s);
2841 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2842}
2843
2844bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2845{
c3f1875e 2846 FloatParts64 p;
98e256fc 2847
c3f1875e
RH
2848 float64_unpack_canonical(&p, a, s);
2849 parts_float_to_float(&p, s);
2850 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2851}
2852
9882ccaf
RH
2853float32 float128_to_float32(float128 a, float_status *s)
2854{
2855 FloatParts64 p64;
2856 FloatParts128 p128;
2857
2858 float128_unpack_canonical(&p128, a, s);
2859 parts_float_to_float_narrow(&p64, &p128, s);
2860 return float32_round_pack_canonical(&p64, s);
2861}
2862
2863float64 float128_to_float64(float128 a, float_status *s)
2864{
2865 FloatParts64 p64;
2866 FloatParts128 p128;
2867
2868 float128_unpack_canonical(&p128, a, s);
2869 parts_float_to_float_narrow(&p64, &p128, s);
2870 return float64_round_pack_canonical(&p64, s);
2871}
2872
2873float128 float32_to_float128(float32 a, float_status *s)
2874{
2875 FloatParts64 p64;
2876 FloatParts128 p128;
2877
2878 float32_unpack_canonical(&p64, a, s);
2879 parts_float_to_float_widen(&p128, &p64, s);
2880 return float128_round_pack_canonical(&p128, s);
2881}
2882
2883float128 float64_to_float128(float64 a, float_status *s)
2884{
2885 FloatParts64 p64;
2886 FloatParts128 p128;
2887
2888 float64_unpack_canonical(&p64, a, s);
2889 parts_float_to_float_widen(&p128, &p64, s);
2890 return float128_round_pack_canonical(&p128, s);
2891}
2892
8ae5719c
RH
2893float32 floatx80_to_float32(floatx80 a, float_status *s)
2894{
2895 FloatParts64 p64;
2896 FloatParts128 p128;
2897
2898 if (floatx80_unpack_canonical(&p128, a, s)) {
2899 parts_float_to_float_narrow(&p64, &p128, s);
2900 } else {
2901 parts_default_nan(&p64, s);
2902 }
2903 return float32_round_pack_canonical(&p64, s);
2904}
2905
2906float64 floatx80_to_float64(floatx80 a, float_status *s)
2907{
2908 FloatParts64 p64;
2909 FloatParts128 p128;
2910
2911 if (floatx80_unpack_canonical(&p128, a, s)) {
2912 parts_float_to_float_narrow(&p64, &p128, s);
2913 } else {
2914 parts_default_nan(&p64, s);
2915 }
2916 return float64_round_pack_canonical(&p64, s);
2917}
2918
2919float128 floatx80_to_float128(floatx80 a, float_status *s)
2920{
2921 FloatParts128 p;
2922
2923 if (floatx80_unpack_canonical(&p, a, s)) {
2924 parts_float_to_float(&p, s);
2925 } else {
2926 parts_default_nan(&p, s);
2927 }
2928 return float128_round_pack_canonical(&p, s);
2929}
2930
2931floatx80 float32_to_floatx80(float32 a, float_status *s)
2932{
2933 FloatParts64 p64;
2934 FloatParts128 p128;
2935
2936 float32_unpack_canonical(&p64, a, s);
2937 parts_float_to_float_widen(&p128, &p64, s);
2938 return floatx80_round_pack_canonical(&p128, s);
2939}
2940
2941floatx80 float64_to_floatx80(float64 a, float_status *s)
2942{
2943 FloatParts64 p64;
2944 FloatParts128 p128;
2945
2946 float64_unpack_canonical(&p64, a, s);
2947 parts_float_to_float_widen(&p128, &p64, s);
2948 return floatx80_round_pack_canonical(&p128, s);
2949}
2950
2951floatx80 float128_to_floatx80(float128 a, float_status *s)
2952{
2953 FloatParts128 p;
2954
2955 float128_unpack_canonical(&p, a, s);
2956 parts_float_to_float(&p, s);
2957 return floatx80_round_pack_canonical(&p, s);
2958}
2959
dbe4d53a 2960/*
afc34931 2961 * Round to integral value
dbe4d53a
AB
2962 */
2963
dbe4d53a
AB
2964float16 float16_round_to_int(float16 a, float_status *s)
2965{
afc34931 2966 FloatParts64 p;
98e256fc 2967
afc34931
RH
2968 float16_unpack_canonical(&p, a, s);
2969 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2970 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2971}
2972
2973float32 float32_round_to_int(float32 a, float_status *s)
2974{
afc34931 2975 FloatParts64 p;
98e256fc 2976
afc34931
RH
2977 float32_unpack_canonical(&p, a, s);
2978 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2979 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2980}
2981
2982float64 float64_round_to_int(float64 a, float_status *s)
2983{
afc34931 2984 FloatParts64 p;
98e256fc 2985
afc34931
RH
2986 float64_unpack_canonical(&p, a, s);
2987 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2988 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2989}
2990
34f0c0a9
LZ
2991bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2992{
afc34931 2993 FloatParts64 p;
98e256fc 2994
afc34931
RH
2995 bfloat16_unpack_canonical(&p, a, s);
2996 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2997 return bfloat16_round_pack_canonical(&p, s);
2998}
2999
3000float128 float128_round_to_int(float128 a, float_status *s)
3001{
3002 FloatParts128 p;
3003
3004 float128_unpack_canonical(&p, a, s);
3005 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3006 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
3007}
3008
f9a95a78
RH
3009floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3010{
3011 FloatParts128 p;
3012
3013 if (!floatx80_unpack_canonical(&p, a, status)) {
3014 return floatx80_default_nan(status);
3015 }
3016
3017 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3018 &floatx80_params[status->floatx80_rounding_precision]);
3019 return floatx80_round_pack_canonical(&p, status);
3020}
3021
ab52f973 3022/*
463b3f0d
RH
3023 * Floating-point to signed integer conversions
3024 */
ab52f973 3025
0d93d8ec
FC
3026int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3027 float_status *s)
3028{
98e256fc
RH
3029 FloatParts64 p;
3030
3031 float16_unpack_canonical(&p, a, s);
463b3f0d 3032 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
3033}
3034
3dede407 3035int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3036 float_status *s)
3037{
98e256fc
RH
3038 FloatParts64 p;
3039
3040 float16_unpack_canonical(&p, a, s);
463b3f0d 3041 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3042}
3043
3dede407 3044int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3045 float_status *s)
3046{
98e256fc
RH
3047 FloatParts64 p;
3048
3049 float16_unpack_canonical(&p, a, s);
463b3f0d 3050 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3051}
3052
3dede407 3053int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3054 float_status *s)
3055{
98e256fc
RH
3056 FloatParts64 p;
3057
3058 float16_unpack_canonical(&p, a, s);
463b3f0d 3059 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3060}
3061
3dede407 3062int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3063 float_status *s)
3064{
98e256fc
RH
3065 FloatParts64 p;
3066
3067 float32_unpack_canonical(&p, a, s);
463b3f0d 3068 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3069}
3070
3dede407 3071int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3072 float_status *s)
3073{
98e256fc
RH
3074 FloatParts64 p;
3075
3076 float32_unpack_canonical(&p, a, s);
463b3f0d 3077 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3078}
3079
3dede407 3080int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3081 float_status *s)
3082{
98e256fc
RH
3083 FloatParts64 p;
3084
3085 float32_unpack_canonical(&p, a, s);
463b3f0d 3086 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3087}
3088
3dede407 3089int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3090 float_status *s)
3091{
98e256fc
RH
3092 FloatParts64 p;
3093
3094 float64_unpack_canonical(&p, a, s);
463b3f0d 3095 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3096}
3097
3dede407 3098int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3099 float_status *s)
3100{
98e256fc
RH
3101 FloatParts64 p;
3102
3103 float64_unpack_canonical(&p, a, s);
463b3f0d 3104 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3105}
3106
3dede407 3107int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3108 float_status *s)
3109{
98e256fc
RH
3110 FloatParts64 p;
3111
3112 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
3113 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3114}
3115
3116int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3117 float_status *s)
3118{
3119 FloatParts64 p;
3120
3121 bfloat16_unpack_canonical(&p, a, s);
3122 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3123}
3124
3125int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3126 float_status *s)
3127{
3128 FloatParts64 p;
3129
3130 bfloat16_unpack_canonical(&p, a, s);
3131 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3132}
3133
3134int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3135 float_status *s)
3136{
3137 FloatParts64 p;
3138
3139 bfloat16_unpack_canonical(&p, a, s);
3140 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3141}
3142
3143static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3144 int scale, float_status *s)
3145{
3146 FloatParts128 p;
3147
3148 float128_unpack_canonical(&p, a, s);
3149 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3150}
3151
3152static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3153 int scale, float_status *s)
3154{
3155 FloatParts128 p;
3156
3157 float128_unpack_canonical(&p, a, s);
3158 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3159}
3160
bea59230
MF
3161static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3162 int scale, float_status *s)
3163{
3164 int flags = 0;
3165 Int128 r;
3166 FloatParts128 p;
3167
3168 float128_unpack_canonical(&p, a, s);
3169
3170 switch (p.cls) {
3171 case float_class_snan:
3172 flags |= float_flag_invalid_snan;
3173 /* fall through */
3174 case float_class_qnan:
3175 flags |= float_flag_invalid;
3176 r = UINT128_MAX;
3177 break;
3178
3179 case float_class_inf:
3180 flags = float_flag_invalid | float_flag_invalid_cvti;
3181 r = p.sign ? INT128_MIN : INT128_MAX;
3182 break;
3183
3184 case float_class_zero:
3185 return int128_zero();
3186
3187 case float_class_normal:
3188 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3189 flags = float_flag_inexact;
3190 }
3191
3192 if (p.exp < 127) {
3193 int shift = 127 - p.exp;
3194 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3195 if (p.sign) {
3196 r = int128_neg(r);
3197 }
3198 } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3199 p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3200 r = INT128_MIN;
3201 } else {
3202 flags = float_flag_invalid | float_flag_invalid_cvti;
3203 r = p.sign ? INT128_MIN : INT128_MAX;
3204 }
3205 break;
3206
3207 default:
3208 g_assert_not_reached();
3209 }
3210
3211 float_raise(flags, s);
3212 return r;
3213}
3214
a1fc527b
RH
3215static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3216 int scale, float_status *s)
3217{
3218 FloatParts128 p;
3219
3220 if (!floatx80_unpack_canonical(&p, a, s)) {
3221 parts_default_nan(&p, s);
3222 }
3223 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3224}
3225
3226static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3227 int scale, float_status *s)
3228{
3229 FloatParts128 p;
3230
3231 if (!floatx80_unpack_canonical(&p, a, s)) {
3232 parts_default_nan(&p, s);
3233 }
3234 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3235}
3236
0d93d8ec
FC
3237int8_t float16_to_int8(float16 a, float_status *s)
3238{
3239 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3240}
3241
2f6c74be
RH
3242int16_t float16_to_int16(float16 a, float_status *s)
3243{
3244 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3245}
3246
3247int32_t float16_to_int32(float16 a, float_status *s)
3248{
3249 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3250}
3251
3252int64_t float16_to_int64(float16 a, float_status *s)
3253{
3254 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3255}
3256
3257int16_t float32_to_int16(float32 a, float_status *s)
3258{
3259 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3260}
3261
3262int32_t float32_to_int32(float32 a, float_status *s)
3263{
3264 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3265}
3266
3267int64_t float32_to_int64(float32 a, float_status *s)
3268{
3269 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3270}
3271
3272int16_t float64_to_int16(float64 a, float_status *s)
3273{
3274 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3275}
3276
3277int32_t float64_to_int32(float64 a, float_status *s)
3278{
3279 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3280}
3281
3282int64_t float64_to_int64(float64 a, float_status *s)
3283{
3284 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3285}
3286
463b3f0d
RH
3287int32_t float128_to_int32(float128 a, float_status *s)
3288{
3289 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3290}
3291
3292int64_t float128_to_int64(float128 a, float_status *s)
3293{
3294 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3295}
3296
bea59230
MF
3297Int128 float128_to_int128(float128 a, float_status *s)
3298{
3299 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3300}
3301
a1fc527b
RH
3302int32_t floatx80_to_int32(floatx80 a, float_status *s)
3303{
3304 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3305}
3306
3307int64_t floatx80_to_int64(floatx80 a, float_status *s)
3308{
3309 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3310}
3311
2f6c74be
RH
3312int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3313{
3314 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3315}
3316
3317int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3318{
3319 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3320}
3321
3322int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3323{
3324 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
3325}
3326
2f6c74be
RH
3327int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3328{
3329 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3330}
ab52f973 3331
2f6c74be
RH
3332int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3333{
3334 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3335}
3336
3337int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3338{
3339 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3340}
3341
3342int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3343{
3344 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3345}
ab52f973 3346
2f6c74be
RH
3347int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3348{
3349 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3350}
ab52f973 3351
2f6c74be
RH
3352int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3353{
3354 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3355}
ab52f973 3356
463b3f0d 3357int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3358{
463b3f0d 3359 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3360}
3361
463b3f0d 3362int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3363{
463b3f0d 3364 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3365}
3366
bea59230
MF
3367Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3368{
3369 return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3370}
3371
a1fc527b
RH
3372int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3373{
3374 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3375}
3376
3377int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3378{
3379 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3380}
3381
34f0c0a9
LZ
3382int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3383{
3384 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3385}
3386
3387int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3388{
3389 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3390}
3391
3392int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3393{
3394 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3395}
3396
3397int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3398{
3399 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3400}
3401
3402int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3403{
3404 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3405}
3406
3407int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3408{
3409 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3410}
3411
ab52f973 3412/*
4ab4aef0 3413 * Floating-point to unsigned integer conversions
ab52f973
AB
3414 */
3415
0d93d8ec
FC
3416uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3417 float_status *s)
3418{
98e256fc
RH
3419 FloatParts64 p;
3420
3421 float16_unpack_canonical(&p, a, s);
4ab4aef0 3422 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
3423}
3424
3dede407 3425uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3426 float_status *s)
3427{
98e256fc
RH
3428 FloatParts64 p;
3429
3430 float16_unpack_canonical(&p, a, s);
4ab4aef0 3431 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3432}
3433
3dede407 3434uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3435 float_status *s)
3436{
98e256fc
RH
3437 FloatParts64 p;
3438
3439 float16_unpack_canonical(&p, a, s);
4ab4aef0 3440 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3441}
3442
3dede407 3443uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3444 float_status *s)
3445{
98e256fc
RH
3446 FloatParts64 p;
3447
3448 float16_unpack_canonical(&p, a, s);
4ab4aef0 3449 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3450}
3451
3dede407 3452uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3453 float_status *s)
3454{
98e256fc
RH
3455 FloatParts64 p;
3456
3457 float32_unpack_canonical(&p, a, s);
4ab4aef0 3458 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3459}
3460
3dede407 3461uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3462 float_status *s)
3463{
98e256fc
RH
3464 FloatParts64 p;
3465
3466 float32_unpack_canonical(&p, a, s);
4ab4aef0 3467 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3468}
3469
3dede407 3470uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3471 float_status *s)
3472{
98e256fc
RH
3473 FloatParts64 p;
3474
3475 float32_unpack_canonical(&p, a, s);
4ab4aef0 3476 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3477}
3478
3dede407 3479uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3480 float_status *s)
3481{
98e256fc
RH
3482 FloatParts64 p;
3483
3484 float64_unpack_canonical(&p, a, s);
4ab4aef0 3485 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3486}
3487
3dede407 3488uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3489 float_status *s)
3490{
98e256fc
RH
3491 FloatParts64 p;
3492
3493 float64_unpack_canonical(&p, a, s);
4ab4aef0 3494 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3495}
3496
3dede407 3497uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3498 float_status *s)
3499{
98e256fc
RH
3500 FloatParts64 p;
3501
3502 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
3503 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3504}
3505
3506uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3507 int scale, float_status *s)
3508{
3509 FloatParts64 p;
3510
3511 bfloat16_unpack_canonical(&p, a, s);
3512 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3513}
3514
3515uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3516 int scale, float_status *s)
3517{
3518 FloatParts64 p;
3519
3520 bfloat16_unpack_canonical(&p, a, s);
3521 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3522}
3523
3524uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3525 int scale, float_status *s)
3526{
3527 FloatParts64 p;
3528
3529 bfloat16_unpack_canonical(&p, a, s);
3530 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3531}
3532
3533static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3534 int scale, float_status *s)
3535{
3536 FloatParts128 p;
3537
3538 float128_unpack_canonical(&p, a, s);
3539 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3540}
3541
3542static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3543 int scale, float_status *s)
3544{
3545 FloatParts128 p;
3546
3547 float128_unpack_canonical(&p, a, s);
3548 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3549}
3550
4de49ddf
MF
3551static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3552 int scale, float_status *s)
3553{
3554 int flags = 0;
3555 Int128 r;
3556 FloatParts128 p;
3557
3558 float128_unpack_canonical(&p, a, s);
3559
3560 switch (p.cls) {
3561 case float_class_snan:
3562 flags |= float_flag_invalid_snan;
3563 /* fall through */
3564 case float_class_qnan:
3565 flags |= float_flag_invalid;
3566 r = UINT128_MAX;
3567 break;
3568
3569 case float_class_inf:
3570 flags = float_flag_invalid | float_flag_invalid_cvti;
3571 r = p.sign ? int128_zero() : UINT128_MAX;
3572 break;
3573
3574 case float_class_zero:
3575 return int128_zero();
3576
3577 case float_class_normal:
3578 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3579 flags = float_flag_inexact;
3580 if (p.cls == float_class_zero) {
3581 r = int128_zero();
3582 break;
3583 }
3584 }
3585
3586 if (p.sign) {
3587 flags = float_flag_invalid | float_flag_invalid_cvti;
3588 r = int128_zero();
3589 } else if (p.exp <= 127) {
3590 int shift = 127 - p.exp;
3591 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3592 } else {
3593 flags = float_flag_invalid | float_flag_invalid_cvti;
3594 r = UINT128_MAX;
3595 }
3596 break;
3597
3598 default:
3599 g_assert_not_reached();
3600 }
3601
3602 float_raise(flags, s);
3603 return r;
3604}
3605
0d93d8ec
FC
3606uint8_t float16_to_uint8(float16 a, float_status *s)
3607{
3608 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3609}
3610
2f6c74be
RH
3611uint16_t float16_to_uint16(float16 a, float_status *s)
3612{
3613 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3614}
3615
3616uint32_t float16_to_uint32(float16 a, float_status *s)
3617{
3618 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3619}
3620
3621uint64_t float16_to_uint64(float16 a, float_status *s)
3622{
3623 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3624}
3625
3626uint16_t float32_to_uint16(float32 a, float_status *s)
3627{
3628 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3629}
3630
3631uint32_t float32_to_uint32(float32 a, float_status *s)
3632{
3633 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3634}
3635
3636uint64_t float32_to_uint64(float32 a, float_status *s)
3637{
3638 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3639}
3640
3641uint16_t float64_to_uint16(float64 a, float_status *s)
3642{
3643 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3644}
3645
3646uint32_t float64_to_uint32(float64 a, float_status *s)
3647{
3648 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3649}
3650
3651uint64_t float64_to_uint64(float64 a, float_status *s)
3652{
3653 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3654}
3655
4ab4aef0
RH
3656uint32_t float128_to_uint32(float128 a, float_status *s)
3657{
3658 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3659}
3660
3661uint64_t float128_to_uint64(float128 a, float_status *s)
3662{
3663 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3664}
3665
4de49ddf
MF
3666Int128 float128_to_uint128(float128 a, float_status *s)
3667{
3668 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3669}
3670
2f6c74be
RH
3671uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3672{
3673 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3674}
3675
3676uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3677{
3678 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3679}
3680
3681uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3682{
3683 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3684}
3685
3686uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3687{
3688 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3689}
3690
3691uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3692{
3693 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3694}
3695
3696uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3697{
3698 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3699}
3700
3701uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3702{
3703 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3704}
3705
3706uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3707{
3708 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3709}
3710
3711uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3712{
3713 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3714}
ab52f973 3715
4ab4aef0 3716uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3717{
4ab4aef0 3718 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3719}
3720
4ab4aef0 3721uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3722{
4ab4aef0 3723 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3724}
3725
4de49ddf
MF
3726Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3727{
3728 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3729}
3730
34f0c0a9
LZ
3731uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3732{
3733 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3734}
3735
3736uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3737{
3738 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3739}
3740
3741uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3742{
3743 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3744}
3745
3746uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3747{
3748 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3749}
3750
3751uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3752{
3753 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3754}
3755
3756uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3757{
3758 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3759}
3760
c02e1fb8 3761/*
e3689519 3762 * Signed integer to floating-point conversions
c02e1fb8
AB
3763 */
3764
2abdfe24 3765float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3766{
e3689519
RH
3767 FloatParts64 p;
3768
3769 parts_sint_to_float(&p, a, scale, status);
3770 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3771}
3772
2abdfe24
RH
3773float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3774{
3775 return int64_to_float16_scalbn(a, scale, status);
3776}
3777
3778float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3779{
3780 return int64_to_float16_scalbn(a, scale, status);
3781}
3782
3783float16 int64_to_float16(int64_t a, float_status *status)
3784{
3785 return int64_to_float16_scalbn(a, 0, status);
3786}
3787
c02e1fb8
AB
3788float16 int32_to_float16(int32_t a, float_status *status)
3789{
2abdfe24 3790 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3791}
3792
3793float16 int16_to_float16(int16_t a, float_status *status)
3794{
2abdfe24 3795 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3796}
3797
0d93d8ec
FC
3798float16 int8_to_float16(int8_t a, float_status *status)
3799{
3800 return int64_to_float16_scalbn(a, 0, status);
3801}
3802
2abdfe24 3803float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3804{
e3689519
RH
3805 FloatParts64 p;
3806
5d0204b8
RH
3807 /* Without scaling, there are no overflow concerns. */
3808 if (likely(scale == 0) && can_use_fpu(status)) {
3809 union_float32 ur;
3810 ur.h = a;
3811 return ur.s;
3812 }
3813
e3689519
RH
3814 parts64_sint_to_float(&p, a, scale, status);
3815 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3816}
3817
2abdfe24
RH
3818float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3819{
3820 return int64_to_float32_scalbn(a, scale, status);
3821}
3822
3823float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3824{
3825 return int64_to_float32_scalbn(a, scale, status);
3826}
3827
3828float32 int64_to_float32(int64_t a, float_status *status)
3829{
3830 return int64_to_float32_scalbn(a, 0, status);
3831}
3832
c02e1fb8
AB
3833float32 int32_to_float32(int32_t a, float_status *status)
3834{
2abdfe24 3835 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3836}
3837
3838float32 int16_to_float32(int16_t a, float_status *status)
3839{
2abdfe24 3840 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3841}
3842
2abdfe24 3843float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3844{
e3689519
RH
3845 FloatParts64 p;
3846
5d0204b8
RH
3847 /* Without scaling, there are no overflow concerns. */
3848 if (likely(scale == 0) && can_use_fpu(status)) {
3849 union_float64 ur;
3850 ur.h = a;
3851 return ur.s;
3852 }
3853
e3689519
RH
3854 parts_sint_to_float(&p, a, scale, status);
3855 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3856}
3857
2abdfe24
RH
3858float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3859{
3860 return int64_to_float64_scalbn(a, scale, status);
3861}
3862
3863float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3864{
3865 return int64_to_float64_scalbn(a, scale, status);
3866}
3867
3868float64 int64_to_float64(int64_t a, float_status *status)
3869{
3870 return int64_to_float64_scalbn(a, 0, status);
3871}
3872
c02e1fb8
AB
3873float64 int32_to_float64(int32_t a, float_status *status)
3874{
2abdfe24 3875 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3876}
3877
3878float64 int16_to_float64(int16_t a, float_status *status)
3879{
2abdfe24 3880 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3881}
3882
34f0c0a9
LZ
3883bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3884{
e3689519
RH
3885 FloatParts64 p;
3886
3887 parts_sint_to_float(&p, a, scale, status);
3888 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3889}
3890
3891bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3892{
3893 return int64_to_bfloat16_scalbn(a, scale, status);
3894}
3895
3896bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3897{
3898 return int64_to_bfloat16_scalbn(a, scale, status);
3899}
3900
3901bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3902{
3903 return int64_to_bfloat16_scalbn(a, 0, status);
3904}
3905
3906bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3907{
3908 return int64_to_bfloat16_scalbn(a, 0, status);
3909}
3910
3911bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3912{
3913 return int64_to_bfloat16_scalbn(a, 0, status);
3914}
c02e1fb8 3915
95c1b71e
MF
3916float128 int128_to_float128(Int128 a, float_status *status)
3917{
3918 FloatParts128 p = { };
3919 int shift;
3920
3921 if (int128_nz(a)) {
3922 p.cls = float_class_normal;
3923 if (!int128_nonneg(a)) {
3924 p.sign = true;
3925 a = int128_neg(a);
3926 }
3927
3928 shift = clz64(int128_gethi(a));
3929 if (shift == 64) {
3930 shift += clz64(int128_getlo(a));
3931 }
3932
3933 p.exp = 127 - shift;
3934 a = int128_lshift(a, shift);
3935
3936 p.frac_hi = int128_gethi(a);
3937 p.frac_lo = int128_getlo(a);
3938 } else {
3939 p.cls = float_class_zero;
3940 }
3941
3942 return float128_round_pack_canonical(&p, status);
3943}
3944
e3689519
RH
3945float128 int64_to_float128(int64_t a, float_status *status)
3946{
3947 FloatParts128 p;
3948
3949 parts_sint_to_float(&p, a, 0, status);
3950 return float128_round_pack_canonical(&p, status);
3951}
3952
3953float128 int32_to_float128(int32_t a, float_status *status)
3954{
3955 return int64_to_float128(a, status);
3956}
3957
5f952900
RH
3958floatx80 int64_to_floatx80(int64_t a, float_status *status)
3959{
3960 FloatParts128 p;
3961
3962 parts_sint_to_float(&p, a, 0, status);
3963 return floatx80_round_pack_canonical(&p, status);
3964}
3965
3966floatx80 int32_to_floatx80(int32_t a, float_status *status)
3967{
3968 return int64_to_floatx80(a, status);
3969}
3970
c02e1fb8 3971/*
37c954a1 3972 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3973 */
3974
2abdfe24 3975float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3976{
37c954a1
RH
3977 FloatParts64 p;
3978
3979 parts_uint_to_float(&p, a, scale, status);
3980 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3981}
3982
2abdfe24
RH
3983float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3984{
3985 return uint64_to_float16_scalbn(a, scale, status);
3986}
3987
3988float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3989{
3990 return uint64_to_float16_scalbn(a, scale, status);
3991}
3992
3993float16 uint64_to_float16(uint64_t a, float_status *status)
3994{
3995 return uint64_to_float16_scalbn(a, 0, status);
3996}
3997
c02e1fb8
AB
3998float16 uint32_to_float16(uint32_t a, float_status *status)
3999{
2abdfe24 4000 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
4001}
4002
4003float16 uint16_to_float16(uint16_t a, float_status *status)
4004{
2abdfe24 4005 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
4006}
4007
0d93d8ec
FC
4008float16 uint8_to_float16(uint8_t a, float_status *status)
4009{
4010 return uint64_to_float16_scalbn(a, 0, status);
4011}
4012
2abdfe24 4013float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 4014{
37c954a1
RH
4015 FloatParts64 p;
4016
5d0204b8
RH
4017 /* Without scaling, there are no overflow concerns. */
4018 if (likely(scale == 0) && can_use_fpu(status)) {
4019 union_float32 ur;
4020 ur.h = a;
4021 return ur.s;
4022 }
4023
37c954a1
RH
4024 parts_uint_to_float(&p, a, scale, status);
4025 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
4026}
4027
2abdfe24
RH
4028float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4029{
4030 return uint64_to_float32_scalbn(a, scale, status);
4031}
4032
4033float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4034{
4035 return uint64_to_float32_scalbn(a, scale, status);
4036}
4037
4038float32 uint64_to_float32(uint64_t a, float_status *status)
4039{
4040 return uint64_to_float32_scalbn(a, 0, status);
4041}
4042
c02e1fb8
AB
4043float32 uint32_to_float32(uint32_t a, float_status *status)
4044{
2abdfe24 4045 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
4046}
4047
4048float32 uint16_to_float32(uint16_t a, float_status *status)
4049{
2abdfe24 4050 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
4051}
4052
2abdfe24 4053float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 4054{
37c954a1
RH
4055 FloatParts64 p;
4056
5d0204b8
RH
4057 /* Without scaling, there are no overflow concerns. */
4058 if (likely(scale == 0) && can_use_fpu(status)) {
4059 union_float64 ur;
4060 ur.h = a;
4061 return ur.s;
4062 }
4063
37c954a1
RH
4064 parts_uint_to_float(&p, a, scale, status);
4065 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
4066}
4067
2abdfe24
RH
4068float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4069{
4070 return uint64_to_float64_scalbn(a, scale, status);
4071}
4072
4073float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4074{
4075 return uint64_to_float64_scalbn(a, scale, status);
4076}
4077
4078float64 uint64_to_float64(uint64_t a, float_status *status)
4079{
4080 return uint64_to_float64_scalbn(a, 0, status);
4081}
4082
c02e1fb8
AB
4083float64 uint32_to_float64(uint32_t a, float_status *status)
4084{
2abdfe24 4085 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4086}
4087
4088float64 uint16_to_float64(uint16_t a, float_status *status)
4089{
2abdfe24 4090 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4091}
4092
34f0c0a9
LZ
4093bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4094{
37c954a1
RH
4095 FloatParts64 p;
4096
4097 parts_uint_to_float(&p, a, scale, status);
4098 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
4099}
4100
4101bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4102{
4103 return uint64_to_bfloat16_scalbn(a, scale, status);
4104}
4105
4106bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4107{
4108 return uint64_to_bfloat16_scalbn(a, scale, status);
4109}
4110
4111bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4112{
4113 return uint64_to_bfloat16_scalbn(a, 0, status);
4114}
4115
4116bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4117{
4118 return uint64_to_bfloat16_scalbn(a, 0, status);
4119}
4120
4121bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4122{
4123 return uint64_to_bfloat16_scalbn(a, 0, status);
4124}
4125
37c954a1
RH
4126float128 uint64_to_float128(uint64_t a, float_status *status)
4127{
4128 FloatParts128 p;
4129
4130 parts_uint_to_float(&p, a, 0, status);
4131 return float128_round_pack_canonical(&p, status);
4132}
4133
f279852b
MF
4134float128 uint128_to_float128(Int128 a, float_status *status)
4135{
4136 FloatParts128 p = { };
4137 int shift;
4138
4139 if (int128_nz(a)) {
4140 p.cls = float_class_normal;
4141
4142 shift = clz64(int128_gethi(a));
4143 if (shift == 64) {
4144 shift += clz64(int128_getlo(a));
4145 }
4146
4147 p.exp = 127 - shift;
4148 a = int128_lshift(a, shift);
4149
4150 p.frac_hi = int128_gethi(a);
4151 p.frac_lo = int128_getlo(a);
4152 } else {
4153 p.cls = float_class_zero;
4154 }
4155
4156 return float128_round_pack_canonical(&p, status);
4157}
4158
e1c4667a
RH
4159/*
4160 * Minimum and maximum
89360067 4161 */
89360067 4162
e1c4667a
RH
4163static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4164{
4165 FloatParts64 pa, pb, *pr;
89360067 4166
e1c4667a
RH
4167 float16_unpack_canonical(&pa, a, s);
4168 float16_unpack_canonical(&pb, b, s);
4169 pr = parts_minmax(&pa, &pb, s, flags);
4170
4171 return float16_round_pack_canonical(pr, s);
89360067
AB
4172}
4173
e1c4667a
RH
4174static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4175 float_status *s, int flags)
4176{
4177 FloatParts64 pa, pb, *pr;
4178
4179 bfloat16_unpack_canonical(&pa, a, s);
4180 bfloat16_unpack_canonical(&pb, b, s);
4181 pr = parts_minmax(&pa, &pb, s, flags);
4182
4183 return bfloat16_round_pack_canonical(pr, s);
4184}
4185
4186static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4187{
4188 FloatParts64 pa, pb, *pr;
4189
4190 float32_unpack_canonical(&pa, a, s);
4191 float32_unpack_canonical(&pb, b, s);
4192 pr = parts_minmax(&pa, &pb, s, flags);
4193
4194 return float32_round_pack_canonical(pr, s);
4195}
4196
4197static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4198{
4199 FloatParts64 pa, pb, *pr;
4200
4201 float64_unpack_canonical(&pa, a, s);
4202 float64_unpack_canonical(&pb, b, s);
4203 pr = parts_minmax(&pa, &pb, s, flags);
4204
4205 return float64_round_pack_canonical(pr, s);
4206}
4207
ceebc129
DH
4208static float128 float128_minmax(float128 a, float128 b,
4209 float_status *s, int flags)
4210{
4211 FloatParts128 pa, pb, *pr;
4212
4213 float128_unpack_canonical(&pa, a, s);
4214 float128_unpack_canonical(&pb, b, s);
4215 pr = parts_minmax(&pa, &pb, s, flags);
4216
4217 return float128_round_pack_canonical(pr, s);
4218}
4219
e1c4667a
RH
4220#define MINMAX_1(type, name, flags) \
4221 type type##_##name(type a, type b, float_status *s) \
4222 { return type##_minmax(a, b, s, flags); }
4223
4224#define MINMAX_2(type) \
0e903037
CMC
4225 MINMAX_1(type, max, 0) \
4226 MINMAX_1(type, maxnum, minmax_isnum) \
4227 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4228 MINMAX_1(type, maximum_number, minmax_isnumber) \
4229 MINMAX_1(type, min, minmax_ismin) \
4230 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4231 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4232 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
e1c4667a
RH
4233
4234MINMAX_2(float16)
4235MINMAX_2(bfloat16)
4236MINMAX_2(float32)
4237MINMAX_2(float64)
ceebc129 4238MINMAX_2(float128)
e1c4667a
RH
4239
4240#undef MINMAX_1
4241#undef MINMAX_2
8282310d 4242
6eb169b8
RH
4243/*
4244 * Floating point compare
4245 */
0c4c9092 4246
6eb169b8
RH
4247static FloatRelation QEMU_FLATTEN
4248float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4249{
4250 FloatParts64 pa, pb;
0c4c9092 4251
6eb169b8
RH
4252 float16_unpack_canonical(&pa, a, s);
4253 float16_unpack_canonical(&pb, b, s);
4254 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
4255}
4256
71bfd65c 4257FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 4258{
6eb169b8 4259 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
4260}
4261
71bfd65c 4262FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 4263{
6eb169b8
RH
4264 return float16_do_compare(a, b, s, true);
4265}
4266
4267static FloatRelation QEMU_SOFTFLOAT_ATTR
4268float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4269{
4270 FloatParts64 pa, pb;
4271
4272 float32_unpack_canonical(&pa, a, s);
4273 float32_unpack_canonical(&pb, b, s);
4274 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4275}
4276
71bfd65c 4277static FloatRelation QEMU_FLATTEN
6eb169b8 4278float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4279{
4280 union_float32 ua, ub;
4281
4282 ua.s = xa;
4283 ub.s = xb;
4284
4285 if (QEMU_NO_HARDFLOAT) {
4286 goto soft;
4287 }
4288
4289 float32_input_flush2(&ua.s, &ub.s, s);
4290 if (isgreaterequal(ua.h, ub.h)) {
4291 if (isgreater(ua.h, ub.h)) {
4292 return float_relation_greater;
4293 }
4294 return float_relation_equal;
4295 }
4296 if (likely(isless(ua.h, ub.h))) {
4297 return float_relation_less;
4298 }
6eb169b8
RH
4299 /*
4300 * The only condition remaining is unordered.
d9fe9db9
EC
4301 * Fall through to set flags.
4302 */
4303 soft:
6eb169b8 4304 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4305}
4306
71bfd65c 4307FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 4308{
6eb169b8 4309 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
4310}
4311
71bfd65c 4312FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 4313{
6eb169b8
RH
4314 return float32_hs_compare(a, b, s, true);
4315}
4316
4317static FloatRelation QEMU_SOFTFLOAT_ATTR
4318float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4319{
4320 FloatParts64 pa, pb;
4321
4322 float64_unpack_canonical(&pa, a, s);
4323 float64_unpack_canonical(&pb, b, s);
4324 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4325}
4326
71bfd65c 4327static FloatRelation QEMU_FLATTEN
6eb169b8 4328float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4329{
4330 union_float64 ua, ub;
4331
4332 ua.s = xa;
4333 ub.s = xb;
4334
4335 if (QEMU_NO_HARDFLOAT) {
4336 goto soft;
4337 }
4338
4339 float64_input_flush2(&ua.s, &ub.s, s);
4340 if (isgreaterequal(ua.h, ub.h)) {
4341 if (isgreater(ua.h, ub.h)) {
4342 return float_relation_greater;
4343 }
4344 return float_relation_equal;
4345 }
4346 if (likely(isless(ua.h, ub.h))) {
4347 return float_relation_less;
4348 }
6eb169b8
RH
4349 /*
4350 * The only condition remaining is unordered.
d9fe9db9
EC
4351 * Fall through to set flags.
4352 */
4353 soft:
6eb169b8 4354 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4355}
4356
71bfd65c 4357FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 4358{
6eb169b8 4359 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
4360}
4361
71bfd65c 4362FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 4363{
6eb169b8 4364 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
4365}
4366
8282310d 4367static FloatRelation QEMU_FLATTEN
6eb169b8 4368bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 4369{
98e256fc
RH
4370 FloatParts64 pa, pb;
4371
4372 bfloat16_unpack_canonical(&pa, a, s);
4373 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 4374 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
4375}
4376
4377FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4378{
6eb169b8 4379 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
4380}
4381
4382FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4383{
6eb169b8
RH
4384 return bfloat16_do_compare(a, b, s, true);
4385}
4386
4387static FloatRelation QEMU_FLATTEN
4388float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4389{
4390 FloatParts128 pa, pb;
4391
4392 float128_unpack_canonical(&pa, a, s);
4393 float128_unpack_canonical(&pb, b, s);
4394 return parts_compare(&pa, &pb, s, is_quiet);
4395}
4396
4397FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4398{
4399 return float128_do_compare(a, b, s, false);
4400}
4401
4402FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4403{
4404 return float128_do_compare(a, b, s, true);
8282310d
LZ
4405}
4406
1b96b006
RH
4407static FloatRelation QEMU_FLATTEN
4408floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4409{
4410 FloatParts128 pa, pb;
4411
4412 if (!floatx80_unpack_canonical(&pa, a, s) ||
4413 !floatx80_unpack_canonical(&pb, b, s)) {
4414 return float_relation_unordered;
4415 }
4416 return parts_compare(&pa, &pb, s, is_quiet);
4417}
4418
4419FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4420{
4421 return floatx80_do_compare(a, b, s, false);
4422}
4423
4424FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4425{
4426 return floatx80_do_compare(a, b, s, true);
4427}
4428
39626b0c
RH
4429/*
4430 * Scale by 2**N
4431 */
0bfc9f19
AB
4432
4433float16 float16_scalbn(float16 a, int n, float_status *status)
4434{
39626b0c 4435 FloatParts64 p;
98e256fc 4436
39626b0c
RH
4437 float16_unpack_canonical(&p, a, status);
4438 parts_scalbn(&p, n, status);
4439 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
4440}
4441
4442float32 float32_scalbn(float32 a, int n, float_status *status)
4443{
39626b0c 4444 FloatParts64 p;
98e256fc 4445
39626b0c
RH
4446 float32_unpack_canonical(&p, a, status);
4447 parts_scalbn(&p, n, status);
4448 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
4449}
4450
4451float64 float64_scalbn(float64 a, int n, float_status *status)
4452{
39626b0c 4453 FloatParts64 p;
98e256fc 4454
39626b0c
RH
4455 float64_unpack_canonical(&p, a, status);
4456 parts_scalbn(&p, n, status);
4457 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
4458}
4459
8282310d
LZ
4460bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4461{
39626b0c 4462 FloatParts64 p;
98e256fc 4463
39626b0c
RH
4464 bfloat16_unpack_canonical(&p, a, status);
4465 parts_scalbn(&p, n, status);
4466 return bfloat16_round_pack_canonical(&p, status);
4467}
4468
4469float128 float128_scalbn(float128 a, int n, float_status *status)
4470{
4471 FloatParts128 p;
4472
4473 float128_unpack_canonical(&p, a, status);
4474 parts_scalbn(&p, n, status);
4475 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4476}
4477
872e6991
RH
4478floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4479{
4480 FloatParts128 p;
4481
4482 if (!floatx80_unpack_canonical(&p, a, status)) {
4483 return floatx80_default_nan(status);
4484 }
4485 parts_scalbn(&p, n, status);
4486 return floatx80_round_pack_canonical(&p, status);
4487}
4488
c13bb2da
AB
4489/*
4490 * Square Root
c13bb2da
AB
4491 */
4492
97ff87c0 4493float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 4494{
9261b245 4495 FloatParts64 p;
98e256fc 4496
9261b245
RH
4497 float16_unpack_canonical(&p, a, status);
4498 parts_sqrt(&p, status, &float16_params);
4499 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
4500}
4501
f131bae8
EC
4502static float32 QEMU_SOFTFLOAT_ATTR
4503soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 4504{
9261b245 4505 FloatParts64 p;
98e256fc 4506
9261b245
RH
4507 float32_unpack_canonical(&p, a, status);
4508 parts_sqrt(&p, status, &float32_params);
4509 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
4510}
4511
f131bae8
EC
4512static float64 QEMU_SOFTFLOAT_ATTR
4513soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 4514{
9261b245 4515 FloatParts64 p;
98e256fc 4516
9261b245
RH
4517 float64_unpack_canonical(&p, a, status);
4518 parts_sqrt(&p, status, &float64_params);
4519 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
4520}
4521
f131bae8
EC
4522float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4523{
4524 union_float32 ua, ur;
4525
4526 ua.s = xa;
4527 if (unlikely(!can_use_fpu(s))) {
4528 goto soft;
4529 }
4530
4531 float32_input_flush1(&ua.s, s);
4532 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4533 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4534 fpclassify(ua.h) == FP_ZERO) ||
4535 signbit(ua.h))) {
4536 goto soft;
4537 }
4538 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4539 float32_is_neg(ua.s))) {
4540 goto soft;
4541 }
4542 ur.h = sqrtf(ua.h);
4543 return ur.s;
4544
4545 soft:
4546 return soft_f32_sqrt(ua.s, s);
4547}
4548
4549float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4550{
4551 union_float64 ua, ur;
4552
4553 ua.s = xa;
4554 if (unlikely(!can_use_fpu(s))) {
4555 goto soft;
4556 }
4557
4558 float64_input_flush1(&ua.s, s);
4559 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4560 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4561 fpclassify(ua.h) == FP_ZERO) ||
4562 signbit(ua.h))) {
4563 goto soft;
4564 }
4565 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4566 float64_is_neg(ua.s))) {
4567 goto soft;
4568 }
4569 ur.h = sqrt(ua.h);
4570 return ur.s;
4571
4572 soft:
4573 return soft_f64_sqrt(ua.s, s);
4574}
4575
42636fb9
RH
4576float64 float64r32_sqrt(float64 a, float_status *status)
4577{
4578 FloatParts64 p;
4579
4580 float64_unpack_canonical(&p, a, status);
4581 parts_sqrt(&p, status, &float64_params);
4582 return float64r32_round_pack_canonical(&p, status);
4583}
4584
8282310d
LZ
4585bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4586{
9261b245 4587 FloatParts64 p;
98e256fc 4588
9261b245
RH
4589 bfloat16_unpack_canonical(&p, a, status);
4590 parts_sqrt(&p, status, &bfloat16_params);
4591 return bfloat16_round_pack_canonical(&p, status);
4592}
4593
4594float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4595{
4596 FloatParts128 p;
4597
4598 float128_unpack_canonical(&p, a, status);
4599 parts_sqrt(&p, status, &float128_params);
4600 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4601}
4602
aa5e19cc
RH
4603floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4604{
4605 FloatParts128 p;
4606
4607 if (!floatx80_unpack_canonical(&p, a, s)) {
4608 return floatx80_default_nan(s);
4609 }
4610 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4611 return floatx80_round_pack_canonical(&p, s);
4612}
4613
2fa3546c
RH
4614/*
4615 * log2
4616 */
4617float32 float32_log2(float32 a, float_status *status)
4618{
4619 FloatParts64 p;
4620
4621 float32_unpack_canonical(&p, a, status);
4622 parts_log2(&p, status, &float32_params);
4623 return float32_round_pack_canonical(&p, status);
4624}
4625
4626float64 float64_log2(float64 a, float_status *status)
4627{
4628 FloatParts64 p;
4629
4630 float64_unpack_canonical(&p, a, status);
4631 parts_log2(&p, status, &float64_params);
4632 return float64_round_pack_canonical(&p, status);
4633}
4634
0218a16e
RH
4635/*----------------------------------------------------------------------------
4636| The pattern for a default generated NaN.
4637*----------------------------------------------------------------------------*/
4638
4639float16 float16_default_nan(float_status *status)
4640{
0fc07cad
RH
4641 FloatParts64 p;
4642
4643 parts_default_nan(&p, status);
0218a16e 4644 p.frac >>= float16_params.frac_shift;
71fd178e 4645 return float16_pack_raw(&p);
0218a16e
RH
4646}
4647
4648float32 float32_default_nan(float_status *status)
4649{
0fc07cad
RH
4650 FloatParts64 p;
4651
4652 parts_default_nan(&p, status);
0218a16e 4653 p.frac >>= float32_params.frac_shift;
71fd178e 4654 return float32_pack_raw(&p);
0218a16e
RH
4655}
4656
4657float64 float64_default_nan(float_status *status)
4658{
0fc07cad
RH
4659 FloatParts64 p;
4660
4661 parts_default_nan(&p, status);
0218a16e 4662 p.frac >>= float64_params.frac_shift;
71fd178e 4663 return float64_pack_raw(&p);
0218a16e
RH
4664}
4665
4666float128 float128_default_nan(float_status *status)
4667{
e9034ea8 4668 FloatParts128 p;
0218a16e 4669
0fc07cad 4670 parts_default_nan(&p, status);
e9034ea8
RH
4671 frac_shr(&p, float128_params.frac_shift);
4672 return float128_pack_raw(&p);
0218a16e 4673}
c13bb2da 4674
8282310d
LZ
4675bfloat16 bfloat16_default_nan(float_status *status)
4676{
0fc07cad
RH
4677 FloatParts64 p;
4678
4679 parts_default_nan(&p, status);
8282310d 4680 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4681 return bfloat16_pack_raw(&p);
8282310d
LZ
4682}
4683
158142c2 4684/*----------------------------------------------------------------------------
377ed926
RH
4685| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4686*----------------------------------------------------------------------------*/
4687
4688float16 float16_silence_nan(float16 a, float_status *status)
4689{
3dddb203
RH
4690 FloatParts64 p;
4691
4692 float16_unpack_raw(&p, a);
377ed926 4693 p.frac <<= float16_params.frac_shift;
92ff426d 4694 parts_silence_nan(&p, status);
377ed926 4695 p.frac >>= float16_params.frac_shift;
71fd178e 4696 return float16_pack_raw(&p);
377ed926
RH
4697}
4698
4699float32 float32_silence_nan(float32 a, float_status *status)
4700{
3dddb203
RH
4701 FloatParts64 p;
4702
4703 float32_unpack_raw(&p, a);
377ed926 4704 p.frac <<= float32_params.frac_shift;
92ff426d 4705 parts_silence_nan(&p, status);
377ed926 4706 p.frac >>= float32_params.frac_shift;
71fd178e 4707 return float32_pack_raw(&p);
377ed926
RH
4708}
4709
4710float64 float64_silence_nan(float64 a, float_status *status)
4711{
3dddb203
RH
4712 FloatParts64 p;
4713
4714 float64_unpack_raw(&p, a);
377ed926 4715 p.frac <<= float64_params.frac_shift;
92ff426d 4716 parts_silence_nan(&p, status);
377ed926 4717 p.frac >>= float64_params.frac_shift;
71fd178e 4718 return float64_pack_raw(&p);
377ed926
RH
4719}
4720
8282310d
LZ
4721bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4722{
3dddb203
RH
4723 FloatParts64 p;
4724
4725 bfloat16_unpack_raw(&p, a);
8282310d 4726 p.frac <<= bfloat16_params.frac_shift;
92ff426d 4727 parts_silence_nan(&p, status);
8282310d 4728 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4729 return bfloat16_pack_raw(&p);
8282310d 4730}
e6b405fe 4731
0018b1f4
RH
4732float128 float128_silence_nan(float128 a, float_status *status)
4733{
4734 FloatParts128 p;
4735
4736 float128_unpack_raw(&p, a);
4737 frac_shl(&p, float128_params.frac_shift);
4738 parts_silence_nan(&p, status);
4739 frac_shr(&p, float128_params.frac_shift);
4740 return float128_pack_raw(&p);
4741}
4742
e6b405fe
AB
4743/*----------------------------------------------------------------------------
4744| If `a' is denormal and we are in flush-to-zero mode then set the
4745| input-denormal exception and return zero. Otherwise just return the value.
4746*----------------------------------------------------------------------------*/
4747
f8155c1d 4748static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
4749{
4750 if (p.exp == 0 && p.frac != 0) {
4751 float_raise(float_flag_input_denormal, status);
4752 return true;
4753 }
4754
4755 return false;
4756}
4757
4758float16 float16_squash_input_denormal(float16 a, float_status *status)
4759{
4760 if (status->flush_inputs_to_zero) {
3dddb203
RH
4761 FloatParts64 p;
4762
4763 float16_unpack_raw(&p, a);
e6b405fe
AB
4764 if (parts_squash_denormal(p, status)) {
4765 return float16_set_sign(float16_zero, p.sign);
4766 }
4767 }
4768 return a;
4769}
4770
4771float32 float32_squash_input_denormal(float32 a, float_status *status)
4772{
4773 if (status->flush_inputs_to_zero) {
3dddb203
RH
4774 FloatParts64 p;
4775
4776 float32_unpack_raw(&p, a);
e6b405fe
AB
4777 if (parts_squash_denormal(p, status)) {
4778 return float32_set_sign(float32_zero, p.sign);
4779 }
4780 }
4781 return a;
4782}
4783
4784float64 float64_squash_input_denormal(float64 a, float_status *status)
4785{
4786 if (status->flush_inputs_to_zero) {
3dddb203
RH
4787 FloatParts64 p;
4788
4789 float64_unpack_raw(&p, a);
e6b405fe
AB
4790 if (parts_squash_denormal(p, status)) {
4791 return float64_set_sign(float64_zero, p.sign);
4792 }
4793 }
4794 return a;
4795}
4796
8282310d
LZ
4797bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4798{
4799 if (status->flush_inputs_to_zero) {
3dddb203
RH
4800 FloatParts64 p;
4801
4802 bfloat16_unpack_raw(&p, a);
8282310d
LZ
4803 if (parts_squash_denormal(p, status)) {
4804 return bfloat16_set_sign(bfloat16_zero, p.sign);
4805 }
4806 }
4807 return a;
4808}
4809
158142c2 4810/*----------------------------------------------------------------------------
feaf2e9c
RH
4811| Normalizes the subnormal extended double-precision floating-point value
4812| represented by the denormalized significand `aSig'. The normalized exponent
4813| and significand are stored at the locations pointed to by `zExpPtr' and
158142c2
FB
4814| `zSigPtr', respectively.
4815*----------------------------------------------------------------------------*/
4816
feaf2e9c
RH
4817void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4818 uint64_t *zSigPtr)
158142c2 4819{
8f506c70 4820 int8_t shiftCount;
158142c2 4821
feaf2e9c 4822 shiftCount = clz64(aSig);
158142c2
FB
4823 *zSigPtr = aSig<<shiftCount;
4824 *zExpPtr = 1 - shiftCount;
158142c2
FB
4825}
4826
158142c2
FB
4827/*----------------------------------------------------------------------------
4828| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
feaf2e9c
RH
4829| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4830| and returns the proper extended double-precision floating-point value
4831| corresponding to the abstract input. Ordinarily, the abstract value is
4832| rounded and packed into the extended double-precision format, with the
4833| inexact exception raised if the abstract input cannot be represented
158142c2
FB
4834| exactly. However, if the abstract value is too large, the overflow and
4835| inexact exceptions are raised and an infinity or maximal finite value is
4836| returned. If the abstract value is too small, the input value is rounded to
4837| a subnormal number, and the underflow and inexact exceptions are raised if
feaf2e9c
RH
4838| the abstract input cannot be represented exactly as a subnormal extended
4839| double-precision floating-point number.
4840| If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4841| the result is rounded to the same number of bits as single or double
4842| precision, respectively. Otherwise, the result is rounded to the full
4843| precision of the extended double-precision format.
4844| The input significand must be normalized or smaller. If the input
4845| significand is not normalized, `zExp' must be 0; in that case, the result
4846| returned is a subnormal number, and it must not require rounding. The
4847| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4848| Floating-Point Arithmetic.
158142c2
FB
4849*----------------------------------------------------------------------------*/
4850
feaf2e9c
RH
4851floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4852 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4853 float_status *status)
158142c2 4854{
feaf2e9c
RH
4855 FloatRoundMode roundingMode;
4856 bool roundNearestEven, increment, isTiny;
4857 int64_t roundIncrement, roundMask, roundBits;
158142c2 4858
a2f2d288 4859 roundingMode = status->float_rounding_mode;
158142c2 4860 roundNearestEven = ( roundingMode == float_round_nearest_even );
feaf2e9c
RH
4861 switch (roundingPrecision) {
4862 case floatx80_precision_x:
4863 goto precision80;
4864 case floatx80_precision_d:
4865 roundIncrement = UINT64_C(0x0000000000000400);
4866 roundMask = UINT64_C(0x00000000000007FF);
4867 break;
4868 case floatx80_precision_s:
4869 roundIncrement = UINT64_C(0x0000008000000000);
4870 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4871 break;
4872 default:
4873 g_assert_not_reached();
4874 }
4875 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4876 switch (roundingMode) {
4877 case float_round_nearest_even:
f9288a76 4878 case float_round_ties_away:
dc355b76
PM
4879 break;
4880 case float_round_to_zero:
4881 roundIncrement = 0;
4882 break;
4883 case float_round_up:
feaf2e9c 4884 roundIncrement = zSign ? 0 : roundMask;
dc355b76
PM
4885 break;
4886 case float_round_down:
feaf2e9c 4887 roundIncrement = zSign ? roundMask : 0;
5d64abb3 4888 break;
dc355b76
PM
4889 default:
4890 abort();
158142c2 4891 }
feaf2e9c
RH
4892 roundBits = zSig0 & roundMask;
4893 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4894 if ( ( 0x7FFE < zExp )
4895 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
158142c2 4896 ) {
feaf2e9c 4897 goto overflow;
158142c2 4898 }
feaf2e9c 4899 if ( zExp <= 0 ) {
a2f2d288 4900 if (status->flush_to_zero) {
ff32e16e 4901 float_raise(float_flag_output_denormal, status);
feaf2e9c 4902 return packFloatx80(zSign, 0, 0);
e6afc87f 4903 }
a828b373 4904 isTiny = status->tininess_before_rounding
feaf2e9c
RH
4905 || (zExp < 0 )
4906 || (zSig0 <= zSig0 + roundIncrement);
4907 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
158142c2 4908 zExp = 0;
feaf2e9c 4909 roundBits = zSig0 & roundMask;
ff32e16e
PM
4910 if (isTiny && roundBits) {
4911 float_raise(float_flag_underflow, status);
4912 }
feaf2e9c
RH
4913 if (roundBits) {
4914 float_raise(float_flag_inexact, status);
4915 }
4916 zSig0 += roundIncrement;
4917 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4918 roundIncrement = roundMask + 1;
4919 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4920 roundMask |= roundIncrement;
5d64abb3 4921 }
feaf2e9c
RH
4922 zSig0 &= ~ roundMask;
4923 return packFloatx80( zSign, zExp, zSig0 );
158142c2
FB
4924 }
4925 }
a2f2d288 4926 if (roundBits) {
d82f3b2d 4927 float_raise(float_flag_inexact, status);
a2f2d288 4928 }
feaf2e9c
RH
4929 zSig0 += roundIncrement;
4930 if ( zSig0 < roundIncrement ) {
4931 ++zExp;
4932 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4933 }
4934 roundIncrement = roundMask + 1;
4935 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4936 roundMask |= roundIncrement;
4937 }
4938 zSig0 &= ~ roundMask;
4939 if ( zSig0 == 0 ) zExp = 0;
4940 return packFloatx80( zSign, zExp, zSig0 );
4941 precision80:
dc355b76
PM
4942 switch (roundingMode) {
4943 case float_round_nearest_even:
f9288a76 4944 case float_round_ties_away:
dc355b76
PM
4945 increment = ((int64_t)zSig1 < 0);
4946 break;
4947 case float_round_to_zero:
4948 increment = 0;
4949 break;
4950 case float_round_up:
4951 increment = !zSign && zSig1;
4952 break;
4953 case float_round_down:
4954 increment = zSign && zSig1;
4955 break;
4956 default:
4957 abort();
158142c2 4958 }
bb98fe42 4959 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4960 if ( ( 0x7FFE < zExp )
4961 || ( ( zExp == 0x7FFE )
e9321124 4962 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4963 && increment
4964 )
4965 ) {
4966 roundMask = 0;
4967 overflow:
ff32e16e 4968 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4969 if ( ( roundingMode == float_round_to_zero )
4970 || ( zSign && ( roundingMode == float_round_up ) )
4971 || ( ! zSign && ( roundingMode == float_round_down ) )
4972 ) {
4973 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4974 }
0f605c88
LV
4975 return packFloatx80(zSign,
4976 floatx80_infinity_high,
4977 floatx80_infinity_low);
158142c2
FB
4978 }
4979 if ( zExp <= 0 ) {
a828b373
RH
4980 isTiny = status->tininess_before_rounding
4981 || (zExp < 0)
4982 || !increment
4983 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4984 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4985 zExp = 0;
ff32e16e
PM
4986 if (isTiny && zSig1) {
4987 float_raise(float_flag_underflow, status);
4988 }
a2f2d288 4989 if (zSig1) {
d82f3b2d 4990 float_raise(float_flag_inexact, status);
a2f2d288 4991 }
dc355b76
PM
4992 switch (roundingMode) {
4993 case float_round_nearest_even:
f9288a76 4994 case float_round_ties_away:
dc355b76
PM
4995 increment = ((int64_t)zSig1 < 0);
4996 break;
4997 case float_round_to_zero:
4998 increment = 0;
4999 break;
5000 case float_round_up:
5001 increment = !zSign && zSig1;
5002 break;
5003 case float_round_down:
5004 increment = zSign && zSig1;
5005 break;
5006 default:
5007 abort();
158142c2
FB
5008 }
5009 if ( increment ) {
5010 ++zSig0;
40662886
PMD
5011 if (!(zSig1 << 1) && roundNearestEven) {
5012 zSig0 &= ~1;
5013 }
bb98fe42 5014 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
5015 }
5016 return packFloatx80( zSign, zExp, zSig0 );
5017 }
5018 }
a2f2d288 5019 if (zSig1) {
d82f3b2d 5020 float_raise(float_flag_inexact, status);
a2f2d288 5021 }
158142c2
FB
5022 if ( increment ) {
5023 ++zSig0;
5024 if ( zSig0 == 0 ) {
5025 ++zExp;
e9321124 5026 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
5027 }
5028 else {
40662886
PMD
5029 if (!(zSig1 << 1) && roundNearestEven) {
5030 zSig0 &= ~1;
5031 }
158142c2
FB
5032 }
5033 }
5034 else {
5035 if ( zSig0 == 0 ) zExp = 0;
5036 }
5037 return packFloatx80( zSign, zExp, zSig0 );
5038
5039}
5040
5041/*----------------------------------------------------------------------------
5042| Takes an abstract floating-point value having sign `zSign', exponent
5043| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5044| and returns the proper extended double-precision floating-point value
5045| corresponding to the abstract input. This routine is just like
5046| `roundAndPackFloatx80' except that the input significand does not have to be
5047| normalized.
5048*----------------------------------------------------------------------------*/
5049
8da5f1db 5050floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
c120391c 5051 bool zSign, int32_t zExp,
88857aca
LV
5052 uint64_t zSig0, uint64_t zSig1,
5053 float_status *status)
158142c2 5054{
8f506c70 5055 int8_t shiftCount;
158142c2
FB
5056
5057 if ( zSig0 == 0 ) {
5058 zSig0 = zSig1;
5059 zSig1 = 0;
5060 zExp -= 64;
5061 }
0019d5c3 5062 shiftCount = clz64(zSig0);
158142c2
FB
5063 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5064 zExp -= shiftCount;
ff32e16e
PM
5065 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5066 zSig0, zSig1, status);
158142c2
FB
5067
5068}
5069
8229c991
AJ
5070/*----------------------------------------------------------------------------
5071| Returns the binary exponential of the single-precision floating-point value
5072| `a'. The operation is performed according to the IEC/IEEE Standard for
5073| Binary Floating-Point Arithmetic.
5074|
5075| Uses the following identities:
5076|
5077| 1. -------------------------------------------------------------------------
5078| x x*ln(2)
5079| 2 = e
5080|
5081| 2. -------------------------------------------------------------------------
5082| 2 3 4 5 n
5083| x x x x x x x
5084| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5085| 1! 2! 3! 4! 5! n!
5086*----------------------------------------------------------------------------*/
5087
5088static const float64 float32_exp2_coefficients[15] =
5089{
d5138cf4
PM
5090 const_float64( 0x3ff0000000000000ll ), /* 1 */
5091 const_float64( 0x3fe0000000000000ll ), /* 2 */
5092 const_float64( 0x3fc5555555555555ll ), /* 3 */
5093 const_float64( 0x3fa5555555555555ll ), /* 4 */
5094 const_float64( 0x3f81111111111111ll ), /* 5 */
5095 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5096 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5097 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5098 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5099 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5100 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5101 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5102 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5103 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5104 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5105};
5106
e5a41ffa 5107float32 float32_exp2(float32 a, float_status *status)
8229c991 5108{
572c4d86 5109 FloatParts64 xp, xnp, tp, rp;
8229c991 5110 int i;
8229c991 5111
572c4d86
RH
5112 float32_unpack_canonical(&xp, a, status);
5113 if (unlikely(xp.cls != float_class_normal)) {
5114 switch (xp.cls) {
5115 case float_class_snan:
5116 case float_class_qnan:
5117 parts_return_nan(&xp, status);
5118 return float32_round_pack_canonical(&xp, status);
5119 case float_class_inf:
5120 return xp.sign ? float32_zero : a;
5121 case float_class_zero:
5122 return float32_one;
5123 default:
5124 break;
ff32e16e 5125 }
572c4d86 5126 g_assert_not_reached();
8229c991
AJ
5127 }
5128
ff32e16e 5129 float_raise(float_flag_inexact, status);
8229c991 5130
f291f45f 5131 float64_unpack_canonical(&tp, float64_ln2, status);
572c4d86
RH
5132 xp = *parts_mul(&xp, &tp, status);
5133 xnp = xp;
8229c991 5134
572c4d86 5135 float64_unpack_canonical(&rp, float64_one, status);
8229c991 5136 for (i = 0 ; i < 15 ; i++) {
572c4d86 5137 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
1098cc3f 5138 rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
572c4d86 5139 xnp = *parts_mul(&xnp, &xp, status);
8229c991
AJ
5140 }
5141
572c4d86 5142 return float32_round_pack_canonical(&rp, status);
8229c991
AJ
5143}
5144
0f721292
LV
5145/*----------------------------------------------------------------------------
5146| Rounds the extended double-precision floating-point value `a'
5147| to the precision provided by floatx80_rounding_precision and returns the
5148| result as an extended double-precision floating-point value.
5149| The operation is performed according to the IEC/IEEE Standard for Binary
5150| Floating-Point Arithmetic.
5151*----------------------------------------------------------------------------*/
5152
5153floatx80 floatx80_round(floatx80 a, float_status *status)
5154{
45a76b71
RH
5155 FloatParts128 p;
5156
5157 if (!floatx80_unpack_canonical(&p, a, status)) {
5158 return floatx80_default_nan(status);
5159 }
5160 return floatx80_round_pack_canonical(&p, status);
0f721292
LV
5161}
5162
f6b3b108
EC
5163static void __attribute__((constructor)) softfloat_init(void)
5164{
5165 union_float64 ua, ub, uc, ur;
5166
5167 if (QEMU_NO_HARDFLOAT) {
5168 return;
5169 }
5170 /*
5171 * Test that the host's FMA is not obviously broken. For example,
5172 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5173 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5174 */
5175 ua.s = 0x0020000000000001ULL;
5176 ub.s = 0x3ca0000000000000ULL;
5177 uc.s = 0x0020000000000000ULL;
5178 ur.h = fma(ua.h, ub.h, uc.h);
5179 if (ur.s != 0x0020000000000001ULL) {
5180 force_soft_fma = true;
5181 }
5182}