]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: add float128_to_uint128
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
a90119b5
AB
404/*
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
407 */
408
409typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
a90119b5
AB
416} FloatClass;
417
134eda00
RH
418#define float_cmask(bit) (1u << (bit))
419
420enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
426
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
429};
430
e1c4667a
RH
431/* Flags for parts_minmax. */
432enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
0e903037
CMC
439 /*
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441 * operations.
442 */
443 minmax_isnumber = 8,
e1c4667a 444};
134eda00 445
247d1f21
RH
446/* Simple helpers for checking if, or what kind of, NaN we have */
447static inline __attribute__((unused)) bool is_nan(FloatClass c)
448{
449 return unlikely(c >= float_class_qnan);
450}
451
452static inline __attribute__((unused)) bool is_snan(FloatClass c)
453{
454 return c == float_class_snan;
455}
456
457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458{
459 return c == float_class_qnan;
460}
461
a90119b5 462/*
0018b1f4
RH
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
a90119b5 465 *
0018b1f4
RH
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
a90119b5
AB
469 */
470
471typedef struct {
a90119b5
AB
472 FloatClass cls;
473 bool sign;
4109b9ea
RH
474 int32_t exp;
475 union {
476 /* Routines that know the structure may reference the singular name. */
477 uint64_t frac;
478 /*
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
482 */
483 uint64_t frac_hi;
484 uint64_t frac_lo;
485 };
f8155c1d 486} FloatParts64;
a90119b5 487
0018b1f4
RH
488typedef struct {
489 FloatClass cls;
490 bool sign;
491 int32_t exp;
492 uint64_t frac_hi;
493 uint64_t frac_lo;
494} FloatParts128;
495
aca84527
RH
496typedef struct {
497 FloatClass cls;
498 bool sign;
499 int32_t exp;
500 uint64_t frac_hi;
501 uint64_t frac_hm; /* high-middle */
502 uint64_t frac_lm; /* low-middle */
503 uint64_t frac_lo;
504} FloatParts256;
505
0018b1f4 506/* These apply to the most significant word of each FloatPartsN. */
e99c4373 507#define DECOMPOSED_BINARY_POINT 63
a90119b5 508#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
509
510/* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
d6e1f0cd 517 * round_mask: bits below lsb which must be rounded
ca3a3d5a
AB
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
520 */
521typedef struct {
522 int exp_size;
523 int exp_bias;
524 int exp_max;
525 int frac_size;
526 int frac_shift;
ca3a3d5a 527 bool arm_althp;
d6e1f0cd 528 uint64_t round_mask;
a90119b5
AB
529} FloatFmt;
530
531/* Expand fields based on the size of exponent and fraction */
c1b6299b 532#define FLOAT_PARAMS_(E) \
d6e1f0cd
RH
533 .exp_size = E, \
534 .exp_bias = ((1 << E) - 1) >> 1, \
c1b6299b 535 .exp_max = (1 << E) - 1
d6e1f0cd
RH
536
537#define FLOAT_PARAMS(E, F) \
c1b6299b
RH
538 FLOAT_PARAMS_(E), \
539 .frac_size = F, \
d6e1f0cd
RH
540 .frac_shift = (-F - 1) & 63, \
541 .round_mask = (1ull << ((-F - 1) & 63)) - 1
a90119b5
AB
542
543static const FloatFmt float16_params = {
544 FLOAT_PARAMS(5, 10)
545};
546
6fed16b2
AB
547static const FloatFmt float16_params_ahp = {
548 FLOAT_PARAMS(5, 10),
549 .arm_althp = true
550};
551
8282310d
LZ
552static const FloatFmt bfloat16_params = {
553 FLOAT_PARAMS(8, 7)
554};
555
a90119b5
AB
556static const FloatFmt float32_params = {
557 FLOAT_PARAMS(8, 23)
558};
559
560static const FloatFmt float64_params = {
561 FLOAT_PARAMS(11, 52)
562};
563
0018b1f4
RH
564static const FloatFmt float128_params = {
565 FLOAT_PARAMS(15, 112)
566};
567
c1b6299b
RH
568#define FLOATX80_PARAMS(R) \
569 FLOAT_PARAMS_(15), \
570 .frac_size = R == 64 ? 63 : R, \
571 .frac_shift = 0, \
572 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
573
574static const FloatFmt floatx80_params[3] = {
575 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
576 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
577 [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
578};
579
6fff2167 580/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 581static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 582{
d8fdd172
RH
583 const int f_size = fmt->frac_size;
584 const int e_size = fmt->exp_size;
6fff2167 585
d8fdd172 586 *r = (FloatParts64) {
6fff2167 587 .cls = float_class_unclassified,
d8fdd172
RH
588 .sign = extract64(raw, f_size + e_size, 1),
589 .exp = extract64(raw, f_size, e_size),
590 .frac = extract64(raw, 0, f_size)
6fff2167
AB
591 };
592}
593
3dddb203 594static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 595{
3dddb203 596 unpack_raw64(p, &float16_params, f);
6fff2167
AB
597}
598
3dddb203 599static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 600{
3dddb203 601 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
602}
603
3dddb203 604static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 605{
3dddb203 606 unpack_raw64(p, &float32_params, f);
6fff2167
AB
607}
608
3dddb203 609static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 610{
3dddb203 611 unpack_raw64(p, &float64_params, f);
6fff2167
AB
612}
613
c1b6299b
RH
614static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
615{
616 *p = (FloatParts128) {
617 .cls = float_class_unclassified,
618 .sign = extract32(f.high, 15, 1),
619 .exp = extract32(f.high, 0, 15),
620 .frac_hi = f.low
621 };
622}
623
0018b1f4
RH
624static void float128_unpack_raw(FloatParts128 *p, float128 f)
625{
626 const int f_size = float128_params.frac_size - 64;
627 const int e_size = float128_params.exp_size;
628
629 *p = (FloatParts128) {
630 .cls = float_class_unclassified,
631 .sign = extract64(f.high, f_size + e_size, 1),
632 .exp = extract64(f.high, f_size, e_size),
633 .frac_hi = extract64(f.high, 0, f_size),
634 .frac_lo = f.low,
635 };
636}
637
6fff2167 638/* Pack a float from parts, but do not canonicalize. */
9e4af58c 639static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 640{
9e4af58c
RH
641 const int f_size = fmt->frac_size;
642 const int e_size = fmt->exp_size;
643 uint64_t ret;
644
645 ret = (uint64_t)p->sign << (f_size + e_size);
646 ret = deposit64(ret, f_size, e_size, p->exp);
647 ret = deposit64(ret, 0, f_size, p->frac);
648 return ret;
6fff2167
AB
649}
650
71fd178e 651static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 652{
71fd178e 653 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
654}
655
71fd178e 656static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 657{
71fd178e 658 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
659}
660
71fd178e 661static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 662{
71fd178e 663 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
664}
665
71fd178e 666static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 667{
71fd178e 668 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
669}
670
0018b1f4
RH
671static float128 float128_pack_raw(const FloatParts128 *p)
672{
673 const int f_size = float128_params.frac_size - 64;
674 const int e_size = float128_params.exp_size;
675 uint64_t hi;
676
677 hi = (uint64_t)p->sign << (f_size + e_size);
678 hi = deposit64(hi, f_size, e_size, p->exp);
679 hi = deposit64(hi, 0, f_size, p->frac_hi);
680 return make_float128(hi, p->frac_lo);
681}
682
0664335a
RH
683/*----------------------------------------------------------------------------
684| Functions and definitions to determine: (1) whether tininess for underflow
685| is detected before or after rounding by default, (2) what (if anything)
686| happens when exceptions are raised, (3) how signaling NaNs are distinguished
687| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
688| are propagated from function inputs to output. These details are target-
689| specific.
690*----------------------------------------------------------------------------*/
139c1837 691#include "softfloat-specialize.c.inc"
0664335a 692
0018b1f4 693#define PARTS_GENERIC_64_128(NAME, P) \
6c06aca0
RH
694 _Generic((P), FloatParts64 *: parts64_##NAME, \
695 FloatParts128 *: parts128_##NAME)
0018b1f4 696
dedd123c 697#define PARTS_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
698 _Generic((P), FloatParts64 *: parts64_##NAME, \
699 FloatParts128 *: parts128_##NAME, \
700 FloatParts256 *: parts256_##NAME)
dedd123c 701
e9034ea8 702#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
703#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
704
7c45bad8
RH
705static void parts64_return_nan(FloatParts64 *a, float_status *s);
706static void parts128_return_nan(FloatParts128 *a, float_status *s);
707
708#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 709
22c355f4
RH
710static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
711 float_status *s);
712static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
713 float_status *s);
714
715#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
716
979582d0
RH
717static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
718 FloatParts64 *c, float_status *s,
719 int ab_mask, int abc_mask);
720static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
721 FloatParts128 *b,
722 FloatParts128 *c,
723 float_status *s,
724 int ab_mask, int abc_mask);
725
726#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
727 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
728
d46975bc
RH
729static void parts64_canonicalize(FloatParts64 *p, float_status *status,
730 const FloatFmt *fmt);
731static void parts128_canonicalize(FloatParts128 *p, float_status *status,
732 const FloatFmt *fmt);
733
734#define parts_canonicalize(A, S, F) \
735 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
736
25fdedf0
RH
737static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
738 const FloatFmt *fmt);
739static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
740 const FloatFmt *fmt);
741
742#define parts_uncanon_normal(A, S, F) \
743 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
744
ee6959f2
RH
745static void parts64_uncanon(FloatParts64 *p, float_status *status,
746 const FloatFmt *fmt);
747static void parts128_uncanon(FloatParts128 *p, float_status *status,
748 const FloatFmt *fmt);
749
750#define parts_uncanon(A, S, F) \
751 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
752
da10a907
RH
753static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
754static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 755static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
756
757#define parts_add_normal(A, B) \
dedd123c 758 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
759
760static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
761static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 762static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
763
764#define parts_sub_normal(A, B) \
dedd123c 765 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
766
767static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
768 float_status *s, bool subtract);
769static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
770 float_status *s, bool subtract);
771
772#define parts_addsub(A, B, S, Z) \
773 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
774
aca84527
RH
775static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
776 float_status *s);
777static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
778 float_status *s);
779
780#define parts_mul(A, B, S) \
781 PARTS_GENERIC_64_128(mul, A)(A, B, S)
782
dedd123c
RH
783static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
784 FloatParts64 *c, int flags,
785 float_status *s);
786static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
787 FloatParts128 *c, int flags,
788 float_status *s);
789
790#define parts_muladd(A, B, C, Z, S) \
791 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
792
ec961b81
RH
793static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
794 float_status *s);
795static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
796 float_status *s);
797
798#define parts_div(A, B, S) \
799 PARTS_GENERIC_64_128(div, A)(A, B, S)
800
feaf2e9c
RH
801static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
802 uint64_t *mod_quot, float_status *s);
803static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
804 uint64_t *mod_quot, float_status *s);
805
806#define parts_modrem(A, B, Q, S) \
807 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
808
9261b245
RH
809static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
810static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
811
812#define parts_sqrt(A, S, F) \
813 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
814
afc34931
RH
815static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
816 int scale, int frac_size);
817static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
818 int scale, int frac_size);
819
820#define parts_round_to_int_normal(A, R, C, F) \
821 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
822
823static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
824 int scale, float_status *s,
825 const FloatFmt *fmt);
826static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
827 int scale, float_status *s,
828 const FloatFmt *fmt);
829
830#define parts_round_to_int(A, R, C, S, F) \
831 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
832
463b3f0d
RH
833static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
834 int scale, int64_t min, int64_t max,
835 float_status *s);
836static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
837 int scale, int64_t min, int64_t max,
838 float_status *s);
839
840#define parts_float_to_sint(P, R, Z, MN, MX, S) \
841 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
842
4ab4aef0
RH
843static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
844 int scale, uint64_t max,
845 float_status *s);
846static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
847 int scale, uint64_t max,
848 float_status *s);
849
850#define parts_float_to_uint(P, R, Z, M, S) \
851 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
852
e3689519
RH
853static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
854 int scale, float_status *s);
855static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
856 int scale, float_status *s);
857
858#define parts_sint_to_float(P, I, Z, S) \
859 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
860
37c954a1
RH
861static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
862 int scale, float_status *s);
863static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
864 int scale, float_status *s);
865
866#define parts_uint_to_float(P, I, Z, S) \
867 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
868
e1c4667a
RH
869static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
870 float_status *s, int flags);
871static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
872 float_status *s, int flags);
873
874#define parts_minmax(A, B, S, F) \
875 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
876
6eb169b8
RH
877static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
878 float_status *s, bool q);
879static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
880 float_status *s, bool q);
881
882#define parts_compare(A, B, S, Q) \
883 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
884
39626b0c
RH
885static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
886static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
887
888#define parts_scalbn(A, N, S) \
889 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
890
2fa3546c
RH
891static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
892static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
893
894#define parts_log2(A, S, F) \
895 PARTS_GENERIC_64_128(log2, A)(A, S, F)
896
0018b1f4
RH
897/*
898 * Helper functions for softfloat-parts.c.inc, per-size operations.
899 */
900
22c355f4 901#define FRAC_GENERIC_64_128(NAME, P) \
6c06aca0
RH
902 _Generic((P), FloatParts64 *: frac64_##NAME, \
903 FloatParts128 *: frac128_##NAME)
22c355f4 904
dedd123c 905#define FRAC_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
906 _Generic((P), FloatParts64 *: frac64_##NAME, \
907 FloatParts128 *: frac128_##NAME, \
908 FloatParts256 *: frac256_##NAME)
dedd123c 909
da10a907
RH
910static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
911{
912 return uadd64_overflow(a->frac, b->frac, &r->frac);
913}
914
915static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
916{
917 bool c = 0;
918 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
919 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
920 return c;
921}
922
dedd123c
RH
923static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
924{
925 bool c = 0;
926 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
927 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
928 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
929 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
930 return c;
931}
932
933#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 934
ee6959f2
RH
935static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
936{
937 return uadd64_overflow(a->frac, c, &r->frac);
938}
939
940static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
941{
942 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
943 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
944}
945
946#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
947
948static void frac64_allones(FloatParts64 *a)
949{
950 a->frac = -1;
951}
952
953static void frac128_allones(FloatParts128 *a)
954{
955 a->frac_hi = a->frac_lo = -1;
956}
957
958#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
959
22c355f4
RH
960static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
961{
962 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
963}
964
965static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
966{
967 uint64_t ta = a->frac_hi, tb = b->frac_hi;
968 if (ta == tb) {
969 ta = a->frac_lo, tb = b->frac_lo;
970 if (ta == tb) {
971 return 0;
972 }
973 }
974 return ta < tb ? -1 : 1;
975}
976
977#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
978
d46975bc 979static void frac64_clear(FloatParts64 *a)
0018b1f4 980{
d46975bc
RH
981 a->frac = 0;
982}
983
984static void frac128_clear(FloatParts128 *a)
985{
986 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
987}
988
d46975bc 989#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 990
ec961b81
RH
991static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
992{
993 uint64_t n1, n0, r, q;
994 bool ret;
995
996 /*
997 * We want a 2*N / N-bit division to produce exactly an N-bit
998 * result, so that we do not lose any precision and so that we
999 * do not have to renormalize afterward. If A.frac < B.frac,
1000 * then division would produce an (N-1)-bit result; shift A left
1001 * by one to produce the an N-bit result, and return true to
1002 * decrement the exponent to match.
1003 *
1004 * The udiv_qrnnd algorithm that we're using requires normalization,
1005 * i.e. the msb of the denominator must be set, which is already true.
1006 */
1007 ret = a->frac < b->frac;
1008 if (ret) {
1009 n0 = a->frac;
1010 n1 = 0;
1011 } else {
1012 n0 = a->frac >> 1;
1013 n1 = a->frac << 63;
1014 }
1015 q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017 /* Set lsb if there is a remainder, to set inexact. */
1018 a->frac = q | (r != 0);
1019
1020 return ret;
1021}
1022
1023static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024{
1025 uint64_t q0, q1, a0, a1, b0, b1;
1026 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027 bool ret = false;
1028
1029 a0 = a->frac_hi, a1 = a->frac_lo;
1030 b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032 ret = lt128(a0, a1, b0, b1);
1033 if (!ret) {
1034 a1 = shr_double(a0, a1, 1);
1035 a0 = a0 >> 1;
1036 }
1037
1038 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039 q0 = estimateDiv128To64(a0, a1, b0);
1040
1041 /*
1042 * Estimate is high because B1 was not included (unless B1 == 0).
1043 * Reduce quotient and increase remainder until remainder is non-negative.
1044 * This loop will execute 0 to 2 times.
1045 */
1046 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048 while (r0 != 0) {
1049 q0--;
1050 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051 }
1052
1053 /* Repeat using the remainder, producing a second word of quotient. */
1054 q1 = estimateDiv128To64(r1, r2, b0);
1055 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057 while (r1 != 0) {
1058 q1--;
1059 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060 }
1061
1062 /* Any remainder indicates inexact; set sticky bit. */
1063 q1 |= (r2 | r3) != 0;
1064
1065 a->frac_hi = q0;
1066 a->frac_lo = q1;
1067 return ret;
1068}
1069
1070#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1071
d46975bc 1072static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1073{
d46975bc
RH
1074 return a->frac == 0;
1075}
1076
1077static bool frac128_eqz(FloatParts128 *a)
1078{
1079 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1080}
1081
d46975bc 1082#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1083
aca84527
RH
1084static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085{
1086 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087}
1088
1089static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090{
1091 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093}
1094
1095#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
da10a907
RH
1097static void frac64_neg(FloatParts64 *a)
1098{
1099 a->frac = -a->frac;
1100}
1101
1102static void frac128_neg(FloatParts128 *a)
1103{
1104 bool c = 0;
1105 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107}
1108
dedd123c
RH
1109static void frac256_neg(FloatParts256 *a)
1110{
1111 bool c = 0;
1112 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116}
1117
1118#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1119
d46975bc 1120static int frac64_normalize(FloatParts64 *a)
6fff2167 1121{
d46975bc
RH
1122 if (a->frac) {
1123 int shift = clz64(a->frac);
1124 a->frac <<= shift;
1125 return shift;
1126 }
1127 return 64;
1128}
1129
1130static int frac128_normalize(FloatParts128 *a)
1131{
1132 if (a->frac_hi) {
1133 int shl = clz64(a->frac_hi);
463e45dc
RH
1134 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135 a->frac_lo <<= shl;
d46975bc
RH
1136 return shl;
1137 } else if (a->frac_lo) {
1138 int shl = clz64(a->frac_lo);
463e45dc 1139 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1140 a->frac_lo = 0;
1141 return shl + 64;
6fff2167 1142 }
d46975bc 1143 return 128;
6fff2167
AB
1144}
1145
dedd123c
RH
1146static int frac256_normalize(FloatParts256 *a)
1147{
1148 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1150 int ret, shl;
dedd123c
RH
1151
1152 if (likely(a0)) {
1153 shl = clz64(a0);
1154 if (shl == 0) {
1155 return 0;
1156 }
1157 ret = shl;
1158 } else {
1159 if (a1) {
1160 ret = 64;
1161 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162 } else if (a2) {
1163 ret = 128;
1164 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165 } else if (a3) {
1166 ret = 192;
1167 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168 } else {
1169 ret = 256;
1170 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171 goto done;
1172 }
1173 shl = clz64(a0);
1174 if (shl == 0) {
1175 goto done;
1176 }
1177 ret += shl;
1178 }
1179
463e45dc
RH
1180 a0 = shl_double(a0, a1, shl);
1181 a1 = shl_double(a1, a2, shl);
1182 a2 = shl_double(a2, a3, shl);
1183 a3 <<= shl;
dedd123c
RH
1184
1185 done:
1186 a->frac_hi = a0;
1187 a->frac_hm = a1;
1188 a->frac_lm = a2;
1189 a->frac_lo = a3;
1190 return ret;
1191}
1192
1193#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc 1194
feaf2e9c
RH
1195static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196{
1197 uint64_t a0, a1, b0, t0, t1, q, quot;
1198 int exp_diff = a->exp - b->exp;
1199 int shift;
1200
1201 a0 = a->frac;
1202 a1 = 0;
1203
1204 if (exp_diff < -1) {
1205 if (mod_quot) {
1206 *mod_quot = 0;
1207 }
1208 return;
1209 }
1210 if (exp_diff == -1) {
1211 a0 >>= 1;
1212 exp_diff = 0;
1213 }
1214
1215 b0 = b->frac;
1216 quot = q = b0 <= a0;
1217 if (q) {
1218 a0 -= b0;
1219 }
1220
1221 exp_diff -= 64;
1222 while (exp_diff > 0) {
1223 q = estimateDiv128To64(a0, a1, b0);
1224 q = q > 2 ? q - 2 : 0;
1225 mul64To128(b0, q, &t0, &t1);
1226 sub128(a0, a1, t0, t1, &a0, &a1);
1227 shortShift128Left(a0, a1, 62, &a0, &a1);
1228 exp_diff -= 62;
1229 quot = (quot << 62) + q;
1230 }
1231
1232 exp_diff += 64;
1233 if (exp_diff > 0) {
1234 q = estimateDiv128To64(a0, a1, b0);
1235 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237 sub128(a0, a1, t0, t1, &a0, &a1);
1238 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239 while (le128(t0, t1, a0, a1)) {
1240 ++q;
1241 sub128(a0, a1, t0, t1, &a0, &a1);
1242 }
1243 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244 } else {
1245 t0 = b0;
1246 t1 = 0;
1247 }
1248
1249 if (mod_quot) {
1250 *mod_quot = quot;
1251 } else {
1252 sub128(t0, t1, a0, a1, &t0, &t1);
1253 if (lt128(t0, t1, a0, a1) ||
1254 (eq128(t0, t1, a0, a1) && (q & 1))) {
1255 a0 = t0;
1256 a1 = t1;
1257 a->sign = !a->sign;
1258 }
1259 }
1260
1261 if (likely(a0)) {
1262 shift = clz64(a0);
1263 shortShift128Left(a0, a1, shift, &a0, &a1);
1264 } else if (likely(a1)) {
1265 shift = clz64(a1);
1266 a0 = a1 << shift;
1267 a1 = 0;
1268 shift += 64;
1269 } else {
1270 a->cls = float_class_zero;
1271 return;
1272 }
1273
1274 a->exp = b->exp + exp_diff - shift;
1275 a->frac = a0 | (a1 != 0);
1276}
1277
1278static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279 uint64_t *mod_quot)
1280{
1281 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282 int exp_diff = a->exp - b->exp;
1283 int shift;
1284
1285 a0 = a->frac_hi;
1286 a1 = a->frac_lo;
1287 a2 = 0;
1288
1289 if (exp_diff < -1) {
1290 if (mod_quot) {
1291 *mod_quot = 0;
1292 }
1293 return;
1294 }
1295 if (exp_diff == -1) {
1296 shift128Right(a0, a1, 1, &a0, &a1);
1297 exp_diff = 0;
1298 }
1299
1300 b0 = b->frac_hi;
1301 b1 = b->frac_lo;
1302
1303 quot = q = le128(b0, b1, a0, a1);
1304 if (q) {
1305 sub128(a0, a1, b0, b1, &a0, &a1);
1306 }
1307
1308 exp_diff -= 64;
1309 while (exp_diff > 0) {
1310 q = estimateDiv128To64(a0, a1, b0);
1311 q = q > 4 ? q - 4 : 0;
1312 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315 exp_diff -= 61;
1316 quot = (quot << 61) + q;
1317 }
1318
1319 exp_diff += 64;
1320 if (exp_diff > 0) {
1321 q = estimateDiv128To64(a0, a1, b0);
1322 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326 while (le192(t0, t1, t2, a0, a1, a2)) {
1327 ++q;
1328 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329 }
1330 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331 } else {
1332 t0 = b0;
1333 t1 = b1;
1334 t2 = 0;
1335 }
1336
1337 if (mod_quot) {
1338 *mod_quot = quot;
1339 } else {
1340 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341 if (lt192(t0, t1, t2, a0, a1, a2) ||
1342 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343 a0 = t0;
1344 a1 = t1;
1345 a2 = t2;
1346 a->sign = !a->sign;
1347 }
1348 }
1349
1350 if (likely(a0)) {
1351 shift = clz64(a0);
1352 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353 } else if (likely(a1)) {
1354 shift = clz64(a1);
1355 shortShift128Left(a1, a2, shift, &a0, &a1);
1356 a2 = 0;
1357 shift += 64;
1358 } else if (likely(a2)) {
1359 shift = clz64(a2);
1360 a0 = a2 << shift;
1361 a1 = a2 = 0;
1362 shift += 128;
1363 } else {
1364 a->cls = float_class_zero;
1365 return;
1366 }
1367
1368 a->exp = b->exp + exp_diff - shift;
1369 a->frac_hi = a0;
1370 a->frac_lo = a1 | (a2 != 0);
1371}
1372
1373#define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374
d46975bc
RH
1375static void frac64_shl(FloatParts64 *a, int c)
1376{
1377 a->frac <<= c;
1378}
1379
1380static void frac128_shl(FloatParts128 *a, int c)
1381{
463e45dc
RH
1382 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383
1384 if (c & 64) {
1385 a0 = a1, a1 = 0;
1386 }
1387
1388 c &= 63;
1389 if (c) {
1390 a0 = shl_double(a0, a1, c);
1391 a1 = a1 << c;
1392 }
1393
1394 a->frac_hi = a0;
1395 a->frac_lo = a1;
d46975bc
RH
1396}
1397
1398#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1399
1400static void frac64_shr(FloatParts64 *a, int c)
1401{
1402 a->frac >>= c;
1403}
1404
1405static void frac128_shr(FloatParts128 *a, int c)
1406{
463e45dc
RH
1407 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408
1409 if (c & 64) {
1410 a1 = a0, a0 = 0;
1411 }
1412
1413 c &= 63;
1414 if (c) {
1415 a1 = shr_double(a0, a1, c);
1416 a0 = a0 >> c;
1417 }
1418
1419 a->frac_hi = a0;
1420 a->frac_lo = a1;
d46975bc
RH
1421}
1422
1423#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1424
ee6959f2 1425static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1426{
463e45dc
RH
1427 uint64_t a0 = a->frac;
1428
1429 if (likely(c != 0)) {
1430 if (likely(c < 64)) {
1431 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432 } else {
1433 a0 = a0 != 0;
1434 }
1435 a->frac = a0;
1436 }
ee6959f2 1437}
6fff2167 1438
ee6959f2
RH
1439static void frac128_shrjam(FloatParts128 *a, int c)
1440{
463e45dc
RH
1441 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442 uint64_t sticky = 0;
1443
1444 if (unlikely(c == 0)) {
1445 return;
1446 } else if (likely(c < 64)) {
1447 /* nothing */
1448 } else if (likely(c < 128)) {
1449 sticky = a1;
1450 a1 = a0;
1451 a0 = 0;
1452 c &= 63;
1453 if (c == 0) {
1454 goto done;
1455 }
1456 } else {
1457 sticky = a0 | a1;
1458 a0 = a1 = 0;
1459 goto done;
1460 }
1461
1462 sticky |= shr_double(a1, 0, c);
1463 a1 = shr_double(a0, a1, c);
1464 a0 = a0 >> c;
1465
1466 done:
1467 a->frac_lo = a1 | (sticky != 0);
1468 a->frac_hi = a0;
6fff2167
AB
1469}
1470
dedd123c
RH
1471static void frac256_shrjam(FloatParts256 *a, int c)
1472{
1473 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475 uint64_t sticky = 0;
dedd123c
RH
1476
1477 if (unlikely(c == 0)) {
1478 return;
1479 } else if (likely(c < 64)) {
1480 /* nothing */
1481 } else if (likely(c < 256)) {
1482 if (unlikely(c & 128)) {
1483 sticky |= a2 | a3;
1484 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485 }
1486 if (unlikely(c & 64)) {
1487 sticky |= a3;
1488 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489 }
1490 c &= 63;
1491 if (c == 0) {
1492 goto done;
1493 }
1494 } else {
1495 sticky = a0 | a1 | a2 | a3;
1496 a0 = a1 = a2 = a3 = 0;
1497 goto done;
1498 }
1499
463e45dc
RH
1500 sticky |= shr_double(a3, 0, c);
1501 a3 = shr_double(a2, a3, c);
1502 a2 = shr_double(a1, a2, c);
1503 a1 = shr_double(a0, a1, c);
1504 a0 = a0 >> c;
dedd123c
RH
1505
1506 done:
1507 a->frac_lo = a3 | (sticky != 0);
1508 a->frac_lm = a2;
1509 a->frac_hm = a1;
1510 a->frac_hi = a0;
1511}
1512
1513#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1514
da10a907
RH
1515static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516{
1517 return usub64_overflow(a->frac, b->frac, &r->frac);
1518}
7c45bad8 1519
da10a907
RH
1520static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521{
1522 bool c = 0;
1523 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525 return c;
1526}
1527
dedd123c
RH
1528static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529{
1530 bool c = 0;
1531 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535 return c;
1536}
1537
1538#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1539
aca84527
RH
1540static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541{
1542 r->frac = a->frac_hi | (a->frac_lo != 0);
1543}
1544
1545static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546{
1547 r->frac_hi = a->frac_hi;
1548 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549}
1550
1551#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552
dedd123c
RH
1553static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554{
1555 r->frac_hi = a->frac;
1556 r->frac_lo = 0;
1557}
1558
1559static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560{
1561 r->frac_hi = a->frac_hi;
1562 r->frac_hm = a->frac_lo;
1563 r->frac_lm = 0;
1564 r->frac_lo = 0;
1565}
1566
1567#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1568
9261b245
RH
1569/*
1570 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1571 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572 * and thus MIT licenced.
1573 */
1574static const uint16_t rsqrt_tab[128] = {
1575 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591};
1592
da10a907
RH
1593#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1594#define FloatPartsN glue(FloatParts,N)
aca84527 1595#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1596
1597#define N 64
aca84527 1598#define W 128
da10a907
RH
1599
1600#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1601#include "softfloat-parts.c.inc"
1602
da10a907 1603#undef N
aca84527 1604#undef W
da10a907 1605#define N 128
aca84527 1606#define W 256
7c45bad8 1607
da10a907 1608#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1609#include "softfloat-parts.c.inc"
1610
dedd123c
RH
1611#undef N
1612#undef W
1613#define N 256
1614
1615#include "softfloat-parts-addsub.c.inc"
1616
da10a907 1617#undef N
aca84527 1618#undef W
7c45bad8
RH
1619#undef partsN
1620#undef FloatPartsN
aca84527 1621#undef FloatPartsW
7c45bad8 1622
aaffb7bf
RH
1623/*
1624 * Pack/unpack routines with a specific FloatFmt.
1625 */
1626
98e256fc
RH
1627static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628 float_status *s, const FloatFmt *params)
aaffb7bf 1629{
98e256fc 1630 float16_unpack_raw(p, f);
d46975bc 1631 parts_canonicalize(p, s, params);
aaffb7bf
RH
1632}
1633
98e256fc
RH
1634static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635 float_status *s)
aaffb7bf 1636{
98e256fc 1637 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1638}
1639
98e256fc
RH
1640static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641 float_status *s)
aaffb7bf 1642{
98e256fc 1643 bfloat16_unpack_raw(p, f);
d46975bc 1644 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1645}
1646
e293e927
RH
1647static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648 float_status *s,
aaffb7bf
RH
1649 const FloatFmt *params)
1650{
ee6959f2 1651 parts_uncanon(p, s, params);
e293e927 1652 return float16_pack_raw(p);
aaffb7bf
RH
1653}
1654
e293e927
RH
1655static float16 float16_round_pack_canonical(FloatParts64 *p,
1656 float_status *s)
aaffb7bf
RH
1657{
1658 return float16a_round_pack_canonical(p, s, &float16_params);
1659}
1660
e293e927
RH
1661static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662 float_status *s)
aaffb7bf 1663{
ee6959f2 1664 parts_uncanon(p, s, &bfloat16_params);
e293e927 1665 return bfloat16_pack_raw(p);
aaffb7bf
RH
1666}
1667
98e256fc
RH
1668static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669 float_status *s)
aaffb7bf 1670{
98e256fc 1671 float32_unpack_raw(p, f);
d46975bc 1672 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1673}
1674
e293e927
RH
1675static float32 float32_round_pack_canonical(FloatParts64 *p,
1676 float_status *s)
aaffb7bf 1677{
ee6959f2 1678 parts_uncanon(p, s, &float32_params);
e293e927 1679 return float32_pack_raw(p);
aaffb7bf
RH
1680}
1681
98e256fc
RH
1682static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683 float_status *s)
aaffb7bf 1684{
98e256fc 1685 float64_unpack_raw(p, f);
d46975bc 1686 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1687}
1688
e293e927
RH
1689static float64 float64_round_pack_canonical(FloatParts64 *p,
1690 float_status *s)
aaffb7bf 1691{
ee6959f2 1692 parts_uncanon(p, s, &float64_params);
e293e927 1693 return float64_pack_raw(p);
aaffb7bf
RH
1694}
1695
42636fb9
RH
1696static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1697 float_status *s)
1698{
1699 parts_uncanon(p, s, &float32_params);
1700
1701 /*
1702 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1703 * We need to adjust the fraction higher so that the least N bits are
1704 * zero, and the fraction is adjacent to the float64 implicit bit.
1705 */
1706 switch (p->cls) {
1707 case float_class_normal:
1708 if (unlikely(p->exp == 0)) {
1709 /*
1710 * The result is denormal for float32, but can be represented
1711 * in normalized form for float64. Adjust, per canonicalize.
1712 */
1713 int shift = frac_normalize(p);
1714 p->exp = (float32_params.frac_shift -
1715 float32_params.exp_bias - shift + 1 +
1716 float64_params.exp_bias);
1717 frac_shr(p, float64_params.frac_shift);
1718 } else {
1719 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1720 p->exp += float64_params.exp_bias - float32_params.exp_bias;
1721 }
1722 break;
1723 case float_class_snan:
1724 case float_class_qnan:
1725 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1726 p->exp = float64_params.exp_max;
1727 break;
1728 case float_class_inf:
1729 p->exp = float64_params.exp_max;
1730 break;
1731 case float_class_zero:
1732 break;
1733 default:
1734 g_assert_not_reached();
1735 }
1736
1737 return float64_pack_raw(p);
1738}
1739
3ff49e56
RH
1740static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1741 float_status *s)
1742{
1743 float128_unpack_raw(p, f);
1744 parts_canonicalize(p, s, &float128_params);
1745}
1746
1747static float128 float128_round_pack_canonical(FloatParts128 *p,
1748 float_status *s)
1749{
1750 parts_uncanon(p, s, &float128_params);
1751 return float128_pack_raw(p);
1752}
1753
c1b6299b
RH
1754/* Returns false if the encoding is invalid. */
1755static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1756 float_status *s)
1757{
1758 /* Ensure rounding precision is set before beginning. */
1759 switch (s->floatx80_rounding_precision) {
1760 case floatx80_precision_x:
1761 case floatx80_precision_d:
1762 case floatx80_precision_s:
1763 break;
1764 default:
1765 g_assert_not_reached();
1766 }
1767
1768 if (unlikely(floatx80_invalid_encoding(f))) {
1769 float_raise(float_flag_invalid, s);
1770 return false;
1771 }
1772
1773 floatx80_unpack_raw(p, f);
1774
1775 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1776 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1777 } else {
1778 /* The explicit integer bit is ignored, after invalid checks. */
1779 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1780 p->cls = (p->frac_hi == 0 ? float_class_inf
1781 : parts_is_snan_frac(p->frac_hi, s)
1782 ? float_class_snan : float_class_qnan);
1783 }
1784 return true;
1785}
1786
1787static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1788 float_status *s)
1789{
1790 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1791 uint64_t frac;
1792 int exp;
1793
1794 switch (p->cls) {
1795 case float_class_normal:
1796 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1797 parts_uncanon_normal(p, s, fmt);
1798 frac = p->frac_hi;
1799 exp = p->exp;
1800 } else {
1801 FloatParts64 p64;
1802
1803 p64.sign = p->sign;
1804 p64.exp = p->exp;
1805 frac_truncjam(&p64, p);
1806 parts_uncanon_normal(&p64, s, fmt);
1807 frac = p64.frac;
1808 exp = p64.exp;
1809 }
1810 if (exp != fmt->exp_max) {
1811 break;
1812 }
1813 /* rounded to inf -- fall through to set frac correctly */
1814
1815 case float_class_inf:
1816 /* x86 and m68k differ in the setting of the integer bit. */
1817 frac = floatx80_infinity_low;
1818 exp = fmt->exp_max;
1819 break;
1820
1821 case float_class_zero:
1822 frac = 0;
1823 exp = 0;
1824 break;
1825
1826 case float_class_snan:
1827 case float_class_qnan:
1828 /* NaNs have the integer bit set. */
1829 frac = p->frac_hi | (1ull << 63);
1830 exp = fmt->exp_max;
1831 break;
1832
1833 default:
1834 g_assert_not_reached();
1835 }
1836
1837 return packFloatx80(p->sign, exp, frac);
1838}
1839
6fff2167 1840/*
da10a907 1841 * Addition and subtraction
6fff2167
AB
1842 */
1843
da10a907
RH
1844static float16 QEMU_FLATTEN
1845float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1846{
da10a907 1847 FloatParts64 pa, pb, *pr;
98e256fc
RH
1848
1849 float16_unpack_canonical(&pa, a, status);
1850 float16_unpack_canonical(&pb, b, status);
da10a907 1851 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1852
da10a907 1853 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1854}
1855
da10a907 1856float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1857{
da10a907
RH
1858 return float16_addsub(a, b, status, false);
1859}
1b615d48 1860
da10a907
RH
1861float16 float16_sub(float16 a, float16 b, float_status *status)
1862{
1863 return float16_addsub(a, b, status, true);
1b615d48
EC
1864}
1865
1866static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1867soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1868{
da10a907 1869 FloatParts64 pa, pb, *pr;
98e256fc
RH
1870
1871 float32_unpack_canonical(&pa, a, status);
1872 float32_unpack_canonical(&pb, b, status);
da10a907 1873 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1874
da10a907 1875 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1876}
1877
da10a907 1878static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1879{
da10a907 1880 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1881}
1882
da10a907 1883static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1884{
da10a907 1885 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1886}
1887
1888static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1889soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1890{
da10a907 1891 FloatParts64 pa, pb, *pr;
98e256fc
RH
1892
1893 float64_unpack_canonical(&pa, a, status);
1894 float64_unpack_canonical(&pb, b, status);
da10a907 1895 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1896
da10a907 1897 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1898}
1899
da10a907 1900static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1901{
da10a907 1902 return soft_f64_addsub(a, b, status, false);
1b615d48 1903}
6fff2167 1904
da10a907 1905static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1906{
da10a907 1907 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1908}
1909
1b615d48 1910static float hard_f32_add(float a, float b)
6fff2167 1911{
1b615d48
EC
1912 return a + b;
1913}
6fff2167 1914
1b615d48
EC
1915static float hard_f32_sub(float a, float b)
1916{
1917 return a - b;
6fff2167
AB
1918}
1919
1b615d48 1920static double hard_f64_add(double a, double b)
6fff2167 1921{
1b615d48
EC
1922 return a + b;
1923}
6fff2167 1924
1b615d48
EC
1925static double hard_f64_sub(double a, double b)
1926{
1927 return a - b;
1928}
1929
b240c9c4 1930static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1931{
1932 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1933 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1934 }
1935 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1936}
1937
b240c9c4 1938static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1939{
1940 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1941 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1942 } else {
1943 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1944 }
1945}
1946
1947static float32 float32_addsub(float32 a, float32 b, float_status *s,
1948 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1949{
1950 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1951 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1952}
1953
1954static float64 float64_addsub(float64 a, float64 b, float_status *s,
1955 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1956{
1957 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1958 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1959}
1960
1961float32 QEMU_FLATTEN
1962float32_add(float32 a, float32 b, float_status *s)
1963{
1964 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1965}
1966
1967float32 QEMU_FLATTEN
1968float32_sub(float32 a, float32 b, float_status *s)
1969{
1970 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1971}
1972
1973float64 QEMU_FLATTEN
1974float64_add(float64 a, float64 b, float_status *s)
1975{
1976 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1977}
1978
1979float64 QEMU_FLATTEN
1980float64_sub(float64 a, float64 b, float_status *s)
1981{
1982 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1983}
1984
42636fb9
RH
1985static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1986 bool subtract)
1987{
1988 FloatParts64 pa, pb, *pr;
1989
1990 float64_unpack_canonical(&pa, a, status);
1991 float64_unpack_canonical(&pb, b, status);
1992 pr = parts_addsub(&pa, &pb, status, subtract);
1993
1994 return float64r32_round_pack_canonical(pr, status);
1995}
1996
1997float64 float64r32_add(float64 a, float64 b, float_status *status)
1998{
1999 return float64r32_addsub(a, b, status, false);
2000}
2001
2002float64 float64r32_sub(float64 a, float64 b, float_status *status)
2003{
2004 return float64r32_addsub(a, b, status, true);
2005}
2006
da10a907
RH
2007static bfloat16 QEMU_FLATTEN
2008bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 2009{
da10a907 2010 FloatParts64 pa, pb, *pr;
98e256fc
RH
2011
2012 bfloat16_unpack_canonical(&pa, a, status);
2013 bfloat16_unpack_canonical(&pb, b, status);
da10a907 2014 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 2015
da10a907 2016 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
2017}
2018
da10a907 2019bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2020{
da10a907
RH
2021 return bfloat16_addsub(a, b, status, false);
2022}
8282310d 2023
da10a907
RH
2024bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2025{
2026 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
2027}
2028
3ff49e56
RH
2029static float128 QEMU_FLATTEN
2030float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2031{
2032 FloatParts128 pa, pb, *pr;
2033
2034 float128_unpack_canonical(&pa, a, status);
2035 float128_unpack_canonical(&pb, b, status);
2036 pr = parts_addsub(&pa, &pb, status, subtract);
2037
2038 return float128_round_pack_canonical(pr, status);
2039}
2040
2041float128 float128_add(float128 a, float128 b, float_status *status)
2042{
2043 return float128_addsub(a, b, status, false);
2044}
2045
2046float128 float128_sub(float128 a, float128 b, float_status *status)
2047{
2048 return float128_addsub(a, b, status, true);
2049}
2050
c1b6299b
RH
2051static floatx80 QEMU_FLATTEN
2052floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2053{
2054 FloatParts128 pa, pb, *pr;
2055
2056 if (!floatx80_unpack_canonical(&pa, a, status) ||
2057 !floatx80_unpack_canonical(&pb, b, status)) {
2058 return floatx80_default_nan(status);
2059 }
2060
2061 pr = parts_addsub(&pa, &pb, status, subtract);
2062 return floatx80_round_pack_canonical(pr, status);
2063}
2064
2065floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2066{
2067 return floatx80_addsub(a, b, status, false);
2068}
2069
2070floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2071{
2072 return floatx80_addsub(a, b, status, true);
2073}
2074
74d707e2 2075/*
aca84527 2076 * Multiplication
74d707e2
AB
2077 */
2078
97ff87c0 2079float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 2080{
aca84527 2081 FloatParts64 pa, pb, *pr;
98e256fc
RH
2082
2083 float16_unpack_canonical(&pa, a, status);
2084 float16_unpack_canonical(&pb, b, status);
aca84527 2085 pr = parts_mul(&pa, &pb, status);
74d707e2 2086
aca84527 2087 return float16_round_pack_canonical(pr, status);
74d707e2
AB
2088}
2089
2dfabc86
EC
2090static float32 QEMU_SOFTFLOAT_ATTR
2091soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 2092{
aca84527 2093 FloatParts64 pa, pb, *pr;
98e256fc
RH
2094
2095 float32_unpack_canonical(&pa, a, status);
2096 float32_unpack_canonical(&pb, b, status);
aca84527 2097 pr = parts_mul(&pa, &pb, status);
74d707e2 2098
aca84527 2099 return float32_round_pack_canonical(pr, status);
74d707e2
AB
2100}
2101
2dfabc86
EC
2102static float64 QEMU_SOFTFLOAT_ATTR
2103soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 2104{
aca84527 2105 FloatParts64 pa, pb, *pr;
98e256fc
RH
2106
2107 float64_unpack_canonical(&pa, a, status);
2108 float64_unpack_canonical(&pb, b, status);
aca84527 2109 pr = parts_mul(&pa, &pb, status);
74d707e2 2110
aca84527 2111 return float64_round_pack_canonical(pr, status);
74d707e2
AB
2112}
2113
2dfabc86
EC
2114static float hard_f32_mul(float a, float b)
2115{
2116 return a * b;
2117}
2118
2119static double hard_f64_mul(double a, double b)
2120{
2121 return a * b;
2122}
2123
2dfabc86
EC
2124float32 QEMU_FLATTEN
2125float32_mul(float32 a, float32 b, float_status *s)
2126{
2127 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 2128 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
2129}
2130
2131float64 QEMU_FLATTEN
2132float64_mul(float64 a, float64 b, float_status *s)
2133{
2134 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 2135 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
2136}
2137
42636fb9
RH
2138float64 float64r32_mul(float64 a, float64 b, float_status *status)
2139{
2140 FloatParts64 pa, pb, *pr;
2141
2142 float64_unpack_canonical(&pa, a, status);
2143 float64_unpack_canonical(&pb, b, status);
2144 pr = parts_mul(&pa, &pb, status);
2145
2146 return float64r32_round_pack_canonical(pr, status);
2147}
2148
aca84527
RH
2149bfloat16 QEMU_FLATTEN
2150bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2151{
aca84527 2152 FloatParts64 pa, pb, *pr;
98e256fc
RH
2153
2154 bfloat16_unpack_canonical(&pa, a, status);
2155 bfloat16_unpack_canonical(&pb, b, status);
aca84527 2156 pr = parts_mul(&pa, &pb, status);
8282310d 2157
aca84527
RH
2158 return bfloat16_round_pack_canonical(pr, status);
2159}
2160
2161float128 QEMU_FLATTEN
2162float128_mul(float128 a, float128 b, float_status *status)
2163{
2164 FloatParts128 pa, pb, *pr;
2165
2166 float128_unpack_canonical(&pa, a, status);
2167 float128_unpack_canonical(&pb, b, status);
2168 pr = parts_mul(&pa, &pb, status);
2169
2170 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2171}
2172
153f664a
RH
2173floatx80 QEMU_FLATTEN
2174floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2175{
2176 FloatParts128 pa, pb, *pr;
2177
2178 if (!floatx80_unpack_canonical(&pa, a, status) ||
2179 !floatx80_unpack_canonical(&pb, b, status)) {
2180 return floatx80_default_nan(status);
2181 }
2182
2183 pr = parts_mul(&pa, &pb, status);
2184 return floatx80_round_pack_canonical(pr, status);
2185}
2186
d446830a 2187/*
dedd123c 2188 * Fused multiply-add
d446830a
AB
2189 */
2190
97ff87c0 2191float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 2192 int flags, float_status *status)
d446830a 2193{
dedd123c 2194 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2195
2196 float16_unpack_canonical(&pa, a, status);
2197 float16_unpack_canonical(&pb, b, status);
2198 float16_unpack_canonical(&pc, c, status);
dedd123c 2199 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2200
dedd123c 2201 return float16_round_pack_canonical(pr, status);
d446830a
AB
2202}
2203
ccf770ba
EC
2204static float32 QEMU_SOFTFLOAT_ATTR
2205soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2206 float_status *status)
d446830a 2207{
dedd123c 2208 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2209
2210 float32_unpack_canonical(&pa, a, status);
2211 float32_unpack_canonical(&pb, b, status);
2212 float32_unpack_canonical(&pc, c, status);
dedd123c 2213 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2214
dedd123c 2215 return float32_round_pack_canonical(pr, status);
d446830a
AB
2216}
2217
ccf770ba
EC
2218static float64 QEMU_SOFTFLOAT_ATTR
2219soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2220 float_status *status)
d446830a 2221{
dedd123c 2222 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2223
2224 float64_unpack_canonical(&pa, a, status);
2225 float64_unpack_canonical(&pb, b, status);
2226 float64_unpack_canonical(&pc, c, status);
dedd123c 2227 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2228
dedd123c 2229 return float64_round_pack_canonical(pr, status);
d446830a
AB
2230}
2231
f6b3b108
EC
2232static bool force_soft_fma;
2233
ccf770ba
EC
2234float32 QEMU_FLATTEN
2235float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2236{
2237 union_float32 ua, ub, uc, ur;
2238
2239 ua.s = xa;
2240 ub.s = xb;
2241 uc.s = xc;
2242
2243 if (unlikely(!can_use_fpu(s))) {
2244 goto soft;
2245 }
2246 if (unlikely(flags & float_muladd_halve_result)) {
2247 goto soft;
2248 }
2249
2250 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2251 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2252 goto soft;
2253 }
f6b3b108
EC
2254
2255 if (unlikely(force_soft_fma)) {
2256 goto soft;
2257 }
2258
ccf770ba
EC
2259 /*
2260 * When (a || b) == 0, there's no need to check for under/over flow,
2261 * since we know the addend is (normal || 0) and the product is 0.
2262 */
2263 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2264 union_float32 up;
2265 bool prod_sign;
2266
2267 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2268 prod_sign ^= !!(flags & float_muladd_negate_product);
2269 up.s = float32_set_sign(float32_zero, prod_sign);
2270
2271 if (flags & float_muladd_negate_c) {
2272 uc.h = -uc.h;
2273 }
2274 ur.h = up.h + uc.h;
2275 } else {
896f51fb
KC
2276 union_float32 ua_orig = ua;
2277 union_float32 uc_orig = uc;
2278
ccf770ba
EC
2279 if (flags & float_muladd_negate_product) {
2280 ua.h = -ua.h;
2281 }
2282 if (flags & float_muladd_negate_c) {
2283 uc.h = -uc.h;
2284 }
2285
2286 ur.h = fmaf(ua.h, ub.h, uc.h);
2287
2288 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 2289 float_raise(float_flag_overflow, s);
ccf770ba 2290 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
2291 ua = ua_orig;
2292 uc = uc_orig;
ccf770ba
EC
2293 goto soft;
2294 }
2295 }
2296 if (flags & float_muladd_negate_result) {
2297 return float32_chs(ur.s);
2298 }
2299 return ur.s;
2300
2301 soft:
2302 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2303}
2304
2305float64 QEMU_FLATTEN
2306float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2307{
2308 union_float64 ua, ub, uc, ur;
2309
2310 ua.s = xa;
2311 ub.s = xb;
2312 uc.s = xc;
2313
2314 if (unlikely(!can_use_fpu(s))) {
2315 goto soft;
2316 }
2317 if (unlikely(flags & float_muladd_halve_result)) {
2318 goto soft;
2319 }
2320
2321 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2322 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2323 goto soft;
2324 }
f6b3b108
EC
2325
2326 if (unlikely(force_soft_fma)) {
2327 goto soft;
2328 }
2329
ccf770ba
EC
2330 /*
2331 * When (a || b) == 0, there's no need to check for under/over flow,
2332 * since we know the addend is (normal || 0) and the product is 0.
2333 */
2334 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2335 union_float64 up;
2336 bool prod_sign;
2337
2338 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2339 prod_sign ^= !!(flags & float_muladd_negate_product);
2340 up.s = float64_set_sign(float64_zero, prod_sign);
2341
2342 if (flags & float_muladd_negate_c) {
2343 uc.h = -uc.h;
2344 }
2345 ur.h = up.h + uc.h;
2346 } else {
896f51fb
KC
2347 union_float64 ua_orig = ua;
2348 union_float64 uc_orig = uc;
2349
ccf770ba
EC
2350 if (flags & float_muladd_negate_product) {
2351 ua.h = -ua.h;
2352 }
2353 if (flags & float_muladd_negate_c) {
2354 uc.h = -uc.h;
2355 }
2356
2357 ur.h = fma(ua.h, ub.h, uc.h);
2358
2359 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 2360 float_raise(float_flag_overflow, s);
ccf770ba 2361 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
2362 ua = ua_orig;
2363 uc = uc_orig;
ccf770ba
EC
2364 goto soft;
2365 }
2366 }
2367 if (flags & float_muladd_negate_result) {
2368 return float64_chs(ur.s);
2369 }
2370 return ur.s;
2371
2372 soft:
2373 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2374}
2375
42636fb9
RH
2376float64 float64r32_muladd(float64 a, float64 b, float64 c,
2377 int flags, float_status *status)
2378{
2379 FloatParts64 pa, pb, pc, *pr;
2380
2381 float64_unpack_canonical(&pa, a, status);
2382 float64_unpack_canonical(&pb, b, status);
2383 float64_unpack_canonical(&pc, c, status);
2384 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2385
2386 return float64r32_round_pack_canonical(pr, status);
2387}
2388
8282310d
LZ
2389bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2390 int flags, float_status *status)
2391{
dedd123c 2392 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2393
2394 bfloat16_unpack_canonical(&pa, a, status);
2395 bfloat16_unpack_canonical(&pb, b, status);
2396 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2397 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2398
2399 return bfloat16_round_pack_canonical(pr, status);
2400}
8282310d 2401
dedd123c
RH
2402float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2403 int flags, float_status *status)
2404{
2405 FloatParts128 pa, pb, pc, *pr;
2406
2407 float128_unpack_canonical(&pa, a, status);
2408 float128_unpack_canonical(&pb, b, status);
2409 float128_unpack_canonical(&pc, c, status);
2410 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2411
2412 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2413}
2414
cf07323d 2415/*
ec961b81 2416 * Division
cf07323d
AB
2417 */
2418
cf07323d
AB
2419float16 float16_div(float16 a, float16 b, float_status *status)
2420{
ec961b81 2421 FloatParts64 pa, pb, *pr;
98e256fc
RH
2422
2423 float16_unpack_canonical(&pa, a, status);
2424 float16_unpack_canonical(&pb, b, status);
ec961b81 2425 pr = parts_div(&pa, &pb, status);
cf07323d 2426
ec961b81 2427 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2428}
2429
4a629561
EC
2430static float32 QEMU_SOFTFLOAT_ATTR
2431soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2432{
ec961b81 2433 FloatParts64 pa, pb, *pr;
98e256fc
RH
2434
2435 float32_unpack_canonical(&pa, a, status);
2436 float32_unpack_canonical(&pb, b, status);
ec961b81 2437 pr = parts_div(&pa, &pb, status);
cf07323d 2438
ec961b81 2439 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2440}
2441
4a629561
EC
2442static float64 QEMU_SOFTFLOAT_ATTR
2443soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2444{
ec961b81 2445 FloatParts64 pa, pb, *pr;
98e256fc
RH
2446
2447 float64_unpack_canonical(&pa, a, status);
2448 float64_unpack_canonical(&pb, b, status);
ec961b81 2449 pr = parts_div(&pa, &pb, status);
cf07323d 2450
ec961b81 2451 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2452}
2453
4a629561
EC
2454static float hard_f32_div(float a, float b)
2455{
2456 return a / b;
2457}
2458
2459static double hard_f64_div(double a, double b)
2460{
2461 return a / b;
2462}
2463
2464static bool f32_div_pre(union_float32 a, union_float32 b)
2465{
2466 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2467 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2468 fpclassify(b.h) == FP_NORMAL;
2469 }
2470 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2471}
2472
2473static bool f64_div_pre(union_float64 a, union_float64 b)
2474{
2475 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2476 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2477 fpclassify(b.h) == FP_NORMAL;
2478 }
2479 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2480}
2481
2482static bool f32_div_post(union_float32 a, union_float32 b)
2483{
2484 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2485 return fpclassify(a.h) != FP_ZERO;
2486 }
2487 return !float32_is_zero(a.s);
2488}
2489
2490static bool f64_div_post(union_float64 a, union_float64 b)
2491{
2492 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493 return fpclassify(a.h) != FP_ZERO;
2494 }
2495 return !float64_is_zero(a.s);
2496}
2497
2498float32 QEMU_FLATTEN
2499float32_div(float32 a, float32 b, float_status *s)
2500{
2501 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2502 f32_div_pre, f32_div_post);
4a629561
EC
2503}
2504
2505float64 QEMU_FLATTEN
2506float64_div(float64 a, float64 b, float_status *s)
2507{
2508 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2509 f64_div_pre, f64_div_post);
4a629561
EC
2510}
2511
42636fb9
RH
2512float64 float64r32_div(float64 a, float64 b, float_status *status)
2513{
2514 FloatParts64 pa, pb, *pr;
2515
2516 float64_unpack_canonical(&pa, a, status);
2517 float64_unpack_canonical(&pb, b, status);
2518 pr = parts_div(&pa, &pb, status);
2519
2520 return float64r32_round_pack_canonical(pr, status);
2521}
2522
ec961b81
RH
2523bfloat16 QEMU_FLATTEN
2524bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2525{
ec961b81 2526 FloatParts64 pa, pb, *pr;
98e256fc
RH
2527
2528 bfloat16_unpack_canonical(&pa, a, status);
2529 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2530 pr = parts_div(&pa, &pb, status);
8282310d 2531
ec961b81
RH
2532 return bfloat16_round_pack_canonical(pr, status);
2533}
2534
2535float128 QEMU_FLATTEN
2536float128_div(float128 a, float128 b, float_status *status)
2537{
2538 FloatParts128 pa, pb, *pr;
2539
2540 float128_unpack_canonical(&pa, a, status);
2541 float128_unpack_canonical(&pb, b, status);
2542 pr = parts_div(&pa, &pb, status);
2543
2544 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2545}
2546
38db99e2
RH
2547floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2548{
2549 FloatParts128 pa, pb, *pr;
2550
2551 if (!floatx80_unpack_canonical(&pa, a, status) ||
2552 !floatx80_unpack_canonical(&pb, b, status)) {
2553 return floatx80_default_nan(status);
2554 }
2555
2556 pr = parts_div(&pa, &pb, status);
2557 return floatx80_round_pack_canonical(pr, status);
2558}
2559
feaf2e9c
RH
2560/*
2561 * Remainder
2562 */
2563
2564float32 float32_rem(float32 a, float32 b, float_status *status)
2565{
2566 FloatParts64 pa, pb, *pr;
2567
2568 float32_unpack_canonical(&pa, a, status);
2569 float32_unpack_canonical(&pb, b, status);
2570 pr = parts_modrem(&pa, &pb, NULL, status);
2571
2572 return float32_round_pack_canonical(pr, status);
2573}
2574
2575float64 float64_rem(float64 a, float64 b, float_status *status)
2576{
2577 FloatParts64 pa, pb, *pr;
2578
2579 float64_unpack_canonical(&pa, a, status);
2580 float64_unpack_canonical(&pb, b, status);
2581 pr = parts_modrem(&pa, &pb, NULL, status);
2582
2583 return float64_round_pack_canonical(pr, status);
2584}
2585
2586float128 float128_rem(float128 a, float128 b, float_status *status)
2587{
2588 FloatParts128 pa, pb, *pr;
2589
2590 float128_unpack_canonical(&pa, a, status);
2591 float128_unpack_canonical(&pb, b, status);
2592 pr = parts_modrem(&pa, &pb, NULL, status);
2593
2594 return float128_round_pack_canonical(pr, status);
2595}
2596
2597/*
2598 * Returns the remainder of the extended double-precision floating-point value
2599 * `a' with respect to the corresponding value `b'.
2600 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2601 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2602 * the remainder based on truncating the quotient toward zero instead and
2603 * *quotient is set to the low 64 bits of the absolute value of the integer
2604 * quotient.
2605 */
2606floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2607 uint64_t *quotient, float_status *status)
2608{
2609 FloatParts128 pa, pb, *pr;
2610
2611 *quotient = 0;
2612 if (!floatx80_unpack_canonical(&pa, a, status) ||
2613 !floatx80_unpack_canonical(&pb, b, status)) {
2614 return floatx80_default_nan(status);
2615 }
2616 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2617
2618 return floatx80_round_pack_canonical(pr, status);
2619}
2620
2621floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2622{
2623 uint64_t quotient;
2624 return floatx80_modrem(a, b, false, &quotient, status);
2625}
2626
2627floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2628{
2629 uint64_t quotient;
2630 return floatx80_modrem(a, b, true, &quotient, status);
2631}
2632
6fed16b2
AB
2633/*
2634 * Float to Float conversions
2635 *
2636 * Returns the result of converting one float format to another. The
2637 * conversion is performed according to the IEC/IEEE Standard for
2638 * Binary Floating-Point Arithmetic.
2639 *
c3f1875e
RH
2640 * Usually this only needs to take care of raising invalid exceptions
2641 * and handling the conversion on NaNs.
6fed16b2
AB
2642 */
2643
c3f1875e
RH
2644static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2645{
2646 switch (a->cls) {
c3f1875e 2647 case float_class_snan:
e706d445
RH
2648 float_raise(float_flag_invalid_snan, s);
2649 /* fall through */
2650 case float_class_qnan:
c3f1875e
RH
2651 /*
2652 * There is no NaN in the destination format. Raise Invalid
2653 * and return a zero with the sign of the input NaN.
2654 */
2655 float_raise(float_flag_invalid, s);
2656 a->cls = float_class_zero;
2657 break;
2658
2659 case float_class_inf:
2660 /*
2661 * There is no Inf in the destination format. Raise Invalid
2662 * and return the maximum normal with the correct sign.
2663 */
2664 float_raise(float_flag_invalid, s);
2665 a->cls = float_class_normal;
2666 a->exp = float16_params_ahp.exp_max;
2667 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2668 float16_params_ahp.frac_size + 1);
2669 break;
2670
2671 case float_class_normal:
2672 case float_class_zero:
2673 break;
2674
2675 default:
2676 g_assert_not_reached();
2677 }
2678}
2679
2680static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2681{
2682 if (is_nan(a->cls)) {
2683 parts_return_nan(a, s);
6fed16b2 2684 }
6fed16b2
AB
2685}
2686
c3f1875e
RH
2687static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2688{
2689 if (is_nan(a->cls)) {
2690 parts_return_nan(a, s);
2691 }
2692}
2693
2694#define parts_float_to_float(P, S) \
2695 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2696
9882ccaf
RH
2697static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2698 float_status *s)
2699{
2700 a->cls = b->cls;
2701 a->sign = b->sign;
2702 a->exp = b->exp;
2703
2704 if (a->cls == float_class_normal) {
2705 frac_truncjam(a, b);
2706 } else if (is_nan(a->cls)) {
2707 /* Discard the low bits of the NaN. */
2708 a->frac = b->frac_hi;
2709 parts_return_nan(a, s);
2710 }
2711}
2712
2713static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2714 float_status *s)
2715{
2716 a->cls = b->cls;
2717 a->sign = b->sign;
2718 a->exp = b->exp;
2719 frac_widen(a, b);
2720
2721 if (is_nan(a->cls)) {
2722 parts_return_nan(a, s);
2723 }
2724}
2725
6fed16b2
AB
2726float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2727{
2728 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2729 FloatParts64 p;
98e256fc 2730
c3f1875e
RH
2731 float16a_unpack_canonical(&p, a, s, fmt16);
2732 parts_float_to_float(&p, s);
2733 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2734}
2735
2736float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2737{
2738 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2739 FloatParts64 p;
98e256fc 2740
c3f1875e
RH
2741 float16a_unpack_canonical(&p, a, s, fmt16);
2742 parts_float_to_float(&p, s);
2743 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2744}
2745
2746float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2747{
c3f1875e
RH
2748 FloatParts64 p;
2749 const FloatFmt *fmt;
98e256fc 2750
c3f1875e
RH
2751 float32_unpack_canonical(&p, a, s);
2752 if (ieee) {
2753 parts_float_to_float(&p, s);
2754 fmt = &float16_params;
2755 } else {
2756 parts_float_to_ahp(&p, s);
2757 fmt = &float16_params_ahp;
2758 }
2759 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2760}
2761
21381dcf
MK
2762static float64 QEMU_SOFTFLOAT_ATTR
2763soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2764{
c3f1875e 2765 FloatParts64 p;
98e256fc 2766
c3f1875e
RH
2767 float32_unpack_canonical(&p, a, s);
2768 parts_float_to_float(&p, s);
2769 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2770}
2771
21381dcf
MK
2772float64 float32_to_float64(float32 a, float_status *s)
2773{
2774 if (likely(float32_is_normal(a))) {
2775 /* Widening conversion can never produce inexact results. */
2776 union_float32 uf;
2777 union_float64 ud;
2778 uf.s = a;
2779 ud.h = uf.h;
2780 return ud.s;
2781 } else if (float32_is_zero(a)) {
2782 return float64_set_sign(float64_zero, float32_is_neg(a));
2783 } else {
2784 return soft_float32_to_float64(a, s);
2785 }
2786}
2787
6fed16b2
AB
2788float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2789{
c3f1875e
RH
2790 FloatParts64 p;
2791 const FloatFmt *fmt;
98e256fc 2792
c3f1875e
RH
2793 float64_unpack_canonical(&p, a, s);
2794 if (ieee) {
2795 parts_float_to_float(&p, s);
2796 fmt = &float16_params;
2797 } else {
2798 parts_float_to_ahp(&p, s);
2799 fmt = &float16_params_ahp;
2800 }
2801 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2802}
2803
2804float32 float64_to_float32(float64 a, float_status *s)
2805{
c3f1875e 2806 FloatParts64 p;
98e256fc 2807
c3f1875e
RH
2808 float64_unpack_canonical(&p, a, s);
2809 parts_float_to_float(&p, s);
2810 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2811}
2812
34f0c0a9
LZ
2813float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2814{
c3f1875e 2815 FloatParts64 p;
98e256fc 2816
c3f1875e
RH
2817 bfloat16_unpack_canonical(&p, a, s);
2818 parts_float_to_float(&p, s);
2819 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2820}
2821
2822float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2823{
c3f1875e 2824 FloatParts64 p;
98e256fc 2825
c3f1875e
RH
2826 bfloat16_unpack_canonical(&p, a, s);
2827 parts_float_to_float(&p, s);
2828 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2829}
2830
2831bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2832{
c3f1875e 2833 FloatParts64 p;
98e256fc 2834
c3f1875e
RH
2835 float32_unpack_canonical(&p, a, s);
2836 parts_float_to_float(&p, s);
2837 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2838}
2839
2840bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2841{
c3f1875e 2842 FloatParts64 p;
98e256fc 2843
c3f1875e
RH
2844 float64_unpack_canonical(&p, a, s);
2845 parts_float_to_float(&p, s);
2846 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2847}
2848
9882ccaf
RH
2849float32 float128_to_float32(float128 a, float_status *s)
2850{
2851 FloatParts64 p64;
2852 FloatParts128 p128;
2853
2854 float128_unpack_canonical(&p128, a, s);
2855 parts_float_to_float_narrow(&p64, &p128, s);
2856 return float32_round_pack_canonical(&p64, s);
2857}
2858
2859float64 float128_to_float64(float128 a, float_status *s)
2860{
2861 FloatParts64 p64;
2862 FloatParts128 p128;
2863
2864 float128_unpack_canonical(&p128, a, s);
2865 parts_float_to_float_narrow(&p64, &p128, s);
2866 return float64_round_pack_canonical(&p64, s);
2867}
2868
2869float128 float32_to_float128(float32 a, float_status *s)
2870{
2871 FloatParts64 p64;
2872 FloatParts128 p128;
2873
2874 float32_unpack_canonical(&p64, a, s);
2875 parts_float_to_float_widen(&p128, &p64, s);
2876 return float128_round_pack_canonical(&p128, s);
2877}
2878
2879float128 float64_to_float128(float64 a, float_status *s)
2880{
2881 FloatParts64 p64;
2882 FloatParts128 p128;
2883
2884 float64_unpack_canonical(&p64, a, s);
2885 parts_float_to_float_widen(&p128, &p64, s);
2886 return float128_round_pack_canonical(&p128, s);
2887}
2888
8ae5719c
RH
2889float32 floatx80_to_float32(floatx80 a, float_status *s)
2890{
2891 FloatParts64 p64;
2892 FloatParts128 p128;
2893
2894 if (floatx80_unpack_canonical(&p128, a, s)) {
2895 parts_float_to_float_narrow(&p64, &p128, s);
2896 } else {
2897 parts_default_nan(&p64, s);
2898 }
2899 return float32_round_pack_canonical(&p64, s);
2900}
2901
2902float64 floatx80_to_float64(floatx80 a, float_status *s)
2903{
2904 FloatParts64 p64;
2905 FloatParts128 p128;
2906
2907 if (floatx80_unpack_canonical(&p128, a, s)) {
2908 parts_float_to_float_narrow(&p64, &p128, s);
2909 } else {
2910 parts_default_nan(&p64, s);
2911 }
2912 return float64_round_pack_canonical(&p64, s);
2913}
2914
2915float128 floatx80_to_float128(floatx80 a, float_status *s)
2916{
2917 FloatParts128 p;
2918
2919 if (floatx80_unpack_canonical(&p, a, s)) {
2920 parts_float_to_float(&p, s);
2921 } else {
2922 parts_default_nan(&p, s);
2923 }
2924 return float128_round_pack_canonical(&p, s);
2925}
2926
2927floatx80 float32_to_floatx80(float32 a, float_status *s)
2928{
2929 FloatParts64 p64;
2930 FloatParts128 p128;
2931
2932 float32_unpack_canonical(&p64, a, s);
2933 parts_float_to_float_widen(&p128, &p64, s);
2934 return floatx80_round_pack_canonical(&p128, s);
2935}
2936
2937floatx80 float64_to_floatx80(float64 a, float_status *s)
2938{
2939 FloatParts64 p64;
2940 FloatParts128 p128;
2941
2942 float64_unpack_canonical(&p64, a, s);
2943 parts_float_to_float_widen(&p128, &p64, s);
2944 return floatx80_round_pack_canonical(&p128, s);
2945}
2946
2947floatx80 float128_to_floatx80(float128 a, float_status *s)
2948{
2949 FloatParts128 p;
2950
2951 float128_unpack_canonical(&p, a, s);
2952 parts_float_to_float(&p, s);
2953 return floatx80_round_pack_canonical(&p, s);
2954}
2955
dbe4d53a 2956/*
afc34931 2957 * Round to integral value
dbe4d53a
AB
2958 */
2959
dbe4d53a
AB
2960float16 float16_round_to_int(float16 a, float_status *s)
2961{
afc34931 2962 FloatParts64 p;
98e256fc 2963
afc34931
RH
2964 float16_unpack_canonical(&p, a, s);
2965 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2966 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2967}
2968
2969float32 float32_round_to_int(float32 a, float_status *s)
2970{
afc34931 2971 FloatParts64 p;
98e256fc 2972
afc34931
RH
2973 float32_unpack_canonical(&p, a, s);
2974 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2975 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2976}
2977
2978float64 float64_round_to_int(float64 a, float_status *s)
2979{
afc34931 2980 FloatParts64 p;
98e256fc 2981
afc34931
RH
2982 float64_unpack_canonical(&p, a, s);
2983 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2984 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2985}
2986
34f0c0a9
LZ
2987bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2988{
afc34931 2989 FloatParts64 p;
98e256fc 2990
afc34931
RH
2991 bfloat16_unpack_canonical(&p, a, s);
2992 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2993 return bfloat16_round_pack_canonical(&p, s);
2994}
2995
2996float128 float128_round_to_int(float128 a, float_status *s)
2997{
2998 FloatParts128 p;
2999
3000 float128_unpack_canonical(&p, a, s);
3001 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3002 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
3003}
3004
f9a95a78
RH
3005floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3006{
3007 FloatParts128 p;
3008
3009 if (!floatx80_unpack_canonical(&p, a, status)) {
3010 return floatx80_default_nan(status);
3011 }
3012
3013 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3014 &floatx80_params[status->floatx80_rounding_precision]);
3015 return floatx80_round_pack_canonical(&p, status);
3016}
3017
ab52f973 3018/*
463b3f0d
RH
3019 * Floating-point to signed integer conversions
3020 */
ab52f973 3021
0d93d8ec
FC
3022int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3023 float_status *s)
3024{
98e256fc
RH
3025 FloatParts64 p;
3026
3027 float16_unpack_canonical(&p, a, s);
463b3f0d 3028 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
3029}
3030
3dede407 3031int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3032 float_status *s)
3033{
98e256fc
RH
3034 FloatParts64 p;
3035
3036 float16_unpack_canonical(&p, a, s);
463b3f0d 3037 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3038}
3039
3dede407 3040int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3041 float_status *s)
3042{
98e256fc
RH
3043 FloatParts64 p;
3044
3045 float16_unpack_canonical(&p, a, s);
463b3f0d 3046 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3047}
3048
3dede407 3049int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3050 float_status *s)
3051{
98e256fc
RH
3052 FloatParts64 p;
3053
3054 float16_unpack_canonical(&p, a, s);
463b3f0d 3055 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3056}
3057
3dede407 3058int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3059 float_status *s)
3060{
98e256fc
RH
3061 FloatParts64 p;
3062
3063 float32_unpack_canonical(&p, a, s);
463b3f0d 3064 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3065}
3066
3dede407 3067int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3068 float_status *s)
3069{
98e256fc
RH
3070 FloatParts64 p;
3071
3072 float32_unpack_canonical(&p, a, s);
463b3f0d 3073 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3074}
3075
3dede407 3076int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3077 float_status *s)
3078{
98e256fc
RH
3079 FloatParts64 p;
3080
3081 float32_unpack_canonical(&p, a, s);
463b3f0d 3082 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3083}
3084
3dede407 3085int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3086 float_status *s)
3087{
98e256fc
RH
3088 FloatParts64 p;
3089
3090 float64_unpack_canonical(&p, a, s);
463b3f0d 3091 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3092}
3093
3dede407 3094int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3095 float_status *s)
3096{
98e256fc
RH
3097 FloatParts64 p;
3098
3099 float64_unpack_canonical(&p, a, s);
463b3f0d 3100 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3101}
3102
3dede407 3103int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3104 float_status *s)
3105{
98e256fc
RH
3106 FloatParts64 p;
3107
3108 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
3109 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3110}
3111
3112int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3113 float_status *s)
3114{
3115 FloatParts64 p;
3116
3117 bfloat16_unpack_canonical(&p, a, s);
3118 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3119}
3120
3121int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3122 float_status *s)
3123{
3124 FloatParts64 p;
3125
3126 bfloat16_unpack_canonical(&p, a, s);
3127 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3128}
3129
3130int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3131 float_status *s)
3132{
3133 FloatParts64 p;
3134
3135 bfloat16_unpack_canonical(&p, a, s);
3136 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3137}
3138
3139static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3140 int scale, float_status *s)
3141{
3142 FloatParts128 p;
3143
3144 float128_unpack_canonical(&p, a, s);
3145 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3146}
3147
3148static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3149 int scale, float_status *s)
3150{
3151 FloatParts128 p;
3152
3153 float128_unpack_canonical(&p, a, s);
3154 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3155}
3156
a1fc527b
RH
3157static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3158 int scale, float_status *s)
3159{
3160 FloatParts128 p;
3161
3162 if (!floatx80_unpack_canonical(&p, a, s)) {
3163 parts_default_nan(&p, s);
3164 }
3165 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3166}
3167
3168static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3169 int scale, float_status *s)
3170{
3171 FloatParts128 p;
3172
3173 if (!floatx80_unpack_canonical(&p, a, s)) {
3174 parts_default_nan(&p, s);
3175 }
3176 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3177}
3178
0d93d8ec
FC
3179int8_t float16_to_int8(float16 a, float_status *s)
3180{
3181 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3182}
3183
2f6c74be
RH
3184int16_t float16_to_int16(float16 a, float_status *s)
3185{
3186 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3187}
3188
3189int32_t float16_to_int32(float16 a, float_status *s)
3190{
3191 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3192}
3193
3194int64_t float16_to_int64(float16 a, float_status *s)
3195{
3196 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3197}
3198
3199int16_t float32_to_int16(float32 a, float_status *s)
3200{
3201 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3202}
3203
3204int32_t float32_to_int32(float32 a, float_status *s)
3205{
3206 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3207}
3208
3209int64_t float32_to_int64(float32 a, float_status *s)
3210{
3211 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3212}
3213
3214int16_t float64_to_int16(float64 a, float_status *s)
3215{
3216 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3217}
3218
3219int32_t float64_to_int32(float64 a, float_status *s)
3220{
3221 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3222}
3223
3224int64_t float64_to_int64(float64 a, float_status *s)
3225{
3226 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3227}
3228
463b3f0d
RH
3229int32_t float128_to_int32(float128 a, float_status *s)
3230{
3231 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3232}
3233
3234int64_t float128_to_int64(float128 a, float_status *s)
3235{
3236 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3237}
3238
a1fc527b
RH
3239int32_t floatx80_to_int32(floatx80 a, float_status *s)
3240{
3241 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3242}
3243
3244int64_t floatx80_to_int64(floatx80 a, float_status *s)
3245{
3246 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3247}
3248
2f6c74be
RH
3249int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3250{
3251 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3252}
3253
3254int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3255{
3256 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3257}
3258
3259int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3260{
3261 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
3262}
3263
2f6c74be
RH
3264int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3265{
3266 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3267}
ab52f973 3268
2f6c74be
RH
3269int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3270{
3271 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3272}
3273
3274int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3275{
3276 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3277}
3278
3279int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3280{
3281 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3282}
ab52f973 3283
2f6c74be
RH
3284int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3285{
3286 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3287}
ab52f973 3288
2f6c74be
RH
3289int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3290{
3291 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3292}
ab52f973 3293
463b3f0d 3294int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3295{
463b3f0d 3296 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3297}
3298
463b3f0d 3299int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3300{
463b3f0d 3301 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3302}
3303
a1fc527b
RH
3304int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3305{
3306 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3307}
3308
3309int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3310{
3311 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3312}
3313
34f0c0a9
LZ
3314int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3315{
3316 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3317}
3318
3319int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3320{
3321 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3322}
3323
3324int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3325{
3326 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3327}
3328
3329int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3330{
3331 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3332}
3333
3334int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3335{
3336 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3337}
3338
3339int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3340{
3341 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3342}
3343
ab52f973 3344/*
4ab4aef0 3345 * Floating-point to unsigned integer conversions
ab52f973
AB
3346 */
3347
0d93d8ec
FC
3348uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3349 float_status *s)
3350{
98e256fc
RH
3351 FloatParts64 p;
3352
3353 float16_unpack_canonical(&p, a, s);
4ab4aef0 3354 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
3355}
3356
3dede407 3357uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3358 float_status *s)
3359{
98e256fc
RH
3360 FloatParts64 p;
3361
3362 float16_unpack_canonical(&p, a, s);
4ab4aef0 3363 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3364}
3365
3dede407 3366uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3367 float_status *s)
3368{
98e256fc
RH
3369 FloatParts64 p;
3370
3371 float16_unpack_canonical(&p, a, s);
4ab4aef0 3372 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3373}
3374
3dede407 3375uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3376 float_status *s)
3377{
98e256fc
RH
3378 FloatParts64 p;
3379
3380 float16_unpack_canonical(&p, a, s);
4ab4aef0 3381 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3382}
3383
3dede407 3384uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3385 float_status *s)
3386{
98e256fc
RH
3387 FloatParts64 p;
3388
3389 float32_unpack_canonical(&p, a, s);
4ab4aef0 3390 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3391}
3392
3dede407 3393uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3394 float_status *s)
3395{
98e256fc
RH
3396 FloatParts64 p;
3397
3398 float32_unpack_canonical(&p, a, s);
4ab4aef0 3399 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3400}
3401
3dede407 3402uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3403 float_status *s)
3404{
98e256fc
RH
3405 FloatParts64 p;
3406
3407 float32_unpack_canonical(&p, a, s);
4ab4aef0 3408 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3409}
3410
3dede407 3411uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3412 float_status *s)
3413{
98e256fc
RH
3414 FloatParts64 p;
3415
3416 float64_unpack_canonical(&p, a, s);
4ab4aef0 3417 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3418}
3419
3dede407 3420uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3421 float_status *s)
3422{
98e256fc
RH
3423 FloatParts64 p;
3424
3425 float64_unpack_canonical(&p, a, s);
4ab4aef0 3426 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3427}
3428
3dede407 3429uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3430 float_status *s)
3431{
98e256fc
RH
3432 FloatParts64 p;
3433
3434 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
3435 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3436}
3437
3438uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3439 int scale, float_status *s)
3440{
3441 FloatParts64 p;
3442
3443 bfloat16_unpack_canonical(&p, a, s);
3444 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3445}
3446
3447uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3448 int scale, float_status *s)
3449{
3450 FloatParts64 p;
3451
3452 bfloat16_unpack_canonical(&p, a, s);
3453 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3454}
3455
3456uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3457 int scale, float_status *s)
3458{
3459 FloatParts64 p;
3460
3461 bfloat16_unpack_canonical(&p, a, s);
3462 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3463}
3464
3465static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3466 int scale, float_status *s)
3467{
3468 FloatParts128 p;
3469
3470 float128_unpack_canonical(&p, a, s);
3471 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3472}
3473
3474static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3475 int scale, float_status *s)
3476{
3477 FloatParts128 p;
3478
3479 float128_unpack_canonical(&p, a, s);
3480 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3481}
3482
4de49ddf
MF
3483static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3484 int scale, float_status *s)
3485{
3486 int flags = 0;
3487 Int128 r;
3488 FloatParts128 p;
3489
3490 float128_unpack_canonical(&p, a, s);
3491
3492 switch (p.cls) {
3493 case float_class_snan:
3494 flags |= float_flag_invalid_snan;
3495 /* fall through */
3496 case float_class_qnan:
3497 flags |= float_flag_invalid;
3498 r = UINT128_MAX;
3499 break;
3500
3501 case float_class_inf:
3502 flags = float_flag_invalid | float_flag_invalid_cvti;
3503 r = p.sign ? int128_zero() : UINT128_MAX;
3504 break;
3505
3506 case float_class_zero:
3507 return int128_zero();
3508
3509 case float_class_normal:
3510 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3511 flags = float_flag_inexact;
3512 if (p.cls == float_class_zero) {
3513 r = int128_zero();
3514 break;
3515 }
3516 }
3517
3518 if (p.sign) {
3519 flags = float_flag_invalid | float_flag_invalid_cvti;
3520 r = int128_zero();
3521 } else if (p.exp <= 127) {
3522 int shift = 127 - p.exp;
3523 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3524 } else {
3525 flags = float_flag_invalid | float_flag_invalid_cvti;
3526 r = UINT128_MAX;
3527 }
3528 break;
3529
3530 default:
3531 g_assert_not_reached();
3532 }
3533
3534 float_raise(flags, s);
3535 return r;
3536}
3537
0d93d8ec
FC
3538uint8_t float16_to_uint8(float16 a, float_status *s)
3539{
3540 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3541}
3542
2f6c74be
RH
3543uint16_t float16_to_uint16(float16 a, float_status *s)
3544{
3545 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3546}
3547
3548uint32_t float16_to_uint32(float16 a, float_status *s)
3549{
3550 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3551}
3552
3553uint64_t float16_to_uint64(float16 a, float_status *s)
3554{
3555 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3556}
3557
3558uint16_t float32_to_uint16(float32 a, float_status *s)
3559{
3560 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3561}
3562
3563uint32_t float32_to_uint32(float32 a, float_status *s)
3564{
3565 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3566}
3567
3568uint64_t float32_to_uint64(float32 a, float_status *s)
3569{
3570 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3571}
3572
3573uint16_t float64_to_uint16(float64 a, float_status *s)
3574{
3575 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3576}
3577
3578uint32_t float64_to_uint32(float64 a, float_status *s)
3579{
3580 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3581}
3582
3583uint64_t float64_to_uint64(float64 a, float_status *s)
3584{
3585 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3586}
3587
4ab4aef0
RH
3588uint32_t float128_to_uint32(float128 a, float_status *s)
3589{
3590 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3591}
3592
3593uint64_t float128_to_uint64(float128 a, float_status *s)
3594{
3595 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3596}
3597
4de49ddf
MF
3598Int128 float128_to_uint128(float128 a, float_status *s)
3599{
3600 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3601}
3602
2f6c74be
RH
3603uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3604{
3605 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3606}
3607
3608uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3609{
3610 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3611}
3612
3613uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3614{
3615 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3616}
3617
3618uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3619{
3620 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3621}
3622
3623uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3624{
3625 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3626}
3627
3628uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3629{
3630 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3631}
3632
3633uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3634{
3635 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3636}
3637
3638uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3639{
3640 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3641}
3642
3643uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3644{
3645 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3646}
ab52f973 3647
4ab4aef0 3648uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3649{
4ab4aef0 3650 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3651}
3652
4ab4aef0 3653uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3654{
4ab4aef0 3655 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3656}
3657
4de49ddf
MF
3658Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3659{
3660 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3661}
3662
34f0c0a9
LZ
3663uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3664{
3665 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3666}
3667
3668uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3669{
3670 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3671}
3672
3673uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3674{
3675 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3676}
3677
3678uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3679{
3680 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3681}
3682
3683uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3684{
3685 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3686}
3687
3688uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3689{
3690 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3691}
3692
c02e1fb8 3693/*
e3689519 3694 * Signed integer to floating-point conversions
c02e1fb8
AB
3695 */
3696
2abdfe24 3697float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3698{
e3689519
RH
3699 FloatParts64 p;
3700
3701 parts_sint_to_float(&p, a, scale, status);
3702 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3703}
3704
2abdfe24
RH
3705float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3706{
3707 return int64_to_float16_scalbn(a, scale, status);
3708}
3709
3710float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3711{
3712 return int64_to_float16_scalbn(a, scale, status);
3713}
3714
3715float16 int64_to_float16(int64_t a, float_status *status)
3716{
3717 return int64_to_float16_scalbn(a, 0, status);
3718}
3719
c02e1fb8
AB
3720float16 int32_to_float16(int32_t a, float_status *status)
3721{
2abdfe24 3722 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3723}
3724
3725float16 int16_to_float16(int16_t a, float_status *status)
3726{
2abdfe24 3727 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3728}
3729
0d93d8ec
FC
3730float16 int8_to_float16(int8_t a, float_status *status)
3731{
3732 return int64_to_float16_scalbn(a, 0, status);
3733}
3734
2abdfe24 3735float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3736{
e3689519
RH
3737 FloatParts64 p;
3738
5d0204b8
RH
3739 /* Without scaling, there are no overflow concerns. */
3740 if (likely(scale == 0) && can_use_fpu(status)) {
3741 union_float32 ur;
3742 ur.h = a;
3743 return ur.s;
3744 }
3745
e3689519
RH
3746 parts64_sint_to_float(&p, a, scale, status);
3747 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3748}
3749
2abdfe24
RH
3750float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3751{
3752 return int64_to_float32_scalbn(a, scale, status);
3753}
3754
3755float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3756{
3757 return int64_to_float32_scalbn(a, scale, status);
3758}
3759
3760float32 int64_to_float32(int64_t a, float_status *status)
3761{
3762 return int64_to_float32_scalbn(a, 0, status);
3763}
3764
c02e1fb8
AB
3765float32 int32_to_float32(int32_t a, float_status *status)
3766{
2abdfe24 3767 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3768}
3769
3770float32 int16_to_float32(int16_t a, float_status *status)
3771{
2abdfe24 3772 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3773}
3774
2abdfe24 3775float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3776{
e3689519
RH
3777 FloatParts64 p;
3778
5d0204b8
RH
3779 /* Without scaling, there are no overflow concerns. */
3780 if (likely(scale == 0) && can_use_fpu(status)) {
3781 union_float64 ur;
3782 ur.h = a;
3783 return ur.s;
3784 }
3785
e3689519
RH
3786 parts_sint_to_float(&p, a, scale, status);
3787 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3788}
3789
2abdfe24
RH
3790float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3791{
3792 return int64_to_float64_scalbn(a, scale, status);
3793}
3794
3795float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3796{
3797 return int64_to_float64_scalbn(a, scale, status);
3798}
3799
3800float64 int64_to_float64(int64_t a, float_status *status)
3801{
3802 return int64_to_float64_scalbn(a, 0, status);
3803}
3804
c02e1fb8
AB
3805float64 int32_to_float64(int32_t a, float_status *status)
3806{
2abdfe24 3807 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3808}
3809
3810float64 int16_to_float64(int16_t a, float_status *status)
3811{
2abdfe24 3812 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3813}
3814
34f0c0a9
LZ
3815bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3816{
e3689519
RH
3817 FloatParts64 p;
3818
3819 parts_sint_to_float(&p, a, scale, status);
3820 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3821}
3822
3823bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3824{
3825 return int64_to_bfloat16_scalbn(a, scale, status);
3826}
3827
3828bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3829{
3830 return int64_to_bfloat16_scalbn(a, scale, status);
3831}
3832
3833bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3834{
3835 return int64_to_bfloat16_scalbn(a, 0, status);
3836}
3837
3838bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3839{
3840 return int64_to_bfloat16_scalbn(a, 0, status);
3841}
3842
3843bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3844{
3845 return int64_to_bfloat16_scalbn(a, 0, status);
3846}
c02e1fb8 3847
95c1b71e
MF
3848float128 int128_to_float128(Int128 a, float_status *status)
3849{
3850 FloatParts128 p = { };
3851 int shift;
3852
3853 if (int128_nz(a)) {
3854 p.cls = float_class_normal;
3855 if (!int128_nonneg(a)) {
3856 p.sign = true;
3857 a = int128_neg(a);
3858 }
3859
3860 shift = clz64(int128_gethi(a));
3861 if (shift == 64) {
3862 shift += clz64(int128_getlo(a));
3863 }
3864
3865 p.exp = 127 - shift;
3866 a = int128_lshift(a, shift);
3867
3868 p.frac_hi = int128_gethi(a);
3869 p.frac_lo = int128_getlo(a);
3870 } else {
3871 p.cls = float_class_zero;
3872 }
3873
3874 return float128_round_pack_canonical(&p, status);
3875}
3876
e3689519
RH
3877float128 int64_to_float128(int64_t a, float_status *status)
3878{
3879 FloatParts128 p;
3880
3881 parts_sint_to_float(&p, a, 0, status);
3882 return float128_round_pack_canonical(&p, status);
3883}
3884
3885float128 int32_to_float128(int32_t a, float_status *status)
3886{
3887 return int64_to_float128(a, status);
3888}
3889
5f952900
RH
3890floatx80 int64_to_floatx80(int64_t a, float_status *status)
3891{
3892 FloatParts128 p;
3893
3894 parts_sint_to_float(&p, a, 0, status);
3895 return floatx80_round_pack_canonical(&p, status);
3896}
3897
3898floatx80 int32_to_floatx80(int32_t a, float_status *status)
3899{
3900 return int64_to_floatx80(a, status);
3901}
3902
c02e1fb8 3903/*
37c954a1 3904 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3905 */
3906
2abdfe24 3907float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3908{
37c954a1
RH
3909 FloatParts64 p;
3910
3911 parts_uint_to_float(&p, a, scale, status);
3912 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3913}
3914
2abdfe24
RH
3915float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3916{
3917 return uint64_to_float16_scalbn(a, scale, status);
3918}
3919
3920float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3921{
3922 return uint64_to_float16_scalbn(a, scale, status);
3923}
3924
3925float16 uint64_to_float16(uint64_t a, float_status *status)
3926{
3927 return uint64_to_float16_scalbn(a, 0, status);
3928}
3929
c02e1fb8
AB
3930float16 uint32_to_float16(uint32_t a, float_status *status)
3931{
2abdfe24 3932 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3933}
3934
3935float16 uint16_to_float16(uint16_t a, float_status *status)
3936{
2abdfe24 3937 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3938}
3939
0d93d8ec
FC
3940float16 uint8_to_float16(uint8_t a, float_status *status)
3941{
3942 return uint64_to_float16_scalbn(a, 0, status);
3943}
3944
2abdfe24 3945float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3946{
37c954a1
RH
3947 FloatParts64 p;
3948
5d0204b8
RH
3949 /* Without scaling, there are no overflow concerns. */
3950 if (likely(scale == 0) && can_use_fpu(status)) {
3951 union_float32 ur;
3952 ur.h = a;
3953 return ur.s;
3954 }
3955
37c954a1
RH
3956 parts_uint_to_float(&p, a, scale, status);
3957 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3958}
3959
2abdfe24
RH
3960float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3961{
3962 return uint64_to_float32_scalbn(a, scale, status);
3963}
3964
3965float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3966{
3967 return uint64_to_float32_scalbn(a, scale, status);
3968}
3969
3970float32 uint64_to_float32(uint64_t a, float_status *status)
3971{
3972 return uint64_to_float32_scalbn(a, 0, status);
3973}
3974
c02e1fb8
AB
3975float32 uint32_to_float32(uint32_t a, float_status *status)
3976{
2abdfe24 3977 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3978}
3979
3980float32 uint16_to_float32(uint16_t a, float_status *status)
3981{
2abdfe24 3982 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3983}
3984
2abdfe24 3985float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3986{
37c954a1
RH
3987 FloatParts64 p;
3988
5d0204b8
RH
3989 /* Without scaling, there are no overflow concerns. */
3990 if (likely(scale == 0) && can_use_fpu(status)) {
3991 union_float64 ur;
3992 ur.h = a;
3993 return ur.s;
3994 }
3995
37c954a1
RH
3996 parts_uint_to_float(&p, a, scale, status);
3997 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3998}
3999
2abdfe24
RH
4000float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4001{
4002 return uint64_to_float64_scalbn(a, scale, status);
4003}
4004
4005float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4006{
4007 return uint64_to_float64_scalbn(a, scale, status);
4008}
4009
4010float64 uint64_to_float64(uint64_t a, float_status *status)
4011{
4012 return uint64_to_float64_scalbn(a, 0, status);
4013}
4014
c02e1fb8
AB
4015float64 uint32_to_float64(uint32_t a, float_status *status)
4016{
2abdfe24 4017 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4018}
4019
4020float64 uint16_to_float64(uint16_t a, float_status *status)
4021{
2abdfe24 4022 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4023}
4024
34f0c0a9
LZ
4025bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4026{
37c954a1
RH
4027 FloatParts64 p;
4028
4029 parts_uint_to_float(&p, a, scale, status);
4030 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
4031}
4032
4033bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4034{
4035 return uint64_to_bfloat16_scalbn(a, scale, status);
4036}
4037
4038bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4039{
4040 return uint64_to_bfloat16_scalbn(a, scale, status);
4041}
4042
4043bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4044{
4045 return uint64_to_bfloat16_scalbn(a, 0, status);
4046}
4047
4048bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4049{
4050 return uint64_to_bfloat16_scalbn(a, 0, status);
4051}
4052
4053bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4054{
4055 return uint64_to_bfloat16_scalbn(a, 0, status);
4056}
4057
37c954a1
RH
4058float128 uint64_to_float128(uint64_t a, float_status *status)
4059{
4060 FloatParts128 p;
4061
4062 parts_uint_to_float(&p, a, 0, status);
4063 return float128_round_pack_canonical(&p, status);
4064}
4065
f279852b
MF
4066float128 uint128_to_float128(Int128 a, float_status *status)
4067{
4068 FloatParts128 p = { };
4069 int shift;
4070
4071 if (int128_nz(a)) {
4072 p.cls = float_class_normal;
4073
4074 shift = clz64(int128_gethi(a));
4075 if (shift == 64) {
4076 shift += clz64(int128_getlo(a));
4077 }
4078
4079 p.exp = 127 - shift;
4080 a = int128_lshift(a, shift);
4081
4082 p.frac_hi = int128_gethi(a);
4083 p.frac_lo = int128_getlo(a);
4084 } else {
4085 p.cls = float_class_zero;
4086 }
4087
4088 return float128_round_pack_canonical(&p, status);
4089}
4090
e1c4667a
RH
4091/*
4092 * Minimum and maximum
89360067 4093 */
89360067 4094
e1c4667a
RH
4095static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4096{
4097 FloatParts64 pa, pb, *pr;
89360067 4098
e1c4667a
RH
4099 float16_unpack_canonical(&pa, a, s);
4100 float16_unpack_canonical(&pb, b, s);
4101 pr = parts_minmax(&pa, &pb, s, flags);
4102
4103 return float16_round_pack_canonical(pr, s);
89360067
AB
4104}
4105
e1c4667a
RH
4106static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4107 float_status *s, int flags)
4108{
4109 FloatParts64 pa, pb, *pr;
4110
4111 bfloat16_unpack_canonical(&pa, a, s);
4112 bfloat16_unpack_canonical(&pb, b, s);
4113 pr = parts_minmax(&pa, &pb, s, flags);
4114
4115 return bfloat16_round_pack_canonical(pr, s);
4116}
4117
4118static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4119{
4120 FloatParts64 pa, pb, *pr;
4121
4122 float32_unpack_canonical(&pa, a, s);
4123 float32_unpack_canonical(&pb, b, s);
4124 pr = parts_minmax(&pa, &pb, s, flags);
4125
4126 return float32_round_pack_canonical(pr, s);
4127}
4128
4129static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4130{
4131 FloatParts64 pa, pb, *pr;
4132
4133 float64_unpack_canonical(&pa, a, s);
4134 float64_unpack_canonical(&pb, b, s);
4135 pr = parts_minmax(&pa, &pb, s, flags);
4136
4137 return float64_round_pack_canonical(pr, s);
4138}
4139
ceebc129
DH
4140static float128 float128_minmax(float128 a, float128 b,
4141 float_status *s, int flags)
4142{
4143 FloatParts128 pa, pb, *pr;
4144
4145 float128_unpack_canonical(&pa, a, s);
4146 float128_unpack_canonical(&pb, b, s);
4147 pr = parts_minmax(&pa, &pb, s, flags);
4148
4149 return float128_round_pack_canonical(pr, s);
4150}
4151
e1c4667a
RH
4152#define MINMAX_1(type, name, flags) \
4153 type type##_##name(type a, type b, float_status *s) \
4154 { return type##_minmax(a, b, s, flags); }
4155
4156#define MINMAX_2(type) \
0e903037
CMC
4157 MINMAX_1(type, max, 0) \
4158 MINMAX_1(type, maxnum, minmax_isnum) \
4159 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4160 MINMAX_1(type, maximum_number, minmax_isnumber) \
4161 MINMAX_1(type, min, minmax_ismin) \
4162 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4163 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4164 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
e1c4667a
RH
4165
4166MINMAX_2(float16)
4167MINMAX_2(bfloat16)
4168MINMAX_2(float32)
4169MINMAX_2(float64)
ceebc129 4170MINMAX_2(float128)
e1c4667a
RH
4171
4172#undef MINMAX_1
4173#undef MINMAX_2
8282310d 4174
6eb169b8
RH
4175/*
4176 * Floating point compare
4177 */
0c4c9092 4178
6eb169b8
RH
4179static FloatRelation QEMU_FLATTEN
4180float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4181{
4182 FloatParts64 pa, pb;
0c4c9092 4183
6eb169b8
RH
4184 float16_unpack_canonical(&pa, a, s);
4185 float16_unpack_canonical(&pb, b, s);
4186 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
4187}
4188
71bfd65c 4189FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 4190{
6eb169b8 4191 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
4192}
4193
71bfd65c 4194FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 4195{
6eb169b8
RH
4196 return float16_do_compare(a, b, s, true);
4197}
4198
4199static FloatRelation QEMU_SOFTFLOAT_ATTR
4200float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4201{
4202 FloatParts64 pa, pb;
4203
4204 float32_unpack_canonical(&pa, a, s);
4205 float32_unpack_canonical(&pb, b, s);
4206 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4207}
4208
71bfd65c 4209static FloatRelation QEMU_FLATTEN
6eb169b8 4210float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4211{
4212 union_float32 ua, ub;
4213
4214 ua.s = xa;
4215 ub.s = xb;
4216
4217 if (QEMU_NO_HARDFLOAT) {
4218 goto soft;
4219 }
4220
4221 float32_input_flush2(&ua.s, &ub.s, s);
4222 if (isgreaterequal(ua.h, ub.h)) {
4223 if (isgreater(ua.h, ub.h)) {
4224 return float_relation_greater;
4225 }
4226 return float_relation_equal;
4227 }
4228 if (likely(isless(ua.h, ub.h))) {
4229 return float_relation_less;
4230 }
6eb169b8
RH
4231 /*
4232 * The only condition remaining is unordered.
d9fe9db9
EC
4233 * Fall through to set flags.
4234 */
4235 soft:
6eb169b8 4236 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4237}
4238
71bfd65c 4239FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 4240{
6eb169b8 4241 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
4242}
4243
71bfd65c 4244FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 4245{
6eb169b8
RH
4246 return float32_hs_compare(a, b, s, true);
4247}
4248
4249static FloatRelation QEMU_SOFTFLOAT_ATTR
4250float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4251{
4252 FloatParts64 pa, pb;
4253
4254 float64_unpack_canonical(&pa, a, s);
4255 float64_unpack_canonical(&pb, b, s);
4256 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4257}
4258
71bfd65c 4259static FloatRelation QEMU_FLATTEN
6eb169b8 4260float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4261{
4262 union_float64 ua, ub;
4263
4264 ua.s = xa;
4265 ub.s = xb;
4266
4267 if (QEMU_NO_HARDFLOAT) {
4268 goto soft;
4269 }
4270
4271 float64_input_flush2(&ua.s, &ub.s, s);
4272 if (isgreaterequal(ua.h, ub.h)) {
4273 if (isgreater(ua.h, ub.h)) {
4274 return float_relation_greater;
4275 }
4276 return float_relation_equal;
4277 }
4278 if (likely(isless(ua.h, ub.h))) {
4279 return float_relation_less;
4280 }
6eb169b8
RH
4281 /*
4282 * The only condition remaining is unordered.
d9fe9db9
EC
4283 * Fall through to set flags.
4284 */
4285 soft:
6eb169b8 4286 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4287}
4288
71bfd65c 4289FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 4290{
6eb169b8 4291 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
4292}
4293
71bfd65c 4294FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 4295{
6eb169b8 4296 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
4297}
4298
8282310d 4299static FloatRelation QEMU_FLATTEN
6eb169b8 4300bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 4301{
98e256fc
RH
4302 FloatParts64 pa, pb;
4303
4304 bfloat16_unpack_canonical(&pa, a, s);
4305 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 4306 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
4307}
4308
4309FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4310{
6eb169b8 4311 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
4312}
4313
4314FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4315{
6eb169b8
RH
4316 return bfloat16_do_compare(a, b, s, true);
4317}
4318
4319static FloatRelation QEMU_FLATTEN
4320float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4321{
4322 FloatParts128 pa, pb;
4323
4324 float128_unpack_canonical(&pa, a, s);
4325 float128_unpack_canonical(&pb, b, s);
4326 return parts_compare(&pa, &pb, s, is_quiet);
4327}
4328
4329FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4330{
4331 return float128_do_compare(a, b, s, false);
4332}
4333
4334FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4335{
4336 return float128_do_compare(a, b, s, true);
8282310d
LZ
4337}
4338
1b96b006
RH
4339static FloatRelation QEMU_FLATTEN
4340floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4341{
4342 FloatParts128 pa, pb;
4343
4344 if (!floatx80_unpack_canonical(&pa, a, s) ||
4345 !floatx80_unpack_canonical(&pb, b, s)) {
4346 return float_relation_unordered;
4347 }
4348 return parts_compare(&pa, &pb, s, is_quiet);
4349}
4350
4351FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4352{
4353 return floatx80_do_compare(a, b, s, false);
4354}
4355
4356FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4357{
4358 return floatx80_do_compare(a, b, s, true);
4359}
4360
39626b0c
RH
4361/*
4362 * Scale by 2**N
4363 */
0bfc9f19
AB
4364
4365float16 float16_scalbn(float16 a, int n, float_status *status)
4366{
39626b0c 4367 FloatParts64 p;
98e256fc 4368
39626b0c
RH
4369 float16_unpack_canonical(&p, a, status);
4370 parts_scalbn(&p, n, status);
4371 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
4372}
4373
4374float32 float32_scalbn(float32 a, int n, float_status *status)
4375{
39626b0c 4376 FloatParts64 p;
98e256fc 4377
39626b0c
RH
4378 float32_unpack_canonical(&p, a, status);
4379 parts_scalbn(&p, n, status);
4380 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
4381}
4382
4383float64 float64_scalbn(float64 a, int n, float_status *status)
4384{
39626b0c 4385 FloatParts64 p;
98e256fc 4386
39626b0c
RH
4387 float64_unpack_canonical(&p, a, status);
4388 parts_scalbn(&p, n, status);
4389 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
4390}
4391
8282310d
LZ
4392bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4393{
39626b0c 4394 FloatParts64 p;
98e256fc 4395
39626b0c
RH
4396 bfloat16_unpack_canonical(&p, a, status);
4397 parts_scalbn(&p, n, status);
4398 return bfloat16_round_pack_canonical(&p, status);
4399}
4400
4401float128 float128_scalbn(float128 a, int n, float_status *status)
4402{
4403 FloatParts128 p;
4404
4405 float128_unpack_canonical(&p, a, status);
4406 parts_scalbn(&p, n, status);
4407 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4408}
4409
872e6991
RH
4410floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4411{
4412 FloatParts128 p;
4413
4414 if (!floatx80_unpack_canonical(&p, a, status)) {
4415 return floatx80_default_nan(status);
4416 }
4417 parts_scalbn(&p, n, status);
4418 return floatx80_round_pack_canonical(&p, status);
4419}
4420
c13bb2da
AB
4421/*
4422 * Square Root
c13bb2da
AB
4423 */
4424
97ff87c0 4425float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 4426{
9261b245 4427 FloatParts64 p;
98e256fc 4428
9261b245
RH
4429 float16_unpack_canonical(&p, a, status);
4430 parts_sqrt(&p, status, &float16_params);
4431 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
4432}
4433
f131bae8
EC
4434static float32 QEMU_SOFTFLOAT_ATTR
4435soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 4436{
9261b245 4437 FloatParts64 p;
98e256fc 4438
9261b245
RH
4439 float32_unpack_canonical(&p, a, status);
4440 parts_sqrt(&p, status, &float32_params);
4441 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
4442}
4443
f131bae8
EC
4444static float64 QEMU_SOFTFLOAT_ATTR
4445soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 4446{
9261b245 4447 FloatParts64 p;
98e256fc 4448
9261b245
RH
4449 float64_unpack_canonical(&p, a, status);
4450 parts_sqrt(&p, status, &float64_params);
4451 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
4452}
4453
f131bae8
EC
4454float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4455{
4456 union_float32 ua, ur;
4457
4458 ua.s = xa;
4459 if (unlikely(!can_use_fpu(s))) {
4460 goto soft;
4461 }
4462
4463 float32_input_flush1(&ua.s, s);
4464 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4465 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4466 fpclassify(ua.h) == FP_ZERO) ||
4467 signbit(ua.h))) {
4468 goto soft;
4469 }
4470 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4471 float32_is_neg(ua.s))) {
4472 goto soft;
4473 }
4474 ur.h = sqrtf(ua.h);
4475 return ur.s;
4476
4477 soft:
4478 return soft_f32_sqrt(ua.s, s);
4479}
4480
4481float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4482{
4483 union_float64 ua, ur;
4484
4485 ua.s = xa;
4486 if (unlikely(!can_use_fpu(s))) {
4487 goto soft;
4488 }
4489
4490 float64_input_flush1(&ua.s, s);
4491 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4492 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4493 fpclassify(ua.h) == FP_ZERO) ||
4494 signbit(ua.h))) {
4495 goto soft;
4496 }
4497 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4498 float64_is_neg(ua.s))) {
4499 goto soft;
4500 }
4501 ur.h = sqrt(ua.h);
4502 return ur.s;
4503
4504 soft:
4505 return soft_f64_sqrt(ua.s, s);
4506}
4507
42636fb9
RH
4508float64 float64r32_sqrt(float64 a, float_status *status)
4509{
4510 FloatParts64 p;
4511
4512 float64_unpack_canonical(&p, a, status);
4513 parts_sqrt(&p, status, &float64_params);
4514 return float64r32_round_pack_canonical(&p, status);
4515}
4516
8282310d
LZ
4517bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4518{
9261b245 4519 FloatParts64 p;
98e256fc 4520
9261b245
RH
4521 bfloat16_unpack_canonical(&p, a, status);
4522 parts_sqrt(&p, status, &bfloat16_params);
4523 return bfloat16_round_pack_canonical(&p, status);
4524}
4525
4526float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4527{
4528 FloatParts128 p;
4529
4530 float128_unpack_canonical(&p, a, status);
4531 parts_sqrt(&p, status, &float128_params);
4532 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4533}
4534
aa5e19cc
RH
4535floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4536{
4537 FloatParts128 p;
4538
4539 if (!floatx80_unpack_canonical(&p, a, s)) {
4540 return floatx80_default_nan(s);
4541 }
4542 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4543 return floatx80_round_pack_canonical(&p, s);
4544}
4545
2fa3546c
RH
4546/*
4547 * log2
4548 */
4549float32 float32_log2(float32 a, float_status *status)
4550{
4551 FloatParts64 p;
4552
4553 float32_unpack_canonical(&p, a, status);
4554 parts_log2(&p, status, &float32_params);
4555 return float32_round_pack_canonical(&p, status);
4556}
4557
4558float64 float64_log2(float64 a, float_status *status)
4559{
4560 FloatParts64 p;
4561
4562 float64_unpack_canonical(&p, a, status);
4563 parts_log2(&p, status, &float64_params);
4564 return float64_round_pack_canonical(&p, status);
4565}
4566
0218a16e
RH
4567/*----------------------------------------------------------------------------
4568| The pattern for a default generated NaN.
4569*----------------------------------------------------------------------------*/
4570
4571float16 float16_default_nan(float_status *status)
4572{
0fc07cad
RH
4573 FloatParts64 p;
4574
4575 parts_default_nan(&p, status);
0218a16e 4576 p.frac >>= float16_params.frac_shift;
71fd178e 4577 return float16_pack_raw(&p);
0218a16e
RH
4578}
4579
4580float32 float32_default_nan(float_status *status)
4581{
0fc07cad
RH
4582 FloatParts64 p;
4583
4584 parts_default_nan(&p, status);
0218a16e 4585 p.frac >>= float32_params.frac_shift;
71fd178e 4586 return float32_pack_raw(&p);
0218a16e
RH
4587}
4588
4589float64 float64_default_nan(float_status *status)
4590{
0fc07cad
RH
4591 FloatParts64 p;
4592
4593 parts_default_nan(&p, status);
0218a16e 4594 p.frac >>= float64_params.frac_shift;
71fd178e 4595 return float64_pack_raw(&p);
0218a16e
RH
4596}
4597
4598float128 float128_default_nan(float_status *status)
4599{
e9034ea8 4600 FloatParts128 p;
0218a16e 4601
0fc07cad 4602 parts_default_nan(&p, status);
e9034ea8
RH
4603 frac_shr(&p, float128_params.frac_shift);
4604 return float128_pack_raw(&p);
0218a16e 4605}
c13bb2da 4606
8282310d
LZ
4607bfloat16 bfloat16_default_nan(float_status *status)
4608{
0fc07cad
RH
4609 FloatParts64 p;
4610
4611 parts_default_nan(&p, status);
8282310d 4612 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4613 return bfloat16_pack_raw(&p);
8282310d
LZ
4614}
4615
158142c2 4616/*----------------------------------------------------------------------------
377ed926
RH
4617| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4618*----------------------------------------------------------------------------*/
4619
4620float16 float16_silence_nan(float16 a, float_status *status)
4621{
3dddb203
RH
4622 FloatParts64 p;
4623
4624 float16_unpack_raw(&p, a);
377ed926 4625 p.frac <<= float16_params.frac_shift;
92ff426d 4626 parts_silence_nan(&p, status);
377ed926 4627 p.frac >>= float16_params.frac_shift;
71fd178e 4628 return float16_pack_raw(&p);
377ed926
RH
4629}
4630
4631float32 float32_silence_nan(float32 a, float_status *status)
4632{
3dddb203
RH
4633 FloatParts64 p;
4634
4635 float32_unpack_raw(&p, a);
377ed926 4636 p.frac <<= float32_params.frac_shift;
92ff426d 4637 parts_silence_nan(&p, status);
377ed926 4638 p.frac >>= float32_params.frac_shift;
71fd178e 4639 return float32_pack_raw(&p);
377ed926
RH
4640}
4641
4642float64 float64_silence_nan(float64 a, float_status *status)
4643{
3dddb203
RH
4644 FloatParts64 p;
4645
4646 float64_unpack_raw(&p, a);
377ed926 4647 p.frac <<= float64_params.frac_shift;
92ff426d 4648 parts_silence_nan(&p, status);
377ed926 4649 p.frac >>= float64_params.frac_shift;
71fd178e 4650 return float64_pack_raw(&p);
377ed926
RH
4651}
4652
8282310d
LZ
4653bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4654{
3dddb203
RH
4655 FloatParts64 p;
4656
4657 bfloat16_unpack_raw(&p, a);
8282310d 4658 p.frac <<= bfloat16_params.frac_shift;
92ff426d 4659 parts_silence_nan(&p, status);
8282310d 4660 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4661 return bfloat16_pack_raw(&p);
8282310d 4662}
e6b405fe 4663
0018b1f4
RH
4664float128 float128_silence_nan(float128 a, float_status *status)
4665{
4666 FloatParts128 p;
4667
4668 float128_unpack_raw(&p, a);
4669 frac_shl(&p, float128_params.frac_shift);
4670 parts_silence_nan(&p, status);
4671 frac_shr(&p, float128_params.frac_shift);
4672 return float128_pack_raw(&p);
4673}
4674
e6b405fe
AB
4675/*----------------------------------------------------------------------------
4676| If `a' is denormal and we are in flush-to-zero mode then set the
4677| input-denormal exception and return zero. Otherwise just return the value.
4678*----------------------------------------------------------------------------*/
4679
f8155c1d 4680static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
4681{
4682 if (p.exp == 0 && p.frac != 0) {
4683 float_raise(float_flag_input_denormal, status);
4684 return true;
4685 }
4686
4687 return false;
4688}
4689
4690float16 float16_squash_input_denormal(float16 a, float_status *status)
4691{
4692 if (status->flush_inputs_to_zero) {
3dddb203
RH
4693 FloatParts64 p;
4694
4695 float16_unpack_raw(&p, a);
e6b405fe
AB
4696 if (parts_squash_denormal(p, status)) {
4697 return float16_set_sign(float16_zero, p.sign);
4698 }
4699 }
4700 return a;
4701}
4702
4703float32 float32_squash_input_denormal(float32 a, float_status *status)
4704{
4705 if (status->flush_inputs_to_zero) {
3dddb203
RH
4706 FloatParts64 p;
4707
4708 float32_unpack_raw(&p, a);
e6b405fe
AB
4709 if (parts_squash_denormal(p, status)) {
4710 return float32_set_sign(float32_zero, p.sign);
4711 }
4712 }
4713 return a;
4714}
4715
4716float64 float64_squash_input_denormal(float64 a, float_status *status)
4717{
4718 if (status->flush_inputs_to_zero) {
3dddb203
RH
4719 FloatParts64 p;
4720
4721 float64_unpack_raw(&p, a);
e6b405fe
AB
4722 if (parts_squash_denormal(p, status)) {
4723 return float64_set_sign(float64_zero, p.sign);
4724 }
4725 }
4726 return a;
4727}
4728
8282310d
LZ
4729bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4730{
4731 if (status->flush_inputs_to_zero) {
3dddb203
RH
4732 FloatParts64 p;
4733
4734 bfloat16_unpack_raw(&p, a);
8282310d
LZ
4735 if (parts_squash_denormal(p, status)) {
4736 return bfloat16_set_sign(bfloat16_zero, p.sign);
4737 }
4738 }
4739 return a;
4740}
4741
158142c2 4742/*----------------------------------------------------------------------------
feaf2e9c
RH
4743| Normalizes the subnormal extended double-precision floating-point value
4744| represented by the denormalized significand `aSig'. The normalized exponent
4745| and significand are stored at the locations pointed to by `zExpPtr' and
158142c2
FB
4746| `zSigPtr', respectively.
4747*----------------------------------------------------------------------------*/
4748
feaf2e9c
RH
4749void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4750 uint64_t *zSigPtr)
158142c2 4751{
8f506c70 4752 int8_t shiftCount;
158142c2 4753
feaf2e9c 4754 shiftCount = clz64(aSig);
158142c2
FB
4755 *zSigPtr = aSig<<shiftCount;
4756 *zExpPtr = 1 - shiftCount;
158142c2
FB
4757}
4758
158142c2
FB
4759/*----------------------------------------------------------------------------
4760| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
feaf2e9c
RH
4761| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4762| and returns the proper extended double-precision floating-point value
4763| corresponding to the abstract input. Ordinarily, the abstract value is
4764| rounded and packed into the extended double-precision format, with the
4765| inexact exception raised if the abstract input cannot be represented
158142c2
FB
4766| exactly. However, if the abstract value is too large, the overflow and
4767| inexact exceptions are raised and an infinity or maximal finite value is
4768| returned. If the abstract value is too small, the input value is rounded to
4769| a subnormal number, and the underflow and inexact exceptions are raised if
feaf2e9c
RH
4770| the abstract input cannot be represented exactly as a subnormal extended
4771| double-precision floating-point number.
4772| If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4773| the result is rounded to the same number of bits as single or double
4774| precision, respectively. Otherwise, the result is rounded to the full
4775| precision of the extended double-precision format.
4776| The input significand must be normalized or smaller. If the input
4777| significand is not normalized, `zExp' must be 0; in that case, the result
4778| returned is a subnormal number, and it must not require rounding. The
4779| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4780| Floating-Point Arithmetic.
158142c2
FB
4781*----------------------------------------------------------------------------*/
4782
feaf2e9c
RH
4783floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4784 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4785 float_status *status)
158142c2 4786{
feaf2e9c
RH
4787 FloatRoundMode roundingMode;
4788 bool roundNearestEven, increment, isTiny;
4789 int64_t roundIncrement, roundMask, roundBits;
158142c2 4790
a2f2d288 4791 roundingMode = status->float_rounding_mode;
158142c2 4792 roundNearestEven = ( roundingMode == float_round_nearest_even );
feaf2e9c
RH
4793 switch (roundingPrecision) {
4794 case floatx80_precision_x:
4795 goto precision80;
4796 case floatx80_precision_d:
4797 roundIncrement = UINT64_C(0x0000000000000400);
4798 roundMask = UINT64_C(0x00000000000007FF);
4799 break;
4800 case floatx80_precision_s:
4801 roundIncrement = UINT64_C(0x0000008000000000);
4802 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4803 break;
4804 default:
4805 g_assert_not_reached();
4806 }
4807 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4808 switch (roundingMode) {
4809 case float_round_nearest_even:
f9288a76 4810 case float_round_ties_away:
dc355b76
PM
4811 break;
4812 case float_round_to_zero:
4813 roundIncrement = 0;
4814 break;
4815 case float_round_up:
feaf2e9c 4816 roundIncrement = zSign ? 0 : roundMask;
dc355b76
PM
4817 break;
4818 case float_round_down:
feaf2e9c 4819 roundIncrement = zSign ? roundMask : 0;
5d64abb3 4820 break;
dc355b76
PM
4821 default:
4822 abort();
158142c2 4823 }
feaf2e9c
RH
4824 roundBits = zSig0 & roundMask;
4825 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4826 if ( ( 0x7FFE < zExp )
4827 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
158142c2 4828 ) {
feaf2e9c 4829 goto overflow;
158142c2 4830 }
feaf2e9c 4831 if ( zExp <= 0 ) {
a2f2d288 4832 if (status->flush_to_zero) {
ff32e16e 4833 float_raise(float_flag_output_denormal, status);
feaf2e9c 4834 return packFloatx80(zSign, 0, 0);
e6afc87f 4835 }
a828b373 4836 isTiny = status->tininess_before_rounding
feaf2e9c
RH
4837 || (zExp < 0 )
4838 || (zSig0 <= zSig0 + roundIncrement);
4839 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
158142c2 4840 zExp = 0;
feaf2e9c 4841 roundBits = zSig0 & roundMask;
ff32e16e
PM
4842 if (isTiny && roundBits) {
4843 float_raise(float_flag_underflow, status);
4844 }
feaf2e9c
RH
4845 if (roundBits) {
4846 float_raise(float_flag_inexact, status);
4847 }
4848 zSig0 += roundIncrement;
4849 if ( (int64_t) zSig0 < 0 ) zExp = 1;
4850 roundIncrement = roundMask + 1;
4851 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4852 roundMask |= roundIncrement;
5d64abb3 4853 }
feaf2e9c
RH
4854 zSig0 &= ~ roundMask;
4855 return packFloatx80( zSign, zExp, zSig0 );
158142c2
FB
4856 }
4857 }
a2f2d288 4858 if (roundBits) {
d82f3b2d 4859 float_raise(float_flag_inexact, status);
a2f2d288 4860 }
feaf2e9c
RH
4861 zSig0 += roundIncrement;
4862 if ( zSig0 < roundIncrement ) {
4863 ++zExp;
4864 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4865 }
4866 roundIncrement = roundMask + 1;
4867 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4868 roundMask |= roundIncrement;
4869 }
4870 zSig0 &= ~ roundMask;
4871 if ( zSig0 == 0 ) zExp = 0;
4872 return packFloatx80( zSign, zExp, zSig0 );
4873 precision80:
dc355b76
PM
4874 switch (roundingMode) {
4875 case float_round_nearest_even:
f9288a76 4876 case float_round_ties_away:
dc355b76
PM
4877 increment = ((int64_t)zSig1 < 0);
4878 break;
4879 case float_round_to_zero:
4880 increment = 0;
4881 break;
4882 case float_round_up:
4883 increment = !zSign && zSig1;
4884 break;
4885 case float_round_down:
4886 increment = zSign && zSig1;
4887 break;
4888 default:
4889 abort();
158142c2 4890 }
bb98fe42 4891 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4892 if ( ( 0x7FFE < zExp )
4893 || ( ( zExp == 0x7FFE )
e9321124 4894 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4895 && increment
4896 )
4897 ) {
4898 roundMask = 0;
4899 overflow:
ff32e16e 4900 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4901 if ( ( roundingMode == float_round_to_zero )
4902 || ( zSign && ( roundingMode == float_round_up ) )
4903 || ( ! zSign && ( roundingMode == float_round_down ) )
4904 ) {
4905 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4906 }
0f605c88
LV
4907 return packFloatx80(zSign,
4908 floatx80_infinity_high,
4909 floatx80_infinity_low);
158142c2
FB
4910 }
4911 if ( zExp <= 0 ) {
a828b373
RH
4912 isTiny = status->tininess_before_rounding
4913 || (zExp < 0)
4914 || !increment
4915 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4916 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4917 zExp = 0;
ff32e16e
PM
4918 if (isTiny && zSig1) {
4919 float_raise(float_flag_underflow, status);
4920 }
a2f2d288 4921 if (zSig1) {
d82f3b2d 4922 float_raise(float_flag_inexact, status);
a2f2d288 4923 }
dc355b76
PM
4924 switch (roundingMode) {
4925 case float_round_nearest_even:
f9288a76 4926 case float_round_ties_away:
dc355b76
PM
4927 increment = ((int64_t)zSig1 < 0);
4928 break;
4929 case float_round_to_zero:
4930 increment = 0;
4931 break;
4932 case float_round_up:
4933 increment = !zSign && zSig1;
4934 break;
4935 case float_round_down:
4936 increment = zSign && zSig1;
4937 break;
4938 default:
4939 abort();
158142c2
FB
4940 }
4941 if ( increment ) {
4942 ++zSig0;
40662886
PMD
4943 if (!(zSig1 << 1) && roundNearestEven) {
4944 zSig0 &= ~1;
4945 }
bb98fe42 4946 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4947 }
4948 return packFloatx80( zSign, zExp, zSig0 );
4949 }
4950 }
a2f2d288 4951 if (zSig1) {
d82f3b2d 4952 float_raise(float_flag_inexact, status);
a2f2d288 4953 }
158142c2
FB
4954 if ( increment ) {
4955 ++zSig0;
4956 if ( zSig0 == 0 ) {
4957 ++zExp;
e9321124 4958 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4959 }
4960 else {
40662886
PMD
4961 if (!(zSig1 << 1) && roundNearestEven) {
4962 zSig0 &= ~1;
4963 }
158142c2
FB
4964 }
4965 }
4966 else {
4967 if ( zSig0 == 0 ) zExp = 0;
4968 }
4969 return packFloatx80( zSign, zExp, zSig0 );
4970
4971}
4972
4973/*----------------------------------------------------------------------------
4974| Takes an abstract floating-point value having sign `zSign', exponent
4975| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4976| and returns the proper extended double-precision floating-point value
4977| corresponding to the abstract input. This routine is just like
4978| `roundAndPackFloatx80' except that the input significand does not have to be
4979| normalized.
4980*----------------------------------------------------------------------------*/
4981
8da5f1db 4982floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
c120391c 4983 bool zSign, int32_t zExp,
88857aca
LV
4984 uint64_t zSig0, uint64_t zSig1,
4985 float_status *status)
158142c2 4986{
8f506c70 4987 int8_t shiftCount;
158142c2
FB
4988
4989 if ( zSig0 == 0 ) {
4990 zSig0 = zSig1;
4991 zSig1 = 0;
4992 zExp -= 64;
4993 }
0019d5c3 4994 shiftCount = clz64(zSig0);
158142c2
FB
4995 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4996 zExp -= shiftCount;
ff32e16e
PM
4997 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4998 zSig0, zSig1, status);
158142c2
FB
4999
5000}
5001
8229c991
AJ
5002/*----------------------------------------------------------------------------
5003| Returns the binary exponential of the single-precision floating-point value
5004| `a'. The operation is performed according to the IEC/IEEE Standard for
5005| Binary Floating-Point Arithmetic.
5006|
5007| Uses the following identities:
5008|
5009| 1. -------------------------------------------------------------------------
5010| x x*ln(2)
5011| 2 = e
5012|
5013| 2. -------------------------------------------------------------------------
5014| 2 3 4 5 n
5015| x x x x x x x
5016| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5017| 1! 2! 3! 4! 5! n!
5018*----------------------------------------------------------------------------*/
5019
5020static const float64 float32_exp2_coefficients[15] =
5021{
d5138cf4
PM
5022 const_float64( 0x3ff0000000000000ll ), /* 1 */
5023 const_float64( 0x3fe0000000000000ll ), /* 2 */
5024 const_float64( 0x3fc5555555555555ll ), /* 3 */
5025 const_float64( 0x3fa5555555555555ll ), /* 4 */
5026 const_float64( 0x3f81111111111111ll ), /* 5 */
5027 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5028 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5029 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5030 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5031 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5032 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5033 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5034 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5035 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5036 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5037};
5038
e5a41ffa 5039float32 float32_exp2(float32 a, float_status *status)
8229c991 5040{
572c4d86 5041 FloatParts64 xp, xnp, tp, rp;
8229c991 5042 int i;
8229c991 5043
572c4d86
RH
5044 float32_unpack_canonical(&xp, a, status);
5045 if (unlikely(xp.cls != float_class_normal)) {
5046 switch (xp.cls) {
5047 case float_class_snan:
5048 case float_class_qnan:
5049 parts_return_nan(&xp, status);
5050 return float32_round_pack_canonical(&xp, status);
5051 case float_class_inf:
5052 return xp.sign ? float32_zero : a;
5053 case float_class_zero:
5054 return float32_one;
5055 default:
5056 break;
ff32e16e 5057 }
572c4d86 5058 g_assert_not_reached();
8229c991
AJ
5059 }
5060
ff32e16e 5061 float_raise(float_flag_inexact, status);
8229c991 5062
f291f45f 5063 float64_unpack_canonical(&tp, float64_ln2, status);
572c4d86
RH
5064 xp = *parts_mul(&xp, &tp, status);
5065 xnp = xp;
8229c991 5066
572c4d86 5067 float64_unpack_canonical(&rp, float64_one, status);
8229c991 5068 for (i = 0 ; i < 15 ; i++) {
572c4d86
RH
5069 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5070 rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5071 xnp = *parts_mul(&xnp, &xp, status);
8229c991
AJ
5072 }
5073
572c4d86 5074 return float32_round_pack_canonical(&rp, status);
8229c991
AJ
5075}
5076
0f721292
LV
5077/*----------------------------------------------------------------------------
5078| Rounds the extended double-precision floating-point value `a'
5079| to the precision provided by floatx80_rounding_precision and returns the
5080| result as an extended double-precision floating-point value.
5081| The operation is performed according to the IEC/IEEE Standard for Binary
5082| Floating-Point Arithmetic.
5083*----------------------------------------------------------------------------*/
5084
5085floatx80 floatx80_round(floatx80 a, float_status *status)
5086{
45a76b71
RH
5087 FloatParts128 p;
5088
5089 if (!floatx80_unpack_canonical(&p, a, status)) {
5090 return floatx80_default_nan(status);
5091 }
5092 return floatx80_round_pack_canonical(&p, status);
0f721292
LV
5093}
5094
f6b3b108
EC
5095static void __attribute__((constructor)) softfloat_init(void)
5096{
5097 union_float64 ua, ub, uc, ur;
5098
5099 if (QEMU_NO_HARDFLOAT) {
5100 return;
5101 }
5102 /*
5103 * Test that the host's FMA is not obviously broken. For example,
5104 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5105 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5106 */
5107 ua.s = 0x0020000000000001ULL;
5108 ub.s = 0x3ca0000000000000ULL;
5109 uc.s = 0x0020000000000000ULL;
5110 ur.h = fma(ua.h, ub.h, uc.h);
5111 if (ur.s != 0x0020000000000001ULL) {
5112 force_soft_fma = true;
5113 }
5114}