]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
Merge tag 'hppa-btlb-pull-request' of https://github.com/hdeller/qemu-hppa into staging
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
a90119b5
AB
404/*
405 * Classify a floating point number. Everything above float_class_qnan
406 * is a NaN so cls >= float_class_qnan is any NaN.
407 */
408
409typedef enum __attribute__ ((__packed__)) {
410 float_class_unclassified,
411 float_class_zero,
412 float_class_normal,
413 float_class_inf,
414 float_class_qnan, /* all NaNs from here */
415 float_class_snan,
a90119b5
AB
416} FloatClass;
417
134eda00
RH
418#define float_cmask(bit) (1u << (bit))
419
420enum {
421 float_cmask_zero = float_cmask(float_class_zero),
422 float_cmask_normal = float_cmask(float_class_normal),
423 float_cmask_inf = float_cmask(float_class_inf),
424 float_cmask_qnan = float_cmask(float_class_qnan),
425 float_cmask_snan = float_cmask(float_class_snan),
426
427 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
428 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
429};
430
e1c4667a
RH
431/* Flags for parts_minmax. */
432enum {
433 /* Set for minimum; clear for maximum. */
434 minmax_ismin = 1,
435 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
436 minmax_isnum = 2,
437 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
438 minmax_ismag = 4,
0e903037
CMC
439 /*
440 * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
441 * operations.
442 */
443 minmax_isnumber = 8,
e1c4667a 444};
134eda00 445
247d1f21
RH
446/* Simple helpers for checking if, or what kind of, NaN we have */
447static inline __attribute__((unused)) bool is_nan(FloatClass c)
448{
449 return unlikely(c >= float_class_qnan);
450}
451
452static inline __attribute__((unused)) bool is_snan(FloatClass c)
453{
454 return c == float_class_snan;
455}
456
457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
458{
459 return c == float_class_qnan;
460}
461
a90119b5 462/*
0018b1f4
RH
463 * Structure holding all of the decomposed parts of a float.
464 * The exponent is unbiased and the fraction is normalized.
a90119b5 465 *
0018b1f4
RH
466 * The fraction words are stored in big-endian word ordering,
467 * so that truncation from a larger format to a smaller format
468 * can be done simply by ignoring subsequent elements.
a90119b5
AB
469 */
470
471typedef struct {
a90119b5
AB
472 FloatClass cls;
473 bool sign;
4109b9ea
RH
474 int32_t exp;
475 union {
476 /* Routines that know the structure may reference the singular name. */
477 uint64_t frac;
478 /*
479 * Routines expanded with multiple structures reference "hi" and "lo"
480 * depending on the operation. In FloatParts64, "hi" and "lo" are
481 * both the same word and aliased here.
482 */
483 uint64_t frac_hi;
484 uint64_t frac_lo;
485 };
f8155c1d 486} FloatParts64;
a90119b5 487
0018b1f4
RH
488typedef struct {
489 FloatClass cls;
490 bool sign;
491 int32_t exp;
492 uint64_t frac_hi;
493 uint64_t frac_lo;
494} FloatParts128;
495
aca84527
RH
496typedef struct {
497 FloatClass cls;
498 bool sign;
499 int32_t exp;
500 uint64_t frac_hi;
501 uint64_t frac_hm; /* high-middle */
502 uint64_t frac_lm; /* low-middle */
503 uint64_t frac_lo;
504} FloatParts256;
505
0018b1f4 506/* These apply to the most significant word of each FloatPartsN. */
e99c4373 507#define DECOMPOSED_BINARY_POINT 63
a90119b5 508#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
509
510/* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
d6e1f0cd 517 * round_mask: bits below lsb which must be rounded
ca3a3d5a
AB
518 * The following optional modifiers are available:
519 * arm_althp: handle ARM Alternative Half Precision
72246065 520 * m68k_denormal: explicit integer bit for extended precision may be 1
a90119b5
AB
521 */
522typedef struct {
523 int exp_size;
524 int exp_bias;
c40da5c6 525 int exp_re_bias;
a90119b5
AB
526 int exp_max;
527 int frac_size;
528 int frac_shift;
ca3a3d5a 529 bool arm_althp;
72246065 530 bool m68k_denormal;
d6e1f0cd 531 uint64_t round_mask;
a90119b5
AB
532} FloatFmt;
533
534/* Expand fields based on the size of exponent and fraction */
c1b6299b 535#define FLOAT_PARAMS_(E) \
d6e1f0cd
RH
536 .exp_size = E, \
537 .exp_bias = ((1 << E) - 1) >> 1, \
c40da5c6 538 .exp_re_bias = (1 << (E - 1)) + (1 << (E - 2)), \
c1b6299b 539 .exp_max = (1 << E) - 1
d6e1f0cd
RH
540
541#define FLOAT_PARAMS(E, F) \
c1b6299b
RH
542 FLOAT_PARAMS_(E), \
543 .frac_size = F, \
d6e1f0cd
RH
544 .frac_shift = (-F - 1) & 63, \
545 .round_mask = (1ull << ((-F - 1) & 63)) - 1
a90119b5
AB
546
547static const FloatFmt float16_params = {
548 FLOAT_PARAMS(5, 10)
549};
550
6fed16b2
AB
551static const FloatFmt float16_params_ahp = {
552 FLOAT_PARAMS(5, 10),
553 .arm_althp = true
554};
555
8282310d
LZ
556static const FloatFmt bfloat16_params = {
557 FLOAT_PARAMS(8, 7)
558};
559
a90119b5
AB
560static const FloatFmt float32_params = {
561 FLOAT_PARAMS(8, 23)
562};
563
564static const FloatFmt float64_params = {
565 FLOAT_PARAMS(11, 52)
566};
567
0018b1f4
RH
568static const FloatFmt float128_params = {
569 FLOAT_PARAMS(15, 112)
570};
571
c1b6299b
RH
572#define FLOATX80_PARAMS(R) \
573 FLOAT_PARAMS_(15), \
574 .frac_size = R == 64 ? 63 : R, \
575 .frac_shift = 0, \
576 .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
577
578static const FloatFmt floatx80_params[3] = {
579 [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
580 [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
72246065
RH
581 [floatx80_precision_x] = {
582 FLOATX80_PARAMS(64),
583#ifdef TARGET_M68K
584 .m68k_denormal = true,
585#endif
586 },
c1b6299b
RH
587};
588
6fff2167 589/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 590static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 591{
d8fdd172
RH
592 const int f_size = fmt->frac_size;
593 const int e_size = fmt->exp_size;
6fff2167 594
d8fdd172 595 *r = (FloatParts64) {
6fff2167 596 .cls = float_class_unclassified,
d8fdd172
RH
597 .sign = extract64(raw, f_size + e_size, 1),
598 .exp = extract64(raw, f_size, e_size),
599 .frac = extract64(raw, 0, f_size)
6fff2167
AB
600 };
601}
602
1d3daf95 603static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 604{
3dddb203 605 unpack_raw64(p, &float16_params, f);
6fff2167
AB
606}
607
1d3daf95 608static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 609{
3dddb203 610 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
611}
612
1d3daf95 613static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 614{
3dddb203 615 unpack_raw64(p, &float32_params, f);
6fff2167
AB
616}
617
1d3daf95 618static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 619{
3dddb203 620 unpack_raw64(p, &float64_params, f);
6fff2167
AB
621}
622
1d3daf95 623static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
c1b6299b
RH
624{
625 *p = (FloatParts128) {
626 .cls = float_class_unclassified,
627 .sign = extract32(f.high, 15, 1),
628 .exp = extract32(f.high, 0, 15),
629 .frac_hi = f.low
630 };
631}
632
1d3daf95 633static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
0018b1f4
RH
634{
635 const int f_size = float128_params.frac_size - 64;
636 const int e_size = float128_params.exp_size;
637
638 *p = (FloatParts128) {
639 .cls = float_class_unclassified,
640 .sign = extract64(f.high, f_size + e_size, 1),
641 .exp = extract64(f.high, f_size, e_size),
642 .frac_hi = extract64(f.high, 0, f_size),
643 .frac_lo = f.low,
644 };
645}
646
6fff2167 647/* Pack a float from parts, but do not canonicalize. */
9e4af58c 648static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 649{
9e4af58c
RH
650 const int f_size = fmt->frac_size;
651 const int e_size = fmt->exp_size;
652 uint64_t ret;
653
654 ret = (uint64_t)p->sign << (f_size + e_size);
655 ret = deposit64(ret, f_size, e_size, p->exp);
656 ret = deposit64(ret, 0, f_size, p->frac);
657 return ret;
6fff2167
AB
658}
659
1d3daf95 660static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
6fff2167 661{
71fd178e 662 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
663}
664
1d3daf95 665static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
8282310d 666{
71fd178e 667 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
668}
669
1d3daf95 670static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
6fff2167 671{
71fd178e 672 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
673}
674
1d3daf95 675static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
6fff2167 676{
71fd178e 677 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
678}
679
1d3daf95 680static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
0018b1f4
RH
681{
682 const int f_size = float128_params.frac_size - 64;
683 const int e_size = float128_params.exp_size;
684 uint64_t hi;
685
686 hi = (uint64_t)p->sign << (f_size + e_size);
687 hi = deposit64(hi, f_size, e_size, p->exp);
688 hi = deposit64(hi, 0, f_size, p->frac_hi);
689 return make_float128(hi, p->frac_lo);
690}
691
0664335a
RH
692/*----------------------------------------------------------------------------
693| Functions and definitions to determine: (1) whether tininess for underflow
694| is detected before or after rounding by default, (2) what (if anything)
695| happens when exceptions are raised, (3) how signaling NaNs are distinguished
696| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
697| are propagated from function inputs to output. These details are target-
698| specific.
699*----------------------------------------------------------------------------*/
139c1837 700#include "softfloat-specialize.c.inc"
0664335a 701
0018b1f4 702#define PARTS_GENERIC_64_128(NAME, P) \
6c06aca0
RH
703 _Generic((P), FloatParts64 *: parts64_##NAME, \
704 FloatParts128 *: parts128_##NAME)
0018b1f4 705
dedd123c 706#define PARTS_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
707 _Generic((P), FloatParts64 *: parts64_##NAME, \
708 FloatParts128 *: parts128_##NAME, \
709 FloatParts256 *: parts256_##NAME)
dedd123c 710
e9034ea8 711#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
712#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
713
7c45bad8
RH
714static void parts64_return_nan(FloatParts64 *a, float_status *s);
715static void parts128_return_nan(FloatParts128 *a, float_status *s);
716
717#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 718
22c355f4
RH
719static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
720 float_status *s);
721static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
722 float_status *s);
723
724#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
725
979582d0
RH
726static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
727 FloatParts64 *c, float_status *s,
728 int ab_mask, int abc_mask);
729static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
730 FloatParts128 *b,
731 FloatParts128 *c,
732 float_status *s,
733 int ab_mask, int abc_mask);
734
735#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
736 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
737
d46975bc
RH
738static void parts64_canonicalize(FloatParts64 *p, float_status *status,
739 const FloatFmt *fmt);
740static void parts128_canonicalize(FloatParts128 *p, float_status *status,
741 const FloatFmt *fmt);
742
743#define parts_canonicalize(A, S, F) \
744 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
745
25fdedf0
RH
746static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
747 const FloatFmt *fmt);
748static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
749 const FloatFmt *fmt);
750
751#define parts_uncanon_normal(A, S, F) \
752 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
753
ee6959f2
RH
754static void parts64_uncanon(FloatParts64 *p, float_status *status,
755 const FloatFmt *fmt);
756static void parts128_uncanon(FloatParts128 *p, float_status *status,
757 const FloatFmt *fmt);
758
759#define parts_uncanon(A, S, F) \
760 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761
da10a907
RH
762static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 764static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
765
766#define parts_add_normal(A, B) \
dedd123c 767 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
768
769static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
770static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 771static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
772
773#define parts_sub_normal(A, B) \
dedd123c 774 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
775
776static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
777 float_status *s, bool subtract);
778static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
779 float_status *s, bool subtract);
780
781#define parts_addsub(A, B, S, Z) \
782 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
783
aca84527
RH
784static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
785 float_status *s);
786static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
787 float_status *s);
788
789#define parts_mul(A, B, S) \
790 PARTS_GENERIC_64_128(mul, A)(A, B, S)
791
dedd123c
RH
792static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
793 FloatParts64 *c, int flags,
794 float_status *s);
795static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
796 FloatParts128 *c, int flags,
797 float_status *s);
798
799#define parts_muladd(A, B, C, Z, S) \
800 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
801
ec961b81
RH
802static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
803 float_status *s);
804static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
805 float_status *s);
806
807#define parts_div(A, B, S) \
808 PARTS_GENERIC_64_128(div, A)(A, B, S)
809
feaf2e9c
RH
810static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
811 uint64_t *mod_quot, float_status *s);
812static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
813 uint64_t *mod_quot, float_status *s);
814
815#define parts_modrem(A, B, Q, S) \
816 PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
817
9261b245
RH
818static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
819static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
820
821#define parts_sqrt(A, S, F) \
822 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
823
afc34931
RH
824static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
825 int scale, int frac_size);
826static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
827 int scale, int frac_size);
828
829#define parts_round_to_int_normal(A, R, C, F) \
830 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
831
832static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
833 int scale, float_status *s,
834 const FloatFmt *fmt);
835static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
836 int scale, float_status *s,
837 const FloatFmt *fmt);
838
839#define parts_round_to_int(A, R, C, S, F) \
840 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
841
463b3f0d
RH
842static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
843 int scale, int64_t min, int64_t max,
844 float_status *s);
845static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
846 int scale, int64_t min, int64_t max,
847 float_status *s);
848
849#define parts_float_to_sint(P, R, Z, MN, MX, S) \
850 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
851
4ab4aef0
RH
852static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
853 int scale, uint64_t max,
854 float_status *s);
855static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
856 int scale, uint64_t max,
857 float_status *s);
858
859#define parts_float_to_uint(P, R, Z, M, S) \
860 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
861
e2041f4d
RH
862static int64_t parts64_float_to_sint_modulo(FloatParts64 *p,
863 FloatRoundMode rmode,
864 int bitsm1, float_status *s);
865static int64_t parts128_float_to_sint_modulo(FloatParts128 *p,
866 FloatRoundMode rmode,
867 int bitsm1, float_status *s);
868
869#define parts_float_to_sint_modulo(P, R, M, S) \
870 PARTS_GENERIC_64_128(float_to_sint_modulo, P)(P, R, M, S)
871
e3689519
RH
872static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
873 int scale, float_status *s);
874static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
875 int scale, float_status *s);
876
e2041f4d
RH
877#define parts_float_to_sint(P, R, Z, MN, MX, S) \
878 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
879
e3689519
RH
880#define parts_sint_to_float(P, I, Z, S) \
881 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882
37c954a1
RH
883static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884 int scale, float_status *s);
885static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886 int scale, float_status *s);
887
888#define parts_uint_to_float(P, I, Z, S) \
889 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890
e1c4667a
RH
891static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892 float_status *s, int flags);
893static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894 float_status *s, int flags);
895
896#define parts_minmax(A, B, S, F) \
897 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898
b880867f
RH
899static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
900 float_status *s, bool q);
901static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
902 float_status *s, bool q);
6eb169b8
RH
903
904#define parts_compare(A, B, S, Q) \
905 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906
39626b0c
RH
907static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909
910#define parts_scalbn(A, N, S) \
911 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912
2fa3546c
RH
913static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
914static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
915
916#define parts_log2(A, S, F) \
917 PARTS_GENERIC_64_128(log2, A)(A, S, F)
918
0018b1f4
RH
919/*
920 * Helper functions for softfloat-parts.c.inc, per-size operations.
921 */
922
22c355f4 923#define FRAC_GENERIC_64_128(NAME, P) \
6c06aca0
RH
924 _Generic((P), FloatParts64 *: frac64_##NAME, \
925 FloatParts128 *: frac128_##NAME)
22c355f4 926
dedd123c 927#define FRAC_GENERIC_64_128_256(NAME, P) \
6c06aca0
RH
928 _Generic((P), FloatParts64 *: frac64_##NAME, \
929 FloatParts128 *: frac128_##NAME, \
930 FloatParts256 *: frac256_##NAME)
dedd123c 931
da10a907
RH
932static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
933{
934 return uadd64_overflow(a->frac, b->frac, &r->frac);
935}
936
937static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
938{
939 bool c = 0;
940 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
942 return c;
943}
944
dedd123c
RH
945static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
946{
947 bool c = 0;
948 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
949 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
950 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
951 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
952 return c;
953}
954
955#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 956
ee6959f2
RH
957static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
958{
959 return uadd64_overflow(a->frac, c, &r->frac);
960}
961
962static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
963{
964 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
965 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
966}
967
968#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
969
970static void frac64_allones(FloatParts64 *a)
971{
972 a->frac = -1;
973}
974
975static void frac128_allones(FloatParts128 *a)
976{
977 a->frac_hi = a->frac_lo = -1;
978}
979
980#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
981
dee3fcfb 982static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
22c355f4 983{
dee3fcfb
RH
984 return (a->frac == b->frac ? float_relation_equal
985 : a->frac < b->frac ? float_relation_less
986 : float_relation_greater);
22c355f4
RH
987}
988
dee3fcfb 989static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
22c355f4
RH
990{
991 uint64_t ta = a->frac_hi, tb = b->frac_hi;
992 if (ta == tb) {
993 ta = a->frac_lo, tb = b->frac_lo;
994 if (ta == tb) {
dee3fcfb 995 return float_relation_equal;
22c355f4
RH
996 }
997 }
dee3fcfb 998 return ta < tb ? float_relation_less : float_relation_greater;
22c355f4
RH
999}
1000
1001#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
1002
d46975bc 1003static void frac64_clear(FloatParts64 *a)
0018b1f4 1004{
d46975bc
RH
1005 a->frac = 0;
1006}
1007
1008static void frac128_clear(FloatParts128 *a)
1009{
1010 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
1011}
1012
d46975bc 1013#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 1014
ec961b81
RH
1015static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1016{
1017 uint64_t n1, n0, r, q;
1018 bool ret;
1019
1020 /*
1021 * We want a 2*N / N-bit division to produce exactly an N-bit
1022 * result, so that we do not lose any precision and so that we
1023 * do not have to renormalize afterward. If A.frac < B.frac,
1024 * then division would produce an (N-1)-bit result; shift A left
1025 * by one to produce the an N-bit result, and return true to
1026 * decrement the exponent to match.
1027 *
1028 * The udiv_qrnnd algorithm that we're using requires normalization,
1029 * i.e. the msb of the denominator must be set, which is already true.
1030 */
1031 ret = a->frac < b->frac;
1032 if (ret) {
1033 n0 = a->frac;
1034 n1 = 0;
1035 } else {
1036 n0 = a->frac >> 1;
1037 n1 = a->frac << 63;
1038 }
1039 q = udiv_qrnnd(&r, n0, n1, b->frac);
1040
1041 /* Set lsb if there is a remainder, to set inexact. */
1042 a->frac = q | (r != 0);
1043
1044 return ret;
1045}
1046
1047static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1048{
1049 uint64_t q0, q1, a0, a1, b0, b1;
1050 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1051 bool ret = false;
1052
1053 a0 = a->frac_hi, a1 = a->frac_lo;
1054 b0 = b->frac_hi, b1 = b->frac_lo;
1055
1056 ret = lt128(a0, a1, b0, b1);
1057 if (!ret) {
1058 a1 = shr_double(a0, a1, 1);
1059 a0 = a0 >> 1;
1060 }
1061
1062 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1063 q0 = estimateDiv128To64(a0, a1, b0);
1064
1065 /*
1066 * Estimate is high because B1 was not included (unless B1 == 0).
1067 * Reduce quotient and increase remainder until remainder is non-negative.
1068 * This loop will execute 0 to 2 times.
1069 */
1070 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1071 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1072 while (r0 != 0) {
1073 q0--;
1074 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1075 }
1076
1077 /* Repeat using the remainder, producing a second word of quotient. */
1078 q1 = estimateDiv128To64(r1, r2, b0);
1079 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1080 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1081 while (r1 != 0) {
1082 q1--;
1083 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1084 }
1085
1086 /* Any remainder indicates inexact; set sticky bit. */
1087 q1 |= (r2 | r3) != 0;
1088
1089 a->frac_hi = q0;
1090 a->frac_lo = q1;
1091 return ret;
1092}
1093
1094#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1095
d46975bc 1096static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1097{
d46975bc
RH
1098 return a->frac == 0;
1099}
1100
1101static bool frac128_eqz(FloatParts128 *a)
1102{
1103 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1104}
1105
d46975bc 1106#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1107
aca84527
RH
1108static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1109{
1110 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1111}
1112
1113static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1114{
1115 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1116 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1117}
1118
1119#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1120
da10a907
RH
1121static void frac64_neg(FloatParts64 *a)
1122{
1123 a->frac = -a->frac;
1124}
1125
1126static void frac128_neg(FloatParts128 *a)
1127{
1128 bool c = 0;
1129 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1130 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1131}
1132
dedd123c
RH
1133static void frac256_neg(FloatParts256 *a)
1134{
1135 bool c = 0;
1136 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1137 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1138 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1139 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1140}
1141
1142#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1143
d46975bc 1144static int frac64_normalize(FloatParts64 *a)
6fff2167 1145{
d46975bc
RH
1146 if (a->frac) {
1147 int shift = clz64(a->frac);
1148 a->frac <<= shift;
1149 return shift;
1150 }
1151 return 64;
1152}
1153
1154static int frac128_normalize(FloatParts128 *a)
1155{
1156 if (a->frac_hi) {
1157 int shl = clz64(a->frac_hi);
463e45dc
RH
1158 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1159 a->frac_lo <<= shl;
d46975bc
RH
1160 return shl;
1161 } else if (a->frac_lo) {
1162 int shl = clz64(a->frac_lo);
463e45dc 1163 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1164 a->frac_lo = 0;
1165 return shl + 64;
6fff2167 1166 }
d46975bc 1167 return 128;
6fff2167
AB
1168}
1169
dedd123c
RH
1170static int frac256_normalize(FloatParts256 *a)
1171{
1172 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1173 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1174 int ret, shl;
dedd123c
RH
1175
1176 if (likely(a0)) {
1177 shl = clz64(a0);
1178 if (shl == 0) {
1179 return 0;
1180 }
1181 ret = shl;
1182 } else {
1183 if (a1) {
1184 ret = 64;
1185 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1186 } else if (a2) {
1187 ret = 128;
1188 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1189 } else if (a3) {
1190 ret = 192;
1191 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1192 } else {
1193 ret = 256;
1194 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1195 goto done;
1196 }
1197 shl = clz64(a0);
1198 if (shl == 0) {
1199 goto done;
1200 }
1201 ret += shl;
1202 }
1203
463e45dc
RH
1204 a0 = shl_double(a0, a1, shl);
1205 a1 = shl_double(a1, a2, shl);
1206 a2 = shl_double(a2, a3, shl);
1207 a3 <<= shl;
dedd123c
RH
1208
1209 done:
1210 a->frac_hi = a0;
1211 a->frac_hm = a1;
1212 a->frac_lm = a2;
1213 a->frac_lo = a3;
1214 return ret;
1215}
1216
1217#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc 1218
feaf2e9c
RH
1219static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1220{
1221 uint64_t a0, a1, b0, t0, t1, q, quot;
1222 int exp_diff = a->exp - b->exp;
1223 int shift;
1224
1225 a0 = a->frac;
1226 a1 = 0;
1227
1228 if (exp_diff < -1) {
1229 if (mod_quot) {
1230 *mod_quot = 0;
1231 }
1232 return;
1233 }
1234 if (exp_diff == -1) {
1235 a0 >>= 1;
1236 exp_diff = 0;
1237 }
1238
1239 b0 = b->frac;
1240 quot = q = b0 <= a0;
1241 if (q) {
1242 a0 -= b0;
1243 }
1244
1245 exp_diff -= 64;
1246 while (exp_diff > 0) {
1247 q = estimateDiv128To64(a0, a1, b0);
1248 q = q > 2 ? q - 2 : 0;
1249 mul64To128(b0, q, &t0, &t1);
1250 sub128(a0, a1, t0, t1, &a0, &a1);
1251 shortShift128Left(a0, a1, 62, &a0, &a1);
1252 exp_diff -= 62;
1253 quot = (quot << 62) + q;
1254 }
1255
1256 exp_diff += 64;
1257 if (exp_diff > 0) {
1258 q = estimateDiv128To64(a0, a1, b0);
1259 q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1260 mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1261 sub128(a0, a1, t0, t1, &a0, &a1);
1262 shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1263 while (le128(t0, t1, a0, a1)) {
1264 ++q;
1265 sub128(a0, a1, t0, t1, &a0, &a1);
1266 }
1267 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1268 } else {
1269 t0 = b0;
1270 t1 = 0;
1271 }
1272
1273 if (mod_quot) {
1274 *mod_quot = quot;
1275 } else {
1276 sub128(t0, t1, a0, a1, &t0, &t1);
1277 if (lt128(t0, t1, a0, a1) ||
1278 (eq128(t0, t1, a0, a1) && (q & 1))) {
1279 a0 = t0;
1280 a1 = t1;
1281 a->sign = !a->sign;
1282 }
1283 }
1284
1285 if (likely(a0)) {
1286 shift = clz64(a0);
1287 shortShift128Left(a0, a1, shift, &a0, &a1);
1288 } else if (likely(a1)) {
1289 shift = clz64(a1);
1290 a0 = a1 << shift;
1291 a1 = 0;
1292 shift += 64;
1293 } else {
1294 a->cls = float_class_zero;
1295 return;
1296 }
1297
1298 a->exp = b->exp + exp_diff - shift;
1299 a->frac = a0 | (a1 != 0);
1300}
1301
1302static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1303 uint64_t *mod_quot)
1304{
1305 uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1306 int exp_diff = a->exp - b->exp;
1307 int shift;
1308
1309 a0 = a->frac_hi;
1310 a1 = a->frac_lo;
1311 a2 = 0;
1312
1313 if (exp_diff < -1) {
1314 if (mod_quot) {
1315 *mod_quot = 0;
1316 }
1317 return;
1318 }
1319 if (exp_diff == -1) {
1320 shift128Right(a0, a1, 1, &a0, &a1);
1321 exp_diff = 0;
1322 }
1323
1324 b0 = b->frac_hi;
1325 b1 = b->frac_lo;
1326
1327 quot = q = le128(b0, b1, a0, a1);
1328 if (q) {
1329 sub128(a0, a1, b0, b1, &a0, &a1);
1330 }
1331
1332 exp_diff -= 64;
1333 while (exp_diff > 0) {
1334 q = estimateDiv128To64(a0, a1, b0);
1335 q = q > 4 ? q - 4 : 0;
1336 mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1337 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1338 shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1339 exp_diff -= 61;
1340 quot = (quot << 61) + q;
1341 }
1342
1343 exp_diff += 64;
1344 if (exp_diff > 0) {
1345 q = estimateDiv128To64(a0, a1, b0);
1346 q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1347 mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1348 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1349 shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1350 while (le192(t0, t1, t2, a0, a1, a2)) {
1351 ++q;
1352 sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1353 }
1354 quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1355 } else {
1356 t0 = b0;
1357 t1 = b1;
1358 t2 = 0;
1359 }
1360
1361 if (mod_quot) {
1362 *mod_quot = quot;
1363 } else {
1364 sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1365 if (lt192(t0, t1, t2, a0, a1, a2) ||
1366 (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1367 a0 = t0;
1368 a1 = t1;
1369 a2 = t2;
1370 a->sign = !a->sign;
1371 }
1372 }
1373
1374 if (likely(a0)) {
1375 shift = clz64(a0);
1376 shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1377 } else if (likely(a1)) {
1378 shift = clz64(a1);
1379 shortShift128Left(a1, a2, shift, &a0, &a1);
1380 a2 = 0;
1381 shift += 64;
1382 } else if (likely(a2)) {
1383 shift = clz64(a2);
1384 a0 = a2 << shift;
1385 a1 = a2 = 0;
1386 shift += 128;
1387 } else {
1388 a->cls = float_class_zero;
1389 return;
1390 }
1391
1392 a->exp = b->exp + exp_diff - shift;
1393 a->frac_hi = a0;
1394 a->frac_lo = a1 | (a2 != 0);
1395}
1396
1397#define frac_modrem(A, B, Q) FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1398
d46975bc
RH
1399static void frac64_shl(FloatParts64 *a, int c)
1400{
1401 a->frac <<= c;
1402}
1403
1404static void frac128_shl(FloatParts128 *a, int c)
1405{
463e45dc
RH
1406 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1407
1408 if (c & 64) {
1409 a0 = a1, a1 = 0;
1410 }
1411
1412 c &= 63;
1413 if (c) {
1414 a0 = shl_double(a0, a1, c);
1415 a1 = a1 << c;
1416 }
1417
1418 a->frac_hi = a0;
1419 a->frac_lo = a1;
d46975bc
RH
1420}
1421
1422#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1423
1424static void frac64_shr(FloatParts64 *a, int c)
1425{
1426 a->frac >>= c;
1427}
1428
1429static void frac128_shr(FloatParts128 *a, int c)
1430{
463e45dc
RH
1431 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1432
1433 if (c & 64) {
1434 a1 = a0, a0 = 0;
1435 }
1436
1437 c &= 63;
1438 if (c) {
1439 a1 = shr_double(a0, a1, c);
1440 a0 = a0 >> c;
1441 }
1442
1443 a->frac_hi = a0;
1444 a->frac_lo = a1;
d46975bc
RH
1445}
1446
1447#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1448
ee6959f2 1449static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1450{
463e45dc
RH
1451 uint64_t a0 = a->frac;
1452
1453 if (likely(c != 0)) {
1454 if (likely(c < 64)) {
1455 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1456 } else {
1457 a0 = a0 != 0;
1458 }
1459 a->frac = a0;
1460 }
ee6959f2 1461}
6fff2167 1462
ee6959f2
RH
1463static void frac128_shrjam(FloatParts128 *a, int c)
1464{
463e45dc
RH
1465 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1466 uint64_t sticky = 0;
1467
1468 if (unlikely(c == 0)) {
1469 return;
1470 } else if (likely(c < 64)) {
1471 /* nothing */
1472 } else if (likely(c < 128)) {
1473 sticky = a1;
1474 a1 = a0;
1475 a0 = 0;
1476 c &= 63;
1477 if (c == 0) {
1478 goto done;
1479 }
1480 } else {
1481 sticky = a0 | a1;
1482 a0 = a1 = 0;
1483 goto done;
1484 }
1485
1486 sticky |= shr_double(a1, 0, c);
1487 a1 = shr_double(a0, a1, c);
1488 a0 = a0 >> c;
1489
1490 done:
1491 a->frac_lo = a1 | (sticky != 0);
1492 a->frac_hi = a0;
6fff2167
AB
1493}
1494
dedd123c
RH
1495static void frac256_shrjam(FloatParts256 *a, int c)
1496{
1497 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1498 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1499 uint64_t sticky = 0;
dedd123c
RH
1500
1501 if (unlikely(c == 0)) {
1502 return;
1503 } else if (likely(c < 64)) {
1504 /* nothing */
1505 } else if (likely(c < 256)) {
1506 if (unlikely(c & 128)) {
1507 sticky |= a2 | a3;
1508 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1509 }
1510 if (unlikely(c & 64)) {
1511 sticky |= a3;
1512 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1513 }
1514 c &= 63;
1515 if (c == 0) {
1516 goto done;
1517 }
1518 } else {
1519 sticky = a0 | a1 | a2 | a3;
1520 a0 = a1 = a2 = a3 = 0;
1521 goto done;
1522 }
1523
463e45dc
RH
1524 sticky |= shr_double(a3, 0, c);
1525 a3 = shr_double(a2, a3, c);
1526 a2 = shr_double(a1, a2, c);
1527 a1 = shr_double(a0, a1, c);
1528 a0 = a0 >> c;
dedd123c
RH
1529
1530 done:
1531 a->frac_lo = a3 | (sticky != 0);
1532 a->frac_lm = a2;
1533 a->frac_hm = a1;
1534 a->frac_hi = a0;
1535}
1536
1537#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1538
da10a907
RH
1539static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1540{
1541 return usub64_overflow(a->frac, b->frac, &r->frac);
1542}
7c45bad8 1543
da10a907
RH
1544static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1545{
1546 bool c = 0;
1547 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1548 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1549 return c;
1550}
1551
dedd123c
RH
1552static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1553{
1554 bool c = 0;
1555 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1556 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1557 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1558 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1559 return c;
1560}
1561
1562#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1563
aca84527
RH
1564static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1565{
1566 r->frac = a->frac_hi | (a->frac_lo != 0);
1567}
1568
1569static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1570{
1571 r->frac_hi = a->frac_hi;
1572 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1573}
1574
1575#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1576
dedd123c
RH
1577static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1578{
1579 r->frac_hi = a->frac;
1580 r->frac_lo = 0;
1581}
1582
1583static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1584{
1585 r->frac_hi = a->frac_hi;
1586 r->frac_hm = a->frac_lo;
1587 r->frac_lm = 0;
1588 r->frac_lo = 0;
1589}
1590
1591#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1592
9261b245
RH
1593/*
1594 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1595 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1596 * and thus MIT licenced.
1597 */
1598static const uint16_t rsqrt_tab[128] = {
1599 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1600 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1601 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1602 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1603 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1604 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1605 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1606 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1607 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1608 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1609 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1610 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1611 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1612 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1613 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1614 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1615};
1616
da10a907
RH
1617#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1618#define FloatPartsN glue(FloatParts,N)
aca84527 1619#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1620
1621#define N 64
aca84527 1622#define W 128
da10a907
RH
1623
1624#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1625#include "softfloat-parts.c.inc"
1626
da10a907 1627#undef N
aca84527 1628#undef W
da10a907 1629#define N 128
aca84527 1630#define W 256
7c45bad8 1631
da10a907 1632#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1633#include "softfloat-parts.c.inc"
1634
dedd123c
RH
1635#undef N
1636#undef W
1637#define N 256
1638
1639#include "softfloat-parts-addsub.c.inc"
1640
da10a907 1641#undef N
aca84527 1642#undef W
7c45bad8
RH
1643#undef partsN
1644#undef FloatPartsN
aca84527 1645#undef FloatPartsW
7c45bad8 1646
aaffb7bf
RH
1647/*
1648 * Pack/unpack routines with a specific FloatFmt.
1649 */
1650
98e256fc
RH
1651static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1652 float_status *s, const FloatFmt *params)
aaffb7bf 1653{
98e256fc 1654 float16_unpack_raw(p, f);
d46975bc 1655 parts_canonicalize(p, s, params);
aaffb7bf
RH
1656}
1657
98e256fc
RH
1658static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1659 float_status *s)
aaffb7bf 1660{
98e256fc 1661 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1662}
1663
98e256fc
RH
1664static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1665 float_status *s)
aaffb7bf 1666{
98e256fc 1667 bfloat16_unpack_raw(p, f);
d46975bc 1668 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1669}
1670
e293e927
RH
1671static float16 float16a_round_pack_canonical(FloatParts64 *p,
1672 float_status *s,
aaffb7bf
RH
1673 const FloatFmt *params)
1674{
ee6959f2 1675 parts_uncanon(p, s, params);
e293e927 1676 return float16_pack_raw(p);
aaffb7bf
RH
1677}
1678
e293e927
RH
1679static float16 float16_round_pack_canonical(FloatParts64 *p,
1680 float_status *s)
aaffb7bf
RH
1681{
1682 return float16a_round_pack_canonical(p, s, &float16_params);
1683}
1684
e293e927
RH
1685static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1686 float_status *s)
aaffb7bf 1687{
ee6959f2 1688 parts_uncanon(p, s, &bfloat16_params);
e293e927 1689 return bfloat16_pack_raw(p);
aaffb7bf
RH
1690}
1691
98e256fc
RH
1692static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1693 float_status *s)
aaffb7bf 1694{
98e256fc 1695 float32_unpack_raw(p, f);
d46975bc 1696 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1697}
1698
e293e927
RH
1699static float32 float32_round_pack_canonical(FloatParts64 *p,
1700 float_status *s)
aaffb7bf 1701{
ee6959f2 1702 parts_uncanon(p, s, &float32_params);
e293e927 1703 return float32_pack_raw(p);
aaffb7bf
RH
1704}
1705
98e256fc
RH
1706static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1707 float_status *s)
aaffb7bf 1708{
98e256fc 1709 float64_unpack_raw(p, f);
d46975bc 1710 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1711}
1712
e293e927
RH
1713static float64 float64_round_pack_canonical(FloatParts64 *p,
1714 float_status *s)
aaffb7bf 1715{
ee6959f2 1716 parts_uncanon(p, s, &float64_params);
e293e927 1717 return float64_pack_raw(p);
aaffb7bf
RH
1718}
1719
42636fb9
RH
1720static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1721 float_status *s)
1722{
1723 parts_uncanon(p, s, &float32_params);
1724
1725 /*
1726 * In parts_uncanon, we placed the fraction for float32 at the lsb.
1727 * We need to adjust the fraction higher so that the least N bits are
1728 * zero, and the fraction is adjacent to the float64 implicit bit.
1729 */
1730 switch (p->cls) {
1731 case float_class_normal:
1732 if (unlikely(p->exp == 0)) {
1733 /*
1734 * The result is denormal for float32, but can be represented
1735 * in normalized form for float64. Adjust, per canonicalize.
1736 */
1737 int shift = frac_normalize(p);
1738 p->exp = (float32_params.frac_shift -
1739 float32_params.exp_bias - shift + 1 +
1740 float64_params.exp_bias);
1741 frac_shr(p, float64_params.frac_shift);
1742 } else {
1743 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1744 p->exp += float64_params.exp_bias - float32_params.exp_bias;
1745 }
1746 break;
1747 case float_class_snan:
1748 case float_class_qnan:
1749 frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1750 p->exp = float64_params.exp_max;
1751 break;
1752 case float_class_inf:
1753 p->exp = float64_params.exp_max;
1754 break;
1755 case float_class_zero:
1756 break;
1757 default:
1758 g_assert_not_reached();
1759 }
1760
1761 return float64_pack_raw(p);
1762}
1763
3ff49e56
RH
1764static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1765 float_status *s)
1766{
1767 float128_unpack_raw(p, f);
1768 parts_canonicalize(p, s, &float128_params);
1769}
1770
1771static float128 float128_round_pack_canonical(FloatParts128 *p,
1772 float_status *s)
1773{
1774 parts_uncanon(p, s, &float128_params);
1775 return float128_pack_raw(p);
1776}
1777
c1b6299b
RH
1778/* Returns false if the encoding is invalid. */
1779static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1780 float_status *s)
1781{
1782 /* Ensure rounding precision is set before beginning. */
1783 switch (s->floatx80_rounding_precision) {
1784 case floatx80_precision_x:
1785 case floatx80_precision_d:
1786 case floatx80_precision_s:
1787 break;
1788 default:
1789 g_assert_not_reached();
1790 }
1791
1792 if (unlikely(floatx80_invalid_encoding(f))) {
1793 float_raise(float_flag_invalid, s);
1794 return false;
1795 }
1796
1797 floatx80_unpack_raw(p, f);
1798
1799 if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1800 parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1801 } else {
1802 /* The explicit integer bit is ignored, after invalid checks. */
1803 p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1804 p->cls = (p->frac_hi == 0 ? float_class_inf
1805 : parts_is_snan_frac(p->frac_hi, s)
1806 ? float_class_snan : float_class_qnan);
1807 }
1808 return true;
1809}
1810
1811static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1812 float_status *s)
1813{
1814 const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1815 uint64_t frac;
1816 int exp;
1817
1818 switch (p->cls) {
1819 case float_class_normal:
1820 if (s->floatx80_rounding_precision == floatx80_precision_x) {
1821 parts_uncanon_normal(p, s, fmt);
1822 frac = p->frac_hi;
1823 exp = p->exp;
1824 } else {
1825 FloatParts64 p64;
1826
1827 p64.sign = p->sign;
1828 p64.exp = p->exp;
1829 frac_truncjam(&p64, p);
1830 parts_uncanon_normal(&p64, s, fmt);
1831 frac = p64.frac;
1832 exp = p64.exp;
1833 }
1834 if (exp != fmt->exp_max) {
1835 break;
1836 }
1837 /* rounded to inf -- fall through to set frac correctly */
1838
1839 case float_class_inf:
1840 /* x86 and m68k differ in the setting of the integer bit. */
1841 frac = floatx80_infinity_low;
1842 exp = fmt->exp_max;
1843 break;
1844
1845 case float_class_zero:
1846 frac = 0;
1847 exp = 0;
1848 break;
1849
1850 case float_class_snan:
1851 case float_class_qnan:
1852 /* NaNs have the integer bit set. */
1853 frac = p->frac_hi | (1ull << 63);
1854 exp = fmt->exp_max;
1855 break;
1856
1857 default:
1858 g_assert_not_reached();
1859 }
1860
1861 return packFloatx80(p->sign, exp, frac);
1862}
1863
6fff2167 1864/*
da10a907 1865 * Addition and subtraction
6fff2167
AB
1866 */
1867
da10a907
RH
1868static float16 QEMU_FLATTEN
1869float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1870{
da10a907 1871 FloatParts64 pa, pb, *pr;
98e256fc
RH
1872
1873 float16_unpack_canonical(&pa, a, status);
1874 float16_unpack_canonical(&pb, b, status);
da10a907 1875 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1876
da10a907 1877 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1878}
1879
da10a907 1880float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1881{
da10a907
RH
1882 return float16_addsub(a, b, status, false);
1883}
1b615d48 1884
da10a907
RH
1885float16 float16_sub(float16 a, float16 b, float_status *status)
1886{
1887 return float16_addsub(a, b, status, true);
1b615d48
EC
1888}
1889
1890static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1891soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1892{
da10a907 1893 FloatParts64 pa, pb, *pr;
98e256fc
RH
1894
1895 float32_unpack_canonical(&pa, a, status);
1896 float32_unpack_canonical(&pb, b, status);
da10a907 1897 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1898
da10a907 1899 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1900}
1901
da10a907 1902static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1903{
da10a907 1904 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1905}
1906
da10a907 1907static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1908{
da10a907 1909 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1910}
1911
1912static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1913soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1914{
da10a907 1915 FloatParts64 pa, pb, *pr;
98e256fc
RH
1916
1917 float64_unpack_canonical(&pa, a, status);
1918 float64_unpack_canonical(&pb, b, status);
da10a907 1919 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1920
da10a907 1921 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1922}
1923
da10a907 1924static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1925{
da10a907 1926 return soft_f64_addsub(a, b, status, false);
1b615d48 1927}
6fff2167 1928
da10a907 1929static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1930{
da10a907 1931 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1932}
1933
1b615d48 1934static float hard_f32_add(float a, float b)
6fff2167 1935{
1b615d48
EC
1936 return a + b;
1937}
6fff2167 1938
1b615d48
EC
1939static float hard_f32_sub(float a, float b)
1940{
1941 return a - b;
6fff2167
AB
1942}
1943
1b615d48 1944static double hard_f64_add(double a, double b)
6fff2167 1945{
1b615d48
EC
1946 return a + b;
1947}
6fff2167 1948
1b615d48
EC
1949static double hard_f64_sub(double a, double b)
1950{
1951 return a - b;
1952}
1953
b240c9c4 1954static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1955{
1956 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1957 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1958 }
1959 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1960}
1961
b240c9c4 1962static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1963{
1964 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1965 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1966 } else {
1967 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1968 }
1969}
1970
1971static float32 float32_addsub(float32 a, float32 b, float_status *s,
1972 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1973{
1974 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1975 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1976}
1977
1978static float64 float64_addsub(float64 a, float64 b, float_status *s,
1979 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1980{
1981 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1982 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1983}
1984
1985float32 QEMU_FLATTEN
1986float32_add(float32 a, float32 b, float_status *s)
1987{
1988 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1989}
1990
1991float32 QEMU_FLATTEN
1992float32_sub(float32 a, float32 b, float_status *s)
1993{
1994 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1995}
1996
1997float64 QEMU_FLATTEN
1998float64_add(float64 a, float64 b, float_status *s)
1999{
2000 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
2001}
2002
2003float64 QEMU_FLATTEN
2004float64_sub(float64 a, float64 b, float_status *s)
2005{
2006 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
2007}
2008
42636fb9
RH
2009static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
2010 bool subtract)
2011{
2012 FloatParts64 pa, pb, *pr;
2013
2014 float64_unpack_canonical(&pa, a, status);
2015 float64_unpack_canonical(&pb, b, status);
2016 pr = parts_addsub(&pa, &pb, status, subtract);
2017
2018 return float64r32_round_pack_canonical(pr, status);
2019}
2020
2021float64 float64r32_add(float64 a, float64 b, float_status *status)
2022{
2023 return float64r32_addsub(a, b, status, false);
2024}
2025
2026float64 float64r32_sub(float64 a, float64 b, float_status *status)
2027{
2028 return float64r32_addsub(a, b, status, true);
2029}
2030
da10a907
RH
2031static bfloat16 QEMU_FLATTEN
2032bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 2033{
da10a907 2034 FloatParts64 pa, pb, *pr;
98e256fc
RH
2035
2036 bfloat16_unpack_canonical(&pa, a, status);
2037 bfloat16_unpack_canonical(&pb, b, status);
da10a907 2038 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 2039
da10a907 2040 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
2041}
2042
da10a907 2043bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2044{
da10a907
RH
2045 return bfloat16_addsub(a, b, status, false);
2046}
8282310d 2047
da10a907
RH
2048bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2049{
2050 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
2051}
2052
3ff49e56
RH
2053static float128 QEMU_FLATTEN
2054float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2055{
2056 FloatParts128 pa, pb, *pr;
2057
2058 float128_unpack_canonical(&pa, a, status);
2059 float128_unpack_canonical(&pb, b, status);
2060 pr = parts_addsub(&pa, &pb, status, subtract);
2061
2062 return float128_round_pack_canonical(pr, status);
2063}
2064
2065float128 float128_add(float128 a, float128 b, float_status *status)
2066{
2067 return float128_addsub(a, b, status, false);
2068}
2069
2070float128 float128_sub(float128 a, float128 b, float_status *status)
2071{
2072 return float128_addsub(a, b, status, true);
2073}
2074
c1b6299b
RH
2075static floatx80 QEMU_FLATTEN
2076floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2077{
2078 FloatParts128 pa, pb, *pr;
2079
2080 if (!floatx80_unpack_canonical(&pa, a, status) ||
2081 !floatx80_unpack_canonical(&pb, b, status)) {
2082 return floatx80_default_nan(status);
2083 }
2084
2085 pr = parts_addsub(&pa, &pb, status, subtract);
2086 return floatx80_round_pack_canonical(pr, status);
2087}
2088
2089floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2090{
2091 return floatx80_addsub(a, b, status, false);
2092}
2093
2094floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2095{
2096 return floatx80_addsub(a, b, status, true);
2097}
2098
74d707e2 2099/*
aca84527 2100 * Multiplication
74d707e2
AB
2101 */
2102
97ff87c0 2103float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 2104{
aca84527 2105 FloatParts64 pa, pb, *pr;
98e256fc
RH
2106
2107 float16_unpack_canonical(&pa, a, status);
2108 float16_unpack_canonical(&pb, b, status);
aca84527 2109 pr = parts_mul(&pa, &pb, status);
74d707e2 2110
aca84527 2111 return float16_round_pack_canonical(pr, status);
74d707e2
AB
2112}
2113
2dfabc86
EC
2114static float32 QEMU_SOFTFLOAT_ATTR
2115soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 2116{
aca84527 2117 FloatParts64 pa, pb, *pr;
98e256fc
RH
2118
2119 float32_unpack_canonical(&pa, a, status);
2120 float32_unpack_canonical(&pb, b, status);
aca84527 2121 pr = parts_mul(&pa, &pb, status);
74d707e2 2122
aca84527 2123 return float32_round_pack_canonical(pr, status);
74d707e2
AB
2124}
2125
2dfabc86
EC
2126static float64 QEMU_SOFTFLOAT_ATTR
2127soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 2128{
aca84527 2129 FloatParts64 pa, pb, *pr;
98e256fc
RH
2130
2131 float64_unpack_canonical(&pa, a, status);
2132 float64_unpack_canonical(&pb, b, status);
aca84527 2133 pr = parts_mul(&pa, &pb, status);
74d707e2 2134
aca84527 2135 return float64_round_pack_canonical(pr, status);
74d707e2
AB
2136}
2137
2dfabc86
EC
2138static float hard_f32_mul(float a, float b)
2139{
2140 return a * b;
2141}
2142
2143static double hard_f64_mul(double a, double b)
2144{
2145 return a * b;
2146}
2147
2dfabc86
EC
2148float32 QEMU_FLATTEN
2149float32_mul(float32 a, float32 b, float_status *s)
2150{
2151 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 2152 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
2153}
2154
2155float64 QEMU_FLATTEN
2156float64_mul(float64 a, float64 b, float_status *s)
2157{
2158 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 2159 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
2160}
2161
42636fb9
RH
2162float64 float64r32_mul(float64 a, float64 b, float_status *status)
2163{
2164 FloatParts64 pa, pb, *pr;
2165
2166 float64_unpack_canonical(&pa, a, status);
2167 float64_unpack_canonical(&pb, b, status);
2168 pr = parts_mul(&pa, &pb, status);
2169
2170 return float64r32_round_pack_canonical(pr, status);
2171}
2172
aca84527
RH
2173bfloat16 QEMU_FLATTEN
2174bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2175{
aca84527 2176 FloatParts64 pa, pb, *pr;
98e256fc
RH
2177
2178 bfloat16_unpack_canonical(&pa, a, status);
2179 bfloat16_unpack_canonical(&pb, b, status);
aca84527 2180 pr = parts_mul(&pa, &pb, status);
8282310d 2181
aca84527
RH
2182 return bfloat16_round_pack_canonical(pr, status);
2183}
2184
2185float128 QEMU_FLATTEN
2186float128_mul(float128 a, float128 b, float_status *status)
2187{
2188 FloatParts128 pa, pb, *pr;
2189
2190 float128_unpack_canonical(&pa, a, status);
2191 float128_unpack_canonical(&pb, b, status);
2192 pr = parts_mul(&pa, &pb, status);
2193
2194 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2195}
2196
153f664a
RH
2197floatx80 QEMU_FLATTEN
2198floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2199{
2200 FloatParts128 pa, pb, *pr;
2201
2202 if (!floatx80_unpack_canonical(&pa, a, status) ||
2203 !floatx80_unpack_canonical(&pb, b, status)) {
2204 return floatx80_default_nan(status);
2205 }
2206
2207 pr = parts_mul(&pa, &pb, status);
2208 return floatx80_round_pack_canonical(pr, status);
2209}
2210
d446830a 2211/*
dedd123c 2212 * Fused multiply-add
d446830a
AB
2213 */
2214
97ff87c0 2215float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 2216 int flags, float_status *status)
d446830a 2217{
dedd123c 2218 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2219
2220 float16_unpack_canonical(&pa, a, status);
2221 float16_unpack_canonical(&pb, b, status);
2222 float16_unpack_canonical(&pc, c, status);
dedd123c 2223 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2224
dedd123c 2225 return float16_round_pack_canonical(pr, status);
d446830a
AB
2226}
2227
ccf770ba
EC
2228static float32 QEMU_SOFTFLOAT_ATTR
2229soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2230 float_status *status)
d446830a 2231{
dedd123c 2232 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2233
2234 float32_unpack_canonical(&pa, a, status);
2235 float32_unpack_canonical(&pb, b, status);
2236 float32_unpack_canonical(&pc, c, status);
dedd123c 2237 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2238
dedd123c 2239 return float32_round_pack_canonical(pr, status);
d446830a
AB
2240}
2241
ccf770ba
EC
2242static float64 QEMU_SOFTFLOAT_ATTR
2243soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2244 float_status *status)
d446830a 2245{
dedd123c 2246 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2247
2248 float64_unpack_canonical(&pa, a, status);
2249 float64_unpack_canonical(&pb, b, status);
2250 float64_unpack_canonical(&pc, c, status);
dedd123c 2251 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 2252
dedd123c 2253 return float64_round_pack_canonical(pr, status);
d446830a
AB
2254}
2255
f6b3b108
EC
2256static bool force_soft_fma;
2257
ccf770ba
EC
2258float32 QEMU_FLATTEN
2259float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2260{
2261 union_float32 ua, ub, uc, ur;
2262
2263 ua.s = xa;
2264 ub.s = xb;
2265 uc.s = xc;
2266
2267 if (unlikely(!can_use_fpu(s))) {
2268 goto soft;
2269 }
2270 if (unlikely(flags & float_muladd_halve_result)) {
2271 goto soft;
2272 }
2273
2274 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2275 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2276 goto soft;
2277 }
f6b3b108
EC
2278
2279 if (unlikely(force_soft_fma)) {
2280 goto soft;
2281 }
2282
ccf770ba
EC
2283 /*
2284 * When (a || b) == 0, there's no need to check for under/over flow,
2285 * since we know the addend is (normal || 0) and the product is 0.
2286 */
2287 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2288 union_float32 up;
2289 bool prod_sign;
2290
2291 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2292 prod_sign ^= !!(flags & float_muladd_negate_product);
2293 up.s = float32_set_sign(float32_zero, prod_sign);
2294
2295 if (flags & float_muladd_negate_c) {
2296 uc.h = -uc.h;
2297 }
2298 ur.h = up.h + uc.h;
2299 } else {
896f51fb
KC
2300 union_float32 ua_orig = ua;
2301 union_float32 uc_orig = uc;
2302
ccf770ba
EC
2303 if (flags & float_muladd_negate_product) {
2304 ua.h = -ua.h;
2305 }
2306 if (flags & float_muladd_negate_c) {
2307 uc.h = -uc.h;
2308 }
2309
2310 ur.h = fmaf(ua.h, ub.h, uc.h);
2311
2312 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 2313 float_raise(float_flag_overflow, s);
ccf770ba 2314 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
2315 ua = ua_orig;
2316 uc = uc_orig;
ccf770ba
EC
2317 goto soft;
2318 }
2319 }
2320 if (flags & float_muladd_negate_result) {
2321 return float32_chs(ur.s);
2322 }
2323 return ur.s;
2324
2325 soft:
2326 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2327}
2328
2329float64 QEMU_FLATTEN
2330float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2331{
2332 union_float64 ua, ub, uc, ur;
2333
2334 ua.s = xa;
2335 ub.s = xb;
2336 uc.s = xc;
2337
2338 if (unlikely(!can_use_fpu(s))) {
2339 goto soft;
2340 }
2341 if (unlikely(flags & float_muladd_halve_result)) {
2342 goto soft;
2343 }
2344
2345 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2346 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2347 goto soft;
2348 }
f6b3b108
EC
2349
2350 if (unlikely(force_soft_fma)) {
2351 goto soft;
2352 }
2353
ccf770ba
EC
2354 /*
2355 * When (a || b) == 0, there's no need to check for under/over flow,
2356 * since we know the addend is (normal || 0) and the product is 0.
2357 */
2358 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2359 union_float64 up;
2360 bool prod_sign;
2361
2362 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2363 prod_sign ^= !!(flags & float_muladd_negate_product);
2364 up.s = float64_set_sign(float64_zero, prod_sign);
2365
2366 if (flags & float_muladd_negate_c) {
2367 uc.h = -uc.h;
2368 }
2369 ur.h = up.h + uc.h;
2370 } else {
896f51fb
KC
2371 union_float64 ua_orig = ua;
2372 union_float64 uc_orig = uc;
2373
ccf770ba
EC
2374 if (flags & float_muladd_negate_product) {
2375 ua.h = -ua.h;
2376 }
2377 if (flags & float_muladd_negate_c) {
2378 uc.h = -uc.h;
2379 }
2380
2381 ur.h = fma(ua.h, ub.h, uc.h);
2382
2383 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 2384 float_raise(float_flag_overflow, s);
ccf770ba 2385 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
2386 ua = ua_orig;
2387 uc = uc_orig;
ccf770ba
EC
2388 goto soft;
2389 }
2390 }
2391 if (flags & float_muladd_negate_result) {
2392 return float64_chs(ur.s);
2393 }
2394 return ur.s;
2395
2396 soft:
2397 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2398}
2399
42636fb9
RH
2400float64 float64r32_muladd(float64 a, float64 b, float64 c,
2401 int flags, float_status *status)
2402{
2403 FloatParts64 pa, pb, pc, *pr;
2404
2405 float64_unpack_canonical(&pa, a, status);
2406 float64_unpack_canonical(&pb, b, status);
2407 float64_unpack_canonical(&pc, c, status);
2408 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2409
2410 return float64r32_round_pack_canonical(pr, status);
2411}
2412
8282310d
LZ
2413bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2414 int flags, float_status *status)
2415{
dedd123c 2416 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2417
2418 bfloat16_unpack_canonical(&pa, a, status);
2419 bfloat16_unpack_canonical(&pb, b, status);
2420 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2421 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2422
2423 return bfloat16_round_pack_canonical(pr, status);
2424}
8282310d 2425
dedd123c
RH
2426float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2427 int flags, float_status *status)
2428{
2429 FloatParts128 pa, pb, pc, *pr;
2430
2431 float128_unpack_canonical(&pa, a, status);
2432 float128_unpack_canonical(&pb, b, status);
2433 float128_unpack_canonical(&pc, c, status);
2434 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2435
2436 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2437}
2438
cf07323d 2439/*
ec961b81 2440 * Division
cf07323d
AB
2441 */
2442
cf07323d
AB
2443float16 float16_div(float16 a, float16 b, float_status *status)
2444{
ec961b81 2445 FloatParts64 pa, pb, *pr;
98e256fc
RH
2446
2447 float16_unpack_canonical(&pa, a, status);
2448 float16_unpack_canonical(&pb, b, status);
ec961b81 2449 pr = parts_div(&pa, &pb, status);
cf07323d 2450
ec961b81 2451 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2452}
2453
4a629561
EC
2454static float32 QEMU_SOFTFLOAT_ATTR
2455soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2456{
ec961b81 2457 FloatParts64 pa, pb, *pr;
98e256fc
RH
2458
2459 float32_unpack_canonical(&pa, a, status);
2460 float32_unpack_canonical(&pb, b, status);
ec961b81 2461 pr = parts_div(&pa, &pb, status);
cf07323d 2462
ec961b81 2463 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2464}
2465
4a629561
EC
2466static float64 QEMU_SOFTFLOAT_ATTR
2467soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2468{
ec961b81 2469 FloatParts64 pa, pb, *pr;
98e256fc
RH
2470
2471 float64_unpack_canonical(&pa, a, status);
2472 float64_unpack_canonical(&pb, b, status);
ec961b81 2473 pr = parts_div(&pa, &pb, status);
cf07323d 2474
ec961b81 2475 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2476}
2477
4a629561
EC
2478static float hard_f32_div(float a, float b)
2479{
2480 return a / b;
2481}
2482
2483static double hard_f64_div(double a, double b)
2484{
2485 return a / b;
2486}
2487
2488static bool f32_div_pre(union_float32 a, union_float32 b)
2489{
2490 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2491 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2492 fpclassify(b.h) == FP_NORMAL;
2493 }
2494 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2495}
2496
2497static bool f64_div_pre(union_float64 a, union_float64 b)
2498{
2499 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2500 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2501 fpclassify(b.h) == FP_NORMAL;
2502 }
2503 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2504}
2505
2506static bool f32_div_post(union_float32 a, union_float32 b)
2507{
2508 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2509 return fpclassify(a.h) != FP_ZERO;
2510 }
2511 return !float32_is_zero(a.s);
2512}
2513
2514static bool f64_div_post(union_float64 a, union_float64 b)
2515{
2516 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2517 return fpclassify(a.h) != FP_ZERO;
2518 }
2519 return !float64_is_zero(a.s);
2520}
2521
2522float32 QEMU_FLATTEN
2523float32_div(float32 a, float32 b, float_status *s)
2524{
2525 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2526 f32_div_pre, f32_div_post);
4a629561
EC
2527}
2528
2529float64 QEMU_FLATTEN
2530float64_div(float64 a, float64 b, float_status *s)
2531{
2532 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2533 f64_div_pre, f64_div_post);
4a629561
EC
2534}
2535
42636fb9
RH
2536float64 float64r32_div(float64 a, float64 b, float_status *status)
2537{
2538 FloatParts64 pa, pb, *pr;
2539
2540 float64_unpack_canonical(&pa, a, status);
2541 float64_unpack_canonical(&pb, b, status);
2542 pr = parts_div(&pa, &pb, status);
2543
2544 return float64r32_round_pack_canonical(pr, status);
2545}
2546
ec961b81
RH
2547bfloat16 QEMU_FLATTEN
2548bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2549{
ec961b81 2550 FloatParts64 pa, pb, *pr;
98e256fc
RH
2551
2552 bfloat16_unpack_canonical(&pa, a, status);
2553 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2554 pr = parts_div(&pa, &pb, status);
8282310d 2555
ec961b81
RH
2556 return bfloat16_round_pack_canonical(pr, status);
2557}
2558
2559float128 QEMU_FLATTEN
2560float128_div(float128 a, float128 b, float_status *status)
2561{
2562 FloatParts128 pa, pb, *pr;
2563
2564 float128_unpack_canonical(&pa, a, status);
2565 float128_unpack_canonical(&pb, b, status);
2566 pr = parts_div(&pa, &pb, status);
2567
2568 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2569}
2570
38db99e2
RH
2571floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2572{
2573 FloatParts128 pa, pb, *pr;
2574
2575 if (!floatx80_unpack_canonical(&pa, a, status) ||
2576 !floatx80_unpack_canonical(&pb, b, status)) {
2577 return floatx80_default_nan(status);
2578 }
2579
2580 pr = parts_div(&pa, &pb, status);
2581 return floatx80_round_pack_canonical(pr, status);
2582}
2583
feaf2e9c
RH
2584/*
2585 * Remainder
2586 */
2587
2588float32 float32_rem(float32 a, float32 b, float_status *status)
2589{
2590 FloatParts64 pa, pb, *pr;
2591
2592 float32_unpack_canonical(&pa, a, status);
2593 float32_unpack_canonical(&pb, b, status);
2594 pr = parts_modrem(&pa, &pb, NULL, status);
2595
2596 return float32_round_pack_canonical(pr, status);
2597}
2598
2599float64 float64_rem(float64 a, float64 b, float_status *status)
2600{
2601 FloatParts64 pa, pb, *pr;
2602
2603 float64_unpack_canonical(&pa, a, status);
2604 float64_unpack_canonical(&pb, b, status);
2605 pr = parts_modrem(&pa, &pb, NULL, status);
2606
2607 return float64_round_pack_canonical(pr, status);
2608}
2609
2610float128 float128_rem(float128 a, float128 b, float_status *status)
2611{
2612 FloatParts128 pa, pb, *pr;
2613
2614 float128_unpack_canonical(&pa, a, status);
2615 float128_unpack_canonical(&pb, b, status);
2616 pr = parts_modrem(&pa, &pb, NULL, status);
2617
2618 return float128_round_pack_canonical(pr, status);
2619}
2620
2621/*
2622 * Returns the remainder of the extended double-precision floating-point value
2623 * `a' with respect to the corresponding value `b'.
2624 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2625 * Standard for Binary Floating-Point Arithmetic. If 'mod' is true, return
2626 * the remainder based on truncating the quotient toward zero instead and
2627 * *quotient is set to the low 64 bits of the absolute value of the integer
2628 * quotient.
2629 */
2630floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2631 uint64_t *quotient, float_status *status)
2632{
2633 FloatParts128 pa, pb, *pr;
2634
2635 *quotient = 0;
2636 if (!floatx80_unpack_canonical(&pa, a, status) ||
2637 !floatx80_unpack_canonical(&pb, b, status)) {
2638 return floatx80_default_nan(status);
2639 }
2640 pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2641
2642 return floatx80_round_pack_canonical(pr, status);
2643}
2644
2645floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2646{
2647 uint64_t quotient;
2648 return floatx80_modrem(a, b, false, &quotient, status);
2649}
2650
2651floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2652{
2653 uint64_t quotient;
2654 return floatx80_modrem(a, b, true, &quotient, status);
2655}
2656
6fed16b2
AB
2657/*
2658 * Float to Float conversions
2659 *
2660 * Returns the result of converting one float format to another. The
2661 * conversion is performed according to the IEC/IEEE Standard for
2662 * Binary Floating-Point Arithmetic.
2663 *
c3f1875e
RH
2664 * Usually this only needs to take care of raising invalid exceptions
2665 * and handling the conversion on NaNs.
6fed16b2
AB
2666 */
2667
c3f1875e
RH
2668static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2669{
2670 switch (a->cls) {
c3f1875e 2671 case float_class_snan:
e706d445
RH
2672 float_raise(float_flag_invalid_snan, s);
2673 /* fall through */
2674 case float_class_qnan:
c3f1875e
RH
2675 /*
2676 * There is no NaN in the destination format. Raise Invalid
2677 * and return a zero with the sign of the input NaN.
2678 */
2679 float_raise(float_flag_invalid, s);
2680 a->cls = float_class_zero;
2681 break;
2682
2683 case float_class_inf:
2684 /*
2685 * There is no Inf in the destination format. Raise Invalid
2686 * and return the maximum normal with the correct sign.
2687 */
2688 float_raise(float_flag_invalid, s);
2689 a->cls = float_class_normal;
2690 a->exp = float16_params_ahp.exp_max;
2691 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2692 float16_params_ahp.frac_size + 1);
2693 break;
2694
2695 case float_class_normal:
2696 case float_class_zero:
2697 break;
2698
2699 default:
2700 g_assert_not_reached();
2701 }
2702}
2703
2704static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2705{
2706 if (is_nan(a->cls)) {
2707 parts_return_nan(a, s);
6fed16b2 2708 }
6fed16b2
AB
2709}
2710
c3f1875e
RH
2711static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2712{
2713 if (is_nan(a->cls)) {
2714 parts_return_nan(a, s);
2715 }
2716}
2717
2718#define parts_float_to_float(P, S) \
2719 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2720
9882ccaf
RH
2721static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2722 float_status *s)
2723{
2724 a->cls = b->cls;
2725 a->sign = b->sign;
2726 a->exp = b->exp;
2727
2728 if (a->cls == float_class_normal) {
2729 frac_truncjam(a, b);
2730 } else if (is_nan(a->cls)) {
2731 /* Discard the low bits of the NaN. */
2732 a->frac = b->frac_hi;
2733 parts_return_nan(a, s);
2734 }
2735}
2736
2737static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2738 float_status *s)
2739{
2740 a->cls = b->cls;
2741 a->sign = b->sign;
2742 a->exp = b->exp;
2743 frac_widen(a, b);
2744
2745 if (is_nan(a->cls)) {
2746 parts_return_nan(a, s);
2747 }
2748}
2749
6fed16b2
AB
2750float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2751{
2752 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2753 FloatParts64 p;
98e256fc 2754
c3f1875e
RH
2755 float16a_unpack_canonical(&p, a, s, fmt16);
2756 parts_float_to_float(&p, s);
2757 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2758}
2759
2760float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2761{
2762 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2763 FloatParts64 p;
98e256fc 2764
c3f1875e
RH
2765 float16a_unpack_canonical(&p, a, s, fmt16);
2766 parts_float_to_float(&p, s);
2767 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2768}
2769
2770float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2771{
c3f1875e
RH
2772 FloatParts64 p;
2773 const FloatFmt *fmt;
98e256fc 2774
c3f1875e
RH
2775 float32_unpack_canonical(&p, a, s);
2776 if (ieee) {
2777 parts_float_to_float(&p, s);
2778 fmt = &float16_params;
2779 } else {
2780 parts_float_to_ahp(&p, s);
2781 fmt = &float16_params_ahp;
2782 }
2783 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2784}
2785
21381dcf
MK
2786static float64 QEMU_SOFTFLOAT_ATTR
2787soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2788{
c3f1875e 2789 FloatParts64 p;
98e256fc 2790
c3f1875e
RH
2791 float32_unpack_canonical(&p, a, s);
2792 parts_float_to_float(&p, s);
2793 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2794}
2795
21381dcf
MK
2796float64 float32_to_float64(float32 a, float_status *s)
2797{
2798 if (likely(float32_is_normal(a))) {
2799 /* Widening conversion can never produce inexact results. */
2800 union_float32 uf;
2801 union_float64 ud;
2802 uf.s = a;
2803 ud.h = uf.h;
2804 return ud.s;
2805 } else if (float32_is_zero(a)) {
2806 return float64_set_sign(float64_zero, float32_is_neg(a));
2807 } else {
2808 return soft_float32_to_float64(a, s);
2809 }
2810}
2811
6fed16b2
AB
2812float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2813{
c3f1875e
RH
2814 FloatParts64 p;
2815 const FloatFmt *fmt;
98e256fc 2816
c3f1875e
RH
2817 float64_unpack_canonical(&p, a, s);
2818 if (ieee) {
2819 parts_float_to_float(&p, s);
2820 fmt = &float16_params;
2821 } else {
2822 parts_float_to_ahp(&p, s);
2823 fmt = &float16_params_ahp;
2824 }
2825 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2826}
2827
2828float32 float64_to_float32(float64 a, float_status *s)
2829{
c3f1875e 2830 FloatParts64 p;
98e256fc 2831
c3f1875e
RH
2832 float64_unpack_canonical(&p, a, s);
2833 parts_float_to_float(&p, s);
2834 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2835}
2836
34f0c0a9
LZ
2837float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2838{
c3f1875e 2839 FloatParts64 p;
98e256fc 2840
c3f1875e
RH
2841 bfloat16_unpack_canonical(&p, a, s);
2842 parts_float_to_float(&p, s);
2843 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2844}
2845
2846float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2847{
c3f1875e 2848 FloatParts64 p;
98e256fc 2849
c3f1875e
RH
2850 bfloat16_unpack_canonical(&p, a, s);
2851 parts_float_to_float(&p, s);
2852 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2853}
2854
2855bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2856{
c3f1875e 2857 FloatParts64 p;
98e256fc 2858
c3f1875e
RH
2859 float32_unpack_canonical(&p, a, s);
2860 parts_float_to_float(&p, s);
2861 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2862}
2863
2864bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2865{
c3f1875e 2866 FloatParts64 p;
98e256fc 2867
c3f1875e
RH
2868 float64_unpack_canonical(&p, a, s);
2869 parts_float_to_float(&p, s);
2870 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2871}
2872
9882ccaf
RH
2873float32 float128_to_float32(float128 a, float_status *s)
2874{
2875 FloatParts64 p64;
2876 FloatParts128 p128;
2877
2878 float128_unpack_canonical(&p128, a, s);
2879 parts_float_to_float_narrow(&p64, &p128, s);
2880 return float32_round_pack_canonical(&p64, s);
2881}
2882
2883float64 float128_to_float64(float128 a, float_status *s)
2884{
2885 FloatParts64 p64;
2886 FloatParts128 p128;
2887
2888 float128_unpack_canonical(&p128, a, s);
2889 parts_float_to_float_narrow(&p64, &p128, s);
2890 return float64_round_pack_canonical(&p64, s);
2891}
2892
2893float128 float32_to_float128(float32 a, float_status *s)
2894{
2895 FloatParts64 p64;
2896 FloatParts128 p128;
2897
2898 float32_unpack_canonical(&p64, a, s);
2899 parts_float_to_float_widen(&p128, &p64, s);
2900 return float128_round_pack_canonical(&p128, s);
2901}
2902
2903float128 float64_to_float128(float64 a, float_status *s)
2904{
2905 FloatParts64 p64;
2906 FloatParts128 p128;
2907
2908 float64_unpack_canonical(&p64, a, s);
2909 parts_float_to_float_widen(&p128, &p64, s);
2910 return float128_round_pack_canonical(&p128, s);
2911}
2912
8ae5719c
RH
2913float32 floatx80_to_float32(floatx80 a, float_status *s)
2914{
2915 FloatParts64 p64;
2916 FloatParts128 p128;
2917
2918 if (floatx80_unpack_canonical(&p128, a, s)) {
2919 parts_float_to_float_narrow(&p64, &p128, s);
2920 } else {
2921 parts_default_nan(&p64, s);
2922 }
2923 return float32_round_pack_canonical(&p64, s);
2924}
2925
2926float64 floatx80_to_float64(floatx80 a, float_status *s)
2927{
2928 FloatParts64 p64;
2929 FloatParts128 p128;
2930
2931 if (floatx80_unpack_canonical(&p128, a, s)) {
2932 parts_float_to_float_narrow(&p64, &p128, s);
2933 } else {
2934 parts_default_nan(&p64, s);
2935 }
2936 return float64_round_pack_canonical(&p64, s);
2937}
2938
2939float128 floatx80_to_float128(floatx80 a, float_status *s)
2940{
2941 FloatParts128 p;
2942
2943 if (floatx80_unpack_canonical(&p, a, s)) {
2944 parts_float_to_float(&p, s);
2945 } else {
2946 parts_default_nan(&p, s);
2947 }
2948 return float128_round_pack_canonical(&p, s);
2949}
2950
2951floatx80 float32_to_floatx80(float32 a, float_status *s)
2952{
2953 FloatParts64 p64;
2954 FloatParts128 p128;
2955
2956 float32_unpack_canonical(&p64, a, s);
2957 parts_float_to_float_widen(&p128, &p64, s);
2958 return floatx80_round_pack_canonical(&p128, s);
2959}
2960
2961floatx80 float64_to_floatx80(float64 a, float_status *s)
2962{
2963 FloatParts64 p64;
2964 FloatParts128 p128;
2965
2966 float64_unpack_canonical(&p64, a, s);
2967 parts_float_to_float_widen(&p128, &p64, s);
2968 return floatx80_round_pack_canonical(&p128, s);
2969}
2970
2971floatx80 float128_to_floatx80(float128 a, float_status *s)
2972{
2973 FloatParts128 p;
2974
2975 float128_unpack_canonical(&p, a, s);
2976 parts_float_to_float(&p, s);
2977 return floatx80_round_pack_canonical(&p, s);
2978}
2979
dbe4d53a 2980/*
afc34931 2981 * Round to integral value
dbe4d53a
AB
2982 */
2983
dbe4d53a
AB
2984float16 float16_round_to_int(float16 a, float_status *s)
2985{
afc34931 2986 FloatParts64 p;
98e256fc 2987
afc34931
RH
2988 float16_unpack_canonical(&p, a, s);
2989 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2990 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2991}
2992
2993float32 float32_round_to_int(float32 a, float_status *s)
2994{
afc34931 2995 FloatParts64 p;
98e256fc 2996
afc34931
RH
2997 float32_unpack_canonical(&p, a, s);
2998 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2999 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
3000}
3001
3002float64 float64_round_to_int(float64 a, float_status *s)
3003{
afc34931 3004 FloatParts64 p;
98e256fc 3005
afc34931
RH
3006 float64_unpack_canonical(&p, a, s);
3007 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
3008 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
3009}
3010
34f0c0a9
LZ
3011bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
3012{
afc34931 3013 FloatParts64 p;
98e256fc 3014
afc34931
RH
3015 bfloat16_unpack_canonical(&p, a, s);
3016 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
3017 return bfloat16_round_pack_canonical(&p, s);
3018}
3019
3020float128 float128_round_to_int(float128 a, float_status *s)
3021{
3022 FloatParts128 p;
3023
3024 float128_unpack_canonical(&p, a, s);
3025 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3026 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
3027}
3028
f9a95a78
RH
3029floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3030{
3031 FloatParts128 p;
3032
3033 if (!floatx80_unpack_canonical(&p, a, status)) {
3034 return floatx80_default_nan(status);
3035 }
3036
3037 parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3038 &floatx80_params[status->floatx80_rounding_precision]);
3039 return floatx80_round_pack_canonical(&p, status);
3040}
3041
ab52f973 3042/*
463b3f0d
RH
3043 * Floating-point to signed integer conversions
3044 */
ab52f973 3045
0d93d8ec
FC
3046int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3047 float_status *s)
3048{
98e256fc
RH
3049 FloatParts64 p;
3050
3051 float16_unpack_canonical(&p, a, s);
463b3f0d 3052 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
3053}
3054
3dede407 3055int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3056 float_status *s)
3057{
98e256fc
RH
3058 FloatParts64 p;
3059
3060 float16_unpack_canonical(&p, a, s);
463b3f0d 3061 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3062}
3063
3dede407 3064int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3065 float_status *s)
3066{
98e256fc
RH
3067 FloatParts64 p;
3068
3069 float16_unpack_canonical(&p, a, s);
463b3f0d 3070 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3071}
3072
3dede407 3073int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3074 float_status *s)
3075{
98e256fc
RH
3076 FloatParts64 p;
3077
3078 float16_unpack_canonical(&p, a, s);
463b3f0d 3079 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3080}
3081
3dede407 3082int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3083 float_status *s)
3084{
98e256fc
RH
3085 FloatParts64 p;
3086
3087 float32_unpack_canonical(&p, a, s);
463b3f0d 3088 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3089}
3090
3dede407 3091int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3092 float_status *s)
3093{
98e256fc
RH
3094 FloatParts64 p;
3095
3096 float32_unpack_canonical(&p, a, s);
463b3f0d 3097 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3098}
3099
3dede407 3100int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3101 float_status *s)
3102{
98e256fc
RH
3103 FloatParts64 p;
3104
3105 float32_unpack_canonical(&p, a, s);
463b3f0d 3106 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3107}
3108
3dede407 3109int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3110 float_status *s)
3111{
98e256fc
RH
3112 FloatParts64 p;
3113
3114 float64_unpack_canonical(&p, a, s);
463b3f0d 3115 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
3116}
3117
3dede407 3118int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3119 float_status *s)
3120{
98e256fc
RH
3121 FloatParts64 p;
3122
3123 float64_unpack_canonical(&p, a, s);
463b3f0d 3124 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
3125}
3126
3dede407 3127int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3128 float_status *s)
3129{
98e256fc
RH
3130 FloatParts64 p;
3131
3132 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
3133 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3134}
3135
00f9ef8f
LZ
3136int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3137 float_status *s)
3138{
3139 FloatParts64 p;
3140
3141 bfloat16_unpack_canonical(&p, a, s);
3142 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3143}
3144
463b3f0d
RH
3145int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3146 float_status *s)
3147{
3148 FloatParts64 p;
3149
3150 bfloat16_unpack_canonical(&p, a, s);
3151 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3152}
3153
3154int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3155 float_status *s)
3156{
3157 FloatParts64 p;
3158
3159 bfloat16_unpack_canonical(&p, a, s);
3160 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3161}
3162
3163int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3164 float_status *s)
3165{
3166 FloatParts64 p;
3167
3168 bfloat16_unpack_canonical(&p, a, s);
3169 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3170}
3171
3172static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3173 int scale, float_status *s)
3174{
3175 FloatParts128 p;
3176
3177 float128_unpack_canonical(&p, a, s);
3178 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3179}
3180
3181static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3182 int scale, float_status *s)
3183{
3184 FloatParts128 p;
3185
3186 float128_unpack_canonical(&p, a, s);
3187 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
3188}
3189
bea59230
MF
3190static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3191 int scale, float_status *s)
3192{
3193 int flags = 0;
3194 Int128 r;
3195 FloatParts128 p;
3196
3197 float128_unpack_canonical(&p, a, s);
3198
3199 switch (p.cls) {
3200 case float_class_snan:
3201 flags |= float_flag_invalid_snan;
3202 /* fall through */
3203 case float_class_qnan:
3204 flags |= float_flag_invalid;
3205 r = UINT128_MAX;
3206 break;
3207
3208 case float_class_inf:
3209 flags = float_flag_invalid | float_flag_invalid_cvti;
3210 r = p.sign ? INT128_MIN : INT128_MAX;
3211 break;
3212
3213 case float_class_zero:
3214 return int128_zero();
3215
3216 case float_class_normal:
3217 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3218 flags = float_flag_inexact;
3219 }
3220
3221 if (p.exp < 127) {
3222 int shift = 127 - p.exp;
3223 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3224 if (p.sign) {
3225 r = int128_neg(r);
3226 }
3227 } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3228 p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3229 r = INT128_MIN;
3230 } else {
3231 flags = float_flag_invalid | float_flag_invalid_cvti;
3232 r = p.sign ? INT128_MIN : INT128_MAX;
3233 }
3234 break;
3235
3236 default:
3237 g_assert_not_reached();
3238 }
3239
3240 float_raise(flags, s);
3241 return r;
3242}
3243
a1fc527b
RH
3244static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3245 int scale, float_status *s)
3246{
3247 FloatParts128 p;
3248
3249 if (!floatx80_unpack_canonical(&p, a, s)) {
3250 parts_default_nan(&p, s);
3251 }
3252 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3253}
3254
3255static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3256 int scale, float_status *s)
3257{
3258 FloatParts128 p;
3259
3260 if (!floatx80_unpack_canonical(&p, a, s)) {
3261 parts_default_nan(&p, s);
3262 }
3263 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3264}
3265
0d93d8ec
FC
3266int8_t float16_to_int8(float16 a, float_status *s)
3267{
3268 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3269}
3270
2f6c74be
RH
3271int16_t float16_to_int16(float16 a, float_status *s)
3272{
3273 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3274}
3275
3276int32_t float16_to_int32(float16 a, float_status *s)
3277{
3278 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3279}
3280
3281int64_t float16_to_int64(float16 a, float_status *s)
3282{
3283 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3284}
3285
3286int16_t float32_to_int16(float32 a, float_status *s)
3287{
3288 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3289}
3290
3291int32_t float32_to_int32(float32 a, float_status *s)
3292{
3293 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3294}
3295
3296int64_t float32_to_int64(float32 a, float_status *s)
3297{
3298 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3299}
3300
3301int16_t float64_to_int16(float64 a, float_status *s)
3302{
3303 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3304}
3305
3306int32_t float64_to_int32(float64 a, float_status *s)
3307{
3308 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3309}
3310
3311int64_t float64_to_int64(float64 a, float_status *s)
3312{
3313 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3314}
3315
463b3f0d
RH
3316int32_t float128_to_int32(float128 a, float_status *s)
3317{
3318 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3319}
3320
3321int64_t float128_to_int64(float128 a, float_status *s)
3322{
3323 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3324}
3325
bea59230
MF
3326Int128 float128_to_int128(float128 a, float_status *s)
3327{
3328 return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3329}
3330
a1fc527b
RH
3331int32_t floatx80_to_int32(floatx80 a, float_status *s)
3332{
3333 return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3334}
3335
3336int64_t floatx80_to_int64(floatx80 a, float_status *s)
3337{
3338 return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3339}
3340
2f6c74be
RH
3341int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3342{
3343 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3344}
3345
3346int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3347{
3348 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3349}
3350
3351int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3352{
3353 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
3354}
3355
2f6c74be
RH
3356int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3357{
3358 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3359}
ab52f973 3360
2f6c74be
RH
3361int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3362{
3363 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3364}
3365
3366int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3367{
3368 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3369}
3370
3371int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3372{
3373 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3374}
ab52f973 3375
2f6c74be
RH
3376int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3377{
3378 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3379}
ab52f973 3380
2f6c74be
RH
3381int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3382{
3383 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3384}
ab52f973 3385
463b3f0d 3386int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3387{
463b3f0d 3388 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3389}
3390
463b3f0d 3391int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3392{
463b3f0d 3393 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3394}
3395
bea59230
MF
3396Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3397{
3398 return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3399}
3400
a1fc527b
RH
3401int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3402{
3403 return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3404}
3405
3406int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3407{
3408 return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3409}
3410
00f9ef8f
LZ
3411int8_t bfloat16_to_int8(bfloat16 a, float_status *s)
3412{
3413 return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3414}
3415
34f0c0a9
LZ
3416int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3417{
3418 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3419}
3420
3421int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3422{
3423 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3424}
3425
3426int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3427{
3428 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3429}
3430
00f9ef8f
LZ
3431int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s)
3432{
3433 return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s);
3434}
3435
34f0c0a9
LZ
3436int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3437{
3438 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3439}
3440
3441int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3442{
3443 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3444}
3445
3446int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3447{
3448 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3449}
3450
e2041f4d
RH
3451int32_t float64_to_int32_modulo(float64 a, FloatRoundMode rmode,
3452 float_status *s)
3453{
3454 FloatParts64 p;
3455
3456 float64_unpack_canonical(&p, a, s);
3457 return parts_float_to_sint_modulo(&p, rmode, 31, s);
3458}
3459
3460int64_t float64_to_int64_modulo(float64 a, FloatRoundMode rmode,
3461 float_status *s)
3462{
3463 FloatParts64 p;
3464
3465 float64_unpack_canonical(&p, a, s);
3466 return parts_float_to_sint_modulo(&p, rmode, 63, s);
3467}
3468
ab52f973 3469/*
4ab4aef0 3470 * Floating-point to unsigned integer conversions
ab52f973
AB
3471 */
3472
0d93d8ec
FC
3473uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3474 float_status *s)
3475{
98e256fc
RH
3476 FloatParts64 p;
3477
3478 float16_unpack_canonical(&p, a, s);
4ab4aef0 3479 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
3480}
3481
3dede407 3482uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3483 float_status *s)
3484{
98e256fc
RH
3485 FloatParts64 p;
3486
3487 float16_unpack_canonical(&p, a, s);
4ab4aef0 3488 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3489}
3490
3dede407 3491uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3492 float_status *s)
3493{
98e256fc
RH
3494 FloatParts64 p;
3495
3496 float16_unpack_canonical(&p, a, s);
4ab4aef0 3497 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3498}
3499
3dede407 3500uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3501 float_status *s)
3502{
98e256fc
RH
3503 FloatParts64 p;
3504
3505 float16_unpack_canonical(&p, a, s);
4ab4aef0 3506 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3507}
3508
3dede407 3509uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3510 float_status *s)
3511{
98e256fc
RH
3512 FloatParts64 p;
3513
3514 float32_unpack_canonical(&p, a, s);
4ab4aef0 3515 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3516}
3517
3dede407 3518uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3519 float_status *s)
3520{
98e256fc
RH
3521 FloatParts64 p;
3522
3523 float32_unpack_canonical(&p, a, s);
4ab4aef0 3524 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3525}
3526
3dede407 3527uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3528 float_status *s)
3529{
98e256fc
RH
3530 FloatParts64 p;
3531
3532 float32_unpack_canonical(&p, a, s);
4ab4aef0 3533 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3534}
3535
3dede407 3536uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3537 float_status *s)
3538{
98e256fc
RH
3539 FloatParts64 p;
3540
3541 float64_unpack_canonical(&p, a, s);
4ab4aef0 3542 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
3543}
3544
3dede407 3545uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3546 float_status *s)
3547{
98e256fc
RH
3548 FloatParts64 p;
3549
3550 float64_unpack_canonical(&p, a, s);
4ab4aef0 3551 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
3552}
3553
3dede407 3554uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
3555 float_status *s)
3556{
98e256fc
RH
3557 FloatParts64 p;
3558
3559 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
3560 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3561}
3562
00f9ef8f
LZ
3563uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode,
3564 int scale, float_status *s)
3565{
3566 FloatParts64 p;
3567
3568 bfloat16_unpack_canonical(&p, a, s);
3569 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3570}
3571
4ab4aef0
RH
3572uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3573 int scale, float_status *s)
3574{
3575 FloatParts64 p;
3576
3577 bfloat16_unpack_canonical(&p, a, s);
3578 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3579}
3580
3581uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3582 int scale, float_status *s)
3583{
3584 FloatParts64 p;
3585
3586 bfloat16_unpack_canonical(&p, a, s);
3587 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3588}
3589
3590uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3591 int scale, float_status *s)
3592{
3593 FloatParts64 p;
3594
3595 bfloat16_unpack_canonical(&p, a, s);
3596 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3597}
3598
3599static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3600 int scale, float_status *s)
3601{
3602 FloatParts128 p;
3603
3604 float128_unpack_canonical(&p, a, s);
3605 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3606}
3607
3608static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3609 int scale, float_status *s)
3610{
3611 FloatParts128 p;
3612
3613 float128_unpack_canonical(&p, a, s);
3614 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
3615}
3616
4de49ddf
MF
3617static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3618 int scale, float_status *s)
3619{
3620 int flags = 0;
3621 Int128 r;
3622 FloatParts128 p;
3623
3624 float128_unpack_canonical(&p, a, s);
3625
3626 switch (p.cls) {
3627 case float_class_snan:
3628 flags |= float_flag_invalid_snan;
3629 /* fall through */
3630 case float_class_qnan:
3631 flags |= float_flag_invalid;
3632 r = UINT128_MAX;
3633 break;
3634
3635 case float_class_inf:
3636 flags = float_flag_invalid | float_flag_invalid_cvti;
3637 r = p.sign ? int128_zero() : UINT128_MAX;
3638 break;
3639
3640 case float_class_zero:
3641 return int128_zero();
3642
3643 case float_class_normal:
3644 if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3645 flags = float_flag_inexact;
3646 if (p.cls == float_class_zero) {
3647 r = int128_zero();
3648 break;
3649 }
3650 }
3651
3652 if (p.sign) {
3653 flags = float_flag_invalid | float_flag_invalid_cvti;
3654 r = int128_zero();
3655 } else if (p.exp <= 127) {
3656 int shift = 127 - p.exp;
3657 r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3658 } else {
3659 flags = float_flag_invalid | float_flag_invalid_cvti;
3660 r = UINT128_MAX;
3661 }
3662 break;
3663
3664 default:
3665 g_assert_not_reached();
3666 }
3667
3668 float_raise(flags, s);
3669 return r;
3670}
3671
0d93d8ec
FC
3672uint8_t float16_to_uint8(float16 a, float_status *s)
3673{
3674 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3675}
3676
2f6c74be
RH
3677uint16_t float16_to_uint16(float16 a, float_status *s)
3678{
3679 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3680}
3681
3682uint32_t float16_to_uint32(float16 a, float_status *s)
3683{
3684 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3685}
3686
3687uint64_t float16_to_uint64(float16 a, float_status *s)
3688{
3689 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3690}
3691
3692uint16_t float32_to_uint16(float32 a, float_status *s)
3693{
3694 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3695}
3696
3697uint32_t float32_to_uint32(float32 a, float_status *s)
3698{
3699 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3700}
3701
3702uint64_t float32_to_uint64(float32 a, float_status *s)
3703{
3704 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3705}
3706
3707uint16_t float64_to_uint16(float64 a, float_status *s)
3708{
3709 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3710}
3711
3712uint32_t float64_to_uint32(float64 a, float_status *s)
3713{
3714 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3715}
3716
3717uint64_t float64_to_uint64(float64 a, float_status *s)
3718{
3719 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3720}
3721
4ab4aef0
RH
3722uint32_t float128_to_uint32(float128 a, float_status *s)
3723{
3724 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3725}
3726
3727uint64_t float128_to_uint64(float128 a, float_status *s)
3728{
3729 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3730}
3731
4de49ddf
MF
3732Int128 float128_to_uint128(float128 a, float_status *s)
3733{
3734 return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3735}
3736
2f6c74be
RH
3737uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3738{
3739 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3740}
3741
3742uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3743{
3744 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3745}
3746
3747uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3748{
3749 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3750}
3751
3752uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3753{
3754 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3755}
3756
3757uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3758{
3759 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3760}
3761
3762uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3763{
3764 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3765}
3766
3767uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3768{
3769 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3770}
3771
3772uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3773{
3774 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3775}
3776
3777uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3778{
3779 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3780}
ab52f973 3781
4ab4aef0 3782uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 3783{
4ab4aef0 3784 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3785}
3786
4ab4aef0 3787uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 3788{
4ab4aef0 3789 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
3790}
3791
4de49ddf
MF
3792Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3793{
3794 return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3795}
3796
00f9ef8f
LZ
3797uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s)
3798{
3799 return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3800}
3801
34f0c0a9
LZ
3802uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3803{
3804 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3805}
3806
3807uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3808{
3809 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3810}
3811
3812uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3813{
3814 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3815}
3816
00f9ef8f
LZ
3817uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s)
3818{
3819 return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s);
3820}
3821
34f0c0a9
LZ
3822uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3823{
3824 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3825}
3826
3827uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3828{
3829 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3830}
3831
3832uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3833{
3834 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3835}
3836
c02e1fb8 3837/*
e3689519 3838 * Signed integer to floating-point conversions
c02e1fb8
AB
3839 */
3840
2abdfe24 3841float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3842{
e3689519
RH
3843 FloatParts64 p;
3844
3845 parts_sint_to_float(&p, a, scale, status);
3846 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3847}
3848
2abdfe24
RH
3849float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3850{
3851 return int64_to_float16_scalbn(a, scale, status);
3852}
3853
3854float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3855{
3856 return int64_to_float16_scalbn(a, scale, status);
3857}
3858
3859float16 int64_to_float16(int64_t a, float_status *status)
3860{
3861 return int64_to_float16_scalbn(a, 0, status);
3862}
3863
c02e1fb8
AB
3864float16 int32_to_float16(int32_t a, float_status *status)
3865{
2abdfe24 3866 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3867}
3868
3869float16 int16_to_float16(int16_t a, float_status *status)
3870{
2abdfe24 3871 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3872}
3873
0d93d8ec
FC
3874float16 int8_to_float16(int8_t a, float_status *status)
3875{
3876 return int64_to_float16_scalbn(a, 0, status);
3877}
3878
2abdfe24 3879float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3880{
e3689519
RH
3881 FloatParts64 p;
3882
5d0204b8
RH
3883 /* Without scaling, there are no overflow concerns. */
3884 if (likely(scale == 0) && can_use_fpu(status)) {
3885 union_float32 ur;
3886 ur.h = a;
3887 return ur.s;
3888 }
3889
e3689519
RH
3890 parts64_sint_to_float(&p, a, scale, status);
3891 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3892}
3893
2abdfe24
RH
3894float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3895{
3896 return int64_to_float32_scalbn(a, scale, status);
3897}
3898
3899float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3900{
3901 return int64_to_float32_scalbn(a, scale, status);
3902}
3903
3904float32 int64_to_float32(int64_t a, float_status *status)
3905{
3906 return int64_to_float32_scalbn(a, 0, status);
3907}
3908
c02e1fb8
AB
3909float32 int32_to_float32(int32_t a, float_status *status)
3910{
2abdfe24 3911 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3912}
3913
3914float32 int16_to_float32(int16_t a, float_status *status)
3915{
2abdfe24 3916 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3917}
3918
2abdfe24 3919float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3920{
e3689519
RH
3921 FloatParts64 p;
3922
5d0204b8
RH
3923 /* Without scaling, there are no overflow concerns. */
3924 if (likely(scale == 0) && can_use_fpu(status)) {
3925 union_float64 ur;
3926 ur.h = a;
3927 return ur.s;
3928 }
3929
e3689519
RH
3930 parts_sint_to_float(&p, a, scale, status);
3931 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3932}
3933
2abdfe24
RH
3934float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3935{
3936 return int64_to_float64_scalbn(a, scale, status);
3937}
3938
3939float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3940{
3941 return int64_to_float64_scalbn(a, scale, status);
3942}
3943
3944float64 int64_to_float64(int64_t a, float_status *status)
3945{
3946 return int64_to_float64_scalbn(a, 0, status);
3947}
3948
c02e1fb8
AB
3949float64 int32_to_float64(int32_t a, float_status *status)
3950{
2abdfe24 3951 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3952}
3953
3954float64 int16_to_float64(int16_t a, float_status *status)
3955{
2abdfe24 3956 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3957}
3958
34f0c0a9
LZ
3959bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3960{
e3689519
RH
3961 FloatParts64 p;
3962
3963 parts_sint_to_float(&p, a, scale, status);
3964 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3965}
3966
3967bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3968{
3969 return int64_to_bfloat16_scalbn(a, scale, status);
3970}
3971
3972bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3973{
3974 return int64_to_bfloat16_scalbn(a, scale, status);
3975}
3976
00f9ef8f
LZ
3977bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status)
3978{
3979 return int64_to_bfloat16_scalbn(a, scale, status);
3980}
3981
34f0c0a9
LZ
3982bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3983{
3984 return int64_to_bfloat16_scalbn(a, 0, status);
3985}
3986
3987bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3988{
3989 return int64_to_bfloat16_scalbn(a, 0, status);
3990}
3991
3992bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3993{
3994 return int64_to_bfloat16_scalbn(a, 0, status);
3995}
c02e1fb8 3996
00f9ef8f
LZ
3997bfloat16 int8_to_bfloat16(int8_t a, float_status *status)
3998{
3999 return int64_to_bfloat16_scalbn(a, 0, status);
4000}
4001
95c1b71e
MF
4002float128 int128_to_float128(Int128 a, float_status *status)
4003{
4004 FloatParts128 p = { };
4005 int shift;
4006
4007 if (int128_nz(a)) {
4008 p.cls = float_class_normal;
4009 if (!int128_nonneg(a)) {
4010 p.sign = true;
4011 a = int128_neg(a);
4012 }
4013
4014 shift = clz64(int128_gethi(a));
4015 if (shift == 64) {
4016 shift += clz64(int128_getlo(a));
4017 }
4018
4019 p.exp = 127 - shift;
4020 a = int128_lshift(a, shift);
4021
4022 p.frac_hi = int128_gethi(a);
4023 p.frac_lo = int128_getlo(a);
4024 } else {
4025 p.cls = float_class_zero;
4026 }
4027
4028 return float128_round_pack_canonical(&p, status);
4029}
4030
e3689519
RH
4031float128 int64_to_float128(int64_t a, float_status *status)
4032{
4033 FloatParts128 p;
4034
4035 parts_sint_to_float(&p, a, 0, status);
4036 return float128_round_pack_canonical(&p, status);
4037}
4038
4039float128 int32_to_float128(int32_t a, float_status *status)
4040{
4041 return int64_to_float128(a, status);
4042}
4043
5f952900
RH
4044floatx80 int64_to_floatx80(int64_t a, float_status *status)
4045{
4046 FloatParts128 p;
4047
4048 parts_sint_to_float(&p, a, 0, status);
4049 return floatx80_round_pack_canonical(&p, status);
4050}
4051
4052floatx80 int32_to_floatx80(int32_t a, float_status *status)
4053{
4054 return int64_to_floatx80(a, status);
4055}
4056
c02e1fb8 4057/*
37c954a1 4058 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
4059 */
4060
2abdfe24 4061float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 4062{
37c954a1
RH
4063 FloatParts64 p;
4064
4065 parts_uint_to_float(&p, a, scale, status);
4066 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
4067}
4068
2abdfe24
RH
4069float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
4070{
4071 return uint64_to_float16_scalbn(a, scale, status);
4072}
4073
4074float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
4075{
4076 return uint64_to_float16_scalbn(a, scale, status);
4077}
4078
4079float16 uint64_to_float16(uint64_t a, float_status *status)
4080{
4081 return uint64_to_float16_scalbn(a, 0, status);
4082}
4083
c02e1fb8
AB
4084float16 uint32_to_float16(uint32_t a, float_status *status)
4085{
2abdfe24 4086 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
4087}
4088
4089float16 uint16_to_float16(uint16_t a, float_status *status)
4090{
2abdfe24 4091 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
4092}
4093
0d93d8ec
FC
4094float16 uint8_to_float16(uint8_t a, float_status *status)
4095{
4096 return uint64_to_float16_scalbn(a, 0, status);
4097}
4098
2abdfe24 4099float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 4100{
37c954a1
RH
4101 FloatParts64 p;
4102
5d0204b8
RH
4103 /* Without scaling, there are no overflow concerns. */
4104 if (likely(scale == 0) && can_use_fpu(status)) {
4105 union_float32 ur;
4106 ur.h = a;
4107 return ur.s;
4108 }
4109
37c954a1
RH
4110 parts_uint_to_float(&p, a, scale, status);
4111 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
4112}
4113
2abdfe24
RH
4114float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4115{
4116 return uint64_to_float32_scalbn(a, scale, status);
4117}
4118
4119float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4120{
4121 return uint64_to_float32_scalbn(a, scale, status);
4122}
4123
4124float32 uint64_to_float32(uint64_t a, float_status *status)
4125{
4126 return uint64_to_float32_scalbn(a, 0, status);
4127}
4128
c02e1fb8
AB
4129float32 uint32_to_float32(uint32_t a, float_status *status)
4130{
2abdfe24 4131 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
4132}
4133
4134float32 uint16_to_float32(uint16_t a, float_status *status)
4135{
2abdfe24 4136 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
4137}
4138
2abdfe24 4139float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 4140{
37c954a1
RH
4141 FloatParts64 p;
4142
5d0204b8
RH
4143 /* Without scaling, there are no overflow concerns. */
4144 if (likely(scale == 0) && can_use_fpu(status)) {
4145 union_float64 ur;
4146 ur.h = a;
4147 return ur.s;
4148 }
4149
37c954a1
RH
4150 parts_uint_to_float(&p, a, scale, status);
4151 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
4152}
4153
2abdfe24
RH
4154float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4155{
4156 return uint64_to_float64_scalbn(a, scale, status);
4157}
4158
4159float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4160{
4161 return uint64_to_float64_scalbn(a, scale, status);
4162}
4163
4164float64 uint64_to_float64(uint64_t a, float_status *status)
4165{
4166 return uint64_to_float64_scalbn(a, 0, status);
4167}
4168
c02e1fb8
AB
4169float64 uint32_to_float64(uint32_t a, float_status *status)
4170{
2abdfe24 4171 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4172}
4173
4174float64 uint16_to_float64(uint16_t a, float_status *status)
4175{
2abdfe24 4176 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
4177}
4178
34f0c0a9
LZ
4179bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4180{
37c954a1
RH
4181 FloatParts64 p;
4182
4183 parts_uint_to_float(&p, a, scale, status);
4184 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
4185}
4186
4187bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4188{
4189 return uint64_to_bfloat16_scalbn(a, scale, status);
4190}
4191
4192bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4193{
4194 return uint64_to_bfloat16_scalbn(a, scale, status);
4195}
4196
00f9ef8f
LZ
4197bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status)
4198{
4199 return uint64_to_bfloat16_scalbn(a, scale, status);
4200}
4201
34f0c0a9
LZ
4202bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4203{
4204 return uint64_to_bfloat16_scalbn(a, 0, status);
4205}
4206
4207bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4208{
4209 return uint64_to_bfloat16_scalbn(a, 0, status);
4210}
4211
4212bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4213{
4214 return uint64_to_bfloat16_scalbn(a, 0, status);
4215}
4216
00f9ef8f
LZ
4217bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status)
4218{
4219 return uint64_to_bfloat16_scalbn(a, 0, status);
4220}
4221
37c954a1
RH
4222float128 uint64_to_float128(uint64_t a, float_status *status)
4223{
4224 FloatParts128 p;
4225
4226 parts_uint_to_float(&p, a, 0, status);
4227 return float128_round_pack_canonical(&p, status);
4228}
4229
f279852b
MF
4230float128 uint128_to_float128(Int128 a, float_status *status)
4231{
4232 FloatParts128 p = { };
4233 int shift;
4234
4235 if (int128_nz(a)) {
4236 p.cls = float_class_normal;
4237
4238 shift = clz64(int128_gethi(a));
4239 if (shift == 64) {
4240 shift += clz64(int128_getlo(a));
4241 }
4242
4243 p.exp = 127 - shift;
4244 a = int128_lshift(a, shift);
4245
4246 p.frac_hi = int128_gethi(a);
4247 p.frac_lo = int128_getlo(a);
4248 } else {
4249 p.cls = float_class_zero;
4250 }
4251
4252 return float128_round_pack_canonical(&p, status);
4253}
4254
e1c4667a
RH
4255/*
4256 * Minimum and maximum
89360067 4257 */
89360067 4258
e1c4667a
RH
4259static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4260{
4261 FloatParts64 pa, pb, *pr;
89360067 4262
e1c4667a
RH
4263 float16_unpack_canonical(&pa, a, s);
4264 float16_unpack_canonical(&pb, b, s);
4265 pr = parts_minmax(&pa, &pb, s, flags);
4266
4267 return float16_round_pack_canonical(pr, s);
89360067
AB
4268}
4269
e1c4667a
RH
4270static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4271 float_status *s, int flags)
4272{
4273 FloatParts64 pa, pb, *pr;
4274
4275 bfloat16_unpack_canonical(&pa, a, s);
4276 bfloat16_unpack_canonical(&pb, b, s);
4277 pr = parts_minmax(&pa, &pb, s, flags);
4278
4279 return bfloat16_round_pack_canonical(pr, s);
4280}
4281
4282static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4283{
4284 FloatParts64 pa, pb, *pr;
4285
4286 float32_unpack_canonical(&pa, a, s);
4287 float32_unpack_canonical(&pb, b, s);
4288 pr = parts_minmax(&pa, &pb, s, flags);
4289
4290 return float32_round_pack_canonical(pr, s);
4291}
4292
4293static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4294{
4295 FloatParts64 pa, pb, *pr;
4296
4297 float64_unpack_canonical(&pa, a, s);
4298 float64_unpack_canonical(&pb, b, s);
4299 pr = parts_minmax(&pa, &pb, s, flags);
4300
4301 return float64_round_pack_canonical(pr, s);
4302}
4303
ceebc129
DH
4304static float128 float128_minmax(float128 a, float128 b,
4305 float_status *s, int flags)
4306{
4307 FloatParts128 pa, pb, *pr;
4308
4309 float128_unpack_canonical(&pa, a, s);
4310 float128_unpack_canonical(&pb, b, s);
4311 pr = parts_minmax(&pa, &pb, s, flags);
4312
4313 return float128_round_pack_canonical(pr, s);
4314}
4315
e1c4667a
RH
4316#define MINMAX_1(type, name, flags) \
4317 type type##_##name(type a, type b, float_status *s) \
4318 { return type##_minmax(a, b, s, flags); }
4319
4320#define MINMAX_2(type) \
0e903037
CMC
4321 MINMAX_1(type, max, 0) \
4322 MINMAX_1(type, maxnum, minmax_isnum) \
4323 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
4324 MINMAX_1(type, maximum_number, minmax_isnumber) \
4325 MINMAX_1(type, min, minmax_ismin) \
4326 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
4327 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4328 MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber) \
e1c4667a
RH
4329
4330MINMAX_2(float16)
4331MINMAX_2(bfloat16)
4332MINMAX_2(float32)
4333MINMAX_2(float64)
ceebc129 4334MINMAX_2(float128)
e1c4667a
RH
4335
4336#undef MINMAX_1
4337#undef MINMAX_2
8282310d 4338
6eb169b8
RH
4339/*
4340 * Floating point compare
4341 */
0c4c9092 4342
6eb169b8
RH
4343static FloatRelation QEMU_FLATTEN
4344float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4345{
4346 FloatParts64 pa, pb;
0c4c9092 4347
6eb169b8
RH
4348 float16_unpack_canonical(&pa, a, s);
4349 float16_unpack_canonical(&pb, b, s);
4350 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
4351}
4352
71bfd65c 4353FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 4354{
6eb169b8 4355 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
4356}
4357
71bfd65c 4358FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 4359{
6eb169b8
RH
4360 return float16_do_compare(a, b, s, true);
4361}
4362
4363static FloatRelation QEMU_SOFTFLOAT_ATTR
4364float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4365{
4366 FloatParts64 pa, pb;
4367
4368 float32_unpack_canonical(&pa, a, s);
4369 float32_unpack_canonical(&pb, b, s);
4370 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4371}
4372
71bfd65c 4373static FloatRelation QEMU_FLATTEN
6eb169b8 4374float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4375{
4376 union_float32 ua, ub;
4377
4378 ua.s = xa;
4379 ub.s = xb;
4380
4381 if (QEMU_NO_HARDFLOAT) {
4382 goto soft;
4383 }
4384
4385 float32_input_flush2(&ua.s, &ub.s, s);
4386 if (isgreaterequal(ua.h, ub.h)) {
4387 if (isgreater(ua.h, ub.h)) {
4388 return float_relation_greater;
4389 }
4390 return float_relation_equal;
4391 }
4392 if (likely(isless(ua.h, ub.h))) {
4393 return float_relation_less;
4394 }
6eb169b8
RH
4395 /*
4396 * The only condition remaining is unordered.
d9fe9db9
EC
4397 * Fall through to set flags.
4398 */
4399 soft:
6eb169b8 4400 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4401}
4402
71bfd65c 4403FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 4404{
6eb169b8 4405 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
4406}
4407
71bfd65c 4408FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 4409{
6eb169b8
RH
4410 return float32_hs_compare(a, b, s, true);
4411}
4412
4413static FloatRelation QEMU_SOFTFLOAT_ATTR
4414float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4415{
4416 FloatParts64 pa, pb;
4417
4418 float64_unpack_canonical(&pa, a, s);
4419 float64_unpack_canonical(&pb, b, s);
4420 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
4421}
4422
71bfd65c 4423static FloatRelation QEMU_FLATTEN
6eb169b8 4424float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
4425{
4426 union_float64 ua, ub;
4427
4428 ua.s = xa;
4429 ub.s = xb;
4430
4431 if (QEMU_NO_HARDFLOAT) {
4432 goto soft;
4433 }
4434
4435 float64_input_flush2(&ua.s, &ub.s, s);
4436 if (isgreaterequal(ua.h, ub.h)) {
4437 if (isgreater(ua.h, ub.h)) {
4438 return float_relation_greater;
4439 }
4440 return float_relation_equal;
4441 }
4442 if (likely(isless(ua.h, ub.h))) {
4443 return float_relation_less;
4444 }
6eb169b8
RH
4445 /*
4446 * The only condition remaining is unordered.
d9fe9db9
EC
4447 * Fall through to set flags.
4448 */
4449 soft:
6eb169b8 4450 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
4451}
4452
71bfd65c 4453FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 4454{
6eb169b8 4455 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
4456}
4457
71bfd65c 4458FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 4459{
6eb169b8 4460 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
4461}
4462
8282310d 4463static FloatRelation QEMU_FLATTEN
6eb169b8 4464bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 4465{
98e256fc
RH
4466 FloatParts64 pa, pb;
4467
4468 bfloat16_unpack_canonical(&pa, a, s);
4469 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 4470 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
4471}
4472
4473FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4474{
6eb169b8 4475 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
4476}
4477
4478FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4479{
6eb169b8
RH
4480 return bfloat16_do_compare(a, b, s, true);
4481}
4482
4483static FloatRelation QEMU_FLATTEN
4484float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4485{
4486 FloatParts128 pa, pb;
4487
4488 float128_unpack_canonical(&pa, a, s);
4489 float128_unpack_canonical(&pb, b, s);
4490 return parts_compare(&pa, &pb, s, is_quiet);
4491}
4492
4493FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4494{
4495 return float128_do_compare(a, b, s, false);
4496}
4497
4498FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4499{
4500 return float128_do_compare(a, b, s, true);
8282310d
LZ
4501}
4502
1b96b006
RH
4503static FloatRelation QEMU_FLATTEN
4504floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4505{
4506 FloatParts128 pa, pb;
4507
4508 if (!floatx80_unpack_canonical(&pa, a, s) ||
4509 !floatx80_unpack_canonical(&pb, b, s)) {
4510 return float_relation_unordered;
4511 }
4512 return parts_compare(&pa, &pb, s, is_quiet);
4513}
4514
4515FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4516{
4517 return floatx80_do_compare(a, b, s, false);
4518}
4519
4520FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4521{
4522 return floatx80_do_compare(a, b, s, true);
4523}
4524
39626b0c
RH
4525/*
4526 * Scale by 2**N
4527 */
0bfc9f19
AB
4528
4529float16 float16_scalbn(float16 a, int n, float_status *status)
4530{
39626b0c 4531 FloatParts64 p;
98e256fc 4532
39626b0c
RH
4533 float16_unpack_canonical(&p, a, status);
4534 parts_scalbn(&p, n, status);
4535 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
4536}
4537
4538float32 float32_scalbn(float32 a, int n, float_status *status)
4539{
39626b0c 4540 FloatParts64 p;
98e256fc 4541
39626b0c
RH
4542 float32_unpack_canonical(&p, a, status);
4543 parts_scalbn(&p, n, status);
4544 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
4545}
4546
4547float64 float64_scalbn(float64 a, int n, float_status *status)
4548{
39626b0c 4549 FloatParts64 p;
98e256fc 4550
39626b0c
RH
4551 float64_unpack_canonical(&p, a, status);
4552 parts_scalbn(&p, n, status);
4553 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
4554}
4555
8282310d
LZ
4556bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4557{
39626b0c 4558 FloatParts64 p;
98e256fc 4559
39626b0c
RH
4560 bfloat16_unpack_canonical(&p, a, status);
4561 parts_scalbn(&p, n, status);
4562 return bfloat16_round_pack_canonical(&p, status);
4563}
4564
4565float128 float128_scalbn(float128 a, int n, float_status *status)
4566{
4567 FloatParts128 p;
4568
4569 float128_unpack_canonical(&p, a, status);
4570 parts_scalbn(&p, n, status);
4571 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4572}
4573
872e6991
RH
4574floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4575{
4576 FloatParts128 p;
4577
4578 if (!floatx80_unpack_canonical(&p, a, status)) {
4579 return floatx80_default_nan(status);
4580 }
4581 parts_scalbn(&p, n, status);
4582 return floatx80_round_pack_canonical(&p, status);
4583}
4584
c13bb2da
AB
4585/*
4586 * Square Root
c13bb2da
AB
4587 */
4588
97ff87c0 4589float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 4590{
9261b245 4591 FloatParts64 p;
98e256fc 4592
9261b245
RH
4593 float16_unpack_canonical(&p, a, status);
4594 parts_sqrt(&p, status, &float16_params);
4595 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
4596}
4597
f131bae8
EC
4598static float32 QEMU_SOFTFLOAT_ATTR
4599soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 4600{
9261b245 4601 FloatParts64 p;
98e256fc 4602
9261b245
RH
4603 float32_unpack_canonical(&p, a, status);
4604 parts_sqrt(&p, status, &float32_params);
4605 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
4606}
4607
f131bae8
EC
4608static float64 QEMU_SOFTFLOAT_ATTR
4609soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 4610{
9261b245 4611 FloatParts64 p;
98e256fc 4612
9261b245
RH
4613 float64_unpack_canonical(&p, a, status);
4614 parts_sqrt(&p, status, &float64_params);
4615 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
4616}
4617
f131bae8
EC
4618float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4619{
4620 union_float32 ua, ur;
4621
4622 ua.s = xa;
4623 if (unlikely(!can_use_fpu(s))) {
4624 goto soft;
4625 }
4626
4627 float32_input_flush1(&ua.s, s);
4628 if (QEMU_HARDFLOAT_1F32_USE_FP) {
4629 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4630 fpclassify(ua.h) == FP_ZERO) ||
4631 signbit(ua.h))) {
4632 goto soft;
4633 }
4634 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4635 float32_is_neg(ua.s))) {
4636 goto soft;
4637 }
4638 ur.h = sqrtf(ua.h);
4639 return ur.s;
4640
4641 soft:
4642 return soft_f32_sqrt(ua.s, s);
4643}
4644
4645float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4646{
4647 union_float64 ua, ur;
4648
4649 ua.s = xa;
4650 if (unlikely(!can_use_fpu(s))) {
4651 goto soft;
4652 }
4653
4654 float64_input_flush1(&ua.s, s);
4655 if (QEMU_HARDFLOAT_1F64_USE_FP) {
4656 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4657 fpclassify(ua.h) == FP_ZERO) ||
4658 signbit(ua.h))) {
4659 goto soft;
4660 }
4661 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4662 float64_is_neg(ua.s))) {
4663 goto soft;
4664 }
4665 ur.h = sqrt(ua.h);
4666 return ur.s;
4667
4668 soft:
4669 return soft_f64_sqrt(ua.s, s);
4670}
4671
42636fb9
RH
4672float64 float64r32_sqrt(float64 a, float_status *status)
4673{
4674 FloatParts64 p;
4675
4676 float64_unpack_canonical(&p, a, status);
4677 parts_sqrt(&p, status, &float64_params);
4678 return float64r32_round_pack_canonical(&p, status);
4679}
4680
8282310d
LZ
4681bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4682{
9261b245 4683 FloatParts64 p;
98e256fc 4684
9261b245
RH
4685 bfloat16_unpack_canonical(&p, a, status);
4686 parts_sqrt(&p, status, &bfloat16_params);
4687 return bfloat16_round_pack_canonical(&p, status);
4688}
4689
4690float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4691{
4692 FloatParts128 p;
4693
4694 float128_unpack_canonical(&p, a, status);
4695 parts_sqrt(&p, status, &float128_params);
4696 return float128_round_pack_canonical(&p, status);
8282310d
LZ
4697}
4698
aa5e19cc
RH
4699floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4700{
4701 FloatParts128 p;
4702
4703 if (!floatx80_unpack_canonical(&p, a, s)) {
4704 return floatx80_default_nan(s);
4705 }
4706 parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4707 return floatx80_round_pack_canonical(&p, s);
4708}
4709
2fa3546c
RH
4710/*
4711 * log2
4712 */
4713float32 float32_log2(float32 a, float_status *status)
4714{
4715 FloatParts64 p;
4716
4717 float32_unpack_canonical(&p, a, status);
4718 parts_log2(&p, status, &float32_params);
4719 return float32_round_pack_canonical(&p, status);
4720}
4721
4722float64 float64_log2(float64 a, float_status *status)
4723{
4724 FloatParts64 p;
4725
4726 float64_unpack_canonical(&p, a, status);
4727 parts_log2(&p, status, &float64_params);
4728 return float64_round_pack_canonical(&p, status);
4729}
4730
0218a16e
RH
4731/*----------------------------------------------------------------------------
4732| The pattern for a default generated NaN.
4733*----------------------------------------------------------------------------*/
4734
4735float16 float16_default_nan(float_status *status)
4736{
0fc07cad
RH
4737 FloatParts64 p;
4738
4739 parts_default_nan(&p, status);
0218a16e 4740 p.frac >>= float16_params.frac_shift;
71fd178e 4741 return float16_pack_raw(&p);
0218a16e
RH
4742}
4743
4744float32 float32_default_nan(float_status *status)
4745{
0fc07cad
RH
4746 FloatParts64 p;
4747
4748 parts_default_nan(&p, status);
0218a16e 4749 p.frac >>= float32_params.frac_shift;
71fd178e 4750 return float32_pack_raw(&p);
0218a16e
RH
4751}
4752
4753float64 float64_default_nan(float_status *status)
4754{
0fc07cad
RH
4755 FloatParts64 p;
4756
4757 parts_default_nan(&p, status);
0218a16e 4758 p.frac >>= float64_params.frac_shift;
71fd178e 4759 return float64_pack_raw(&p);
0218a16e
RH
4760}
4761
4762float128 float128_default_nan(float_status *status)
4763{
e9034ea8 4764 FloatParts128 p;
0218a16e 4765
0fc07cad 4766 parts_default_nan(&p, status);
e9034ea8
RH
4767 frac_shr(&p, float128_params.frac_shift);
4768 return float128_pack_raw(&p);
0218a16e 4769}
c13bb2da 4770
8282310d
LZ
4771bfloat16 bfloat16_default_nan(float_status *status)
4772{
0fc07cad
RH
4773 FloatParts64 p;
4774
4775 parts_default_nan(&p, status);
8282310d 4776 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4777 return bfloat16_pack_raw(&p);
8282310d
LZ
4778}
4779
158142c2 4780/*----------------------------------------------------------------------------
377ed926
RH
4781| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4782*----------------------------------------------------------------------------*/
4783
4784float16 float16_silence_nan(float16 a, float_status *status)
4785{
3dddb203
RH
4786 FloatParts64 p;
4787
4788 float16_unpack_raw(&p, a);
377ed926 4789 p.frac <<= float16_params.frac_shift;
92ff426d 4790 parts_silence_nan(&p, status);
377ed926 4791 p.frac >>= float16_params.frac_shift;
71fd178e 4792 return float16_pack_raw(&p);
377ed926
RH
4793}
4794
4795float32 float32_silence_nan(float32 a, float_status *status)
4796{
3dddb203
RH
4797 FloatParts64 p;
4798
4799 float32_unpack_raw(&p, a);
377ed926 4800 p.frac <<= float32_params.frac_shift;
92ff426d 4801 parts_silence_nan(&p, status);
377ed926 4802 p.frac >>= float32_params.frac_shift;
71fd178e 4803 return float32_pack_raw(&p);
377ed926
RH
4804}
4805
4806float64 float64_silence_nan(float64 a, float_status *status)
4807{
3dddb203
RH
4808 FloatParts64 p;
4809
4810 float64_unpack_raw(&p, a);
377ed926 4811 p.frac <<= float64_params.frac_shift;
92ff426d 4812 parts_silence_nan(&p, status);
377ed926 4813 p.frac >>= float64_params.frac_shift;
71fd178e 4814 return float64_pack_raw(&p);
377ed926
RH
4815}
4816
8282310d
LZ
4817bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4818{
3dddb203
RH
4819 FloatParts64 p;
4820
4821 bfloat16_unpack_raw(&p, a);
8282310d 4822 p.frac <<= bfloat16_params.frac_shift;
92ff426d 4823 parts_silence_nan(&p, status);
8282310d 4824 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4825 return bfloat16_pack_raw(&p);
8282310d 4826}
e6b405fe 4827
0018b1f4
RH
4828float128 float128_silence_nan(float128 a, float_status *status)
4829{
4830 FloatParts128 p;
4831
4832 float128_unpack_raw(&p, a);
4833 frac_shl(&p, float128_params.frac_shift);
4834 parts_silence_nan(&p, status);
4835 frac_shr(&p, float128_params.frac_shift);
4836 return float128_pack_raw(&p);
4837}
4838
e6b405fe
AB
4839/*----------------------------------------------------------------------------
4840| If `a' is denormal and we are in flush-to-zero mode then set the
4841| input-denormal exception and return zero. Otherwise just return the value.
4842*----------------------------------------------------------------------------*/
4843
f8155c1d 4844static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
4845{
4846 if (p.exp == 0 && p.frac != 0) {
4847 float_raise(float_flag_input_denormal, status);
4848 return true;
4849 }
4850
4851 return false;
4852}
4853
4854float16 float16_squash_input_denormal(float16 a, float_status *status)
4855{
4856 if (status->flush_inputs_to_zero) {
3dddb203
RH
4857 FloatParts64 p;
4858
4859 float16_unpack_raw(&p, a);
e6b405fe
AB
4860 if (parts_squash_denormal(p, status)) {
4861 return float16_set_sign(float16_zero, p.sign);
4862 }
4863 }
4864 return a;
4865}
4866
4867float32 float32_squash_input_denormal(float32 a, float_status *status)
4868{
4869 if (status->flush_inputs_to_zero) {
3dddb203
RH
4870 FloatParts64 p;
4871
4872 float32_unpack_raw(&p, a);
e6b405fe
AB
4873 if (parts_squash_denormal(p, status)) {
4874 return float32_set_sign(float32_zero, p.sign);
4875 }
4876 }
4877 return a;
4878}
4879
4880float64 float64_squash_input_denormal(float64 a, float_status *status)
4881{
4882 if (status->flush_inputs_to_zero) {
3dddb203
RH
4883 FloatParts64 p;
4884
4885 float64_unpack_raw(&p, a);
e6b405fe
AB
4886 if (parts_squash_denormal(p, status)) {
4887 return float64_set_sign(float64_zero, p.sign);
4888 }
4889 }
4890 return a;
4891}
4892
8282310d
LZ
4893bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4894{
4895 if (status->flush_inputs_to_zero) {
3dddb203
RH
4896 FloatParts64 p;
4897
4898 bfloat16_unpack_raw(&p, a);
8282310d
LZ
4899 if (parts_squash_denormal(p, status)) {
4900 return bfloat16_set_sign(bfloat16_zero, p.sign);
4901 }
4902 }
4903 return a;
4904}
4905
158142c2 4906/*----------------------------------------------------------------------------
feaf2e9c
RH
4907| Normalizes the subnormal extended double-precision floating-point value
4908| represented by the denormalized significand `aSig'. The normalized exponent
4909| and significand are stored at the locations pointed to by `zExpPtr' and
158142c2
FB
4910| `zSigPtr', respectively.
4911*----------------------------------------------------------------------------*/
4912
feaf2e9c
RH
4913void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4914 uint64_t *zSigPtr)
158142c2 4915{
8f506c70 4916 int8_t shiftCount;
158142c2 4917
feaf2e9c 4918 shiftCount = clz64(aSig);
158142c2
FB
4919 *zSigPtr = aSig<<shiftCount;
4920 *zExpPtr = 1 - shiftCount;
158142c2
FB
4921}
4922
158142c2
FB
4923/*----------------------------------------------------------------------------
4924| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
feaf2e9c
RH
4925| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4926| and returns the proper extended double-precision floating-point value
4927| corresponding to the abstract input. Ordinarily, the abstract value is
4928| rounded and packed into the extended double-precision format, with the
4929| inexact exception raised if the abstract input cannot be represented
158142c2
FB
4930| exactly. However, if the abstract value is too large, the overflow and
4931| inexact exceptions are raised and an infinity or maximal finite value is
4932| returned. If the abstract value is too small, the input value is rounded to
4933| a subnormal number, and the underflow and inexact exceptions are raised if
feaf2e9c
RH
4934| the abstract input cannot be represented exactly as a subnormal extended
4935| double-precision floating-point number.
4936| If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4937| the result is rounded to the same number of bits as single or double
4938| precision, respectively. Otherwise, the result is rounded to the full
4939| precision of the extended double-precision format.
4940| The input significand must be normalized or smaller. If the input
4941| significand is not normalized, `zExp' must be 0; in that case, the result
4942| returned is a subnormal number, and it must not require rounding. The
4943| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4944| Floating-Point Arithmetic.
158142c2
FB
4945*----------------------------------------------------------------------------*/
4946
feaf2e9c
RH
4947floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4948 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4949 float_status *status)
158142c2 4950{
feaf2e9c
RH
4951 FloatRoundMode roundingMode;
4952 bool roundNearestEven, increment, isTiny;
4953 int64_t roundIncrement, roundMask, roundBits;
158142c2 4954
a2f2d288 4955 roundingMode = status->float_rounding_mode;
158142c2 4956 roundNearestEven = ( roundingMode == float_round_nearest_even );
feaf2e9c
RH
4957 switch (roundingPrecision) {
4958 case floatx80_precision_x:
4959 goto precision80;
4960 case floatx80_precision_d:
4961 roundIncrement = UINT64_C(0x0000000000000400);
4962 roundMask = UINT64_C(0x00000000000007FF);
4963 break;
4964 case floatx80_precision_s:
4965 roundIncrement = UINT64_C(0x0000008000000000);
4966 roundMask = UINT64_C(0x000000FFFFFFFFFF);
4967 break;
4968 default:
4969 g_assert_not_reached();
4970 }
4971 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4972 switch (roundingMode) {
4973 case float_round_nearest_even:
f9288a76 4974 case float_round_ties_away:
dc355b76
PM
4975 break;
4976 case float_round_to_zero:
4977 roundIncrement = 0;
4978 break;
4979 case float_round_up:
feaf2e9c 4980 roundIncrement = zSign ? 0 : roundMask;
dc355b76
PM
4981 break;
4982 case float_round_down:
feaf2e9c 4983 roundIncrement = zSign ? roundMask : 0;
5d64abb3 4984 break;
dc355b76
PM
4985 default:
4986 abort();
158142c2 4987 }
feaf2e9c
RH
4988 roundBits = zSig0 & roundMask;
4989 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4990 if ( ( 0x7FFE < zExp )
4991 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
158142c2 4992 ) {
feaf2e9c 4993 goto overflow;
158142c2 4994 }
feaf2e9c 4995 if ( zExp <= 0 ) {
a2f2d288 4996 if (status->flush_to_zero) {
ff32e16e 4997 float_raise(float_flag_output_denormal, status);
feaf2e9c 4998 return packFloatx80(zSign, 0, 0);
e6afc87f 4999 }
a828b373 5000 isTiny = status->tininess_before_rounding
feaf2e9c
RH
5001 || (zExp < 0 )
5002 || (zSig0 <= zSig0 + roundIncrement);
5003 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
158142c2 5004 zExp = 0;
feaf2e9c 5005 roundBits = zSig0 & roundMask;
ff32e16e
PM
5006 if (isTiny && roundBits) {
5007 float_raise(float_flag_underflow, status);
5008 }
feaf2e9c
RH
5009 if (roundBits) {
5010 float_raise(float_flag_inexact, status);
5011 }
5012 zSig0 += roundIncrement;
5013 if ( (int64_t) zSig0 < 0 ) zExp = 1;
5014 roundIncrement = roundMask + 1;
5015 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5016 roundMask |= roundIncrement;
5d64abb3 5017 }
feaf2e9c
RH
5018 zSig0 &= ~ roundMask;
5019 return packFloatx80( zSign, zExp, zSig0 );
158142c2
FB
5020 }
5021 }
a2f2d288 5022 if (roundBits) {
d82f3b2d 5023 float_raise(float_flag_inexact, status);
a2f2d288 5024 }
feaf2e9c
RH
5025 zSig0 += roundIncrement;
5026 if ( zSig0 < roundIncrement ) {
5027 ++zExp;
5028 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
5029 }
5030 roundIncrement = roundMask + 1;
5031 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
5032 roundMask |= roundIncrement;
5033 }
5034 zSig0 &= ~ roundMask;
5035 if ( zSig0 == 0 ) zExp = 0;
5036 return packFloatx80( zSign, zExp, zSig0 );
5037 precision80:
dc355b76
PM
5038 switch (roundingMode) {
5039 case float_round_nearest_even:
f9288a76 5040 case float_round_ties_away:
dc355b76
PM
5041 increment = ((int64_t)zSig1 < 0);
5042 break;
5043 case float_round_to_zero:
5044 increment = 0;
5045 break;
5046 case float_round_up:
5047 increment = !zSign && zSig1;
5048 break;
5049 case float_round_down:
5050 increment = zSign && zSig1;
5051 break;
5052 default:
5053 abort();
158142c2 5054 }
bb98fe42 5055 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
5056 if ( ( 0x7FFE < zExp )
5057 || ( ( zExp == 0x7FFE )
e9321124 5058 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
5059 && increment
5060 )
5061 ) {
5062 roundMask = 0;
5063 overflow:
ff32e16e 5064 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
5065 if ( ( roundingMode == float_round_to_zero )
5066 || ( zSign && ( roundingMode == float_round_up ) )
5067 || ( ! zSign && ( roundingMode == float_round_down ) )
5068 ) {
5069 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
5070 }
0f605c88
LV
5071 return packFloatx80(zSign,
5072 floatx80_infinity_high,
5073 floatx80_infinity_low);
158142c2
FB
5074 }
5075 if ( zExp <= 0 ) {
a828b373
RH
5076 isTiny = status->tininess_before_rounding
5077 || (zExp < 0)
5078 || !increment
5079 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
5080 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
5081 zExp = 0;
ff32e16e
PM
5082 if (isTiny && zSig1) {
5083 float_raise(float_flag_underflow, status);
5084 }
a2f2d288 5085 if (zSig1) {
d82f3b2d 5086 float_raise(float_flag_inexact, status);
a2f2d288 5087 }
dc355b76
PM
5088 switch (roundingMode) {
5089 case float_round_nearest_even:
f9288a76 5090 case float_round_ties_away:
dc355b76
PM
5091 increment = ((int64_t)zSig1 < 0);
5092 break;
5093 case float_round_to_zero:
5094 increment = 0;
5095 break;
5096 case float_round_up:
5097 increment = !zSign && zSig1;
5098 break;
5099 case float_round_down:
5100 increment = zSign && zSig1;
5101 break;
5102 default:
5103 abort();
158142c2
FB
5104 }
5105 if ( increment ) {
5106 ++zSig0;
40662886
PMD
5107 if (!(zSig1 << 1) && roundNearestEven) {
5108 zSig0 &= ~1;
5109 }
bb98fe42 5110 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
5111 }
5112 return packFloatx80( zSign, zExp, zSig0 );
5113 }
5114 }
a2f2d288 5115 if (zSig1) {
d82f3b2d 5116 float_raise(float_flag_inexact, status);
a2f2d288 5117 }
158142c2
FB
5118 if ( increment ) {
5119 ++zSig0;
5120 if ( zSig0 == 0 ) {
5121 ++zExp;
e9321124 5122 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
5123 }
5124 else {
40662886
PMD
5125 if (!(zSig1 << 1) && roundNearestEven) {
5126 zSig0 &= ~1;
5127 }
158142c2
FB
5128 }
5129 }
5130 else {
5131 if ( zSig0 == 0 ) zExp = 0;
5132 }
5133 return packFloatx80( zSign, zExp, zSig0 );
5134
5135}
5136
5137/*----------------------------------------------------------------------------
5138| Takes an abstract floating-point value having sign `zSign', exponent
5139| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5140| and returns the proper extended double-precision floating-point value
5141| corresponding to the abstract input. This routine is just like
5142| `roundAndPackFloatx80' except that the input significand does not have to be
5143| normalized.
5144*----------------------------------------------------------------------------*/
5145
8da5f1db 5146floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
c120391c 5147 bool zSign, int32_t zExp,
88857aca
LV
5148 uint64_t zSig0, uint64_t zSig1,
5149 float_status *status)
158142c2 5150{
8f506c70 5151 int8_t shiftCount;
158142c2
FB
5152
5153 if ( zSig0 == 0 ) {
5154 zSig0 = zSig1;
5155 zSig1 = 0;
5156 zExp -= 64;
5157 }
0019d5c3 5158 shiftCount = clz64(zSig0);
158142c2
FB
5159 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5160 zExp -= shiftCount;
ff32e16e
PM
5161 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5162 zSig0, zSig1, status);
158142c2
FB
5163
5164}
5165
8229c991
AJ
5166/*----------------------------------------------------------------------------
5167| Returns the binary exponential of the single-precision floating-point value
5168| `a'. The operation is performed according to the IEC/IEEE Standard for
5169| Binary Floating-Point Arithmetic.
5170|
5171| Uses the following identities:
5172|
5173| 1. -------------------------------------------------------------------------
5174| x x*ln(2)
5175| 2 = e
5176|
5177| 2. -------------------------------------------------------------------------
5178| 2 3 4 5 n
5179| x x x x x x x
5180| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5181| 1! 2! 3! 4! 5! n!
5182*----------------------------------------------------------------------------*/
5183
5184static const float64 float32_exp2_coefficients[15] =
5185{
d5138cf4
PM
5186 const_float64( 0x3ff0000000000000ll ), /* 1 */
5187 const_float64( 0x3fe0000000000000ll ), /* 2 */
5188 const_float64( 0x3fc5555555555555ll ), /* 3 */
5189 const_float64( 0x3fa5555555555555ll ), /* 4 */
5190 const_float64( 0x3f81111111111111ll ), /* 5 */
5191 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5192 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5193 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5194 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5195 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5196 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5197 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5198 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5199 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5200 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5201};
5202
e5a41ffa 5203float32 float32_exp2(float32 a, float_status *status)
8229c991 5204{
572c4d86 5205 FloatParts64 xp, xnp, tp, rp;
8229c991 5206 int i;
8229c991 5207
572c4d86
RH
5208 float32_unpack_canonical(&xp, a, status);
5209 if (unlikely(xp.cls != float_class_normal)) {
5210 switch (xp.cls) {
5211 case float_class_snan:
5212 case float_class_qnan:
5213 parts_return_nan(&xp, status);
5214 return float32_round_pack_canonical(&xp, status);
5215 case float_class_inf:
5216 return xp.sign ? float32_zero : a;
5217 case float_class_zero:
5218 return float32_one;
5219 default:
5220 break;
ff32e16e 5221 }
572c4d86 5222 g_assert_not_reached();
8229c991
AJ
5223 }
5224
ff32e16e 5225 float_raise(float_flag_inexact, status);
8229c991 5226
f291f45f 5227 float64_unpack_canonical(&tp, float64_ln2, status);
572c4d86
RH
5228 xp = *parts_mul(&xp, &tp, status);
5229 xnp = xp;
8229c991 5230
572c4d86 5231 float64_unpack_canonical(&rp, float64_one, status);
8229c991 5232 for (i = 0 ; i < 15 ; i++) {
572c4d86 5233 float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
1098cc3f 5234 rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
572c4d86 5235 xnp = *parts_mul(&xnp, &xp, status);
8229c991
AJ
5236 }
5237
572c4d86 5238 return float32_round_pack_canonical(&rp, status);
8229c991
AJ
5239}
5240
0f721292
LV
5241/*----------------------------------------------------------------------------
5242| Rounds the extended double-precision floating-point value `a'
5243| to the precision provided by floatx80_rounding_precision and returns the
5244| result as an extended double-precision floating-point value.
5245| The operation is performed according to the IEC/IEEE Standard for Binary
5246| Floating-Point Arithmetic.
5247*----------------------------------------------------------------------------*/
5248
5249floatx80 floatx80_round(floatx80 a, float_status *status)
5250{
45a76b71
RH
5251 FloatParts128 p;
5252
5253 if (!floatx80_unpack_canonical(&p, a, status)) {
5254 return floatx80_default_nan(status);
5255 }
5256 return floatx80_round_pack_canonical(&p, status);
0f721292
LV
5257}
5258
f6b3b108
EC
5259static void __attribute__((constructor)) softfloat_init(void)
5260{
5261 union_float64 ua, ub, uc, ur;
5262
5263 if (QEMU_NO_HARDFLOAT) {
5264 return;
5265 }
5266 /*
5267 * Test that the host's FMA is not obviously broken. For example,
5268 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5269 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5270 */
5271 ua.s = 0x0020000000000001ULL;
5272 ub.s = 0x3ca0000000000000ULL;
5273 uc.s = 0x0020000000000000ULL;
5274 ur.h = fma(ua.h, ub.h, uc.h);
5275 if (ur.s != 0x0020000000000001ULL) {
5276 force_soft_fma = true;
5277 }
5278}