]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Reduce FloatFmt
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
e1c4667a
RH
485/* Flags for parts_minmax. */
486enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493};
134eda00 494
247d1f21
RH
495/* Simple helpers for checking if, or what kind of, NaN we have */
496static inline __attribute__((unused)) bool is_nan(FloatClass c)
497{
498 return unlikely(c >= float_class_qnan);
499}
500
501static inline __attribute__((unused)) bool is_snan(FloatClass c)
502{
503 return c == float_class_snan;
504}
505
506static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507{
508 return c == float_class_qnan;
509}
510
a90119b5 511/*
0018b1f4
RH
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
a90119b5 514 *
0018b1f4
RH
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
a90119b5
AB
518 */
519
520typedef struct {
a90119b5
AB
521 FloatClass cls;
522 bool sign;
4109b9ea
RH
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
f8155c1d 535} FloatParts64;
a90119b5 536
0018b1f4
RH
537typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543} FloatParts128;
544
aca84527
RH
545typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553} FloatParts256;
554
0018b1f4 555/* These apply to the most significant word of each FloatPartsN. */
e99c4373 556#define DECOMPOSED_BINARY_POINT 63
a90119b5 557#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
558
559/* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
d6e1f0cd 566 * round_mask: bits below lsb which must be rounded
ca3a3d5a
AB
567 * The following optional modifiers are available:
568 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
569 */
570typedef struct {
571 int exp_size;
572 int exp_bias;
573 int exp_max;
574 int frac_size;
575 int frac_shift;
ca3a3d5a 576 bool arm_althp;
d6e1f0cd 577 uint64_t round_mask;
a90119b5
AB
578} FloatFmt;
579
580/* Expand fields based on the size of exponent and fraction */
d6e1f0cd
RH
581#define FLOAT_PARAMS_(E, F) \
582 .exp_size = E, \
583 .exp_bias = ((1 << E) - 1) >> 1, \
584 .exp_max = (1 << E) - 1, \
585 .frac_size = F
586
587#define FLOAT_PARAMS(E, F) \
588 FLOAT_PARAMS_(E, F), \
589 .frac_shift = (-F - 1) & 63, \
590 .round_mask = (1ull << ((-F - 1) & 63)) - 1
a90119b5
AB
591
592static const FloatFmt float16_params = {
593 FLOAT_PARAMS(5, 10)
594};
595
6fed16b2
AB
596static const FloatFmt float16_params_ahp = {
597 FLOAT_PARAMS(5, 10),
598 .arm_althp = true
599};
600
8282310d
LZ
601static const FloatFmt bfloat16_params = {
602 FLOAT_PARAMS(8, 7)
603};
604
a90119b5
AB
605static const FloatFmt float32_params = {
606 FLOAT_PARAMS(8, 23)
607};
608
609static const FloatFmt float64_params = {
610 FLOAT_PARAMS(11, 52)
611};
612
0018b1f4
RH
613static const FloatFmt float128_params = {
614 FLOAT_PARAMS(15, 112)
615};
616
6fff2167 617/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 618static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 619{
d8fdd172
RH
620 const int f_size = fmt->frac_size;
621 const int e_size = fmt->exp_size;
6fff2167 622
d8fdd172 623 *r = (FloatParts64) {
6fff2167 624 .cls = float_class_unclassified,
d8fdd172
RH
625 .sign = extract64(raw, f_size + e_size, 1),
626 .exp = extract64(raw, f_size, e_size),
627 .frac = extract64(raw, 0, f_size)
6fff2167
AB
628 };
629}
630
3dddb203 631static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 632{
3dddb203 633 unpack_raw64(p, &float16_params, f);
6fff2167
AB
634}
635
3dddb203 636static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 637{
3dddb203 638 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
639}
640
3dddb203 641static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 642{
3dddb203 643 unpack_raw64(p, &float32_params, f);
6fff2167
AB
644}
645
3dddb203 646static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 647{
3dddb203 648 unpack_raw64(p, &float64_params, f);
6fff2167
AB
649}
650
0018b1f4
RH
651static void float128_unpack_raw(FloatParts128 *p, float128 f)
652{
653 const int f_size = float128_params.frac_size - 64;
654 const int e_size = float128_params.exp_size;
655
656 *p = (FloatParts128) {
657 .cls = float_class_unclassified,
658 .sign = extract64(f.high, f_size + e_size, 1),
659 .exp = extract64(f.high, f_size, e_size),
660 .frac_hi = extract64(f.high, 0, f_size),
661 .frac_lo = f.low,
662 };
663}
664
6fff2167 665/* Pack a float from parts, but do not canonicalize. */
9e4af58c 666static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 667{
9e4af58c
RH
668 const int f_size = fmt->frac_size;
669 const int e_size = fmt->exp_size;
670 uint64_t ret;
671
672 ret = (uint64_t)p->sign << (f_size + e_size);
673 ret = deposit64(ret, f_size, e_size, p->exp);
674 ret = deposit64(ret, 0, f_size, p->frac);
675 return ret;
6fff2167
AB
676}
677
71fd178e 678static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 679{
71fd178e 680 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
681}
682
71fd178e 683static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 684{
71fd178e 685 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
686}
687
71fd178e 688static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 689{
71fd178e 690 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
691}
692
71fd178e 693static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 694{
71fd178e 695 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
696}
697
0018b1f4
RH
698static float128 float128_pack_raw(const FloatParts128 *p)
699{
700 const int f_size = float128_params.frac_size - 64;
701 const int e_size = float128_params.exp_size;
702 uint64_t hi;
703
704 hi = (uint64_t)p->sign << (f_size + e_size);
705 hi = deposit64(hi, f_size, e_size, p->exp);
706 hi = deposit64(hi, 0, f_size, p->frac_hi);
707 return make_float128(hi, p->frac_lo);
708}
709
0664335a
RH
710/*----------------------------------------------------------------------------
711| Functions and definitions to determine: (1) whether tininess for underflow
712| is detected before or after rounding by default, (2) what (if anything)
713| happens when exceptions are raised, (3) how signaling NaNs are distinguished
714| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
715| are propagated from function inputs to output. These details are target-
716| specific.
717*----------------------------------------------------------------------------*/
139c1837 718#include "softfloat-specialize.c.inc"
0664335a 719
0018b1f4
RH
720#define PARTS_GENERIC_64_128(NAME, P) \
721 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
dedd123c
RH
723#define PARTS_GENERIC_64_128_256(NAME, P) \
724 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
725 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
726
e9034ea8 727#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
728#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
729
7c45bad8
RH
730static void parts64_return_nan(FloatParts64 *a, float_status *s);
731static void parts128_return_nan(FloatParts128 *a, float_status *s);
732
733#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 734
22c355f4
RH
735static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
736 float_status *s);
737static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
738 float_status *s);
739
740#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
741
979582d0
RH
742static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
743 FloatParts64 *c, float_status *s,
744 int ab_mask, int abc_mask);
745static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
746 FloatParts128 *b,
747 FloatParts128 *c,
748 float_status *s,
749 int ab_mask, int abc_mask);
750
751#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
752 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
753
d46975bc
RH
754static void parts64_canonicalize(FloatParts64 *p, float_status *status,
755 const FloatFmt *fmt);
756static void parts128_canonicalize(FloatParts128 *p, float_status *status,
757 const FloatFmt *fmt);
758
759#define parts_canonicalize(A, S, F) \
760 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
761
25fdedf0
RH
762static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
763 const FloatFmt *fmt);
764static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
765 const FloatFmt *fmt);
766
767#define parts_uncanon_normal(A, S, F) \
768 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
769
ee6959f2
RH
770static void parts64_uncanon(FloatParts64 *p, float_status *status,
771 const FloatFmt *fmt);
772static void parts128_uncanon(FloatParts128 *p, float_status *status,
773 const FloatFmt *fmt);
774
775#define parts_uncanon(A, S, F) \
776 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
777
da10a907
RH
778static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
779static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 780static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
781
782#define parts_add_normal(A, B) \
dedd123c 783 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
784
785static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
786static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 787static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
788
789#define parts_sub_normal(A, B) \
dedd123c 790 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
791
792static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
793 float_status *s, bool subtract);
794static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
795 float_status *s, bool subtract);
796
797#define parts_addsub(A, B, S, Z) \
798 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
799
aca84527
RH
800static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
801 float_status *s);
802static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
803 float_status *s);
804
805#define parts_mul(A, B, S) \
806 PARTS_GENERIC_64_128(mul, A)(A, B, S)
807
dedd123c
RH
808static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
809 FloatParts64 *c, int flags,
810 float_status *s);
811static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
812 FloatParts128 *c, int flags,
813 float_status *s);
814
815#define parts_muladd(A, B, C, Z, S) \
816 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
817
ec961b81
RH
818static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
819 float_status *s);
820static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
821 float_status *s);
822
823#define parts_div(A, B, S) \
824 PARTS_GENERIC_64_128(div, A)(A, B, S)
825
9261b245
RH
826static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
827static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
828
829#define parts_sqrt(A, S, F) \
830 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
831
afc34931
RH
832static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
833 int scale, int frac_size);
834static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
835 int scale, int frac_size);
836
837#define parts_round_to_int_normal(A, R, C, F) \
838 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
839
840static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
841 int scale, float_status *s,
842 const FloatFmt *fmt);
843static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
844 int scale, float_status *s,
845 const FloatFmt *fmt);
846
847#define parts_round_to_int(A, R, C, S, F) \
848 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
849
463b3f0d
RH
850static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
851 int scale, int64_t min, int64_t max,
852 float_status *s);
853static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
854 int scale, int64_t min, int64_t max,
855 float_status *s);
856
857#define parts_float_to_sint(P, R, Z, MN, MX, S) \
858 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
859
4ab4aef0
RH
860static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
861 int scale, uint64_t max,
862 float_status *s);
863static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
864 int scale, uint64_t max,
865 float_status *s);
866
867#define parts_float_to_uint(P, R, Z, M, S) \
868 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
869
e3689519
RH
870static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
871 int scale, float_status *s);
872static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
873 int scale, float_status *s);
874
875#define parts_sint_to_float(P, I, Z, S) \
876 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
877
37c954a1
RH
878static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
879 int scale, float_status *s);
880static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
881 int scale, float_status *s);
882
883#define parts_uint_to_float(P, I, Z, S) \
884 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
885
e1c4667a
RH
886static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
887 float_status *s, int flags);
888static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
889 float_status *s, int flags);
890
891#define parts_minmax(A, B, S, F) \
892 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
893
6eb169b8
RH
894static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
895 float_status *s, bool q);
896static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
897 float_status *s, bool q);
898
899#define parts_compare(A, B, S, Q) \
900 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
901
39626b0c
RH
902static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
903static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
904
905#define parts_scalbn(A, N, S) \
906 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
907
0018b1f4
RH
908/*
909 * Helper functions for softfloat-parts.c.inc, per-size operations.
910 */
911
22c355f4
RH
912#define FRAC_GENERIC_64_128(NAME, P) \
913 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
914
dedd123c
RH
915#define FRAC_GENERIC_64_128_256(NAME, P) \
916 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
917 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
918
da10a907
RH
919static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
920{
921 return uadd64_overflow(a->frac, b->frac, &r->frac);
922}
923
924static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
925{
926 bool c = 0;
927 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
928 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
929 return c;
930}
931
dedd123c
RH
932static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
933{
934 bool c = 0;
935 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
936 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
937 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
938 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
939 return c;
940}
941
942#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 943
ee6959f2
RH
944static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
945{
946 return uadd64_overflow(a->frac, c, &r->frac);
947}
948
949static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
950{
951 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
952 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
953}
954
955#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
956
957static void frac64_allones(FloatParts64 *a)
958{
959 a->frac = -1;
960}
961
962static void frac128_allones(FloatParts128 *a)
963{
964 a->frac_hi = a->frac_lo = -1;
965}
966
967#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
968
22c355f4
RH
969static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
970{
971 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
972}
973
974static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
975{
976 uint64_t ta = a->frac_hi, tb = b->frac_hi;
977 if (ta == tb) {
978 ta = a->frac_lo, tb = b->frac_lo;
979 if (ta == tb) {
980 return 0;
981 }
982 }
983 return ta < tb ? -1 : 1;
984}
985
986#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
987
d46975bc 988static void frac64_clear(FloatParts64 *a)
0018b1f4 989{
d46975bc
RH
990 a->frac = 0;
991}
992
993static void frac128_clear(FloatParts128 *a)
994{
995 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
996}
997
d46975bc 998#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 999
ec961b81
RH
1000static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1001{
1002 uint64_t n1, n0, r, q;
1003 bool ret;
1004
1005 /*
1006 * We want a 2*N / N-bit division to produce exactly an N-bit
1007 * result, so that we do not lose any precision and so that we
1008 * do not have to renormalize afterward. If A.frac < B.frac,
1009 * then division would produce an (N-1)-bit result; shift A left
1010 * by one to produce the an N-bit result, and return true to
1011 * decrement the exponent to match.
1012 *
1013 * The udiv_qrnnd algorithm that we're using requires normalization,
1014 * i.e. the msb of the denominator must be set, which is already true.
1015 */
1016 ret = a->frac < b->frac;
1017 if (ret) {
1018 n0 = a->frac;
1019 n1 = 0;
1020 } else {
1021 n0 = a->frac >> 1;
1022 n1 = a->frac << 63;
1023 }
1024 q = udiv_qrnnd(&r, n0, n1, b->frac);
1025
1026 /* Set lsb if there is a remainder, to set inexact. */
1027 a->frac = q | (r != 0);
1028
1029 return ret;
1030}
1031
1032static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1033{
1034 uint64_t q0, q1, a0, a1, b0, b1;
1035 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1036 bool ret = false;
1037
1038 a0 = a->frac_hi, a1 = a->frac_lo;
1039 b0 = b->frac_hi, b1 = b->frac_lo;
1040
1041 ret = lt128(a0, a1, b0, b1);
1042 if (!ret) {
1043 a1 = shr_double(a0, a1, 1);
1044 a0 = a0 >> 1;
1045 }
1046
1047 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1048 q0 = estimateDiv128To64(a0, a1, b0);
1049
1050 /*
1051 * Estimate is high because B1 was not included (unless B1 == 0).
1052 * Reduce quotient and increase remainder until remainder is non-negative.
1053 * This loop will execute 0 to 2 times.
1054 */
1055 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1056 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1057 while (r0 != 0) {
1058 q0--;
1059 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1060 }
1061
1062 /* Repeat using the remainder, producing a second word of quotient. */
1063 q1 = estimateDiv128To64(r1, r2, b0);
1064 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1065 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1066 while (r1 != 0) {
1067 q1--;
1068 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1069 }
1070
1071 /* Any remainder indicates inexact; set sticky bit. */
1072 q1 |= (r2 | r3) != 0;
1073
1074 a->frac_hi = q0;
1075 a->frac_lo = q1;
1076 return ret;
1077}
1078
1079#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1080
d46975bc 1081static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1082{
d46975bc
RH
1083 return a->frac == 0;
1084}
1085
1086static bool frac128_eqz(FloatParts128 *a)
1087{
1088 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1089}
1090
d46975bc 1091#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1092
aca84527
RH
1093static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1094{
1095 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1096}
1097
1098static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1099{
1100 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1101 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1102}
1103
1104#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1105
da10a907
RH
1106static void frac64_neg(FloatParts64 *a)
1107{
1108 a->frac = -a->frac;
1109}
1110
1111static void frac128_neg(FloatParts128 *a)
1112{
1113 bool c = 0;
1114 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1115 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116}
1117
dedd123c
RH
1118static void frac256_neg(FloatParts256 *a)
1119{
1120 bool c = 0;
1121 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1122 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1123 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1124 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1125}
1126
1127#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1128
d46975bc 1129static int frac64_normalize(FloatParts64 *a)
6fff2167 1130{
d46975bc
RH
1131 if (a->frac) {
1132 int shift = clz64(a->frac);
1133 a->frac <<= shift;
1134 return shift;
1135 }
1136 return 64;
1137}
1138
1139static int frac128_normalize(FloatParts128 *a)
1140{
1141 if (a->frac_hi) {
1142 int shl = clz64(a->frac_hi);
463e45dc
RH
1143 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1144 a->frac_lo <<= shl;
d46975bc
RH
1145 return shl;
1146 } else if (a->frac_lo) {
1147 int shl = clz64(a->frac_lo);
463e45dc 1148 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1149 a->frac_lo = 0;
1150 return shl + 64;
6fff2167 1151 }
d46975bc 1152 return 128;
6fff2167
AB
1153}
1154
dedd123c
RH
1155static int frac256_normalize(FloatParts256 *a)
1156{
1157 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1158 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1159 int ret, shl;
dedd123c
RH
1160
1161 if (likely(a0)) {
1162 shl = clz64(a0);
1163 if (shl == 0) {
1164 return 0;
1165 }
1166 ret = shl;
1167 } else {
1168 if (a1) {
1169 ret = 64;
1170 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1171 } else if (a2) {
1172 ret = 128;
1173 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1174 } else if (a3) {
1175 ret = 192;
1176 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1177 } else {
1178 ret = 256;
1179 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1180 goto done;
1181 }
1182 shl = clz64(a0);
1183 if (shl == 0) {
1184 goto done;
1185 }
1186 ret += shl;
1187 }
1188
463e45dc
RH
1189 a0 = shl_double(a0, a1, shl);
1190 a1 = shl_double(a1, a2, shl);
1191 a2 = shl_double(a2, a3, shl);
1192 a3 <<= shl;
dedd123c
RH
1193
1194 done:
1195 a->frac_hi = a0;
1196 a->frac_hm = a1;
1197 a->frac_lm = a2;
1198 a->frac_lo = a3;
1199 return ret;
1200}
1201
1202#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1203
1204static void frac64_shl(FloatParts64 *a, int c)
1205{
1206 a->frac <<= c;
1207}
1208
1209static void frac128_shl(FloatParts128 *a, int c)
1210{
463e45dc
RH
1211 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1212
1213 if (c & 64) {
1214 a0 = a1, a1 = 0;
1215 }
1216
1217 c &= 63;
1218 if (c) {
1219 a0 = shl_double(a0, a1, c);
1220 a1 = a1 << c;
1221 }
1222
1223 a->frac_hi = a0;
1224 a->frac_lo = a1;
d46975bc
RH
1225}
1226
1227#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1228
1229static void frac64_shr(FloatParts64 *a, int c)
1230{
1231 a->frac >>= c;
1232}
1233
1234static void frac128_shr(FloatParts128 *a, int c)
1235{
463e45dc
RH
1236 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1237
1238 if (c & 64) {
1239 a1 = a0, a0 = 0;
1240 }
1241
1242 c &= 63;
1243 if (c) {
1244 a1 = shr_double(a0, a1, c);
1245 a0 = a0 >> c;
1246 }
1247
1248 a->frac_hi = a0;
1249 a->frac_lo = a1;
d46975bc
RH
1250}
1251
1252#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1253
ee6959f2 1254static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1255{
463e45dc
RH
1256 uint64_t a0 = a->frac;
1257
1258 if (likely(c != 0)) {
1259 if (likely(c < 64)) {
1260 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1261 } else {
1262 a0 = a0 != 0;
1263 }
1264 a->frac = a0;
1265 }
ee6959f2 1266}
6fff2167 1267
ee6959f2
RH
1268static void frac128_shrjam(FloatParts128 *a, int c)
1269{
463e45dc
RH
1270 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1271 uint64_t sticky = 0;
1272
1273 if (unlikely(c == 0)) {
1274 return;
1275 } else if (likely(c < 64)) {
1276 /* nothing */
1277 } else if (likely(c < 128)) {
1278 sticky = a1;
1279 a1 = a0;
1280 a0 = 0;
1281 c &= 63;
1282 if (c == 0) {
1283 goto done;
1284 }
1285 } else {
1286 sticky = a0 | a1;
1287 a0 = a1 = 0;
1288 goto done;
1289 }
1290
1291 sticky |= shr_double(a1, 0, c);
1292 a1 = shr_double(a0, a1, c);
1293 a0 = a0 >> c;
1294
1295 done:
1296 a->frac_lo = a1 | (sticky != 0);
1297 a->frac_hi = a0;
6fff2167
AB
1298}
1299
dedd123c
RH
1300static void frac256_shrjam(FloatParts256 *a, int c)
1301{
1302 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1303 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1304 uint64_t sticky = 0;
dedd123c
RH
1305
1306 if (unlikely(c == 0)) {
1307 return;
1308 } else if (likely(c < 64)) {
1309 /* nothing */
1310 } else if (likely(c < 256)) {
1311 if (unlikely(c & 128)) {
1312 sticky |= a2 | a3;
1313 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1314 }
1315 if (unlikely(c & 64)) {
1316 sticky |= a3;
1317 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1318 }
1319 c &= 63;
1320 if (c == 0) {
1321 goto done;
1322 }
1323 } else {
1324 sticky = a0 | a1 | a2 | a3;
1325 a0 = a1 = a2 = a3 = 0;
1326 goto done;
1327 }
1328
463e45dc
RH
1329 sticky |= shr_double(a3, 0, c);
1330 a3 = shr_double(a2, a3, c);
1331 a2 = shr_double(a1, a2, c);
1332 a1 = shr_double(a0, a1, c);
1333 a0 = a0 >> c;
dedd123c
RH
1334
1335 done:
1336 a->frac_lo = a3 | (sticky != 0);
1337 a->frac_lm = a2;
1338 a->frac_hm = a1;
1339 a->frac_hi = a0;
1340}
1341
1342#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1343
da10a907
RH
1344static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1345{
1346 return usub64_overflow(a->frac, b->frac, &r->frac);
1347}
7c45bad8 1348
da10a907
RH
1349static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1350{
1351 bool c = 0;
1352 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1353 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1354 return c;
1355}
1356
dedd123c
RH
1357static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1358{
1359 bool c = 0;
1360 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1361 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1362 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1363 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1364 return c;
1365}
1366
1367#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1368
aca84527
RH
1369static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1370{
1371 r->frac = a->frac_hi | (a->frac_lo != 0);
1372}
1373
1374static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1375{
1376 r->frac_hi = a->frac_hi;
1377 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1378}
1379
1380#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1381
dedd123c
RH
1382static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1383{
1384 r->frac_hi = a->frac;
1385 r->frac_lo = 0;
1386}
1387
1388static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1389{
1390 r->frac_hi = a->frac_hi;
1391 r->frac_hm = a->frac_lo;
1392 r->frac_lm = 0;
1393 r->frac_lo = 0;
1394}
1395
1396#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1397
9261b245
RH
1398/*
1399 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1400 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1401 * and thus MIT licenced.
1402 */
1403static const uint16_t rsqrt_tab[128] = {
1404 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1405 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1406 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1407 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1408 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1409 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1410 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1411 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1412 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1413 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1414 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1415 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1416 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1417 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1418 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1419 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1420};
1421
da10a907
RH
1422#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1423#define FloatPartsN glue(FloatParts,N)
aca84527 1424#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1425
1426#define N 64
aca84527 1427#define W 128
da10a907
RH
1428
1429#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1430#include "softfloat-parts.c.inc"
1431
da10a907 1432#undef N
aca84527 1433#undef W
da10a907 1434#define N 128
aca84527 1435#define W 256
7c45bad8 1436
da10a907 1437#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1438#include "softfloat-parts.c.inc"
1439
dedd123c
RH
1440#undef N
1441#undef W
1442#define N 256
1443
1444#include "softfloat-parts-addsub.c.inc"
1445
da10a907 1446#undef N
aca84527 1447#undef W
7c45bad8
RH
1448#undef partsN
1449#undef FloatPartsN
aca84527 1450#undef FloatPartsW
7c45bad8 1451
aaffb7bf
RH
1452/*
1453 * Pack/unpack routines with a specific FloatFmt.
1454 */
1455
98e256fc
RH
1456static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1457 float_status *s, const FloatFmt *params)
aaffb7bf 1458{
98e256fc 1459 float16_unpack_raw(p, f);
d46975bc 1460 parts_canonicalize(p, s, params);
aaffb7bf
RH
1461}
1462
98e256fc
RH
1463static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1464 float_status *s)
aaffb7bf 1465{
98e256fc 1466 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1467}
1468
98e256fc
RH
1469static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1470 float_status *s)
aaffb7bf 1471{
98e256fc 1472 bfloat16_unpack_raw(p, f);
d46975bc 1473 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1474}
1475
e293e927
RH
1476static float16 float16a_round_pack_canonical(FloatParts64 *p,
1477 float_status *s,
aaffb7bf
RH
1478 const FloatFmt *params)
1479{
ee6959f2 1480 parts_uncanon(p, s, params);
e293e927 1481 return float16_pack_raw(p);
aaffb7bf
RH
1482}
1483
e293e927
RH
1484static float16 float16_round_pack_canonical(FloatParts64 *p,
1485 float_status *s)
aaffb7bf
RH
1486{
1487 return float16a_round_pack_canonical(p, s, &float16_params);
1488}
1489
e293e927
RH
1490static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1491 float_status *s)
aaffb7bf 1492{
ee6959f2 1493 parts_uncanon(p, s, &bfloat16_params);
e293e927 1494 return bfloat16_pack_raw(p);
aaffb7bf
RH
1495}
1496
98e256fc
RH
1497static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1498 float_status *s)
aaffb7bf 1499{
98e256fc 1500 float32_unpack_raw(p, f);
d46975bc 1501 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1502}
1503
e293e927
RH
1504static float32 float32_round_pack_canonical(FloatParts64 *p,
1505 float_status *s)
aaffb7bf 1506{
ee6959f2 1507 parts_uncanon(p, s, &float32_params);
e293e927 1508 return float32_pack_raw(p);
aaffb7bf
RH
1509}
1510
98e256fc
RH
1511static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1512 float_status *s)
aaffb7bf 1513{
98e256fc 1514 float64_unpack_raw(p, f);
d46975bc 1515 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1516}
1517
e293e927
RH
1518static float64 float64_round_pack_canonical(FloatParts64 *p,
1519 float_status *s)
aaffb7bf 1520{
ee6959f2 1521 parts_uncanon(p, s, &float64_params);
e293e927 1522 return float64_pack_raw(p);
aaffb7bf
RH
1523}
1524
3ff49e56
RH
1525static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1526 float_status *s)
1527{
1528 float128_unpack_raw(p, f);
1529 parts_canonicalize(p, s, &float128_params);
1530}
1531
1532static float128 float128_round_pack_canonical(FloatParts128 *p,
1533 float_status *s)
1534{
1535 parts_uncanon(p, s, &float128_params);
1536 return float128_pack_raw(p);
1537}
1538
6fff2167 1539/*
da10a907 1540 * Addition and subtraction
6fff2167
AB
1541 */
1542
da10a907
RH
1543static float16 QEMU_FLATTEN
1544float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1545{
da10a907 1546 FloatParts64 pa, pb, *pr;
98e256fc
RH
1547
1548 float16_unpack_canonical(&pa, a, status);
1549 float16_unpack_canonical(&pb, b, status);
da10a907 1550 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1551
da10a907 1552 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1553}
1554
da10a907 1555float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1556{
da10a907
RH
1557 return float16_addsub(a, b, status, false);
1558}
1b615d48 1559
da10a907
RH
1560float16 float16_sub(float16 a, float16 b, float_status *status)
1561{
1562 return float16_addsub(a, b, status, true);
1b615d48
EC
1563}
1564
1565static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1566soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1567{
da10a907 1568 FloatParts64 pa, pb, *pr;
98e256fc
RH
1569
1570 float32_unpack_canonical(&pa, a, status);
1571 float32_unpack_canonical(&pb, b, status);
da10a907 1572 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1573
da10a907 1574 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1575}
1576
da10a907 1577static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1578{
da10a907 1579 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1580}
1581
da10a907 1582static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1583{
da10a907 1584 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1585}
1586
1587static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1588soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1589{
da10a907 1590 FloatParts64 pa, pb, *pr;
98e256fc
RH
1591
1592 float64_unpack_canonical(&pa, a, status);
1593 float64_unpack_canonical(&pb, b, status);
da10a907 1594 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1595
da10a907 1596 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1597}
1598
da10a907 1599static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1600{
da10a907 1601 return soft_f64_addsub(a, b, status, false);
1b615d48 1602}
6fff2167 1603
da10a907 1604static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1605{
da10a907 1606 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1607}
1608
1b615d48 1609static float hard_f32_add(float a, float b)
6fff2167 1610{
1b615d48
EC
1611 return a + b;
1612}
6fff2167 1613
1b615d48
EC
1614static float hard_f32_sub(float a, float b)
1615{
1616 return a - b;
6fff2167
AB
1617}
1618
1b615d48 1619static double hard_f64_add(double a, double b)
6fff2167 1620{
1b615d48
EC
1621 return a + b;
1622}
6fff2167 1623
1b615d48
EC
1624static double hard_f64_sub(double a, double b)
1625{
1626 return a - b;
1627}
1628
b240c9c4 1629static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1630{
1631 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1632 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1633 }
1634 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1635}
1636
b240c9c4 1637static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1638{
1639 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1640 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1641 } else {
1642 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1643 }
1644}
1645
1646static float32 float32_addsub(float32 a, float32 b, float_status *s,
1647 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1648{
1649 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1650 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1651}
1652
1653static float64 float64_addsub(float64 a, float64 b, float_status *s,
1654 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1655{
1656 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1657 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1658}
1659
1660float32 QEMU_FLATTEN
1661float32_add(float32 a, float32 b, float_status *s)
1662{
1663 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1664}
1665
1666float32 QEMU_FLATTEN
1667float32_sub(float32 a, float32 b, float_status *s)
1668{
1669 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1670}
1671
1672float64 QEMU_FLATTEN
1673float64_add(float64 a, float64 b, float_status *s)
1674{
1675 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1676}
1677
1678float64 QEMU_FLATTEN
1679float64_sub(float64 a, float64 b, float_status *s)
1680{
1681 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1682}
1683
da10a907
RH
1684static bfloat16 QEMU_FLATTEN
1685bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1686{
da10a907 1687 FloatParts64 pa, pb, *pr;
98e256fc
RH
1688
1689 bfloat16_unpack_canonical(&pa, a, status);
1690 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1691 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1692
da10a907 1693 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1694}
1695
da10a907 1696bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1697{
da10a907
RH
1698 return bfloat16_addsub(a, b, status, false);
1699}
8282310d 1700
da10a907
RH
1701bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1702{
1703 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1704}
1705
3ff49e56
RH
1706static float128 QEMU_FLATTEN
1707float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1708{
1709 FloatParts128 pa, pb, *pr;
1710
1711 float128_unpack_canonical(&pa, a, status);
1712 float128_unpack_canonical(&pb, b, status);
1713 pr = parts_addsub(&pa, &pb, status, subtract);
1714
1715 return float128_round_pack_canonical(pr, status);
1716}
1717
1718float128 float128_add(float128 a, float128 b, float_status *status)
1719{
1720 return float128_addsub(a, b, status, false);
1721}
1722
1723float128 float128_sub(float128 a, float128 b, float_status *status)
1724{
1725 return float128_addsub(a, b, status, true);
1726}
1727
74d707e2 1728/*
aca84527 1729 * Multiplication
74d707e2
AB
1730 */
1731
97ff87c0 1732float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1733{
aca84527 1734 FloatParts64 pa, pb, *pr;
98e256fc
RH
1735
1736 float16_unpack_canonical(&pa, a, status);
1737 float16_unpack_canonical(&pb, b, status);
aca84527 1738 pr = parts_mul(&pa, &pb, status);
74d707e2 1739
aca84527 1740 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1741}
1742
2dfabc86
EC
1743static float32 QEMU_SOFTFLOAT_ATTR
1744soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1745{
aca84527 1746 FloatParts64 pa, pb, *pr;
98e256fc
RH
1747
1748 float32_unpack_canonical(&pa, a, status);
1749 float32_unpack_canonical(&pb, b, status);
aca84527 1750 pr = parts_mul(&pa, &pb, status);
74d707e2 1751
aca84527 1752 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1753}
1754
2dfabc86
EC
1755static float64 QEMU_SOFTFLOAT_ATTR
1756soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1757{
aca84527 1758 FloatParts64 pa, pb, *pr;
98e256fc
RH
1759
1760 float64_unpack_canonical(&pa, a, status);
1761 float64_unpack_canonical(&pb, b, status);
aca84527 1762 pr = parts_mul(&pa, &pb, status);
74d707e2 1763
aca84527 1764 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1765}
1766
2dfabc86
EC
1767static float hard_f32_mul(float a, float b)
1768{
1769 return a * b;
1770}
1771
1772static double hard_f64_mul(double a, double b)
1773{
1774 return a * b;
1775}
1776
2dfabc86
EC
1777float32 QEMU_FLATTEN
1778float32_mul(float32 a, float32 b, float_status *s)
1779{
1780 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1781 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1782}
1783
1784float64 QEMU_FLATTEN
1785float64_mul(float64 a, float64 b, float_status *s)
1786{
1787 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1788 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1789}
1790
aca84527
RH
1791bfloat16 QEMU_FLATTEN
1792bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1793{
aca84527 1794 FloatParts64 pa, pb, *pr;
98e256fc
RH
1795
1796 bfloat16_unpack_canonical(&pa, a, status);
1797 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1798 pr = parts_mul(&pa, &pb, status);
8282310d 1799
aca84527
RH
1800 return bfloat16_round_pack_canonical(pr, status);
1801}
1802
1803float128 QEMU_FLATTEN
1804float128_mul(float128 a, float128 b, float_status *status)
1805{
1806 FloatParts128 pa, pb, *pr;
1807
1808 float128_unpack_canonical(&pa, a, status);
1809 float128_unpack_canonical(&pb, b, status);
1810 pr = parts_mul(&pa, &pb, status);
1811
1812 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1813}
1814
d446830a 1815/*
dedd123c 1816 * Fused multiply-add
d446830a
AB
1817 */
1818
97ff87c0 1819float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1820 int flags, float_status *status)
d446830a 1821{
dedd123c 1822 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1823
1824 float16_unpack_canonical(&pa, a, status);
1825 float16_unpack_canonical(&pb, b, status);
1826 float16_unpack_canonical(&pc, c, status);
dedd123c 1827 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1828
dedd123c 1829 return float16_round_pack_canonical(pr, status);
d446830a
AB
1830}
1831
ccf770ba
EC
1832static float32 QEMU_SOFTFLOAT_ATTR
1833soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1834 float_status *status)
d446830a 1835{
dedd123c 1836 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1837
1838 float32_unpack_canonical(&pa, a, status);
1839 float32_unpack_canonical(&pb, b, status);
1840 float32_unpack_canonical(&pc, c, status);
dedd123c 1841 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1842
dedd123c 1843 return float32_round_pack_canonical(pr, status);
d446830a
AB
1844}
1845
ccf770ba
EC
1846static float64 QEMU_SOFTFLOAT_ATTR
1847soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1848 float_status *status)
d446830a 1849{
dedd123c 1850 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1851
1852 float64_unpack_canonical(&pa, a, status);
1853 float64_unpack_canonical(&pb, b, status);
1854 float64_unpack_canonical(&pc, c, status);
dedd123c 1855 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1856
dedd123c 1857 return float64_round_pack_canonical(pr, status);
d446830a
AB
1858}
1859
f6b3b108
EC
1860static bool force_soft_fma;
1861
ccf770ba
EC
1862float32 QEMU_FLATTEN
1863float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1864{
1865 union_float32 ua, ub, uc, ur;
1866
1867 ua.s = xa;
1868 ub.s = xb;
1869 uc.s = xc;
1870
1871 if (unlikely(!can_use_fpu(s))) {
1872 goto soft;
1873 }
1874 if (unlikely(flags & float_muladd_halve_result)) {
1875 goto soft;
1876 }
1877
1878 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1879 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1880 goto soft;
1881 }
f6b3b108
EC
1882
1883 if (unlikely(force_soft_fma)) {
1884 goto soft;
1885 }
1886
ccf770ba
EC
1887 /*
1888 * When (a || b) == 0, there's no need to check for under/over flow,
1889 * since we know the addend is (normal || 0) and the product is 0.
1890 */
1891 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1892 union_float32 up;
1893 bool prod_sign;
1894
1895 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1896 prod_sign ^= !!(flags & float_muladd_negate_product);
1897 up.s = float32_set_sign(float32_zero, prod_sign);
1898
1899 if (flags & float_muladd_negate_c) {
1900 uc.h = -uc.h;
1901 }
1902 ur.h = up.h + uc.h;
1903 } else {
896f51fb
KC
1904 union_float32 ua_orig = ua;
1905 union_float32 uc_orig = uc;
1906
ccf770ba
EC
1907 if (flags & float_muladd_negate_product) {
1908 ua.h = -ua.h;
1909 }
1910 if (flags & float_muladd_negate_c) {
1911 uc.h = -uc.h;
1912 }
1913
1914 ur.h = fmaf(ua.h, ub.h, uc.h);
1915
1916 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1917 float_raise(float_flag_overflow, s);
ccf770ba 1918 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1919 ua = ua_orig;
1920 uc = uc_orig;
ccf770ba
EC
1921 goto soft;
1922 }
1923 }
1924 if (flags & float_muladd_negate_result) {
1925 return float32_chs(ur.s);
1926 }
1927 return ur.s;
1928
1929 soft:
1930 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1931}
1932
1933float64 QEMU_FLATTEN
1934float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1935{
1936 union_float64 ua, ub, uc, ur;
1937
1938 ua.s = xa;
1939 ub.s = xb;
1940 uc.s = xc;
1941
1942 if (unlikely(!can_use_fpu(s))) {
1943 goto soft;
1944 }
1945 if (unlikely(flags & float_muladd_halve_result)) {
1946 goto soft;
1947 }
1948
1949 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1950 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1951 goto soft;
1952 }
f6b3b108
EC
1953
1954 if (unlikely(force_soft_fma)) {
1955 goto soft;
1956 }
1957
ccf770ba
EC
1958 /*
1959 * When (a || b) == 0, there's no need to check for under/over flow,
1960 * since we know the addend is (normal || 0) and the product is 0.
1961 */
1962 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1963 union_float64 up;
1964 bool prod_sign;
1965
1966 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1967 prod_sign ^= !!(flags & float_muladd_negate_product);
1968 up.s = float64_set_sign(float64_zero, prod_sign);
1969
1970 if (flags & float_muladd_negate_c) {
1971 uc.h = -uc.h;
1972 }
1973 ur.h = up.h + uc.h;
1974 } else {
896f51fb
KC
1975 union_float64 ua_orig = ua;
1976 union_float64 uc_orig = uc;
1977
ccf770ba
EC
1978 if (flags & float_muladd_negate_product) {
1979 ua.h = -ua.h;
1980 }
1981 if (flags & float_muladd_negate_c) {
1982 uc.h = -uc.h;
1983 }
1984
1985 ur.h = fma(ua.h, ub.h, uc.h);
1986
1987 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1988 float_raise(float_flag_overflow, s);
ccf770ba 1989 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1990 ua = ua_orig;
1991 uc = uc_orig;
ccf770ba
EC
1992 goto soft;
1993 }
1994 }
1995 if (flags & float_muladd_negate_result) {
1996 return float64_chs(ur.s);
1997 }
1998 return ur.s;
1999
2000 soft:
2001 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2002}
2003
8282310d
LZ
2004bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2005 int flags, float_status *status)
2006{
dedd123c 2007 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2008
2009 bfloat16_unpack_canonical(&pa, a, status);
2010 bfloat16_unpack_canonical(&pb, b, status);
2011 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2012 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2013
2014 return bfloat16_round_pack_canonical(pr, status);
2015}
8282310d 2016
dedd123c
RH
2017float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2018 int flags, float_status *status)
2019{
2020 FloatParts128 pa, pb, pc, *pr;
2021
2022 float128_unpack_canonical(&pa, a, status);
2023 float128_unpack_canonical(&pb, b, status);
2024 float128_unpack_canonical(&pc, c, status);
2025 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2026
2027 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2028}
2029
cf07323d 2030/*
ec961b81 2031 * Division
cf07323d
AB
2032 */
2033
cf07323d
AB
2034float16 float16_div(float16 a, float16 b, float_status *status)
2035{
ec961b81 2036 FloatParts64 pa, pb, *pr;
98e256fc
RH
2037
2038 float16_unpack_canonical(&pa, a, status);
2039 float16_unpack_canonical(&pb, b, status);
ec961b81 2040 pr = parts_div(&pa, &pb, status);
cf07323d 2041
ec961b81 2042 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2043}
2044
4a629561
EC
2045static float32 QEMU_SOFTFLOAT_ATTR
2046soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2047{
ec961b81 2048 FloatParts64 pa, pb, *pr;
98e256fc
RH
2049
2050 float32_unpack_canonical(&pa, a, status);
2051 float32_unpack_canonical(&pb, b, status);
ec961b81 2052 pr = parts_div(&pa, &pb, status);
cf07323d 2053
ec961b81 2054 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2055}
2056
4a629561
EC
2057static float64 QEMU_SOFTFLOAT_ATTR
2058soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2059{
ec961b81 2060 FloatParts64 pa, pb, *pr;
98e256fc
RH
2061
2062 float64_unpack_canonical(&pa, a, status);
2063 float64_unpack_canonical(&pb, b, status);
ec961b81 2064 pr = parts_div(&pa, &pb, status);
cf07323d 2065
ec961b81 2066 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2067}
2068
4a629561
EC
2069static float hard_f32_div(float a, float b)
2070{
2071 return a / b;
2072}
2073
2074static double hard_f64_div(double a, double b)
2075{
2076 return a / b;
2077}
2078
2079static bool f32_div_pre(union_float32 a, union_float32 b)
2080{
2081 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2082 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2083 fpclassify(b.h) == FP_NORMAL;
2084 }
2085 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2086}
2087
2088static bool f64_div_pre(union_float64 a, union_float64 b)
2089{
2090 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2091 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2092 fpclassify(b.h) == FP_NORMAL;
2093 }
2094 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2095}
2096
2097static bool f32_div_post(union_float32 a, union_float32 b)
2098{
2099 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2100 return fpclassify(a.h) != FP_ZERO;
2101 }
2102 return !float32_is_zero(a.s);
2103}
2104
2105static bool f64_div_post(union_float64 a, union_float64 b)
2106{
2107 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2108 return fpclassify(a.h) != FP_ZERO;
2109 }
2110 return !float64_is_zero(a.s);
2111}
2112
2113float32 QEMU_FLATTEN
2114float32_div(float32 a, float32 b, float_status *s)
2115{
2116 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2117 f32_div_pre, f32_div_post);
4a629561
EC
2118}
2119
2120float64 QEMU_FLATTEN
2121float64_div(float64 a, float64 b, float_status *s)
2122{
2123 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2124 f64_div_pre, f64_div_post);
4a629561
EC
2125}
2126
ec961b81
RH
2127bfloat16 QEMU_FLATTEN
2128bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2129{
ec961b81 2130 FloatParts64 pa, pb, *pr;
98e256fc
RH
2131
2132 bfloat16_unpack_canonical(&pa, a, status);
2133 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2134 pr = parts_div(&pa, &pb, status);
8282310d 2135
ec961b81
RH
2136 return bfloat16_round_pack_canonical(pr, status);
2137}
2138
2139float128 QEMU_FLATTEN
2140float128_div(float128 a, float128 b, float_status *status)
2141{
2142 FloatParts128 pa, pb, *pr;
2143
2144 float128_unpack_canonical(&pa, a, status);
2145 float128_unpack_canonical(&pb, b, status);
2146 pr = parts_div(&pa, &pb, status);
2147
2148 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2149}
2150
6fed16b2
AB
2151/*
2152 * Float to Float conversions
2153 *
2154 * Returns the result of converting one float format to another. The
2155 * conversion is performed according to the IEC/IEEE Standard for
2156 * Binary Floating-Point Arithmetic.
2157 *
c3f1875e
RH
2158 * Usually this only needs to take care of raising invalid exceptions
2159 * and handling the conversion on NaNs.
6fed16b2
AB
2160 */
2161
c3f1875e
RH
2162static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2163{
2164 switch (a->cls) {
2165 case float_class_qnan:
2166 case float_class_snan:
2167 /*
2168 * There is no NaN in the destination format. Raise Invalid
2169 * and return a zero with the sign of the input NaN.
2170 */
2171 float_raise(float_flag_invalid, s);
2172 a->cls = float_class_zero;
2173 break;
2174
2175 case float_class_inf:
2176 /*
2177 * There is no Inf in the destination format. Raise Invalid
2178 * and return the maximum normal with the correct sign.
2179 */
2180 float_raise(float_flag_invalid, s);
2181 a->cls = float_class_normal;
2182 a->exp = float16_params_ahp.exp_max;
2183 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2184 float16_params_ahp.frac_size + 1);
2185 break;
2186
2187 case float_class_normal:
2188 case float_class_zero:
2189 break;
2190
2191 default:
2192 g_assert_not_reached();
2193 }
2194}
2195
2196static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2197{
2198 if (is_nan(a->cls)) {
2199 parts_return_nan(a, s);
6fed16b2 2200 }
6fed16b2
AB
2201}
2202
c3f1875e
RH
2203static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2204{
2205 if (is_nan(a->cls)) {
2206 parts_return_nan(a, s);
2207 }
2208}
2209
2210#define parts_float_to_float(P, S) \
2211 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2212
9882ccaf
RH
2213static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2214 float_status *s)
2215{
2216 a->cls = b->cls;
2217 a->sign = b->sign;
2218 a->exp = b->exp;
2219
2220 if (a->cls == float_class_normal) {
2221 frac_truncjam(a, b);
2222 } else if (is_nan(a->cls)) {
2223 /* Discard the low bits of the NaN. */
2224 a->frac = b->frac_hi;
2225 parts_return_nan(a, s);
2226 }
2227}
2228
2229static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2230 float_status *s)
2231{
2232 a->cls = b->cls;
2233 a->sign = b->sign;
2234 a->exp = b->exp;
2235 frac_widen(a, b);
2236
2237 if (is_nan(a->cls)) {
2238 parts_return_nan(a, s);
2239 }
2240}
2241
6fed16b2
AB
2242float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2243{
2244 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2245 FloatParts64 p;
98e256fc 2246
c3f1875e
RH
2247 float16a_unpack_canonical(&p, a, s, fmt16);
2248 parts_float_to_float(&p, s);
2249 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2250}
2251
2252float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2253{
2254 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2255 FloatParts64 p;
98e256fc 2256
c3f1875e
RH
2257 float16a_unpack_canonical(&p, a, s, fmt16);
2258 parts_float_to_float(&p, s);
2259 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2260}
2261
2262float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2263{
c3f1875e
RH
2264 FloatParts64 p;
2265 const FloatFmt *fmt;
98e256fc 2266
c3f1875e
RH
2267 float32_unpack_canonical(&p, a, s);
2268 if (ieee) {
2269 parts_float_to_float(&p, s);
2270 fmt = &float16_params;
2271 } else {
2272 parts_float_to_ahp(&p, s);
2273 fmt = &float16_params_ahp;
2274 }
2275 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2276}
2277
21381dcf
MK
2278static float64 QEMU_SOFTFLOAT_ATTR
2279soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2280{
c3f1875e 2281 FloatParts64 p;
98e256fc 2282
c3f1875e
RH
2283 float32_unpack_canonical(&p, a, s);
2284 parts_float_to_float(&p, s);
2285 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2286}
2287
21381dcf
MK
2288float64 float32_to_float64(float32 a, float_status *s)
2289{
2290 if (likely(float32_is_normal(a))) {
2291 /* Widening conversion can never produce inexact results. */
2292 union_float32 uf;
2293 union_float64 ud;
2294 uf.s = a;
2295 ud.h = uf.h;
2296 return ud.s;
2297 } else if (float32_is_zero(a)) {
2298 return float64_set_sign(float64_zero, float32_is_neg(a));
2299 } else {
2300 return soft_float32_to_float64(a, s);
2301 }
2302}
2303
6fed16b2
AB
2304float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2305{
c3f1875e
RH
2306 FloatParts64 p;
2307 const FloatFmt *fmt;
98e256fc 2308
c3f1875e
RH
2309 float64_unpack_canonical(&p, a, s);
2310 if (ieee) {
2311 parts_float_to_float(&p, s);
2312 fmt = &float16_params;
2313 } else {
2314 parts_float_to_ahp(&p, s);
2315 fmt = &float16_params_ahp;
2316 }
2317 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2318}
2319
2320float32 float64_to_float32(float64 a, float_status *s)
2321{
c3f1875e 2322 FloatParts64 p;
98e256fc 2323
c3f1875e
RH
2324 float64_unpack_canonical(&p, a, s);
2325 parts_float_to_float(&p, s);
2326 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2327}
2328
34f0c0a9
LZ
2329float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2330{
c3f1875e 2331 FloatParts64 p;
98e256fc 2332
c3f1875e
RH
2333 bfloat16_unpack_canonical(&p, a, s);
2334 parts_float_to_float(&p, s);
2335 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2336}
2337
2338float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2339{
c3f1875e 2340 FloatParts64 p;
98e256fc 2341
c3f1875e
RH
2342 bfloat16_unpack_canonical(&p, a, s);
2343 parts_float_to_float(&p, s);
2344 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2345}
2346
2347bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2348{
c3f1875e 2349 FloatParts64 p;
98e256fc 2350
c3f1875e
RH
2351 float32_unpack_canonical(&p, a, s);
2352 parts_float_to_float(&p, s);
2353 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2354}
2355
2356bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2357{
c3f1875e 2358 FloatParts64 p;
98e256fc 2359
c3f1875e
RH
2360 float64_unpack_canonical(&p, a, s);
2361 parts_float_to_float(&p, s);
2362 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2363}
2364
9882ccaf
RH
2365float32 float128_to_float32(float128 a, float_status *s)
2366{
2367 FloatParts64 p64;
2368 FloatParts128 p128;
2369
2370 float128_unpack_canonical(&p128, a, s);
2371 parts_float_to_float_narrow(&p64, &p128, s);
2372 return float32_round_pack_canonical(&p64, s);
2373}
2374
2375float64 float128_to_float64(float128 a, float_status *s)
2376{
2377 FloatParts64 p64;
2378 FloatParts128 p128;
2379
2380 float128_unpack_canonical(&p128, a, s);
2381 parts_float_to_float_narrow(&p64, &p128, s);
2382 return float64_round_pack_canonical(&p64, s);
2383}
2384
2385float128 float32_to_float128(float32 a, float_status *s)
2386{
2387 FloatParts64 p64;
2388 FloatParts128 p128;
2389
2390 float32_unpack_canonical(&p64, a, s);
2391 parts_float_to_float_widen(&p128, &p64, s);
2392 return float128_round_pack_canonical(&p128, s);
2393}
2394
2395float128 float64_to_float128(float64 a, float_status *s)
2396{
2397 FloatParts64 p64;
2398 FloatParts128 p128;
2399
2400 float64_unpack_canonical(&p64, a, s);
2401 parts_float_to_float_widen(&p128, &p64, s);
2402 return float128_round_pack_canonical(&p128, s);
2403}
2404
dbe4d53a 2405/*
afc34931 2406 * Round to integral value
dbe4d53a
AB
2407 */
2408
dbe4d53a
AB
2409float16 float16_round_to_int(float16 a, float_status *s)
2410{
afc34931 2411 FloatParts64 p;
98e256fc 2412
afc34931
RH
2413 float16_unpack_canonical(&p, a, s);
2414 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2415 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2416}
2417
2418float32 float32_round_to_int(float32 a, float_status *s)
2419{
afc34931 2420 FloatParts64 p;
98e256fc 2421
afc34931
RH
2422 float32_unpack_canonical(&p, a, s);
2423 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2424 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2425}
2426
2427float64 float64_round_to_int(float64 a, float_status *s)
2428{
afc34931 2429 FloatParts64 p;
98e256fc 2430
afc34931
RH
2431 float64_unpack_canonical(&p, a, s);
2432 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2433 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2434}
2435
34f0c0a9
LZ
2436bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2437{
afc34931 2438 FloatParts64 p;
98e256fc 2439
afc34931
RH
2440 bfloat16_unpack_canonical(&p, a, s);
2441 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2442 return bfloat16_round_pack_canonical(&p, s);
2443}
2444
2445float128 float128_round_to_int(float128 a, float_status *s)
2446{
2447 FloatParts128 p;
2448
2449 float128_unpack_canonical(&p, a, s);
2450 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2451 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2452}
2453
ab52f973 2454/*
463b3f0d
RH
2455 * Floating-point to signed integer conversions
2456 */
ab52f973 2457
0d93d8ec
FC
2458int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2459 float_status *s)
2460{
98e256fc
RH
2461 FloatParts64 p;
2462
2463 float16_unpack_canonical(&p, a, s);
463b3f0d 2464 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2465}
2466
3dede407 2467int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2468 float_status *s)
2469{
98e256fc
RH
2470 FloatParts64 p;
2471
2472 float16_unpack_canonical(&p, a, s);
463b3f0d 2473 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2474}
2475
3dede407 2476int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2477 float_status *s)
2478{
98e256fc
RH
2479 FloatParts64 p;
2480
2481 float16_unpack_canonical(&p, a, s);
463b3f0d 2482 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2483}
2484
3dede407 2485int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2486 float_status *s)
2487{
98e256fc
RH
2488 FloatParts64 p;
2489
2490 float16_unpack_canonical(&p, a, s);
463b3f0d 2491 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2492}
2493
3dede407 2494int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2495 float_status *s)
2496{
98e256fc
RH
2497 FloatParts64 p;
2498
2499 float32_unpack_canonical(&p, a, s);
463b3f0d 2500 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2501}
2502
3dede407 2503int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2504 float_status *s)
2505{
98e256fc
RH
2506 FloatParts64 p;
2507
2508 float32_unpack_canonical(&p, a, s);
463b3f0d 2509 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2510}
2511
3dede407 2512int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2513 float_status *s)
2514{
98e256fc
RH
2515 FloatParts64 p;
2516
2517 float32_unpack_canonical(&p, a, s);
463b3f0d 2518 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2519}
2520
3dede407 2521int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2522 float_status *s)
2523{
98e256fc
RH
2524 FloatParts64 p;
2525
2526 float64_unpack_canonical(&p, a, s);
463b3f0d 2527 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2528}
2529
3dede407 2530int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2531 float_status *s)
2532{
98e256fc
RH
2533 FloatParts64 p;
2534
2535 float64_unpack_canonical(&p, a, s);
463b3f0d 2536 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2537}
2538
3dede407 2539int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2540 float_status *s)
2541{
98e256fc
RH
2542 FloatParts64 p;
2543
2544 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2545 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2546}
2547
2548int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2549 float_status *s)
2550{
2551 FloatParts64 p;
2552
2553 bfloat16_unpack_canonical(&p, a, s);
2554 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2555}
2556
2557int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2558 float_status *s)
2559{
2560 FloatParts64 p;
2561
2562 bfloat16_unpack_canonical(&p, a, s);
2563 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2564}
2565
2566int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2567 float_status *s)
2568{
2569 FloatParts64 p;
2570
2571 bfloat16_unpack_canonical(&p, a, s);
2572 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2573}
2574
2575static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2576 int scale, float_status *s)
2577{
2578 FloatParts128 p;
2579
2580 float128_unpack_canonical(&p, a, s);
2581 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2582}
2583
2584static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2585 int scale, float_status *s)
2586{
2587 FloatParts128 p;
2588
2589 float128_unpack_canonical(&p, a, s);
2590 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2591}
2592
0d93d8ec
FC
2593int8_t float16_to_int8(float16 a, float_status *s)
2594{
2595 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2596}
2597
2f6c74be
RH
2598int16_t float16_to_int16(float16 a, float_status *s)
2599{
2600 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2601}
2602
2603int32_t float16_to_int32(float16 a, float_status *s)
2604{
2605 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2606}
2607
2608int64_t float16_to_int64(float16 a, float_status *s)
2609{
2610 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2611}
2612
2613int16_t float32_to_int16(float32 a, float_status *s)
2614{
2615 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2616}
2617
2618int32_t float32_to_int32(float32 a, float_status *s)
2619{
2620 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2621}
2622
2623int64_t float32_to_int64(float32 a, float_status *s)
2624{
2625 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2626}
2627
2628int16_t float64_to_int16(float64 a, float_status *s)
2629{
2630 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2631}
2632
2633int32_t float64_to_int32(float64 a, float_status *s)
2634{
2635 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2636}
2637
2638int64_t float64_to_int64(float64 a, float_status *s)
2639{
2640 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2641}
2642
463b3f0d
RH
2643int32_t float128_to_int32(float128 a, float_status *s)
2644{
2645 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2646}
2647
2648int64_t float128_to_int64(float128 a, float_status *s)
2649{
2650 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2651}
2652
2f6c74be
RH
2653int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2654{
2655 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2656}
2657
2658int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2659{
2660 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2661}
2662
2663int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2664{
2665 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2666}
2667
2f6c74be
RH
2668int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2669{
2670 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2671}
ab52f973 2672
2f6c74be
RH
2673int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2674{
2675 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2676}
2677
2678int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2679{
2680 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2681}
2682
2683int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2684{
2685 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2686}
ab52f973 2687
2f6c74be
RH
2688int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2689{
2690 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2691}
ab52f973 2692
2f6c74be
RH
2693int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2694{
2695 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2696}
ab52f973 2697
463b3f0d 2698int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2699{
463b3f0d 2700 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2701}
2702
463b3f0d 2703int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2704{
463b3f0d 2705 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2706}
2707
2708int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2709{
2710 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2711}
2712
2713int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2714{
2715 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2716}
2717
2718int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2719{
2720 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2721}
2722
2723int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2724{
2725 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2726}
2727
2728int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2729{
2730 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2731}
2732
2733int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2734{
2735 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2736}
2737
ab52f973 2738/*
4ab4aef0 2739 * Floating-point to unsigned integer conversions
ab52f973
AB
2740 */
2741
0d93d8ec
FC
2742uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2743 float_status *s)
2744{
98e256fc
RH
2745 FloatParts64 p;
2746
2747 float16_unpack_canonical(&p, a, s);
4ab4aef0 2748 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2749}
2750
3dede407 2751uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2752 float_status *s)
2753{
98e256fc
RH
2754 FloatParts64 p;
2755
2756 float16_unpack_canonical(&p, a, s);
4ab4aef0 2757 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2758}
2759
3dede407 2760uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2761 float_status *s)
2762{
98e256fc
RH
2763 FloatParts64 p;
2764
2765 float16_unpack_canonical(&p, a, s);
4ab4aef0 2766 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2767}
2768
3dede407 2769uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2770 float_status *s)
2771{
98e256fc
RH
2772 FloatParts64 p;
2773
2774 float16_unpack_canonical(&p, a, s);
4ab4aef0 2775 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2776}
2777
3dede407 2778uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2779 float_status *s)
2780{
98e256fc
RH
2781 FloatParts64 p;
2782
2783 float32_unpack_canonical(&p, a, s);
4ab4aef0 2784 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2785}
2786
3dede407 2787uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2788 float_status *s)
2789{
98e256fc
RH
2790 FloatParts64 p;
2791
2792 float32_unpack_canonical(&p, a, s);
4ab4aef0 2793 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2794}
2795
3dede407 2796uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2797 float_status *s)
2798{
98e256fc
RH
2799 FloatParts64 p;
2800
2801 float32_unpack_canonical(&p, a, s);
4ab4aef0 2802 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2803}
2804
3dede407 2805uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2806 float_status *s)
2807{
98e256fc
RH
2808 FloatParts64 p;
2809
2810 float64_unpack_canonical(&p, a, s);
4ab4aef0 2811 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2812}
2813
3dede407 2814uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2815 float_status *s)
2816{
98e256fc
RH
2817 FloatParts64 p;
2818
2819 float64_unpack_canonical(&p, a, s);
4ab4aef0 2820 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2821}
2822
3dede407 2823uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2824 float_status *s)
2825{
98e256fc
RH
2826 FloatParts64 p;
2827
2828 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
2829 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2830}
2831
2832uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2833 int scale, float_status *s)
2834{
2835 FloatParts64 p;
2836
2837 bfloat16_unpack_canonical(&p, a, s);
2838 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2839}
2840
2841uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2842 int scale, float_status *s)
2843{
2844 FloatParts64 p;
2845
2846 bfloat16_unpack_canonical(&p, a, s);
2847 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2848}
2849
2850uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2851 int scale, float_status *s)
2852{
2853 FloatParts64 p;
2854
2855 bfloat16_unpack_canonical(&p, a, s);
2856 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2857}
2858
2859static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2860 int scale, float_status *s)
2861{
2862 FloatParts128 p;
2863
2864 float128_unpack_canonical(&p, a, s);
2865 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2866}
2867
2868static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2869 int scale, float_status *s)
2870{
2871 FloatParts128 p;
2872
2873 float128_unpack_canonical(&p, a, s);
2874 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2875}
2876
0d93d8ec
FC
2877uint8_t float16_to_uint8(float16 a, float_status *s)
2878{
2879 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2880}
2881
2f6c74be
RH
2882uint16_t float16_to_uint16(float16 a, float_status *s)
2883{
2884 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2885}
2886
2887uint32_t float16_to_uint32(float16 a, float_status *s)
2888{
2889 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2890}
2891
2892uint64_t float16_to_uint64(float16 a, float_status *s)
2893{
2894 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2895}
2896
2897uint16_t float32_to_uint16(float32 a, float_status *s)
2898{
2899 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2900}
2901
2902uint32_t float32_to_uint32(float32 a, float_status *s)
2903{
2904 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2905}
2906
2907uint64_t float32_to_uint64(float32 a, float_status *s)
2908{
2909 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2910}
2911
2912uint16_t float64_to_uint16(float64 a, float_status *s)
2913{
2914 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2915}
2916
2917uint32_t float64_to_uint32(float64 a, float_status *s)
2918{
2919 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2920}
2921
2922uint64_t float64_to_uint64(float64 a, float_status *s)
2923{
2924 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2925}
2926
4ab4aef0
RH
2927uint32_t float128_to_uint32(float128 a, float_status *s)
2928{
2929 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2930}
2931
2932uint64_t float128_to_uint64(float128 a, float_status *s)
2933{
2934 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2935}
2936
2f6c74be
RH
2937uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2938{
2939 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2940}
2941
2942uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2943{
2944 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2945}
2946
2947uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2948{
2949 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2950}
2951
2952uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2953{
2954 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2955}
2956
2957uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2958{
2959 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2960}
2961
2962uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2963{
2964 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2965}
2966
2967uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2968{
2969 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2970}
2971
2972uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2973{
2974 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2975}
2976
2977uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2978{
2979 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2980}
ab52f973 2981
4ab4aef0 2982uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2983{
4ab4aef0 2984 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2985}
2986
4ab4aef0 2987uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2988{
4ab4aef0 2989 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2990}
2991
2992uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2993{
2994 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2995}
2996
2997uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2998{
2999 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3000}
3001
3002uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3003{
3004 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3005}
3006
3007uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3008{
3009 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3010}
3011
3012uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3013{
3014 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3015}
3016
3017uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3018{
3019 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3020}
3021
c02e1fb8 3022/*
e3689519 3023 * Signed integer to floating-point conversions
c02e1fb8
AB
3024 */
3025
2abdfe24 3026float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3027{
e3689519
RH
3028 FloatParts64 p;
3029
3030 parts_sint_to_float(&p, a, scale, status);
3031 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3032}
3033
2abdfe24
RH
3034float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3035{
3036 return int64_to_float16_scalbn(a, scale, status);
3037}
3038
3039float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3040{
3041 return int64_to_float16_scalbn(a, scale, status);
3042}
3043
3044float16 int64_to_float16(int64_t a, float_status *status)
3045{
3046 return int64_to_float16_scalbn(a, 0, status);
3047}
3048
c02e1fb8
AB
3049float16 int32_to_float16(int32_t a, float_status *status)
3050{
2abdfe24 3051 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3052}
3053
3054float16 int16_to_float16(int16_t a, float_status *status)
3055{
2abdfe24 3056 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3057}
3058
0d93d8ec
FC
3059float16 int8_to_float16(int8_t a, float_status *status)
3060{
3061 return int64_to_float16_scalbn(a, 0, status);
3062}
3063
2abdfe24 3064float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3065{
e3689519
RH
3066 FloatParts64 p;
3067
3068 parts64_sint_to_float(&p, a, scale, status);
3069 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3070}
3071
2abdfe24
RH
3072float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3073{
3074 return int64_to_float32_scalbn(a, scale, status);
3075}
3076
3077float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3078{
3079 return int64_to_float32_scalbn(a, scale, status);
3080}
3081
3082float32 int64_to_float32(int64_t a, float_status *status)
3083{
3084 return int64_to_float32_scalbn(a, 0, status);
3085}
3086
c02e1fb8
AB
3087float32 int32_to_float32(int32_t a, float_status *status)
3088{
2abdfe24 3089 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3090}
3091
3092float32 int16_to_float32(int16_t a, float_status *status)
3093{
2abdfe24 3094 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3095}
3096
2abdfe24 3097float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3098{
e3689519
RH
3099 FloatParts64 p;
3100
3101 parts_sint_to_float(&p, a, scale, status);
3102 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3103}
3104
2abdfe24
RH
3105float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3106{
3107 return int64_to_float64_scalbn(a, scale, status);
3108}
3109
3110float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3111{
3112 return int64_to_float64_scalbn(a, scale, status);
3113}
3114
3115float64 int64_to_float64(int64_t a, float_status *status)
3116{
3117 return int64_to_float64_scalbn(a, 0, status);
3118}
3119
c02e1fb8
AB
3120float64 int32_to_float64(int32_t a, float_status *status)
3121{
2abdfe24 3122 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3123}
3124
3125float64 int16_to_float64(int16_t a, float_status *status)
3126{
2abdfe24 3127 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3128}
3129
34f0c0a9
LZ
3130bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3131{
e3689519
RH
3132 FloatParts64 p;
3133
3134 parts_sint_to_float(&p, a, scale, status);
3135 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3136}
3137
3138bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3139{
3140 return int64_to_bfloat16_scalbn(a, scale, status);
3141}
3142
3143bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3144{
3145 return int64_to_bfloat16_scalbn(a, scale, status);
3146}
3147
3148bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3149{
3150 return int64_to_bfloat16_scalbn(a, 0, status);
3151}
3152
3153bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3154{
3155 return int64_to_bfloat16_scalbn(a, 0, status);
3156}
3157
3158bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3159{
3160 return int64_to_bfloat16_scalbn(a, 0, status);
3161}
c02e1fb8 3162
e3689519
RH
3163float128 int64_to_float128(int64_t a, float_status *status)
3164{
3165 FloatParts128 p;
3166
3167 parts_sint_to_float(&p, a, 0, status);
3168 return float128_round_pack_canonical(&p, status);
3169}
3170
3171float128 int32_to_float128(int32_t a, float_status *status)
3172{
3173 return int64_to_float128(a, status);
3174}
3175
c02e1fb8 3176/*
37c954a1 3177 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3178 */
3179
2abdfe24 3180float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3181{
37c954a1
RH
3182 FloatParts64 p;
3183
3184 parts_uint_to_float(&p, a, scale, status);
3185 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3186}
3187
2abdfe24
RH
3188float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3189{
3190 return uint64_to_float16_scalbn(a, scale, status);
3191}
3192
3193float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3194{
3195 return uint64_to_float16_scalbn(a, scale, status);
3196}
3197
3198float16 uint64_to_float16(uint64_t a, float_status *status)
3199{
3200 return uint64_to_float16_scalbn(a, 0, status);
3201}
3202
c02e1fb8
AB
3203float16 uint32_to_float16(uint32_t a, float_status *status)
3204{
2abdfe24 3205 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3206}
3207
3208float16 uint16_to_float16(uint16_t a, float_status *status)
3209{
2abdfe24 3210 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3211}
3212
0d93d8ec
FC
3213float16 uint8_to_float16(uint8_t a, float_status *status)
3214{
3215 return uint64_to_float16_scalbn(a, 0, status);
3216}
3217
2abdfe24 3218float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3219{
37c954a1
RH
3220 FloatParts64 p;
3221
3222 parts_uint_to_float(&p, a, scale, status);
3223 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3224}
3225
2abdfe24
RH
3226float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3227{
3228 return uint64_to_float32_scalbn(a, scale, status);
3229}
3230
3231float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3232{
3233 return uint64_to_float32_scalbn(a, scale, status);
3234}
3235
3236float32 uint64_to_float32(uint64_t a, float_status *status)
3237{
3238 return uint64_to_float32_scalbn(a, 0, status);
3239}
3240
c02e1fb8
AB
3241float32 uint32_to_float32(uint32_t a, float_status *status)
3242{
2abdfe24 3243 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3244}
3245
3246float32 uint16_to_float32(uint16_t a, float_status *status)
3247{
2abdfe24 3248 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3249}
3250
2abdfe24 3251float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3252{
37c954a1
RH
3253 FloatParts64 p;
3254
3255 parts_uint_to_float(&p, a, scale, status);
3256 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3257}
3258
2abdfe24
RH
3259float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3260{
3261 return uint64_to_float64_scalbn(a, scale, status);
3262}
3263
3264float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3265{
3266 return uint64_to_float64_scalbn(a, scale, status);
3267}
3268
3269float64 uint64_to_float64(uint64_t a, float_status *status)
3270{
3271 return uint64_to_float64_scalbn(a, 0, status);
3272}
3273
c02e1fb8
AB
3274float64 uint32_to_float64(uint32_t a, float_status *status)
3275{
2abdfe24 3276 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3277}
3278
3279float64 uint16_to_float64(uint16_t a, float_status *status)
3280{
2abdfe24 3281 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3282}
3283
34f0c0a9
LZ
3284bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3285{
37c954a1
RH
3286 FloatParts64 p;
3287
3288 parts_uint_to_float(&p, a, scale, status);
3289 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3290}
3291
3292bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3293{
3294 return uint64_to_bfloat16_scalbn(a, scale, status);
3295}
3296
3297bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3298{
3299 return uint64_to_bfloat16_scalbn(a, scale, status);
3300}
3301
3302bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3303{
3304 return uint64_to_bfloat16_scalbn(a, 0, status);
3305}
3306
3307bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3308{
3309 return uint64_to_bfloat16_scalbn(a, 0, status);
3310}
3311
3312bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3313{
3314 return uint64_to_bfloat16_scalbn(a, 0, status);
3315}
3316
37c954a1
RH
3317float128 uint64_to_float128(uint64_t a, float_status *status)
3318{
3319 FloatParts128 p;
3320
3321 parts_uint_to_float(&p, a, 0, status);
3322 return float128_round_pack_canonical(&p, status);
3323}
3324
e1c4667a
RH
3325/*
3326 * Minimum and maximum
89360067 3327 */
89360067 3328
e1c4667a
RH
3329static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3330{
3331 FloatParts64 pa, pb, *pr;
89360067 3332
e1c4667a
RH
3333 float16_unpack_canonical(&pa, a, s);
3334 float16_unpack_canonical(&pb, b, s);
3335 pr = parts_minmax(&pa, &pb, s, flags);
3336
3337 return float16_round_pack_canonical(pr, s);
89360067
AB
3338}
3339
e1c4667a
RH
3340static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3341 float_status *s, int flags)
3342{
3343 FloatParts64 pa, pb, *pr;
3344
3345 bfloat16_unpack_canonical(&pa, a, s);
3346 bfloat16_unpack_canonical(&pb, b, s);
3347 pr = parts_minmax(&pa, &pb, s, flags);
3348
3349 return bfloat16_round_pack_canonical(pr, s);
3350}
3351
3352static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3353{
3354 FloatParts64 pa, pb, *pr;
3355
3356 float32_unpack_canonical(&pa, a, s);
3357 float32_unpack_canonical(&pb, b, s);
3358 pr = parts_minmax(&pa, &pb, s, flags);
3359
3360 return float32_round_pack_canonical(pr, s);
3361}
3362
3363static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3364{
3365 FloatParts64 pa, pb, *pr;
3366
3367 float64_unpack_canonical(&pa, a, s);
3368 float64_unpack_canonical(&pb, b, s);
3369 pr = parts_minmax(&pa, &pb, s, flags);
3370
3371 return float64_round_pack_canonical(pr, s);
3372}
3373
ceebc129
DH
3374static float128 float128_minmax(float128 a, float128 b,
3375 float_status *s, int flags)
3376{
3377 FloatParts128 pa, pb, *pr;
3378
3379 float128_unpack_canonical(&pa, a, s);
3380 float128_unpack_canonical(&pb, b, s);
3381 pr = parts_minmax(&pa, &pb, s, flags);
3382
3383 return float128_round_pack_canonical(pr, s);
3384}
3385
e1c4667a
RH
3386#define MINMAX_1(type, name, flags) \
3387 type type##_##name(type a, type b, float_status *s) \
3388 { return type##_minmax(a, b, s, flags); }
3389
3390#define MINMAX_2(type) \
3391 MINMAX_1(type, max, 0) \
3392 MINMAX_1(type, maxnum, minmax_isnum) \
3393 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3394 MINMAX_1(type, min, minmax_ismin) \
3395 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3396 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3397
3398MINMAX_2(float16)
3399MINMAX_2(bfloat16)
3400MINMAX_2(float32)
3401MINMAX_2(float64)
ceebc129 3402MINMAX_2(float128)
e1c4667a
RH
3403
3404#undef MINMAX_1
3405#undef MINMAX_2
8282310d 3406
6eb169b8
RH
3407/*
3408 * Floating point compare
3409 */
0c4c9092 3410
6eb169b8
RH
3411static FloatRelation QEMU_FLATTEN
3412float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3413{
3414 FloatParts64 pa, pb;
0c4c9092 3415
6eb169b8
RH
3416 float16_unpack_canonical(&pa, a, s);
3417 float16_unpack_canonical(&pb, b, s);
3418 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
3419}
3420
71bfd65c 3421FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 3422{
6eb169b8 3423 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
3424}
3425
71bfd65c 3426FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 3427{
6eb169b8
RH
3428 return float16_do_compare(a, b, s, true);
3429}
3430
3431static FloatRelation QEMU_SOFTFLOAT_ATTR
3432float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3433{
3434 FloatParts64 pa, pb;
3435
3436 float32_unpack_canonical(&pa, a, s);
3437 float32_unpack_canonical(&pb, b, s);
3438 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3439}
3440
71bfd65c 3441static FloatRelation QEMU_FLATTEN
6eb169b8 3442float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3443{
3444 union_float32 ua, ub;
3445
3446 ua.s = xa;
3447 ub.s = xb;
3448
3449 if (QEMU_NO_HARDFLOAT) {
3450 goto soft;
3451 }
3452
3453 float32_input_flush2(&ua.s, &ub.s, s);
3454 if (isgreaterequal(ua.h, ub.h)) {
3455 if (isgreater(ua.h, ub.h)) {
3456 return float_relation_greater;
3457 }
3458 return float_relation_equal;
3459 }
3460 if (likely(isless(ua.h, ub.h))) {
3461 return float_relation_less;
3462 }
6eb169b8
RH
3463 /*
3464 * The only condition remaining is unordered.
d9fe9db9
EC
3465 * Fall through to set flags.
3466 */
3467 soft:
6eb169b8 3468 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3469}
3470
71bfd65c 3471FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 3472{
6eb169b8 3473 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
3474}
3475
71bfd65c 3476FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 3477{
6eb169b8
RH
3478 return float32_hs_compare(a, b, s, true);
3479}
3480
3481static FloatRelation QEMU_SOFTFLOAT_ATTR
3482float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3483{
3484 FloatParts64 pa, pb;
3485
3486 float64_unpack_canonical(&pa, a, s);
3487 float64_unpack_canonical(&pb, b, s);
3488 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3489}
3490
71bfd65c 3491static FloatRelation QEMU_FLATTEN
6eb169b8 3492float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3493{
3494 union_float64 ua, ub;
3495
3496 ua.s = xa;
3497 ub.s = xb;
3498
3499 if (QEMU_NO_HARDFLOAT) {
3500 goto soft;
3501 }
3502
3503 float64_input_flush2(&ua.s, &ub.s, s);
3504 if (isgreaterequal(ua.h, ub.h)) {
3505 if (isgreater(ua.h, ub.h)) {
3506 return float_relation_greater;
3507 }
3508 return float_relation_equal;
3509 }
3510 if (likely(isless(ua.h, ub.h))) {
3511 return float_relation_less;
3512 }
6eb169b8
RH
3513 /*
3514 * The only condition remaining is unordered.
d9fe9db9
EC
3515 * Fall through to set flags.
3516 */
3517 soft:
6eb169b8 3518 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3519}
3520
71bfd65c 3521FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 3522{
6eb169b8 3523 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
3524}
3525
71bfd65c 3526FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 3527{
6eb169b8 3528 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
3529}
3530
8282310d 3531static FloatRelation QEMU_FLATTEN
6eb169b8 3532bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 3533{
98e256fc
RH
3534 FloatParts64 pa, pb;
3535
3536 bfloat16_unpack_canonical(&pa, a, s);
3537 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 3538 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
3539}
3540
3541FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3542{
6eb169b8 3543 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
3544}
3545
3546FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3547{
6eb169b8
RH
3548 return bfloat16_do_compare(a, b, s, true);
3549}
3550
3551static FloatRelation QEMU_FLATTEN
3552float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3553{
3554 FloatParts128 pa, pb;
3555
3556 float128_unpack_canonical(&pa, a, s);
3557 float128_unpack_canonical(&pb, b, s);
3558 return parts_compare(&pa, &pb, s, is_quiet);
3559}
3560
3561FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3562{
3563 return float128_do_compare(a, b, s, false);
3564}
3565
3566FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3567{
3568 return float128_do_compare(a, b, s, true);
8282310d
LZ
3569}
3570
39626b0c
RH
3571/*
3572 * Scale by 2**N
3573 */
0bfc9f19
AB
3574
3575float16 float16_scalbn(float16 a, int n, float_status *status)
3576{
39626b0c 3577 FloatParts64 p;
98e256fc 3578
39626b0c
RH
3579 float16_unpack_canonical(&p, a, status);
3580 parts_scalbn(&p, n, status);
3581 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
3582}
3583
3584float32 float32_scalbn(float32 a, int n, float_status *status)
3585{
39626b0c 3586 FloatParts64 p;
98e256fc 3587
39626b0c
RH
3588 float32_unpack_canonical(&p, a, status);
3589 parts_scalbn(&p, n, status);
3590 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
3591}
3592
3593float64 float64_scalbn(float64 a, int n, float_status *status)
3594{
39626b0c 3595 FloatParts64 p;
98e256fc 3596
39626b0c
RH
3597 float64_unpack_canonical(&p, a, status);
3598 parts_scalbn(&p, n, status);
3599 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
3600}
3601
8282310d
LZ
3602bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3603{
39626b0c 3604 FloatParts64 p;
98e256fc 3605
39626b0c
RH
3606 bfloat16_unpack_canonical(&p, a, status);
3607 parts_scalbn(&p, n, status);
3608 return bfloat16_round_pack_canonical(&p, status);
3609}
3610
3611float128 float128_scalbn(float128 a, int n, float_status *status)
3612{
3613 FloatParts128 p;
3614
3615 float128_unpack_canonical(&p, a, status);
3616 parts_scalbn(&p, n, status);
3617 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3618}
3619
c13bb2da
AB
3620/*
3621 * Square Root
c13bb2da
AB
3622 */
3623
97ff87c0 3624float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3625{
9261b245 3626 FloatParts64 p;
98e256fc 3627
9261b245
RH
3628 float16_unpack_canonical(&p, a, status);
3629 parts_sqrt(&p, status, &float16_params);
3630 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
3631}
3632
f131bae8
EC
3633static float32 QEMU_SOFTFLOAT_ATTR
3634soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3635{
9261b245 3636 FloatParts64 p;
98e256fc 3637
9261b245
RH
3638 float32_unpack_canonical(&p, a, status);
3639 parts_sqrt(&p, status, &float32_params);
3640 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
3641}
3642
f131bae8
EC
3643static float64 QEMU_SOFTFLOAT_ATTR
3644soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3645{
9261b245 3646 FloatParts64 p;
98e256fc 3647
9261b245
RH
3648 float64_unpack_canonical(&p, a, status);
3649 parts_sqrt(&p, status, &float64_params);
3650 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
3651}
3652
f131bae8
EC
3653float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3654{
3655 union_float32 ua, ur;
3656
3657 ua.s = xa;
3658 if (unlikely(!can_use_fpu(s))) {
3659 goto soft;
3660 }
3661
3662 float32_input_flush1(&ua.s, s);
3663 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3664 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3665 fpclassify(ua.h) == FP_ZERO) ||
3666 signbit(ua.h))) {
3667 goto soft;
3668 }
3669 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3670 float32_is_neg(ua.s))) {
3671 goto soft;
3672 }
3673 ur.h = sqrtf(ua.h);
3674 return ur.s;
3675
3676 soft:
3677 return soft_f32_sqrt(ua.s, s);
3678}
3679
3680float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3681{
3682 union_float64 ua, ur;
3683
3684 ua.s = xa;
3685 if (unlikely(!can_use_fpu(s))) {
3686 goto soft;
3687 }
3688
3689 float64_input_flush1(&ua.s, s);
3690 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3691 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3692 fpclassify(ua.h) == FP_ZERO) ||
3693 signbit(ua.h))) {
3694 goto soft;
3695 }
3696 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3697 float64_is_neg(ua.s))) {
3698 goto soft;
3699 }
3700 ur.h = sqrt(ua.h);
3701 return ur.s;
3702
3703 soft:
3704 return soft_f64_sqrt(ua.s, s);
3705}
3706
8282310d
LZ
3707bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3708{
9261b245 3709 FloatParts64 p;
98e256fc 3710
9261b245
RH
3711 bfloat16_unpack_canonical(&p, a, status);
3712 parts_sqrt(&p, status, &bfloat16_params);
3713 return bfloat16_round_pack_canonical(&p, status);
3714}
3715
3716float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3717{
3718 FloatParts128 p;
3719
3720 float128_unpack_canonical(&p, a, status);
3721 parts_sqrt(&p, status, &float128_params);
3722 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3723}
3724
0218a16e
RH
3725/*----------------------------------------------------------------------------
3726| The pattern for a default generated NaN.
3727*----------------------------------------------------------------------------*/
3728
3729float16 float16_default_nan(float_status *status)
3730{
0fc07cad
RH
3731 FloatParts64 p;
3732
3733 parts_default_nan(&p, status);
0218a16e 3734 p.frac >>= float16_params.frac_shift;
71fd178e 3735 return float16_pack_raw(&p);
0218a16e
RH
3736}
3737
3738float32 float32_default_nan(float_status *status)
3739{
0fc07cad
RH
3740 FloatParts64 p;
3741
3742 parts_default_nan(&p, status);
0218a16e 3743 p.frac >>= float32_params.frac_shift;
71fd178e 3744 return float32_pack_raw(&p);
0218a16e
RH
3745}
3746
3747float64 float64_default_nan(float_status *status)
3748{
0fc07cad
RH
3749 FloatParts64 p;
3750
3751 parts_default_nan(&p, status);
0218a16e 3752 p.frac >>= float64_params.frac_shift;
71fd178e 3753 return float64_pack_raw(&p);
0218a16e
RH
3754}
3755
3756float128 float128_default_nan(float_status *status)
3757{
e9034ea8 3758 FloatParts128 p;
0218a16e 3759
0fc07cad 3760 parts_default_nan(&p, status);
e9034ea8
RH
3761 frac_shr(&p, float128_params.frac_shift);
3762 return float128_pack_raw(&p);
0218a16e 3763}
c13bb2da 3764
8282310d
LZ
3765bfloat16 bfloat16_default_nan(float_status *status)
3766{
0fc07cad
RH
3767 FloatParts64 p;
3768
3769 parts_default_nan(&p, status);
8282310d 3770 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3771 return bfloat16_pack_raw(&p);
8282310d
LZ
3772}
3773
158142c2 3774/*----------------------------------------------------------------------------
377ed926
RH
3775| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3776*----------------------------------------------------------------------------*/
3777
3778float16 float16_silence_nan(float16 a, float_status *status)
3779{
3dddb203
RH
3780 FloatParts64 p;
3781
3782 float16_unpack_raw(&p, a);
377ed926 3783 p.frac <<= float16_params.frac_shift;
92ff426d 3784 parts_silence_nan(&p, status);
377ed926 3785 p.frac >>= float16_params.frac_shift;
71fd178e 3786 return float16_pack_raw(&p);
377ed926
RH
3787}
3788
3789float32 float32_silence_nan(float32 a, float_status *status)
3790{
3dddb203
RH
3791 FloatParts64 p;
3792
3793 float32_unpack_raw(&p, a);
377ed926 3794 p.frac <<= float32_params.frac_shift;
92ff426d 3795 parts_silence_nan(&p, status);
377ed926 3796 p.frac >>= float32_params.frac_shift;
71fd178e 3797 return float32_pack_raw(&p);
377ed926
RH
3798}
3799
3800float64 float64_silence_nan(float64 a, float_status *status)
3801{
3dddb203
RH
3802 FloatParts64 p;
3803
3804 float64_unpack_raw(&p, a);
377ed926 3805 p.frac <<= float64_params.frac_shift;
92ff426d 3806 parts_silence_nan(&p, status);
377ed926 3807 p.frac >>= float64_params.frac_shift;
71fd178e 3808 return float64_pack_raw(&p);
377ed926
RH
3809}
3810
8282310d
LZ
3811bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3812{
3dddb203
RH
3813 FloatParts64 p;
3814
3815 bfloat16_unpack_raw(&p, a);
8282310d 3816 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3817 parts_silence_nan(&p, status);
8282310d 3818 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3819 return bfloat16_pack_raw(&p);
8282310d 3820}
e6b405fe 3821
0018b1f4
RH
3822float128 float128_silence_nan(float128 a, float_status *status)
3823{
3824 FloatParts128 p;
3825
3826 float128_unpack_raw(&p, a);
3827 frac_shl(&p, float128_params.frac_shift);
3828 parts_silence_nan(&p, status);
3829 frac_shr(&p, float128_params.frac_shift);
3830 return float128_pack_raw(&p);
3831}
3832
e6b405fe
AB
3833/*----------------------------------------------------------------------------
3834| If `a' is denormal and we are in flush-to-zero mode then set the
3835| input-denormal exception and return zero. Otherwise just return the value.
3836*----------------------------------------------------------------------------*/
3837
f8155c1d 3838static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3839{
3840 if (p.exp == 0 && p.frac != 0) {
3841 float_raise(float_flag_input_denormal, status);
3842 return true;
3843 }
3844
3845 return false;
3846}
3847
3848float16 float16_squash_input_denormal(float16 a, float_status *status)
3849{
3850 if (status->flush_inputs_to_zero) {
3dddb203
RH
3851 FloatParts64 p;
3852
3853 float16_unpack_raw(&p, a);
e6b405fe
AB
3854 if (parts_squash_denormal(p, status)) {
3855 return float16_set_sign(float16_zero, p.sign);
3856 }
3857 }
3858 return a;
3859}
3860
3861float32 float32_squash_input_denormal(float32 a, float_status *status)
3862{
3863 if (status->flush_inputs_to_zero) {
3dddb203
RH
3864 FloatParts64 p;
3865
3866 float32_unpack_raw(&p, a);
e6b405fe
AB
3867 if (parts_squash_denormal(p, status)) {
3868 return float32_set_sign(float32_zero, p.sign);
3869 }
3870 }
3871 return a;
3872}
3873
3874float64 float64_squash_input_denormal(float64 a, float_status *status)
3875{
3876 if (status->flush_inputs_to_zero) {
3dddb203
RH
3877 FloatParts64 p;
3878
3879 float64_unpack_raw(&p, a);
e6b405fe
AB
3880 if (parts_squash_denormal(p, status)) {
3881 return float64_set_sign(float64_zero, p.sign);
3882 }
3883 }
3884 return a;
3885}
3886
8282310d
LZ
3887bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3888{
3889 if (status->flush_inputs_to_zero) {
3dddb203
RH
3890 FloatParts64 p;
3891
3892 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3893 if (parts_squash_denormal(p, status)) {
3894 return bfloat16_set_sign(bfloat16_zero, p.sign);
3895 }
3896 }
3897 return a;
3898}
3899
377ed926 3900/*----------------------------------------------------------------------------
158142c2
FB
3901| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3902| and 7, and returns the properly rounded 32-bit integer corresponding to the
3903| input. If `zSign' is 1, the input is negated before being converted to an
3904| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3905| is simply rounded to an integer, with the inexact exception raised if the
3906| input cannot be represented exactly as an integer. However, if the fixed-
3907| point input is too large, the invalid exception is raised and the largest
3908| positive or negative integer is returned.
3909*----------------------------------------------------------------------------*/
3910
c120391c
RH
3911static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3912 float_status *status)
158142c2 3913{
8f506c70 3914 int8_t roundingMode;
c120391c 3915 bool roundNearestEven;
8f506c70 3916 int8_t roundIncrement, roundBits;
760e1416 3917 int32_t z;
158142c2 3918
a2f2d288 3919 roundingMode = status->float_rounding_mode;
158142c2 3920 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3921 switch (roundingMode) {
3922 case float_round_nearest_even:
f9288a76 3923 case float_round_ties_away:
dc355b76
PM
3924 roundIncrement = 0x40;
3925 break;
3926 case float_round_to_zero:
3927 roundIncrement = 0;
3928 break;
3929 case float_round_up:
3930 roundIncrement = zSign ? 0 : 0x7f;
3931 break;
3932 case float_round_down:
3933 roundIncrement = zSign ? 0x7f : 0;
3934 break;
5d64abb3
RH
3935 case float_round_to_odd:
3936 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3937 break;
dc355b76
PM
3938 default:
3939 abort();
158142c2
FB
3940 }
3941 roundBits = absZ & 0x7F;
3942 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3943 if (!(roundBits ^ 0x40) && roundNearestEven) {
3944 absZ &= ~1;
3945 }
158142c2
FB
3946 z = absZ;
3947 if ( zSign ) z = - z;
3948 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3949 float_raise(float_flag_invalid, status);
2c217da0 3950 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3951 }
a2f2d288 3952 if (roundBits) {
d82f3b2d 3953 float_raise(float_flag_inexact, status);
a2f2d288 3954 }
158142c2
FB
3955 return z;
3956
3957}
3958
3959/*----------------------------------------------------------------------------
3960| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3961| `absZ1', with binary point between bits 63 and 64 (between the input words),
3962| and returns the properly rounded 64-bit integer corresponding to the input.
3963| If `zSign' is 1, the input is negated before being converted to an integer.
3964| Ordinarily, the fixed-point input is simply rounded to an integer, with
3965| the inexact exception raised if the input cannot be represented exactly as
3966| an integer. However, if the fixed-point input is too large, the invalid
3967| exception is raised and the largest positive or negative integer is
3968| returned.
3969*----------------------------------------------------------------------------*/
3970
c120391c 3971static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3972 float_status *status)
158142c2 3973{
8f506c70 3974 int8_t roundingMode;
c120391c 3975 bool roundNearestEven, increment;
760e1416 3976 int64_t z;
158142c2 3977
a2f2d288 3978 roundingMode = status->float_rounding_mode;
158142c2 3979 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3980 switch (roundingMode) {
3981 case float_round_nearest_even:
f9288a76 3982 case float_round_ties_away:
dc355b76
PM
3983 increment = ((int64_t) absZ1 < 0);
3984 break;
3985 case float_round_to_zero:
3986 increment = 0;
3987 break;
3988 case float_round_up:
3989 increment = !zSign && absZ1;
3990 break;
3991 case float_round_down:
3992 increment = zSign && absZ1;
3993 break;
5d64abb3
RH
3994 case float_round_to_odd:
3995 increment = !(absZ0 & 1) && absZ1;
3996 break;
dc355b76
PM
3997 default:
3998 abort();
158142c2
FB
3999 }
4000 if ( increment ) {
4001 ++absZ0;
4002 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4003 if (!(absZ1 << 1) && roundNearestEven) {
4004 absZ0 &= ~1;
4005 }
158142c2
FB
4006 }
4007 z = absZ0;
4008 if ( zSign ) z = - z;
4009 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4010 overflow:
ff32e16e 4011 float_raise(float_flag_invalid, status);
2c217da0 4012 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4013 }
a2f2d288 4014 if (absZ1) {
d82f3b2d 4015 float_raise(float_flag_inexact, status);
a2f2d288 4016 }
158142c2
FB
4017 return z;
4018
4019}
4020
158142c2
FB
4021/*----------------------------------------------------------------------------
4022| Normalizes the subnormal single-precision floating-point value represented
4023| by the denormalized significand `aSig'. The normalized exponent and
4024| significand are stored at the locations pointed to by `zExpPtr' and
4025| `zSigPtr', respectively.
4026*----------------------------------------------------------------------------*/
4027
4028static void
0c48262d 4029 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4030{
8f506c70 4031 int8_t shiftCount;
158142c2 4032
0019d5c3 4033 shiftCount = clz32(aSig) - 8;
158142c2
FB
4034 *zSigPtr = aSig<<shiftCount;
4035 *zExpPtr = 1 - shiftCount;
4036
4037}
4038
158142c2
FB
4039/*----------------------------------------------------------------------------
4040| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4041| and significand `zSig', and returns the proper single-precision floating-
4042| point value corresponding to the abstract input. Ordinarily, the abstract
4043| value is simply rounded and packed into the single-precision format, with
4044| the inexact exception raised if the abstract input cannot be represented
4045| exactly. However, if the abstract value is too large, the overflow and
4046| inexact exceptions are raised and an infinity or maximal finite value is
4047| returned. If the abstract value is too small, the input value is rounded to
4048| a subnormal number, and the underflow and inexact exceptions are raised if
4049| the abstract input cannot be represented exactly as a subnormal single-
4050| precision floating-point number.
4051| The input significand `zSig' has its binary point between bits 30
4052| and 29, which is 7 bits to the left of the usual location. This shifted
4053| significand must be normalized or smaller. If `zSig' is not normalized,
4054| `zExp' must be 0; in that case, the result returned is a subnormal number,
4055| and it must not require rounding. In the usual case that `zSig' is
4056| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4057| The handling of underflow and overflow follows the IEC/IEEE Standard for
4058| Binary Floating-Point Arithmetic.
4059*----------------------------------------------------------------------------*/
4060
c120391c 4061static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4062 float_status *status)
158142c2 4063{
8f506c70 4064 int8_t roundingMode;
c120391c 4065 bool roundNearestEven;
8f506c70 4066 int8_t roundIncrement, roundBits;
c120391c 4067 bool isTiny;
158142c2 4068
a2f2d288 4069 roundingMode = status->float_rounding_mode;
158142c2 4070 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4071 switch (roundingMode) {
4072 case float_round_nearest_even:
f9288a76 4073 case float_round_ties_away:
dc355b76
PM
4074 roundIncrement = 0x40;
4075 break;
4076 case float_round_to_zero:
4077 roundIncrement = 0;
4078 break;
4079 case float_round_up:
4080 roundIncrement = zSign ? 0 : 0x7f;
4081 break;
4082 case float_round_down:
4083 roundIncrement = zSign ? 0x7f : 0;
4084 break;
5d64abb3
RH
4085 case float_round_to_odd:
4086 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4087 break;
dc355b76
PM
4088 default:
4089 abort();
4090 break;
158142c2
FB
4091 }
4092 roundBits = zSig & 0x7F;
bb98fe42 4093 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4094 if ( ( 0xFD < zExp )
4095 || ( ( zExp == 0xFD )
bb98fe42 4096 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4097 ) {
5d64abb3
RH
4098 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4099 roundIncrement != 0;
ff32e16e 4100 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4101 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4102 }
4103 if ( zExp < 0 ) {
a2f2d288 4104 if (status->flush_to_zero) {
ff32e16e 4105 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4106 return packFloat32(zSign, 0, 0);
4107 }
a828b373
RH
4108 isTiny = status->tininess_before_rounding
4109 || (zExp < -1)
4110 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4111 shift32RightJamming( zSig, - zExp, &zSig );
4112 zExp = 0;
4113 roundBits = zSig & 0x7F;
ff32e16e
PM
4114 if (isTiny && roundBits) {
4115 float_raise(float_flag_underflow, status);
4116 }
5d64abb3
RH
4117 if (roundingMode == float_round_to_odd) {
4118 /*
4119 * For round-to-odd case, the roundIncrement depends on
4120 * zSig which just changed.
4121 */
4122 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4123 }
158142c2
FB
4124 }
4125 }
a2f2d288 4126 if (roundBits) {
d82f3b2d 4127 float_raise(float_flag_inexact, status);
a2f2d288 4128 }
158142c2 4129 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4130 if (!(roundBits ^ 0x40) && roundNearestEven) {
4131 zSig &= ~1;
4132 }
158142c2
FB
4133 if ( zSig == 0 ) zExp = 0;
4134 return packFloat32( zSign, zExp, zSig );
4135
4136}
4137
4138/*----------------------------------------------------------------------------
4139| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4140| and significand `zSig', and returns the proper single-precision floating-
4141| point value corresponding to the abstract input. This routine is just like
4142| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4143| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4144| floating-point exponent.
4145*----------------------------------------------------------------------------*/
4146
4147static float32
c120391c 4148 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4149 float_status *status)
158142c2 4150{
8f506c70 4151 int8_t shiftCount;
158142c2 4152
0019d5c3 4153 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4154 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4155 status);
158142c2
FB
4156
4157}
4158
158142c2
FB
4159/*----------------------------------------------------------------------------
4160| Normalizes the subnormal double-precision floating-point value represented
4161| by the denormalized significand `aSig'. The normalized exponent and
4162| significand are stored at the locations pointed to by `zExpPtr' and
4163| `zSigPtr', respectively.
4164*----------------------------------------------------------------------------*/
4165
4166static void
0c48262d 4167 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4168{
8f506c70 4169 int8_t shiftCount;
158142c2 4170
0019d5c3 4171 shiftCount = clz64(aSig) - 11;
158142c2
FB
4172 *zSigPtr = aSig<<shiftCount;
4173 *zExpPtr = 1 - shiftCount;
4174
4175}
4176
4177/*----------------------------------------------------------------------------
4178| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4179| double-precision floating-point value, returning the result. After being
4180| shifted into the proper positions, the three fields are simply added
4181| together to form the result. This means that any integer portion of `zSig'
4182| will be added into the exponent. Since a properly normalized significand
4183| will have an integer portion equal to 1, the `zExp' input should be 1 less
4184| than the desired result exponent whenever `zSig' is a complete, normalized
4185| significand.
4186*----------------------------------------------------------------------------*/
4187
c120391c 4188static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4189{
4190
f090c9d4 4191 return make_float64(
bb98fe42 4192 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4193
4194}
4195
4196/*----------------------------------------------------------------------------
4197| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4198| and significand `zSig', and returns the proper double-precision floating-
4199| point value corresponding to the abstract input. Ordinarily, the abstract
4200| value is simply rounded and packed into the double-precision format, with
4201| the inexact exception raised if the abstract input cannot be represented
4202| exactly. However, if the abstract value is too large, the overflow and
4203| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4204| returned. If the abstract value is too small, the input value is rounded to
4205| a subnormal number, and the underflow and inexact exceptions are raised if
4206| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4207| precision floating-point number.
4208| The input significand `zSig' has its binary point between bits 62
4209| and 61, which is 10 bits to the left of the usual location. This shifted
4210| significand must be normalized or smaller. If `zSig' is not normalized,
4211| `zExp' must be 0; in that case, the result returned is a subnormal number,
4212| and it must not require rounding. In the usual case that `zSig' is
4213| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4214| The handling of underflow and overflow follows the IEC/IEEE Standard for
4215| Binary Floating-Point Arithmetic.
4216*----------------------------------------------------------------------------*/
4217
c120391c 4218static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4219 float_status *status)
158142c2 4220{
8f506c70 4221 int8_t roundingMode;
c120391c 4222 bool roundNearestEven;
0c48262d 4223 int roundIncrement, roundBits;
c120391c 4224 bool isTiny;
158142c2 4225
a2f2d288 4226 roundingMode = status->float_rounding_mode;
158142c2 4227 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4228 switch (roundingMode) {
4229 case float_round_nearest_even:
f9288a76 4230 case float_round_ties_away:
dc355b76
PM
4231 roundIncrement = 0x200;
4232 break;
4233 case float_round_to_zero:
4234 roundIncrement = 0;
4235 break;
4236 case float_round_up:
4237 roundIncrement = zSign ? 0 : 0x3ff;
4238 break;
4239 case float_round_down:
4240 roundIncrement = zSign ? 0x3ff : 0;
4241 break;
9ee6f678
BR
4242 case float_round_to_odd:
4243 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4244 break;
dc355b76
PM
4245 default:
4246 abort();
158142c2
FB
4247 }
4248 roundBits = zSig & 0x3FF;
bb98fe42 4249 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4250 if ( ( 0x7FD < zExp )
4251 || ( ( zExp == 0x7FD )
bb98fe42 4252 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4253 ) {
9ee6f678
BR
4254 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4255 roundIncrement != 0;
ff32e16e 4256 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4257 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4258 }
4259 if ( zExp < 0 ) {
a2f2d288 4260 if (status->flush_to_zero) {
ff32e16e 4261 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4262 return packFloat64(zSign, 0, 0);
4263 }
a828b373
RH
4264 isTiny = status->tininess_before_rounding
4265 || (zExp < -1)
4266 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4267 shift64RightJamming( zSig, - zExp, &zSig );
4268 zExp = 0;
4269 roundBits = zSig & 0x3FF;
ff32e16e
PM
4270 if (isTiny && roundBits) {
4271 float_raise(float_flag_underflow, status);
4272 }
9ee6f678
BR
4273 if (roundingMode == float_round_to_odd) {
4274 /*
4275 * For round-to-odd case, the roundIncrement depends on
4276 * zSig which just changed.
4277 */
4278 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4279 }
158142c2
FB
4280 }
4281 }
a2f2d288 4282 if (roundBits) {
d82f3b2d 4283 float_raise(float_flag_inexact, status);
a2f2d288 4284 }
158142c2 4285 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4286 if (!(roundBits ^ 0x200) && roundNearestEven) {
4287 zSig &= ~1;
4288 }
158142c2
FB
4289 if ( zSig == 0 ) zExp = 0;
4290 return packFloat64( zSign, zExp, zSig );
4291
4292}
4293
4294/*----------------------------------------------------------------------------
4295| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4296| and significand `zSig', and returns the proper double-precision floating-
4297| point value corresponding to the abstract input. This routine is just like
4298| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4299| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4300| floating-point exponent.
4301*----------------------------------------------------------------------------*/
4302
4303static float64
c120391c 4304 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4305 float_status *status)
158142c2 4306{
8f506c70 4307 int8_t shiftCount;
158142c2 4308
0019d5c3 4309 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4310 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4311 status);
158142c2
FB
4312
4313}
4314
158142c2
FB
4315/*----------------------------------------------------------------------------
4316| Normalizes the subnormal extended double-precision floating-point value
4317| represented by the denormalized significand `aSig'. The normalized exponent
4318| and significand are stored at the locations pointed to by `zExpPtr' and
4319| `zSigPtr', respectively.
4320*----------------------------------------------------------------------------*/
4321
88857aca
LV
4322void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4323 uint64_t *zSigPtr)
158142c2 4324{
8f506c70 4325 int8_t shiftCount;
158142c2 4326
0019d5c3 4327 shiftCount = clz64(aSig);
158142c2
FB
4328 *zSigPtr = aSig<<shiftCount;
4329 *zExpPtr = 1 - shiftCount;
158142c2
FB
4330}
4331
4332/*----------------------------------------------------------------------------
4333| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4334| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4335| and returns the proper extended double-precision floating-point value
4336| corresponding to the abstract input. Ordinarily, the abstract value is
4337| rounded and packed into the extended double-precision format, with the
4338| inexact exception raised if the abstract input cannot be represented
4339| exactly. However, if the abstract value is too large, the overflow and
4340| inexact exceptions are raised and an infinity or maximal finite value is
4341| returned. If the abstract value is too small, the input value is rounded to
4342| a subnormal number, and the underflow and inexact exceptions are raised if
4343| the abstract input cannot be represented exactly as a subnormal extended
4344| double-precision floating-point number.
4345| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4346| number of bits as single or double precision, respectively. Otherwise, the
4347| result is rounded to the full precision of the extended double-precision
4348| format.
4349| The input significand must be normalized or smaller. If the input
4350| significand is not normalized, `zExp' must be 0; in that case, the result
4351| returned is a subnormal number, and it must not require rounding. The
4352| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4353| Floating-Point Arithmetic.
4354*----------------------------------------------------------------------------*/
4355
c120391c 4356floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4357 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4358 float_status *status)
158142c2 4359{
8f506c70 4360 int8_t roundingMode;
c120391c 4361 bool roundNearestEven, increment, isTiny;
f42c2224 4362 int64_t roundIncrement, roundMask, roundBits;
158142c2 4363
a2f2d288 4364 roundingMode = status->float_rounding_mode;
158142c2
FB
4365 roundNearestEven = ( roundingMode == float_round_nearest_even );
4366 if ( roundingPrecision == 80 ) goto precision80;
4367 if ( roundingPrecision == 64 ) {
e9321124
AB
4368 roundIncrement = UINT64_C(0x0000000000000400);
4369 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4370 }
4371 else if ( roundingPrecision == 32 ) {
e9321124
AB
4372 roundIncrement = UINT64_C(0x0000008000000000);
4373 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4374 }
4375 else {
4376 goto precision80;
4377 }
4378 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4379 switch (roundingMode) {
4380 case float_round_nearest_even:
f9288a76 4381 case float_round_ties_away:
dc355b76
PM
4382 break;
4383 case float_round_to_zero:
4384 roundIncrement = 0;
4385 break;
4386 case float_round_up:
4387 roundIncrement = zSign ? 0 : roundMask;
4388 break;
4389 case float_round_down:
4390 roundIncrement = zSign ? roundMask : 0;
4391 break;
4392 default:
4393 abort();
158142c2
FB
4394 }
4395 roundBits = zSig0 & roundMask;
bb98fe42 4396 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4397 if ( ( 0x7FFE < zExp )
4398 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4399 ) {
4400 goto overflow;
4401 }
4402 if ( zExp <= 0 ) {
a2f2d288 4403 if (status->flush_to_zero) {
ff32e16e 4404 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4405 return packFloatx80(zSign, 0, 0);
4406 }
a828b373
RH
4407 isTiny = status->tininess_before_rounding
4408 || (zExp < 0 )
4409 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4410 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4411 zExp = 0;
4412 roundBits = zSig0 & roundMask;
ff32e16e
PM
4413 if (isTiny && roundBits) {
4414 float_raise(float_flag_underflow, status);
4415 }
a2f2d288 4416 if (roundBits) {
d82f3b2d 4417 float_raise(float_flag_inexact, status);
a2f2d288 4418 }
158142c2 4419 zSig0 += roundIncrement;
bb98fe42 4420 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4421 roundIncrement = roundMask + 1;
4422 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4423 roundMask |= roundIncrement;
4424 }
4425 zSig0 &= ~ roundMask;
4426 return packFloatx80( zSign, zExp, zSig0 );
4427 }
4428 }
a2f2d288 4429 if (roundBits) {
d82f3b2d 4430 float_raise(float_flag_inexact, status);
a2f2d288 4431 }
158142c2
FB
4432 zSig0 += roundIncrement;
4433 if ( zSig0 < roundIncrement ) {
4434 ++zExp;
e9321124 4435 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4436 }
4437 roundIncrement = roundMask + 1;
4438 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4439 roundMask |= roundIncrement;
4440 }
4441 zSig0 &= ~ roundMask;
4442 if ( zSig0 == 0 ) zExp = 0;
4443 return packFloatx80( zSign, zExp, zSig0 );
4444 precision80:
dc355b76
PM
4445 switch (roundingMode) {
4446 case float_round_nearest_even:
f9288a76 4447 case float_round_ties_away:
dc355b76
PM
4448 increment = ((int64_t)zSig1 < 0);
4449 break;
4450 case float_round_to_zero:
4451 increment = 0;
4452 break;
4453 case float_round_up:
4454 increment = !zSign && zSig1;
4455 break;
4456 case float_round_down:
4457 increment = zSign && zSig1;
4458 break;
4459 default:
4460 abort();
158142c2 4461 }
bb98fe42 4462 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4463 if ( ( 0x7FFE < zExp )
4464 || ( ( zExp == 0x7FFE )
e9321124 4465 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4466 && increment
4467 )
4468 ) {
4469 roundMask = 0;
4470 overflow:
ff32e16e 4471 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4472 if ( ( roundingMode == float_round_to_zero )
4473 || ( zSign && ( roundingMode == float_round_up ) )
4474 || ( ! zSign && ( roundingMode == float_round_down ) )
4475 ) {
4476 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4477 }
0f605c88
LV
4478 return packFloatx80(zSign,
4479 floatx80_infinity_high,
4480 floatx80_infinity_low);
158142c2
FB
4481 }
4482 if ( zExp <= 0 ) {
a828b373
RH
4483 isTiny = status->tininess_before_rounding
4484 || (zExp < 0)
4485 || !increment
4486 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4487 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4488 zExp = 0;
ff32e16e
PM
4489 if (isTiny && zSig1) {
4490 float_raise(float_flag_underflow, status);
4491 }
a2f2d288 4492 if (zSig1) {
d82f3b2d 4493 float_raise(float_flag_inexact, status);
a2f2d288 4494 }
dc355b76
PM
4495 switch (roundingMode) {
4496 case float_round_nearest_even:
f9288a76 4497 case float_round_ties_away:
dc355b76
PM
4498 increment = ((int64_t)zSig1 < 0);
4499 break;
4500 case float_round_to_zero:
4501 increment = 0;
4502 break;
4503 case float_round_up:
4504 increment = !zSign && zSig1;
4505 break;
4506 case float_round_down:
4507 increment = zSign && zSig1;
4508 break;
4509 default:
4510 abort();
158142c2
FB
4511 }
4512 if ( increment ) {
4513 ++zSig0;
40662886
PMD
4514 if (!(zSig1 << 1) && roundNearestEven) {
4515 zSig0 &= ~1;
4516 }
bb98fe42 4517 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4518 }
4519 return packFloatx80( zSign, zExp, zSig0 );
4520 }
4521 }
a2f2d288 4522 if (zSig1) {
d82f3b2d 4523 float_raise(float_flag_inexact, status);
a2f2d288 4524 }
158142c2
FB
4525 if ( increment ) {
4526 ++zSig0;
4527 if ( zSig0 == 0 ) {
4528 ++zExp;
e9321124 4529 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4530 }
4531 else {
40662886
PMD
4532 if (!(zSig1 << 1) && roundNearestEven) {
4533 zSig0 &= ~1;
4534 }
158142c2
FB
4535 }
4536 }
4537 else {
4538 if ( zSig0 == 0 ) zExp = 0;
4539 }
4540 return packFloatx80( zSign, zExp, zSig0 );
4541
4542}
4543
4544/*----------------------------------------------------------------------------
4545| Takes an abstract floating-point value having sign `zSign', exponent
4546| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4547| and returns the proper extended double-precision floating-point value
4548| corresponding to the abstract input. This routine is just like
4549| `roundAndPackFloatx80' except that the input significand does not have to be
4550| normalized.
4551*----------------------------------------------------------------------------*/
4552
88857aca 4553floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4554 bool zSign, int32_t zExp,
88857aca
LV
4555 uint64_t zSig0, uint64_t zSig1,
4556 float_status *status)
158142c2 4557{
8f506c70 4558 int8_t shiftCount;
158142c2
FB
4559
4560 if ( zSig0 == 0 ) {
4561 zSig0 = zSig1;
4562 zSig1 = 0;
4563 zExp -= 64;
4564 }
0019d5c3 4565 shiftCount = clz64(zSig0);
158142c2
FB
4566 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4567 zExp -= shiftCount;
ff32e16e
PM
4568 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4569 zSig0, zSig1, status);
158142c2
FB
4570
4571}
4572
158142c2
FB
4573/*----------------------------------------------------------------------------
4574| Returns the least-significant 64 fraction bits of the quadruple-precision
4575| floating-point value `a'.
4576*----------------------------------------------------------------------------*/
4577
a49db98d 4578static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4579{
4580
4581 return a.low;
4582
4583}
4584
4585/*----------------------------------------------------------------------------
4586| Returns the most-significant 48 fraction bits of the quadruple-precision
4587| floating-point value `a'.
4588*----------------------------------------------------------------------------*/
4589
a49db98d 4590static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4591{
4592
e9321124 4593 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4594
4595}
4596
4597/*----------------------------------------------------------------------------
4598| Returns the exponent bits of the quadruple-precision floating-point value
4599| `a'.
4600*----------------------------------------------------------------------------*/
4601
f4014512 4602static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4603{
4604
4605 return ( a.high>>48 ) & 0x7FFF;
4606
4607}
4608
4609/*----------------------------------------------------------------------------
4610| Returns the sign bit of the quadruple-precision floating-point value `a'.
4611*----------------------------------------------------------------------------*/
4612
c120391c 4613static inline bool extractFloat128Sign(float128 a)
158142c2 4614{
c120391c 4615 return a.high >> 63;
158142c2
FB
4616}
4617
4618/*----------------------------------------------------------------------------
4619| Normalizes the subnormal quadruple-precision floating-point value
4620| represented by the denormalized significand formed by the concatenation of
4621| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4622| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4623| significand are stored at the location pointed to by `zSig0Ptr', and the
4624| least significant 64 bits of the normalized significand are stored at the
4625| location pointed to by `zSig1Ptr'.
4626*----------------------------------------------------------------------------*/
4627
4628static void
4629 normalizeFloat128Subnormal(
bb98fe42
AF
4630 uint64_t aSig0,
4631 uint64_t aSig1,
f4014512 4632 int32_t *zExpPtr,
bb98fe42
AF
4633 uint64_t *zSig0Ptr,
4634 uint64_t *zSig1Ptr
158142c2
FB
4635 )
4636{
8f506c70 4637 int8_t shiftCount;
158142c2
FB
4638
4639 if ( aSig0 == 0 ) {
0019d5c3 4640 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4641 if ( shiftCount < 0 ) {
4642 *zSig0Ptr = aSig1>>( - shiftCount );
4643 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4644 }
4645 else {
4646 *zSig0Ptr = aSig1<<shiftCount;
4647 *zSig1Ptr = 0;
4648 }
4649 *zExpPtr = - shiftCount - 63;
4650 }
4651 else {
0019d5c3 4652 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4653 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4654 *zExpPtr = 1 - shiftCount;
4655 }
4656
4657}
4658
4659/*----------------------------------------------------------------------------
4660| Packs the sign `zSign', the exponent `zExp', and the significand formed
4661| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4662| floating-point value, returning the result. After being shifted into the
4663| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4664| added together to form the most significant 32 bits of the result. This
4665| means that any integer portion of `zSig0' will be added into the exponent.
4666| Since a properly normalized significand will have an integer portion equal
4667| to 1, the `zExp' input should be 1 less than the desired result exponent
4668| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4669| significand.
4670*----------------------------------------------------------------------------*/
4671
a49db98d 4672static inline float128
c120391c 4673packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4674{
4675 float128 z;
4676
4677 z.low = zSig1;
c120391c 4678 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4679 return z;
158142c2
FB
4680}
4681
4682/*----------------------------------------------------------------------------
4683| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4684| and extended significand formed by the concatenation of `zSig0', `zSig1',
4685| and `zSig2', and returns the proper quadruple-precision floating-point value
4686| corresponding to the abstract input. Ordinarily, the abstract value is
4687| simply rounded and packed into the quadruple-precision format, with the
4688| inexact exception raised if the abstract input cannot be represented
4689| exactly. However, if the abstract value is too large, the overflow and
4690| inexact exceptions are raised and an infinity or maximal finite value is
4691| returned. If the abstract value is too small, the input value is rounded to
4692| a subnormal number, and the underflow and inexact exceptions are raised if
4693| the abstract input cannot be represented exactly as a subnormal quadruple-
4694| precision floating-point number.
4695| The input significand must be normalized or smaller. If the input
4696| significand is not normalized, `zExp' must be 0; in that case, the result
4697| returned is a subnormal number, and it must not require rounding. In the
4698| usual case that the input significand is normalized, `zExp' must be 1 less
4699| than the ``true'' floating-point exponent. The handling of underflow and
4700| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4701*----------------------------------------------------------------------------*/
4702
c120391c 4703static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4704 uint64_t zSig0, uint64_t zSig1,
4705 uint64_t zSig2, float_status *status)
158142c2 4706{
8f506c70 4707 int8_t roundingMode;
c120391c 4708 bool roundNearestEven, increment, isTiny;
158142c2 4709
a2f2d288 4710 roundingMode = status->float_rounding_mode;
158142c2 4711 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4712 switch (roundingMode) {
4713 case float_round_nearest_even:
f9288a76 4714 case float_round_ties_away:
dc355b76
PM
4715 increment = ((int64_t)zSig2 < 0);
4716 break;
4717 case float_round_to_zero:
4718 increment = 0;
4719 break;
4720 case float_round_up:
4721 increment = !zSign && zSig2;
4722 break;
4723 case float_round_down:
4724 increment = zSign && zSig2;
4725 break;
9ee6f678
BR
4726 case float_round_to_odd:
4727 increment = !(zSig1 & 0x1) && zSig2;
4728 break;
dc355b76
PM
4729 default:
4730 abort();
158142c2 4731 }
bb98fe42 4732 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4733 if ( ( 0x7FFD < zExp )
4734 || ( ( zExp == 0x7FFD )
4735 && eq128(
e9321124
AB
4736 UINT64_C(0x0001FFFFFFFFFFFF),
4737 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4738 zSig0,
4739 zSig1
4740 )
4741 && increment
4742 )
4743 ) {
ff32e16e 4744 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4745 if ( ( roundingMode == float_round_to_zero )
4746 || ( zSign && ( roundingMode == float_round_up ) )
4747 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4748 || (roundingMode == float_round_to_odd)
158142c2
FB
4749 ) {
4750 return
4751 packFloat128(
4752 zSign,
4753 0x7FFE,
e9321124
AB
4754 UINT64_C(0x0000FFFFFFFFFFFF),
4755 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4756 );
4757 }
4758 return packFloat128( zSign, 0x7FFF, 0, 0 );
4759 }
4760 if ( zExp < 0 ) {
a2f2d288 4761 if (status->flush_to_zero) {
ff32e16e 4762 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4763 return packFloat128(zSign, 0, 0, 0);
4764 }
a828b373
RH
4765 isTiny = status->tininess_before_rounding
4766 || (zExp < -1)
4767 || !increment
4768 || lt128(zSig0, zSig1,
4769 UINT64_C(0x0001FFFFFFFFFFFF),
4770 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4771 shift128ExtraRightJamming(
4772 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4773 zExp = 0;
ff32e16e
PM
4774 if (isTiny && zSig2) {
4775 float_raise(float_flag_underflow, status);
4776 }
dc355b76
PM
4777 switch (roundingMode) {
4778 case float_round_nearest_even:
f9288a76 4779 case float_round_ties_away:
dc355b76
PM
4780 increment = ((int64_t)zSig2 < 0);
4781 break;
4782 case float_round_to_zero:
4783 increment = 0;
4784 break;
4785 case float_round_up:
4786 increment = !zSign && zSig2;
4787 break;
4788 case float_round_down:
4789 increment = zSign && zSig2;
4790 break;
9ee6f678
BR
4791 case float_round_to_odd:
4792 increment = !(zSig1 & 0x1) && zSig2;
4793 break;
dc355b76
PM
4794 default:
4795 abort();
158142c2
FB
4796 }
4797 }
4798 }
a2f2d288 4799 if (zSig2) {
d82f3b2d 4800 float_raise(float_flag_inexact, status);
a2f2d288 4801 }
158142c2
FB
4802 if ( increment ) {
4803 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4804 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4805 zSig1 &= ~1;
4806 }
158142c2
FB
4807 }
4808 else {
4809 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4810 }
4811 return packFloat128( zSign, zExp, zSig0, zSig1 );
4812
4813}
4814
4815/*----------------------------------------------------------------------------
4816| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4817| and significand formed by the concatenation of `zSig0' and `zSig1', and
4818| returns the proper quadruple-precision floating-point value corresponding
4819| to the abstract input. This routine is just like `roundAndPackFloat128'
4820| except that the input significand has fewer bits and does not have to be
4821| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4822| point exponent.
4823*----------------------------------------------------------------------------*/
4824
c120391c 4825static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4826 uint64_t zSig0, uint64_t zSig1,
4827 float_status *status)
158142c2 4828{
8f506c70 4829 int8_t shiftCount;
bb98fe42 4830 uint64_t zSig2;
158142c2
FB
4831
4832 if ( zSig0 == 0 ) {
4833 zSig0 = zSig1;
4834 zSig1 = 0;
4835 zExp -= 64;
4836 }
0019d5c3 4837 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4838 if ( 0 <= shiftCount ) {
4839 zSig2 = 0;
4840 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4841 }
4842 else {
4843 shift128ExtraRightJamming(
4844 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4845 }
4846 zExp -= shiftCount;
ff32e16e 4847 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4848
4849}
4850
158142c2 4851
158142c2
FB
4852/*----------------------------------------------------------------------------
4853| Returns the result of converting the 32-bit two's complement integer `a'
4854| to the extended double-precision floating-point format. The conversion
4855| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4856| Arithmetic.
4857*----------------------------------------------------------------------------*/
4858
e5a41ffa 4859floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4860{
c120391c 4861 bool zSign;
3a87d009 4862 uint32_t absA;
8f506c70 4863 int8_t shiftCount;
bb98fe42 4864 uint64_t zSig;
158142c2
FB
4865
4866 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4867 zSign = ( a < 0 );
4868 absA = zSign ? - a : a;
0019d5c3 4869 shiftCount = clz32(absA) + 32;
158142c2
FB
4870 zSig = absA;
4871 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4872
4873}
4874
158142c2
FB
4875/*----------------------------------------------------------------------------
4876| Returns the result of converting the 64-bit two's complement integer `a'
4877| to the extended double-precision floating-point format. The conversion
4878| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4879| Arithmetic.
4880*----------------------------------------------------------------------------*/
4881
e5a41ffa 4882floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4883{
c120391c 4884 bool zSign;
182f42fd 4885 uint64_t absA;
8f506c70 4886 int8_t shiftCount;
158142c2
FB
4887
4888 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4889 zSign = ( a < 0 );
4890 absA = zSign ? - a : a;
0019d5c3 4891 shiftCount = clz64(absA);
158142c2
FB
4892 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4893
4894}
4895
158142c2
FB
4896/*----------------------------------------------------------------------------
4897| Returns the result of converting the single-precision floating-point value
4898| `a' to the extended double-precision floating-point format. The conversion
4899| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4900| Arithmetic.
4901*----------------------------------------------------------------------------*/
4902
e5a41ffa 4903floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4904{
c120391c 4905 bool aSign;
0c48262d 4906 int aExp;
bb98fe42 4907 uint32_t aSig;
158142c2 4908
ff32e16e 4909 a = float32_squash_input_denormal(a, status);
158142c2
FB
4910 aSig = extractFloat32Frac( a );
4911 aExp = extractFloat32Exp( a );
4912 aSign = extractFloat32Sign( a );
4913 if ( aExp == 0xFF ) {
ff32e16e 4914 if (aSig) {
7537c2b4
JM
4915 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4916 status);
4917 return floatx80_silence_nan(res, status);
ff32e16e 4918 }
0f605c88
LV
4919 return packFloatx80(aSign,
4920 floatx80_infinity_high,
4921 floatx80_infinity_low);
158142c2
FB
4922 }
4923 if ( aExp == 0 ) {
4924 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4925 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4926 }
4927 aSig |= 0x00800000;
bb98fe42 4928 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4929
4930}
4931
158142c2
FB
4932/*----------------------------------------------------------------------------
4933| Returns the remainder of the single-precision floating-point value `a'
4934| with respect to the corresponding value `b'. The operation is performed
4935| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4936*----------------------------------------------------------------------------*/
4937
e5a41ffa 4938float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4939{
c120391c 4940 bool aSign, zSign;
0c48262d 4941 int aExp, bExp, expDiff;
bb98fe42
AF
4942 uint32_t aSig, bSig;
4943 uint32_t q;
4944 uint64_t aSig64, bSig64, q64;
4945 uint32_t alternateASig;
4946 int32_t sigMean;
ff32e16e
PM
4947 a = float32_squash_input_denormal(a, status);
4948 b = float32_squash_input_denormal(b, status);
158142c2
FB
4949
4950 aSig = extractFloat32Frac( a );
4951 aExp = extractFloat32Exp( a );
4952 aSign = extractFloat32Sign( a );
4953 bSig = extractFloat32Frac( b );
4954 bExp = extractFloat32Exp( b );
158142c2
FB
4955 if ( aExp == 0xFF ) {
4956 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4957 return propagateFloat32NaN(a, b, status);
158142c2 4958 }
ff32e16e 4959 float_raise(float_flag_invalid, status);
af39bc8c 4960 return float32_default_nan(status);
158142c2
FB
4961 }
4962 if ( bExp == 0xFF ) {
ff32e16e
PM
4963 if (bSig) {
4964 return propagateFloat32NaN(a, b, status);
4965 }
158142c2
FB
4966 return a;
4967 }
4968 if ( bExp == 0 ) {
4969 if ( bSig == 0 ) {
ff32e16e 4970 float_raise(float_flag_invalid, status);
af39bc8c 4971 return float32_default_nan(status);
158142c2
FB
4972 }
4973 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4974 }
4975 if ( aExp == 0 ) {
4976 if ( aSig == 0 ) return a;
4977 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4978 }
4979 expDiff = aExp - bExp;
4980 aSig |= 0x00800000;
4981 bSig |= 0x00800000;
4982 if ( expDiff < 32 ) {
4983 aSig <<= 8;
4984 bSig <<= 8;
4985 if ( expDiff < 0 ) {
4986 if ( expDiff < -1 ) return a;
4987 aSig >>= 1;
4988 }
4989 q = ( bSig <= aSig );
4990 if ( q ) aSig -= bSig;
4991 if ( 0 < expDiff ) {
bb98fe42 4992 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4993 q >>= 32 - expDiff;
4994 bSig >>= 2;
4995 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4996 }
4997 else {
4998 aSig >>= 2;
4999 bSig >>= 2;
5000 }
5001 }
5002 else {
5003 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5004 aSig64 = ( (uint64_t) aSig )<<40;
5005 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5006 expDiff -= 64;
5007 while ( 0 < expDiff ) {
5008 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5009 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5010 aSig64 = - ( ( bSig * q64 )<<38 );
5011 expDiff -= 62;
5012 }
5013 expDiff += 64;
5014 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5015 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5016 q = q64>>( 64 - expDiff );
5017 bSig <<= 6;
5018 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5019 }
5020 do {
5021 alternateASig = aSig;
5022 ++q;
5023 aSig -= bSig;
bb98fe42 5024 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5025 sigMean = aSig + alternateASig;
5026 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5027 aSig = alternateASig;
5028 }
bb98fe42 5029 zSign = ( (int32_t) aSig < 0 );
158142c2 5030 if ( zSign ) aSig = - aSig;
ff32e16e 5031 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5032}
5033
369be8f6 5034
158142c2 5035
8229c991
AJ
5036/*----------------------------------------------------------------------------
5037| Returns the binary exponential of the single-precision floating-point value
5038| `a'. The operation is performed according to the IEC/IEEE Standard for
5039| Binary Floating-Point Arithmetic.
5040|
5041| Uses the following identities:
5042|
5043| 1. -------------------------------------------------------------------------
5044| x x*ln(2)
5045| 2 = e
5046|
5047| 2. -------------------------------------------------------------------------
5048| 2 3 4 5 n
5049| x x x x x x x
5050| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5051| 1! 2! 3! 4! 5! n!
5052*----------------------------------------------------------------------------*/
5053
5054static const float64 float32_exp2_coefficients[15] =
5055{
d5138cf4
PM
5056 const_float64( 0x3ff0000000000000ll ), /* 1 */
5057 const_float64( 0x3fe0000000000000ll ), /* 2 */
5058 const_float64( 0x3fc5555555555555ll ), /* 3 */
5059 const_float64( 0x3fa5555555555555ll ), /* 4 */
5060 const_float64( 0x3f81111111111111ll ), /* 5 */
5061 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5062 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5063 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5064 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5065 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5066 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5067 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5068 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5069 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5070 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5071};
5072
e5a41ffa 5073float32 float32_exp2(float32 a, float_status *status)
8229c991 5074{
c120391c 5075 bool aSign;
0c48262d 5076 int aExp;
bb98fe42 5077 uint32_t aSig;
8229c991
AJ
5078 float64 r, x, xn;
5079 int i;
ff32e16e 5080 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5081
5082 aSig = extractFloat32Frac( a );
5083 aExp = extractFloat32Exp( a );
5084 aSign = extractFloat32Sign( a );
5085
5086 if ( aExp == 0xFF) {
ff32e16e
PM
5087 if (aSig) {
5088 return propagateFloat32NaN(a, float32_zero, status);
5089 }
8229c991
AJ
5090 return (aSign) ? float32_zero : a;
5091 }
5092 if (aExp == 0) {
5093 if (aSig == 0) return float32_one;
5094 }
5095
ff32e16e 5096 float_raise(float_flag_inexact, status);
8229c991
AJ
5097
5098 /* ******************************* */
5099 /* using float64 for approximation */
5100 /* ******************************* */
ff32e16e
PM
5101 x = float32_to_float64(a, status);
5102 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5103
5104 xn = x;
5105 r = float64_one;
5106 for (i = 0 ; i < 15 ; i++) {
5107 float64 f;
5108
ff32e16e
PM
5109 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5110 r = float64_add(r, f, status);
8229c991 5111
ff32e16e 5112 xn = float64_mul(xn, x, status);
8229c991
AJ
5113 }
5114
5115 return float64_to_float32(r, status);
5116}
5117
374dfc33
AJ
5118/*----------------------------------------------------------------------------
5119| Returns the binary log of the single-precision floating-point value `a'.
5120| The operation is performed according to the IEC/IEEE Standard for Binary
5121| Floating-Point Arithmetic.
5122*----------------------------------------------------------------------------*/
e5a41ffa 5123float32 float32_log2(float32 a, float_status *status)
374dfc33 5124{
c120391c 5125 bool aSign, zSign;
0c48262d 5126 int aExp;
bb98fe42 5127 uint32_t aSig, zSig, i;
374dfc33 5128
ff32e16e 5129 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5130 aSig = extractFloat32Frac( a );
5131 aExp = extractFloat32Exp( a );
5132 aSign = extractFloat32Sign( a );
5133
5134 if ( aExp == 0 ) {
5135 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5136 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5137 }
5138 if ( aSign ) {
ff32e16e 5139 float_raise(float_flag_invalid, status);
af39bc8c 5140 return float32_default_nan(status);
374dfc33
AJ
5141 }
5142 if ( aExp == 0xFF ) {
ff32e16e
PM
5143 if (aSig) {
5144 return propagateFloat32NaN(a, float32_zero, status);
5145 }
374dfc33
AJ
5146 return a;
5147 }
5148
5149 aExp -= 0x7F;
5150 aSig |= 0x00800000;
5151 zSign = aExp < 0;
5152 zSig = aExp << 23;
5153
5154 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5155 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5156 if ( aSig & 0x01000000 ) {
5157 aSig >>= 1;
5158 zSig |= i;
5159 }
5160 }
5161
5162 if ( zSign )
5163 zSig = -zSig;
5164
ff32e16e 5165 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5166}
5167
158142c2 5168/*----------------------------------------------------------------------------
158142c2
FB
5169| Returns the result of converting the double-precision floating-point value
5170| `a' to the extended double-precision floating-point format. The conversion
5171| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5172| Arithmetic.
5173*----------------------------------------------------------------------------*/
5174
e5a41ffa 5175floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5176{
c120391c 5177 bool aSign;
0c48262d 5178 int aExp;
bb98fe42 5179 uint64_t aSig;
158142c2 5180
ff32e16e 5181 a = float64_squash_input_denormal(a, status);
158142c2
FB
5182 aSig = extractFloat64Frac( a );
5183 aExp = extractFloat64Exp( a );
5184 aSign = extractFloat64Sign( a );
5185 if ( aExp == 0x7FF ) {
ff32e16e 5186 if (aSig) {
7537c2b4
JM
5187 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5188 status);
5189 return floatx80_silence_nan(res, status);
ff32e16e 5190 }
0f605c88
LV
5191 return packFloatx80(aSign,
5192 floatx80_infinity_high,
5193 floatx80_infinity_low);
158142c2
FB
5194 }
5195 if ( aExp == 0 ) {
5196 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5197 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5198 }
5199 return
5200 packFloatx80(
e9321124 5201 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5202
5203}
5204
158142c2
FB
5205/*----------------------------------------------------------------------------
5206| Returns the remainder of the double-precision floating-point value `a'
5207| with respect to the corresponding value `b'. The operation is performed
5208| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5209*----------------------------------------------------------------------------*/
5210
e5a41ffa 5211float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5212{
c120391c 5213 bool aSign, zSign;
0c48262d 5214 int aExp, bExp, expDiff;
bb98fe42
AF
5215 uint64_t aSig, bSig;
5216 uint64_t q, alternateASig;
5217 int64_t sigMean;
158142c2 5218
ff32e16e
PM
5219 a = float64_squash_input_denormal(a, status);
5220 b = float64_squash_input_denormal(b, status);
158142c2
FB
5221 aSig = extractFloat64Frac( a );
5222 aExp = extractFloat64Exp( a );
5223 aSign = extractFloat64Sign( a );
5224 bSig = extractFloat64Frac( b );
5225 bExp = extractFloat64Exp( b );
158142c2
FB
5226 if ( aExp == 0x7FF ) {
5227 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5228 return propagateFloat64NaN(a, b, status);
158142c2 5229 }
ff32e16e 5230 float_raise(float_flag_invalid, status);
af39bc8c 5231 return float64_default_nan(status);
158142c2
FB
5232 }
5233 if ( bExp == 0x7FF ) {
ff32e16e
PM
5234 if (bSig) {
5235 return propagateFloat64NaN(a, b, status);
5236 }
158142c2
FB
5237 return a;
5238 }
5239 if ( bExp == 0 ) {
5240 if ( bSig == 0 ) {
ff32e16e 5241 float_raise(float_flag_invalid, status);
af39bc8c 5242 return float64_default_nan(status);
158142c2
FB
5243 }
5244 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5245 }
5246 if ( aExp == 0 ) {
5247 if ( aSig == 0 ) return a;
5248 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5249 }
5250 expDiff = aExp - bExp;
e9321124
AB
5251 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5252 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5253 if ( expDiff < 0 ) {
5254 if ( expDiff < -1 ) return a;
5255 aSig >>= 1;
5256 }
5257 q = ( bSig <= aSig );
5258 if ( q ) aSig -= bSig;
5259 expDiff -= 64;
5260 while ( 0 < expDiff ) {
5261 q = estimateDiv128To64( aSig, 0, bSig );
5262 q = ( 2 < q ) ? q - 2 : 0;
5263 aSig = - ( ( bSig>>2 ) * q );
5264 expDiff -= 62;
5265 }
5266 expDiff += 64;
5267 if ( 0 < expDiff ) {
5268 q = estimateDiv128To64( aSig, 0, bSig );
5269 q = ( 2 < q ) ? q - 2 : 0;
5270 q >>= 64 - expDiff;
5271 bSig >>= 2;
5272 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5273 }
5274 else {
5275 aSig >>= 2;
5276 bSig >>= 2;
5277 }
5278 do {
5279 alternateASig = aSig;
5280 ++q;
5281 aSig -= bSig;
bb98fe42 5282 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5283 sigMean = aSig + alternateASig;
5284 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5285 aSig = alternateASig;
5286 }
bb98fe42 5287 zSign = ( (int64_t) aSig < 0 );
158142c2 5288 if ( zSign ) aSig = - aSig;
ff32e16e 5289 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5290
5291}
5292
374dfc33
AJ
5293/*----------------------------------------------------------------------------
5294| Returns the binary log of the double-precision floating-point value `a'.
5295| The operation is performed according to the IEC/IEEE Standard for Binary
5296| Floating-Point Arithmetic.
5297*----------------------------------------------------------------------------*/
e5a41ffa 5298float64 float64_log2(float64 a, float_status *status)
374dfc33 5299{
c120391c 5300 bool aSign, zSign;
0c48262d 5301 int aExp;
bb98fe42 5302 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5303 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5304
5305 aSig = extractFloat64Frac( a );
5306 aExp = extractFloat64Exp( a );
5307 aSign = extractFloat64Sign( a );
5308
5309 if ( aExp == 0 ) {
5310 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5311 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5312 }
5313 if ( aSign ) {
ff32e16e 5314 float_raise(float_flag_invalid, status);
af39bc8c 5315 return float64_default_nan(status);
374dfc33
AJ
5316 }
5317 if ( aExp == 0x7FF ) {
ff32e16e
PM
5318 if (aSig) {
5319 return propagateFloat64NaN(a, float64_zero, status);
5320 }
374dfc33
AJ
5321 return a;
5322 }
5323
5324 aExp -= 0x3FF;
e9321124 5325 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5326 zSign = aExp < 0;
bb98fe42 5327 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5328 for (i = 1LL << 51; i > 0; i >>= 1) {
5329 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5330 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5331 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5332 aSig >>= 1;
5333 zSig |= i;
5334 }
5335 }
5336
5337 if ( zSign )
5338 zSig = -zSig;
ff32e16e 5339 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5340}
5341
158142c2
FB
5342/*----------------------------------------------------------------------------
5343| Returns the result of converting the extended double-precision floating-
5344| point value `a' to the 32-bit two's complement integer format. The
5345| conversion is performed according to the IEC/IEEE Standard for Binary
5346| Floating-Point Arithmetic---which means in particular that the conversion
5347| is rounded according to the current rounding mode. If `a' is a NaN, the
5348| largest positive integer is returned. Otherwise, if the conversion
5349| overflows, the largest integer with the same sign as `a' is returned.
5350*----------------------------------------------------------------------------*/
5351
f4014512 5352int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5353{
c120391c 5354 bool aSign;
f4014512 5355 int32_t aExp, shiftCount;
bb98fe42 5356 uint64_t aSig;
158142c2 5357
d1eb8f2a
AD
5358 if (floatx80_invalid_encoding(a)) {
5359 float_raise(float_flag_invalid, status);
5360 return 1 << 31;
5361 }
158142c2
FB
5362 aSig = extractFloatx80Frac( a );
5363 aExp = extractFloatx80Exp( a );
5364 aSign = extractFloatx80Sign( a );
bb98fe42 5365 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5366 shiftCount = 0x4037 - aExp;
5367 if ( shiftCount <= 0 ) shiftCount = 1;
5368 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5369 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5370
5371}
5372
5373/*----------------------------------------------------------------------------
5374| Returns the result of converting the extended double-precision floating-
5375| point value `a' to the 32-bit two's complement integer format. The
5376| conversion is performed according to the IEC/IEEE Standard for Binary
5377| Floating-Point Arithmetic, except that the conversion is always rounded
5378| toward zero. If `a' is a NaN, the largest positive integer is returned.
5379| Otherwise, if the conversion overflows, the largest integer with the same
5380| sign as `a' is returned.
5381*----------------------------------------------------------------------------*/
5382
f4014512 5383int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5384{
c120391c 5385 bool aSign;
f4014512 5386 int32_t aExp, shiftCount;
bb98fe42 5387 uint64_t aSig, savedASig;
b3a6a2e0 5388 int32_t z;
158142c2 5389
d1eb8f2a
AD
5390 if (floatx80_invalid_encoding(a)) {
5391 float_raise(float_flag_invalid, status);
5392 return 1 << 31;
5393 }
158142c2
FB
5394 aSig = extractFloatx80Frac( a );
5395 aExp = extractFloatx80Exp( a );
5396 aSign = extractFloatx80Sign( a );
5397 if ( 0x401E < aExp ) {
bb98fe42 5398 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5399 goto invalid;
5400 }
5401 else if ( aExp < 0x3FFF ) {
a2f2d288 5402 if (aExp || aSig) {
d82f3b2d 5403 float_raise(float_flag_inexact, status);
a2f2d288 5404 }
158142c2
FB
5405 return 0;
5406 }
5407 shiftCount = 0x403E - aExp;
5408 savedASig = aSig;
5409 aSig >>= shiftCount;
5410 z = aSig;
5411 if ( aSign ) z = - z;
5412 if ( ( z < 0 ) ^ aSign ) {
5413 invalid:
ff32e16e 5414 float_raise(float_flag_invalid, status);
bb98fe42 5415 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5416 }
5417 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5418 float_raise(float_flag_inexact, status);
158142c2
FB
5419 }
5420 return z;
5421
5422}
5423
5424/*----------------------------------------------------------------------------
5425| Returns the result of converting the extended double-precision floating-
5426| point value `a' to the 64-bit two's complement integer format. The
5427| conversion is performed according to the IEC/IEEE Standard for Binary
5428| Floating-Point Arithmetic---which means in particular that the conversion
5429| is rounded according to the current rounding mode. If `a' is a NaN,
5430| the largest positive integer is returned. Otherwise, if the conversion
5431| overflows, the largest integer with the same sign as `a' is returned.
5432*----------------------------------------------------------------------------*/
5433
f42c2224 5434int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5435{
c120391c 5436 bool aSign;
f4014512 5437 int32_t aExp, shiftCount;
bb98fe42 5438 uint64_t aSig, aSigExtra;
158142c2 5439
d1eb8f2a
AD
5440 if (floatx80_invalid_encoding(a)) {
5441 float_raise(float_flag_invalid, status);
5442 return 1ULL << 63;
5443 }
158142c2
FB
5444 aSig = extractFloatx80Frac( a );
5445 aExp = extractFloatx80Exp( a );
5446 aSign = extractFloatx80Sign( a );
5447 shiftCount = 0x403E - aExp;
5448 if ( shiftCount <= 0 ) {
5449 if ( shiftCount ) {
ff32e16e 5450 float_raise(float_flag_invalid, status);
0f605c88 5451 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5452 return INT64_MAX;
158142c2 5453 }
2c217da0 5454 return INT64_MIN;
158142c2
FB
5455 }
5456 aSigExtra = 0;
5457 }
5458 else {
5459 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5460 }
ff32e16e 5461 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5462
5463}
5464
5465/*----------------------------------------------------------------------------
5466| Returns the result of converting the extended double-precision floating-
5467| point value `a' to the 64-bit two's complement integer format. The
5468| conversion is performed according to the IEC/IEEE Standard for Binary
5469| Floating-Point Arithmetic, except that the conversion is always rounded
5470| toward zero. If `a' is a NaN, the largest positive integer is returned.
5471| Otherwise, if the conversion overflows, the largest integer with the same
5472| sign as `a' is returned.
5473*----------------------------------------------------------------------------*/
5474
f42c2224 5475int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5476{
c120391c 5477 bool aSign;
f4014512 5478 int32_t aExp, shiftCount;
bb98fe42 5479 uint64_t aSig;
f42c2224 5480 int64_t z;
158142c2 5481
d1eb8f2a
AD
5482 if (floatx80_invalid_encoding(a)) {
5483 float_raise(float_flag_invalid, status);
5484 return 1ULL << 63;
5485 }
158142c2
FB
5486 aSig = extractFloatx80Frac( a );
5487 aExp = extractFloatx80Exp( a );
5488 aSign = extractFloatx80Sign( a );
5489 shiftCount = aExp - 0x403E;
5490 if ( 0 <= shiftCount ) {
e9321124 5491 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5492 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5493 float_raise(float_flag_invalid, status);
158142c2 5494 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5495 return INT64_MAX;
158142c2
FB
5496 }
5497 }
2c217da0 5498 return INT64_MIN;
158142c2
FB
5499 }
5500 else if ( aExp < 0x3FFF ) {
a2f2d288 5501 if (aExp | aSig) {
d82f3b2d 5502 float_raise(float_flag_inexact, status);
a2f2d288 5503 }
158142c2
FB
5504 return 0;
5505 }
5506 z = aSig>>( - shiftCount );
bb98fe42 5507 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5508 float_raise(float_flag_inexact, status);
158142c2
FB
5509 }
5510 if ( aSign ) z = - z;
5511 return z;
5512
5513}
5514
5515/*----------------------------------------------------------------------------
5516| Returns the result of converting the extended double-precision floating-
5517| point value `a' to the single-precision floating-point format. The
5518| conversion is performed according to the IEC/IEEE Standard for Binary
5519| Floating-Point Arithmetic.
5520*----------------------------------------------------------------------------*/
5521
e5a41ffa 5522float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5523{
c120391c 5524 bool aSign;
f4014512 5525 int32_t aExp;
bb98fe42 5526 uint64_t aSig;
158142c2 5527
d1eb8f2a
AD
5528 if (floatx80_invalid_encoding(a)) {
5529 float_raise(float_flag_invalid, status);
5530 return float32_default_nan(status);
5531 }
158142c2
FB
5532 aSig = extractFloatx80Frac( a );
5533 aExp = extractFloatx80Exp( a );
5534 aSign = extractFloatx80Sign( a );
5535 if ( aExp == 0x7FFF ) {
bb98fe42 5536 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5537 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5538 status);
5539 return float32_silence_nan(res, status);
158142c2
FB
5540 }
5541 return packFloat32( aSign, 0xFF, 0 );
5542 }
5543 shift64RightJamming( aSig, 33, &aSig );
5544 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5545 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5546
5547}
5548
5549/*----------------------------------------------------------------------------
5550| Returns the result of converting the extended double-precision floating-
5551| point value `a' to the double-precision floating-point format. The
5552| conversion is performed according to the IEC/IEEE Standard for Binary
5553| Floating-Point Arithmetic.
5554*----------------------------------------------------------------------------*/
5555
e5a41ffa 5556float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5557{
c120391c 5558 bool aSign;
f4014512 5559 int32_t aExp;
bb98fe42 5560 uint64_t aSig, zSig;
158142c2 5561
d1eb8f2a
AD
5562 if (floatx80_invalid_encoding(a)) {
5563 float_raise(float_flag_invalid, status);
5564 return float64_default_nan(status);
5565 }
158142c2
FB
5566 aSig = extractFloatx80Frac( a );
5567 aExp = extractFloatx80Exp( a );
5568 aSign = extractFloatx80Sign( a );
5569 if ( aExp == 0x7FFF ) {
bb98fe42 5570 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5571 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5572 status);
5573 return float64_silence_nan(res, status);
158142c2
FB
5574 }
5575 return packFloat64( aSign, 0x7FF, 0 );
5576 }
5577 shift64RightJamming( aSig, 1, &zSig );
5578 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5579 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5580
5581}
5582
158142c2
FB
5583/*----------------------------------------------------------------------------
5584| Returns the result of converting the extended double-precision floating-
5585| point value `a' to the quadruple-precision floating-point format. The
5586| conversion is performed according to the IEC/IEEE Standard for Binary
5587| Floating-Point Arithmetic.
5588*----------------------------------------------------------------------------*/
5589
e5a41ffa 5590float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5591{
c120391c 5592 bool aSign;
0c48262d 5593 int aExp;
bb98fe42 5594 uint64_t aSig, zSig0, zSig1;
158142c2 5595
d1eb8f2a
AD
5596 if (floatx80_invalid_encoding(a)) {
5597 float_raise(float_flag_invalid, status);
5598 return float128_default_nan(status);
5599 }
158142c2
FB
5600 aSig = extractFloatx80Frac( a );
5601 aExp = extractFloatx80Exp( a );
5602 aSign = extractFloatx80Sign( a );
bb98fe42 5603 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5604 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5605 status);
5606 return float128_silence_nan(res, status);
158142c2
FB
5607 }
5608 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5609 return packFloat128( aSign, aExp, zSig0, zSig1 );
5610
5611}
5612
0f721292
LV
5613/*----------------------------------------------------------------------------
5614| Rounds the extended double-precision floating-point value `a'
5615| to the precision provided by floatx80_rounding_precision and returns the
5616| result as an extended double-precision floating-point value.
5617| The operation is performed according to the IEC/IEEE Standard for Binary
5618| Floating-Point Arithmetic.
5619*----------------------------------------------------------------------------*/
5620
5621floatx80 floatx80_round(floatx80 a, float_status *status)
5622{
5623 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5624 extractFloatx80Sign(a),
5625 extractFloatx80Exp(a),
5626 extractFloatx80Frac(a), 0, status);
5627}
5628
158142c2
FB
5629/*----------------------------------------------------------------------------
5630| Rounds the extended double-precision floating-point value `a' to an integer,
5631| and returns the result as an extended quadruple-precision floating-point
5632| value. The operation is performed according to the IEC/IEEE Standard for
5633| Binary Floating-Point Arithmetic.
5634*----------------------------------------------------------------------------*/
5635
e5a41ffa 5636floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5637{
c120391c 5638 bool aSign;
f4014512 5639 int32_t aExp;
bb98fe42 5640 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5641 floatx80 z;
5642
d1eb8f2a
AD
5643 if (floatx80_invalid_encoding(a)) {
5644 float_raise(float_flag_invalid, status);
5645 return floatx80_default_nan(status);
5646 }
158142c2
FB
5647 aExp = extractFloatx80Exp( a );
5648 if ( 0x403E <= aExp ) {
bb98fe42 5649 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5650 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5651 }
5652 return a;
5653 }
5654 if ( aExp < 0x3FFF ) {
5655 if ( ( aExp == 0 )
9ecaf5cc 5656 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5657 return a;
5658 }
d82f3b2d 5659 float_raise(float_flag_inexact, status);
158142c2 5660 aSign = extractFloatx80Sign( a );
a2f2d288 5661 switch (status->float_rounding_mode) {
158142c2 5662 case float_round_nearest_even:
bb98fe42 5663 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5664 ) {
5665 return
e9321124 5666 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5667 }
5668 break;
f9288a76
PM
5669 case float_round_ties_away:
5670 if (aExp == 0x3FFE) {
e9321124 5671 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5672 }
5673 break;
158142c2
FB
5674 case float_round_down:
5675 return
5676 aSign ?
e9321124 5677 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5678 : packFloatx80( 0, 0, 0 );
5679 case float_round_up:
5680 return
5681 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5682 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5683
5684 case float_round_to_zero:
5685 break;
5686 default:
5687 g_assert_not_reached();
158142c2
FB
5688 }
5689 return packFloatx80( aSign, 0, 0 );
5690 }
5691 lastBitMask = 1;
5692 lastBitMask <<= 0x403E - aExp;
5693 roundBitsMask = lastBitMask - 1;
5694 z = a;
a2f2d288 5695 switch (status->float_rounding_mode) {
dc355b76 5696 case float_round_nearest_even:
158142c2 5697 z.low += lastBitMask>>1;
dc355b76
PM
5698 if ((z.low & roundBitsMask) == 0) {
5699 z.low &= ~lastBitMask;
5700 }
5701 break;
f9288a76
PM
5702 case float_round_ties_away:
5703 z.low += lastBitMask >> 1;
5704 break;
dc355b76
PM
5705 case float_round_to_zero:
5706 break;
5707 case float_round_up:
5708 if (!extractFloatx80Sign(z)) {
5709 z.low += roundBitsMask;
5710 }
5711 break;
5712 case float_round_down:
5713 if (extractFloatx80Sign(z)) {
158142c2
FB
5714 z.low += roundBitsMask;
5715 }
dc355b76
PM
5716 break;
5717 default:
5718 abort();
158142c2
FB
5719 }
5720 z.low &= ~ roundBitsMask;
5721 if ( z.low == 0 ) {
5722 ++z.high;
e9321124 5723 z.low = UINT64_C(0x8000000000000000);
158142c2 5724 }
a2f2d288 5725 if (z.low != a.low) {
d82f3b2d 5726 float_raise(float_flag_inexact, status);
a2f2d288 5727 }
158142c2
FB
5728 return z;
5729
5730}
5731
5732/*----------------------------------------------------------------------------
5733| Returns the result of adding the absolute values of the extended double-
5734| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5735| negated before being returned. `zSign' is ignored if the result is a NaN.
5736| The addition is performed according to the IEC/IEEE Standard for Binary
5737| Floating-Point Arithmetic.
5738*----------------------------------------------------------------------------*/
5739
c120391c 5740static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5741 float_status *status)
158142c2 5742{
f4014512 5743 int32_t aExp, bExp, zExp;
bb98fe42 5744 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5745 int32_t expDiff;
158142c2
FB
5746
5747 aSig = extractFloatx80Frac( a );
5748 aExp = extractFloatx80Exp( a );
5749 bSig = extractFloatx80Frac( b );
5750 bExp = extractFloatx80Exp( b );
5751 expDiff = aExp - bExp;
5752 if ( 0 < expDiff ) {
5753 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5754 if ((uint64_t)(aSig << 1)) {
5755 return propagateFloatx80NaN(a, b, status);
5756 }
158142c2
FB
5757 return a;
5758 }
5759 if ( bExp == 0 ) --expDiff;
5760 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5761 zExp = aExp;
5762 }
5763 else if ( expDiff < 0 ) {
5764 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5765 if ((uint64_t)(bSig << 1)) {
5766 return propagateFloatx80NaN(a, b, status);
5767 }
0f605c88
LV
5768 return packFloatx80(zSign,
5769 floatx80_infinity_high,
5770 floatx80_infinity_low);
158142c2
FB
5771 }
5772 if ( aExp == 0 ) ++expDiff;
5773 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5774 zExp = bExp;
5775 }
5776 else {
5777 if ( aExp == 0x7FFF ) {
bb98fe42 5778 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5779 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5780 }
5781 return a;
5782 }
5783 zSig1 = 0;
5784 zSig0 = aSig + bSig;
5785 if ( aExp == 0 ) {
41602807
JM
5786 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5787 /* At least one of the values is a pseudo-denormal,
5788 * and there is a carry out of the result. */
5789 zExp = 1;
5790 goto shiftRight1;
5791 }
2f311075
RH
5792 if (zSig0 == 0) {
5793 return packFloatx80(zSign, 0, 0);
5794 }
158142c2
FB
5795 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5796 goto roundAndPack;
5797 }
5798 zExp = aExp;
5799 goto shiftRight1;
5800 }
5801 zSig0 = aSig + bSig;
bb98fe42 5802 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5803 shiftRight1:
5804 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5805 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5806 ++zExp;
5807 roundAndPack:
a2f2d288 5808 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5809 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5810}
5811
5812/*----------------------------------------------------------------------------
5813| Returns the result of subtracting the absolute values of the extended
5814| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5815| difference is negated before being returned. `zSign' is ignored if the
5816| result is a NaN. The subtraction is performed according to the IEC/IEEE
5817| Standard for Binary Floating-Point Arithmetic.
5818*----------------------------------------------------------------------------*/
5819
c120391c 5820static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5821 float_status *status)
158142c2 5822{
f4014512 5823 int32_t aExp, bExp, zExp;
bb98fe42 5824 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5825 int32_t expDiff;
158142c2
FB
5826
5827 aSig = extractFloatx80Frac( a );
5828 aExp = extractFloatx80Exp( a );
5829 bSig = extractFloatx80Frac( b );
5830 bExp = extractFloatx80Exp( b );
5831 expDiff = aExp - bExp;
5832 if ( 0 < expDiff ) goto aExpBigger;
5833 if ( expDiff < 0 ) goto bExpBigger;
5834 if ( aExp == 0x7FFF ) {
bb98fe42 5835 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5836 return propagateFloatx80NaN(a, b, status);
158142c2 5837 }
ff32e16e 5838 float_raise(float_flag_invalid, status);
af39bc8c 5839 return floatx80_default_nan(status);
158142c2
FB
5840 }
5841 if ( aExp == 0 ) {
5842 aExp = 1;
5843 bExp = 1;
5844 }
5845 zSig1 = 0;
5846 if ( bSig < aSig ) goto aBigger;
5847 if ( aSig < bSig ) goto bBigger;
a2f2d288 5848 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5849 bExpBigger:
5850 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5851 if ((uint64_t)(bSig << 1)) {
5852 return propagateFloatx80NaN(a, b, status);
5853 }
0f605c88
LV
5854 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5855 floatx80_infinity_low);
158142c2
FB
5856 }
5857 if ( aExp == 0 ) ++expDiff;
5858 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5859 bBigger:
5860 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5861 zExp = bExp;
5862 zSign ^= 1;
5863 goto normalizeRoundAndPack;
5864 aExpBigger:
5865 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5866 if ((uint64_t)(aSig << 1)) {
5867 return propagateFloatx80NaN(a, b, status);
5868 }
158142c2
FB
5869 return a;
5870 }
5871 if ( bExp == 0 ) --expDiff;
5872 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5873 aBigger:
5874 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5875 zExp = aExp;
5876 normalizeRoundAndPack:
a2f2d288 5877 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5878 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5879}
5880
5881/*----------------------------------------------------------------------------
5882| Returns the result of adding the extended double-precision floating-point
5883| values `a' and `b'. The operation is performed according to the IEC/IEEE
5884| Standard for Binary Floating-Point Arithmetic.
5885*----------------------------------------------------------------------------*/
5886
e5a41ffa 5887floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5888{
c120391c 5889 bool aSign, bSign;
158142c2 5890
d1eb8f2a
AD
5891 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5892 float_raise(float_flag_invalid, status);
5893 return floatx80_default_nan(status);
5894 }
158142c2
FB
5895 aSign = extractFloatx80Sign( a );
5896 bSign = extractFloatx80Sign( b );
5897 if ( aSign == bSign ) {
ff32e16e 5898 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5899 }
5900 else {
ff32e16e 5901 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5902 }
5903
5904}
5905
5906/*----------------------------------------------------------------------------
5907| Returns the result of subtracting the extended double-precision floating-
5908| point values `a' and `b'. The operation is performed according to the
5909| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5910*----------------------------------------------------------------------------*/
5911
e5a41ffa 5912floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5913{
c120391c 5914 bool aSign, bSign;
158142c2 5915
d1eb8f2a
AD
5916 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5917 float_raise(float_flag_invalid, status);
5918 return floatx80_default_nan(status);
5919 }
158142c2
FB
5920 aSign = extractFloatx80Sign( a );
5921 bSign = extractFloatx80Sign( b );
5922 if ( aSign == bSign ) {
ff32e16e 5923 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5924 }
5925 else {
ff32e16e 5926 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5927 }
5928
5929}
5930
5931/*----------------------------------------------------------------------------
5932| Returns the result of multiplying the extended double-precision floating-
5933| point values `a' and `b'. The operation is performed according to the
5934| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5935*----------------------------------------------------------------------------*/
5936
e5a41ffa 5937floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5938{
c120391c 5939 bool aSign, bSign, zSign;
f4014512 5940 int32_t aExp, bExp, zExp;
bb98fe42 5941 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5942
d1eb8f2a
AD
5943 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5944 float_raise(float_flag_invalid, status);
5945 return floatx80_default_nan(status);
5946 }
158142c2
FB
5947 aSig = extractFloatx80Frac( a );
5948 aExp = extractFloatx80Exp( a );
5949 aSign = extractFloatx80Sign( a );
5950 bSig = extractFloatx80Frac( b );
5951 bExp = extractFloatx80Exp( b );
5952 bSign = extractFloatx80Sign( b );
5953 zSign = aSign ^ bSign;
5954 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5955 if ( (uint64_t) ( aSig<<1 )
5956 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5957 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5958 }
5959 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5960 return packFloatx80(zSign, floatx80_infinity_high,
5961 floatx80_infinity_low);
158142c2
FB
5962 }
5963 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5964 if ((uint64_t)(bSig << 1)) {
5965 return propagateFloatx80NaN(a, b, status);
5966 }
158142c2
FB
5967 if ( ( aExp | aSig ) == 0 ) {
5968 invalid:
ff32e16e 5969 float_raise(float_flag_invalid, status);
af39bc8c 5970 return floatx80_default_nan(status);
158142c2 5971 }
0f605c88
LV
5972 return packFloatx80(zSign, floatx80_infinity_high,
5973 floatx80_infinity_low);
158142c2
FB
5974 }
5975 if ( aExp == 0 ) {
5976 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5977 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5978 }
5979 if ( bExp == 0 ) {
5980 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5981 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5982 }
5983 zExp = aExp + bExp - 0x3FFE;
5984 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5985 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5986 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5987 --zExp;
5988 }
a2f2d288 5989 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5990 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5991}
5992
5993/*----------------------------------------------------------------------------
5994| Returns the result of dividing the extended double-precision floating-point
5995| value `a' by the corresponding value `b'. The operation is performed
5996| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5997*----------------------------------------------------------------------------*/
5998
e5a41ffa 5999floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6000{
c120391c 6001 bool aSign, bSign, zSign;
f4014512 6002 int32_t aExp, bExp, zExp;
bb98fe42
AF
6003 uint64_t aSig, bSig, zSig0, zSig1;
6004 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6005
d1eb8f2a
AD
6006 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6007 float_raise(float_flag_invalid, status);
6008 return floatx80_default_nan(status);
6009 }
158142c2
FB
6010 aSig = extractFloatx80Frac( a );
6011 aExp = extractFloatx80Exp( a );
6012 aSign = extractFloatx80Sign( a );
6013 bSig = extractFloatx80Frac( b );
6014 bExp = extractFloatx80Exp( b );
6015 bSign = extractFloatx80Sign( b );
6016 zSign = aSign ^ bSign;
6017 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6018 if ((uint64_t)(aSig << 1)) {
6019 return propagateFloatx80NaN(a, b, status);
6020 }
158142c2 6021 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6022 if ((uint64_t)(bSig << 1)) {
6023 return propagateFloatx80NaN(a, b, status);
6024 }
158142c2
FB
6025 goto invalid;
6026 }
0f605c88
LV
6027 return packFloatx80(zSign, floatx80_infinity_high,
6028 floatx80_infinity_low);
158142c2
FB
6029 }
6030 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6031 if ((uint64_t)(bSig << 1)) {
6032 return propagateFloatx80NaN(a, b, status);
6033 }
158142c2
FB
6034 return packFloatx80( zSign, 0, 0 );
6035 }
6036 if ( bExp == 0 ) {
6037 if ( bSig == 0 ) {
6038 if ( ( aExp | aSig ) == 0 ) {
6039 invalid:
ff32e16e 6040 float_raise(float_flag_invalid, status);
af39bc8c 6041 return floatx80_default_nan(status);
158142c2 6042 }
ff32e16e 6043 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6044 return packFloatx80(zSign, floatx80_infinity_high,
6045 floatx80_infinity_low);
158142c2
FB
6046 }
6047 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6048 }
6049 if ( aExp == 0 ) {
6050 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6051 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6052 }
6053 zExp = aExp - bExp + 0x3FFE;
6054 rem1 = 0;
6055 if ( bSig <= aSig ) {
6056 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6057 ++zExp;
6058 }
6059 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6060 mul64To128( bSig, zSig0, &term0, &term1 );
6061 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6062 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6063 --zSig0;
6064 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6065 }
6066 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6067 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6068 mul64To128( bSig, zSig1, &term1, &term2 );
6069 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6070 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6071 --zSig1;
6072 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6073 }
6074 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6075 }
a2f2d288 6076 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6077 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6078}
6079
6080/*----------------------------------------------------------------------------
6081| Returns the remainder of the extended double-precision floating-point value
6082| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6083| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6084| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6085| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6086| the absolute value of the integer quotient.
158142c2
FB
6087*----------------------------------------------------------------------------*/
6088
445810ec 6089floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6090 float_status *status)
158142c2 6091{
c120391c 6092 bool aSign, zSign;
b662495d 6093 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6094 uint64_t aSig0, aSig1, bSig;
6095 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6096
445810ec 6097 *quotient = 0;
d1eb8f2a
AD
6098 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6099 float_raise(float_flag_invalid, status);
6100 return floatx80_default_nan(status);
6101 }
158142c2 6102 aSig0 = extractFloatx80Frac( a );
b662495d 6103 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6104 aSign = extractFloatx80Sign( a );
6105 bSig = extractFloatx80Frac( b );
6106 bExp = extractFloatx80Exp( b );
158142c2 6107 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6108 if ( (uint64_t) ( aSig0<<1 )
6109 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6110 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6111 }
6112 goto invalid;
6113 }
6114 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6115 if ((uint64_t)(bSig << 1)) {
6116 return propagateFloatx80NaN(a, b, status);
6117 }
b662495d
JM
6118 if (aExp == 0 && aSig0 >> 63) {
6119 /*
6120 * Pseudo-denormal argument must be returned in normalized
6121 * form.
6122 */
6123 return packFloatx80(aSign, 1, aSig0);
6124 }
158142c2
FB
6125 return a;
6126 }
6127 if ( bExp == 0 ) {
6128 if ( bSig == 0 ) {
6129 invalid:
ff32e16e 6130 float_raise(float_flag_invalid, status);
af39bc8c 6131 return floatx80_default_nan(status);
158142c2
FB
6132 }
6133 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6134 }
6135 if ( aExp == 0 ) {
499a2f7b 6136 if ( aSig0 == 0 ) return a;
158142c2
FB
6137 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6138 }
158142c2
FB
6139 zSign = aSign;
6140 expDiff = aExp - bExp;
6141 aSig1 = 0;
6142 if ( expDiff < 0 ) {
b662495d
JM
6143 if ( mod || expDiff < -1 ) {
6144 if (aExp == 1 && aExpOrig == 0) {
6145 /*
6146 * Pseudo-denormal argument must be returned in
6147 * normalized form.
6148 */
6149 return packFloatx80(aSign, aExp, aSig0);
6150 }
6151 return a;
6152 }
158142c2
FB
6153 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6154 expDiff = 0;
6155 }
445810ec 6156 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6157 if ( q ) aSig0 -= bSig;
6158 expDiff -= 64;
6159 while ( 0 < expDiff ) {
6160 q = estimateDiv128To64( aSig0, aSig1, bSig );
6161 q = ( 2 < q ) ? q - 2 : 0;
6162 mul64To128( bSig, q, &term0, &term1 );
6163 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6164 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6165 expDiff -= 62;
445810ec
JM
6166 *quotient <<= 62;
6167 *quotient += q;
158142c2
FB
6168 }
6169 expDiff += 64;
6170 if ( 0 < expDiff ) {
6171 q = estimateDiv128To64( aSig0, aSig1, bSig );
6172 q = ( 2 < q ) ? q - 2 : 0;
6173 q >>= 64 - expDiff;
6174 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6175 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6176 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6177 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6178 ++q;
6179 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6180 }
445810ec
JM
6181 if (expDiff < 64) {
6182 *quotient <<= expDiff;
6183 } else {
6184 *quotient = 0;
6185 }
6186 *quotient += q;
158142c2
FB
6187 }
6188 else {
6189 term1 = 0;
6190 term0 = bSig;
6191 }
6b8b0136
JM
6192 if (!mod) {
6193 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6194 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6195 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6196 && ( q & 1 ) )
6197 ) {
6198 aSig0 = alternateASig0;
6199 aSig1 = alternateASig1;
6200 zSign = ! zSign;
445810ec 6201 ++*quotient;
6b8b0136 6202 }
158142c2
FB
6203 }
6204 return
6205 normalizeRoundAndPackFloatx80(
ff32e16e 6206 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6207
6208}
6209
6b8b0136
JM
6210/*----------------------------------------------------------------------------
6211| Returns the remainder of the extended double-precision floating-point value
6212| `a' with respect to the corresponding value `b'. The operation is performed
6213| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6214*----------------------------------------------------------------------------*/
6215
6216floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6217{
445810ec
JM
6218 uint64_t quotient;
6219 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6220}
6221
6222/*----------------------------------------------------------------------------
6223| Returns the remainder of the extended double-precision floating-point value
6224| `a' with respect to the corresponding value `b', with the quotient truncated
6225| toward zero.
6226*----------------------------------------------------------------------------*/
6227
6228floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6229{
445810ec
JM
6230 uint64_t quotient;
6231 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6232}
6233
158142c2
FB
6234/*----------------------------------------------------------------------------
6235| Returns the square root of the extended double-precision floating-point
6236| value `a'. The operation is performed according to the IEC/IEEE Standard
6237| for Binary Floating-Point Arithmetic.
6238*----------------------------------------------------------------------------*/
6239
e5a41ffa 6240floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6241{
c120391c 6242 bool aSign;
f4014512 6243 int32_t aExp, zExp;
bb98fe42
AF
6244 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6245 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6246
d1eb8f2a
AD
6247 if (floatx80_invalid_encoding(a)) {
6248 float_raise(float_flag_invalid, status);
6249 return floatx80_default_nan(status);
6250 }
158142c2
FB
6251 aSig0 = extractFloatx80Frac( a );
6252 aExp = extractFloatx80Exp( a );
6253 aSign = extractFloatx80Sign( a );
6254 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6255 if ((uint64_t)(aSig0 << 1)) {
6256 return propagateFloatx80NaN(a, a, status);
6257 }
158142c2
FB
6258 if ( ! aSign ) return a;
6259 goto invalid;
6260 }
6261 if ( aSign ) {
6262 if ( ( aExp | aSig0 ) == 0 ) return a;
6263 invalid:
ff32e16e 6264 float_raise(float_flag_invalid, status);
af39bc8c 6265 return floatx80_default_nan(status);
158142c2
FB
6266 }
6267 if ( aExp == 0 ) {
6268 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6269 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6270 }
6271 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6272 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6273 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6274 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6275 doubleZSig0 = zSig0<<1;
6276 mul64To128( zSig0, zSig0, &term0, &term1 );
6277 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6278 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6279 --zSig0;
6280 doubleZSig0 -= 2;
6281 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6282 }
6283 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6284 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6285 if ( zSig1 == 0 ) zSig1 = 1;
6286 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6287 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6288 mul64To128( zSig1, zSig1, &term2, &term3 );
6289 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6290 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6291 --zSig1;
6292 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6293 term3 |= 1;
6294 term2 |= doubleZSig0;
6295 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6296 }
6297 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6298 }
6299 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6300 zSig0 |= doubleZSig0;
a2f2d288
PM
6301 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6302 0, zExp, zSig0, zSig1, status);
158142c2
FB
6303}
6304
158142c2
FB
6305/*----------------------------------------------------------------------------
6306| Returns the result of converting the quadruple-precision floating-point
6307| value `a' to the extended double-precision floating-point format. The
6308| conversion is performed according to the IEC/IEEE Standard for Binary
6309| Floating-Point Arithmetic.
6310*----------------------------------------------------------------------------*/
6311
e5a41ffa 6312floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6313{
c120391c 6314 bool aSign;
f4014512 6315 int32_t aExp;
bb98fe42 6316 uint64_t aSig0, aSig1;
158142c2
FB
6317
6318 aSig1 = extractFloat128Frac1( a );
6319 aSig0 = extractFloat128Frac0( a );
6320 aExp = extractFloat128Exp( a );
6321 aSign = extractFloat128Sign( a );
6322 if ( aExp == 0x7FFF ) {
6323 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6324 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6325 status);
6326 return floatx80_silence_nan(res, status);
158142c2 6327 }
0f605c88
LV
6328 return packFloatx80(aSign, floatx80_infinity_high,
6329 floatx80_infinity_low);
158142c2
FB
6330 }
6331 if ( aExp == 0 ) {
6332 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6333 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6334 }
6335 else {
e9321124 6336 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6337 }
6338 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6339 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6340
6341}
6342
158142c2
FB
6343/*----------------------------------------------------------------------------
6344| Returns the remainder of the quadruple-precision floating-point value `a'
6345| with respect to the corresponding value `b'. The operation is performed
6346| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6347*----------------------------------------------------------------------------*/
6348
e5a41ffa 6349float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6350{
c120391c 6351 bool aSign, zSign;
f4014512 6352 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6353 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6354 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6355 int64_t sigMean0;
158142c2
FB
6356
6357 aSig1 = extractFloat128Frac1( a );
6358 aSig0 = extractFloat128Frac0( a );
6359 aExp = extractFloat128Exp( a );
6360 aSign = extractFloat128Sign( a );
6361 bSig1 = extractFloat128Frac1( b );
6362 bSig0 = extractFloat128Frac0( b );
6363 bExp = extractFloat128Exp( b );
158142c2
FB
6364 if ( aExp == 0x7FFF ) {
6365 if ( ( aSig0 | aSig1 )
6366 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6367 return propagateFloat128NaN(a, b, status);
158142c2
FB
6368 }
6369 goto invalid;
6370 }
6371 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6372 if (bSig0 | bSig1) {
6373 return propagateFloat128NaN(a, b, status);
6374 }
158142c2
FB
6375 return a;
6376 }
6377 if ( bExp == 0 ) {
6378 if ( ( bSig0 | bSig1 ) == 0 ) {
6379 invalid:
ff32e16e 6380 float_raise(float_flag_invalid, status);
af39bc8c 6381 return float128_default_nan(status);
158142c2
FB
6382 }
6383 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6384 }
6385 if ( aExp == 0 ) {
6386 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6387 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6388 }
6389 expDiff = aExp - bExp;
6390 if ( expDiff < -1 ) return a;
6391 shortShift128Left(
e9321124 6392 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6393 aSig1,
6394 15 - ( expDiff < 0 ),
6395 &aSig0,
6396 &aSig1
6397 );
6398 shortShift128Left(
e9321124 6399 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6400 q = le128( bSig0, bSig1, aSig0, aSig1 );
6401 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6402 expDiff -= 64;
6403 while ( 0 < expDiff ) {
6404 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6405 q = ( 4 < q ) ? q - 4 : 0;
6406 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6407 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6408 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6409 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6410 expDiff -= 61;
6411 }
6412 if ( -64 < expDiff ) {
6413 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6414 q = ( 4 < q ) ? q - 4 : 0;
6415 q >>= - expDiff;
6416 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6417 expDiff += 52;
6418 if ( expDiff < 0 ) {
6419 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6420 }
6421 else {
6422 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6423 }
6424 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6425 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6426 }
6427 else {
6428 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6429 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6430 }
6431 do {
6432 alternateASig0 = aSig0;
6433 alternateASig1 = aSig1;
6434 ++q;
6435 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6436 } while ( 0 <= (int64_t) aSig0 );
158142c2 6437 add128(
bb98fe42 6438 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6439 if ( ( sigMean0 < 0 )
6440 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6441 aSig0 = alternateASig0;
6442 aSig1 = alternateASig1;
6443 }
bb98fe42 6444 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6445 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6446 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6447 status);
158142c2
FB
6448}
6449
71bfd65c
RH
6450static inline FloatRelation
6451floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6452 float_status *status)
f6714d36 6453{
c120391c 6454 bool aSign, bSign;
f6714d36 6455
d1eb8f2a
AD
6456 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6457 float_raise(float_flag_invalid, status);
6458 return float_relation_unordered;
6459 }
f6714d36
AJ
6460 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6461 ( extractFloatx80Frac( a )<<1 ) ) ||
6462 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6463 ( extractFloatx80Frac( b )<<1 ) )) {
6464 if (!is_quiet ||
af39bc8c
AM
6465 floatx80_is_signaling_nan(a, status) ||
6466 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6467 float_raise(float_flag_invalid, status);
f6714d36
AJ
6468 }
6469 return float_relation_unordered;
6470 }
6471 aSign = extractFloatx80Sign( a );
6472 bSign = extractFloatx80Sign( b );
6473 if ( aSign != bSign ) {
6474
6475 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6476 ( ( a.low | b.low ) == 0 ) ) {
6477 /* zero case */
6478 return float_relation_equal;
6479 } else {
6480 return 1 - (2 * aSign);
6481 }
6482 } else {
be53fa78
JM
6483 /* Normalize pseudo-denormals before comparison. */
6484 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6485 ++a.high;
6486 }
6487 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6488 ++b.high;
6489 }
f6714d36
AJ
6490 if (a.low == b.low && a.high == b.high) {
6491 return float_relation_equal;
6492 } else {
6493 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6494 }
6495 }
6496}
6497
71bfd65c 6498FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6499{
ff32e16e 6500 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6501}
6502
71bfd65c
RH
6503FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6504 float_status *status)
f6714d36 6505{
ff32e16e 6506 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6507}
6508
e5a41ffa 6509floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6510{
c120391c 6511 bool aSign;
326b9e98 6512 int32_t aExp;
bb98fe42 6513 uint64_t aSig;
9ee6e8bb 6514
d1eb8f2a
AD
6515 if (floatx80_invalid_encoding(a)) {
6516 float_raise(float_flag_invalid, status);
6517 return floatx80_default_nan(status);
6518 }
9ee6e8bb
PB
6519 aSig = extractFloatx80Frac( a );
6520 aExp = extractFloatx80Exp( a );
6521 aSign = extractFloatx80Sign( a );
6522
326b9e98
AJ
6523 if ( aExp == 0x7FFF ) {
6524 if ( aSig<<1 ) {
ff32e16e 6525 return propagateFloatx80NaN(a, a, status);
326b9e98 6526 }
9ee6e8bb
PB
6527 return a;
6528 }
326b9e98 6529
3c85c37f
PM
6530 if (aExp == 0) {
6531 if (aSig == 0) {
6532 return a;
6533 }
6534 aExp++;
6535 }
69397542 6536
326b9e98
AJ
6537 if (n > 0x10000) {
6538 n = 0x10000;
6539 } else if (n < -0x10000) {
6540 n = -0x10000;
6541 }
6542
9ee6e8bb 6543 aExp += n;
a2f2d288
PM
6544 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6545 aSign, aExp, aSig, 0, status);
9ee6e8bb 6546}
9ee6e8bb 6547
f6b3b108
EC
6548static void __attribute__((constructor)) softfloat_init(void)
6549{
6550 union_float64 ua, ub, uc, ur;
6551
6552 if (QEMU_NO_HARDFLOAT) {
6553 return;
6554 }
6555 /*
6556 * Test that the host's FMA is not obviously broken. For example,
6557 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6558 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6559 */
6560 ua.s = 0x0020000000000001ULL;
6561 ub.s = 0x3ca0000000000000ULL;
6562 uc.s = 0x0020000000000000ULL;
6563 ur.h = fma(ua.h, ub.h, uc.h);
6564 if (ur.s != 0x0020000000000001ULL) {
6565 force_soft_fma = true;
6566 }
6567}