]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Move sqrt_float to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
e1c4667a
RH
485/* Flags for parts_minmax. */
486enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493};
134eda00 494
247d1f21
RH
495/* Simple helpers for checking if, or what kind of, NaN we have */
496static inline __attribute__((unused)) bool is_nan(FloatClass c)
497{
498 return unlikely(c >= float_class_qnan);
499}
500
501static inline __attribute__((unused)) bool is_snan(FloatClass c)
502{
503 return c == float_class_snan;
504}
505
506static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507{
508 return c == float_class_qnan;
509}
510
a90119b5 511/*
0018b1f4
RH
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
a90119b5 514 *
0018b1f4
RH
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
a90119b5
AB
518 */
519
520typedef struct {
a90119b5
AB
521 FloatClass cls;
522 bool sign;
4109b9ea
RH
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
f8155c1d 535} FloatParts64;
a90119b5 536
0018b1f4
RH
537typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543} FloatParts128;
544
aca84527
RH
545typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553} FloatParts256;
554
0018b1f4 555/* These apply to the most significant word of each FloatPartsN. */
e99c4373 556#define DECOMPOSED_BINARY_POINT 63
a90119b5 557#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
558
559/* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * frac_lsb: least significant bit of fraction
ca3a3d5a 567 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 568 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
569 * The following optional modifiers are available:
570 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
571 */
572typedef struct {
573 int exp_size;
574 int exp_bias;
575 int exp_max;
576 int frac_size;
577 int frac_shift;
578 uint64_t frac_lsb;
579 uint64_t frac_lsbm1;
580 uint64_t round_mask;
581 uint64_t roundeven_mask;
ca3a3d5a 582 bool arm_althp;
a90119b5
AB
583} FloatFmt;
584
585/* Expand fields based on the size of exponent and fraction */
586#define FLOAT_PARAMS(E, F) \
587 .exp_size = E, \
588 .exp_bias = ((1 << E) - 1) >> 1, \
589 .exp_max = (1 << E) - 1, \
590 .frac_size = F, \
0018b1f4
RH
591 .frac_shift = (-F - 1) & 63, \
592 .frac_lsb = 1ull << ((-F - 1) & 63), \
593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
596
597static const FloatFmt float16_params = {
598 FLOAT_PARAMS(5, 10)
599};
600
6fed16b2
AB
601static const FloatFmt float16_params_ahp = {
602 FLOAT_PARAMS(5, 10),
603 .arm_althp = true
604};
605
8282310d
LZ
606static const FloatFmt bfloat16_params = {
607 FLOAT_PARAMS(8, 7)
608};
609
a90119b5
AB
610static const FloatFmt float32_params = {
611 FLOAT_PARAMS(8, 23)
612};
613
614static const FloatFmt float64_params = {
615 FLOAT_PARAMS(11, 52)
616};
617
0018b1f4
RH
618static const FloatFmt float128_params = {
619 FLOAT_PARAMS(15, 112)
620};
621
6fff2167 622/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 623static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 624{
d8fdd172
RH
625 const int f_size = fmt->frac_size;
626 const int e_size = fmt->exp_size;
6fff2167 627
d8fdd172 628 *r = (FloatParts64) {
6fff2167 629 .cls = float_class_unclassified,
d8fdd172
RH
630 .sign = extract64(raw, f_size + e_size, 1),
631 .exp = extract64(raw, f_size, e_size),
632 .frac = extract64(raw, 0, f_size)
6fff2167
AB
633 };
634}
635
3dddb203 636static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 637{
3dddb203 638 unpack_raw64(p, &float16_params, f);
6fff2167
AB
639}
640
3dddb203 641static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 642{
3dddb203 643 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
644}
645
3dddb203 646static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 647{
3dddb203 648 unpack_raw64(p, &float32_params, f);
6fff2167
AB
649}
650
3dddb203 651static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 652{
3dddb203 653 unpack_raw64(p, &float64_params, f);
6fff2167
AB
654}
655
0018b1f4
RH
656static void float128_unpack_raw(FloatParts128 *p, float128 f)
657{
658 const int f_size = float128_params.frac_size - 64;
659 const int e_size = float128_params.exp_size;
660
661 *p = (FloatParts128) {
662 .cls = float_class_unclassified,
663 .sign = extract64(f.high, f_size + e_size, 1),
664 .exp = extract64(f.high, f_size, e_size),
665 .frac_hi = extract64(f.high, 0, f_size),
666 .frac_lo = f.low,
667 };
668}
669
6fff2167 670/* Pack a float from parts, but do not canonicalize. */
9e4af58c 671static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 672{
9e4af58c
RH
673 const int f_size = fmt->frac_size;
674 const int e_size = fmt->exp_size;
675 uint64_t ret;
676
677 ret = (uint64_t)p->sign << (f_size + e_size);
678 ret = deposit64(ret, f_size, e_size, p->exp);
679 ret = deposit64(ret, 0, f_size, p->frac);
680 return ret;
6fff2167
AB
681}
682
71fd178e 683static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 684{
71fd178e 685 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
686}
687
71fd178e 688static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 689{
71fd178e 690 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
691}
692
71fd178e 693static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 694{
71fd178e 695 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
696}
697
71fd178e 698static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 699{
71fd178e 700 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
701}
702
0018b1f4
RH
703static float128 float128_pack_raw(const FloatParts128 *p)
704{
705 const int f_size = float128_params.frac_size - 64;
706 const int e_size = float128_params.exp_size;
707 uint64_t hi;
708
709 hi = (uint64_t)p->sign << (f_size + e_size);
710 hi = deposit64(hi, f_size, e_size, p->exp);
711 hi = deposit64(hi, 0, f_size, p->frac_hi);
712 return make_float128(hi, p->frac_lo);
713}
714
0664335a
RH
715/*----------------------------------------------------------------------------
716| Functions and definitions to determine: (1) whether tininess for underflow
717| is detected before or after rounding by default, (2) what (if anything)
718| happens when exceptions are raised, (3) how signaling NaNs are distinguished
719| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720| are propagated from function inputs to output. These details are target-
721| specific.
722*----------------------------------------------------------------------------*/
139c1837 723#include "softfloat-specialize.c.inc"
0664335a 724
0018b1f4
RH
725#define PARTS_GENERIC_64_128(NAME, P) \
726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727
dedd123c
RH
728#define PARTS_GENERIC_64_128_256(NAME, P) \
729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731
e9034ea8 732#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
733#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734
7c45bad8
RH
735static void parts64_return_nan(FloatParts64 *a, float_status *s);
736static void parts128_return_nan(FloatParts128 *a, float_status *s);
737
738#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 739
22c355f4
RH
740static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741 float_status *s);
742static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743 float_status *s);
744
745#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746
979582d0
RH
747static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748 FloatParts64 *c, float_status *s,
749 int ab_mask, int abc_mask);
750static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751 FloatParts128 *b,
752 FloatParts128 *c,
753 float_status *s,
754 int ab_mask, int abc_mask);
755
756#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758
d46975bc
RH
759static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760 const FloatFmt *fmt);
761static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762 const FloatFmt *fmt);
763
764#define parts_canonicalize(A, S, F) \
765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766
ee6959f2
RH
767static void parts64_uncanon(FloatParts64 *p, float_status *status,
768 const FloatFmt *fmt);
769static void parts128_uncanon(FloatParts128 *p, float_status *status,
770 const FloatFmt *fmt);
771
772#define parts_uncanon(A, S, F) \
773 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
774
da10a907
RH
775static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
776static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 777static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
778
779#define parts_add_normal(A, B) \
dedd123c 780 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
781
782static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
783static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 784static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
785
786#define parts_sub_normal(A, B) \
dedd123c 787 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
788
789static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
790 float_status *s, bool subtract);
791static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
792 float_status *s, bool subtract);
793
794#define parts_addsub(A, B, S, Z) \
795 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
796
aca84527
RH
797static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
798 float_status *s);
799static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
800 float_status *s);
801
802#define parts_mul(A, B, S) \
803 PARTS_GENERIC_64_128(mul, A)(A, B, S)
804
dedd123c
RH
805static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
806 FloatParts64 *c, int flags,
807 float_status *s);
808static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
809 FloatParts128 *c, int flags,
810 float_status *s);
811
812#define parts_muladd(A, B, C, Z, S) \
813 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
814
ec961b81
RH
815static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
816 float_status *s);
817static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
818 float_status *s);
819
820#define parts_div(A, B, S) \
821 PARTS_GENERIC_64_128(div, A)(A, B, S)
822
9261b245
RH
823static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
824static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
825
826#define parts_sqrt(A, S, F) \
827 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
828
afc34931
RH
829static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
830 int scale, int frac_size);
831static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
832 int scale, int frac_size);
833
834#define parts_round_to_int_normal(A, R, C, F) \
835 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
836
837static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
838 int scale, float_status *s,
839 const FloatFmt *fmt);
840static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
841 int scale, float_status *s,
842 const FloatFmt *fmt);
843
844#define parts_round_to_int(A, R, C, S, F) \
845 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
846
463b3f0d
RH
847static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
848 int scale, int64_t min, int64_t max,
849 float_status *s);
850static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
851 int scale, int64_t min, int64_t max,
852 float_status *s);
853
854#define parts_float_to_sint(P, R, Z, MN, MX, S) \
855 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
856
4ab4aef0
RH
857static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
858 int scale, uint64_t max,
859 float_status *s);
860static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
861 int scale, uint64_t max,
862 float_status *s);
863
864#define parts_float_to_uint(P, R, Z, M, S) \
865 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
866
e3689519
RH
867static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
868 int scale, float_status *s);
869static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
870 int scale, float_status *s);
871
872#define parts_sint_to_float(P, I, Z, S) \
873 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
874
37c954a1
RH
875static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
876 int scale, float_status *s);
877static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
878 int scale, float_status *s);
879
880#define parts_uint_to_float(P, I, Z, S) \
881 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
882
e1c4667a
RH
883static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
884 float_status *s, int flags);
885static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
886 float_status *s, int flags);
887
888#define parts_minmax(A, B, S, F) \
889 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
890
6eb169b8
RH
891static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
892 float_status *s, bool q);
893static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
894 float_status *s, bool q);
895
896#define parts_compare(A, B, S, Q) \
897 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
898
39626b0c
RH
899static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
900static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
901
902#define parts_scalbn(A, N, S) \
903 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
904
0018b1f4
RH
905/*
906 * Helper functions for softfloat-parts.c.inc, per-size operations.
907 */
908
22c355f4
RH
909#define FRAC_GENERIC_64_128(NAME, P) \
910 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
911
dedd123c
RH
912#define FRAC_GENERIC_64_128_256(NAME, P) \
913 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
914 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
915
da10a907
RH
916static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
917{
918 return uadd64_overflow(a->frac, b->frac, &r->frac);
919}
920
921static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
922{
923 bool c = 0;
924 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
925 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
926 return c;
927}
928
dedd123c
RH
929static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
930{
931 bool c = 0;
932 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
933 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
934 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
935 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
936 return c;
937}
938
939#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 940
ee6959f2
RH
941static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
942{
943 return uadd64_overflow(a->frac, c, &r->frac);
944}
945
946static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
947{
948 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
949 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
950}
951
952#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
953
954static void frac64_allones(FloatParts64 *a)
955{
956 a->frac = -1;
957}
958
959static void frac128_allones(FloatParts128 *a)
960{
961 a->frac_hi = a->frac_lo = -1;
962}
963
964#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
965
22c355f4
RH
966static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
967{
968 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
969}
970
971static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
972{
973 uint64_t ta = a->frac_hi, tb = b->frac_hi;
974 if (ta == tb) {
975 ta = a->frac_lo, tb = b->frac_lo;
976 if (ta == tb) {
977 return 0;
978 }
979 }
980 return ta < tb ? -1 : 1;
981}
982
983#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
984
d46975bc 985static void frac64_clear(FloatParts64 *a)
0018b1f4 986{
d46975bc
RH
987 a->frac = 0;
988}
989
990static void frac128_clear(FloatParts128 *a)
991{
992 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
993}
994
d46975bc 995#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 996
ec961b81
RH
997static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
998{
999 uint64_t n1, n0, r, q;
1000 bool ret;
1001
1002 /*
1003 * We want a 2*N / N-bit division to produce exactly an N-bit
1004 * result, so that we do not lose any precision and so that we
1005 * do not have to renormalize afterward. If A.frac < B.frac,
1006 * then division would produce an (N-1)-bit result; shift A left
1007 * by one to produce the an N-bit result, and return true to
1008 * decrement the exponent to match.
1009 *
1010 * The udiv_qrnnd algorithm that we're using requires normalization,
1011 * i.e. the msb of the denominator must be set, which is already true.
1012 */
1013 ret = a->frac < b->frac;
1014 if (ret) {
1015 n0 = a->frac;
1016 n1 = 0;
1017 } else {
1018 n0 = a->frac >> 1;
1019 n1 = a->frac << 63;
1020 }
1021 q = udiv_qrnnd(&r, n0, n1, b->frac);
1022
1023 /* Set lsb if there is a remainder, to set inexact. */
1024 a->frac = q | (r != 0);
1025
1026 return ret;
1027}
1028
1029static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1030{
1031 uint64_t q0, q1, a0, a1, b0, b1;
1032 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1033 bool ret = false;
1034
1035 a0 = a->frac_hi, a1 = a->frac_lo;
1036 b0 = b->frac_hi, b1 = b->frac_lo;
1037
1038 ret = lt128(a0, a1, b0, b1);
1039 if (!ret) {
1040 a1 = shr_double(a0, a1, 1);
1041 a0 = a0 >> 1;
1042 }
1043
1044 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1045 q0 = estimateDiv128To64(a0, a1, b0);
1046
1047 /*
1048 * Estimate is high because B1 was not included (unless B1 == 0).
1049 * Reduce quotient and increase remainder until remainder is non-negative.
1050 * This loop will execute 0 to 2 times.
1051 */
1052 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1053 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1054 while (r0 != 0) {
1055 q0--;
1056 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1057 }
1058
1059 /* Repeat using the remainder, producing a second word of quotient. */
1060 q1 = estimateDiv128To64(r1, r2, b0);
1061 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1062 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1063 while (r1 != 0) {
1064 q1--;
1065 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1066 }
1067
1068 /* Any remainder indicates inexact; set sticky bit. */
1069 q1 |= (r2 | r3) != 0;
1070
1071 a->frac_hi = q0;
1072 a->frac_lo = q1;
1073 return ret;
1074}
1075
1076#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1077
d46975bc 1078static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1079{
d46975bc
RH
1080 return a->frac == 0;
1081}
1082
1083static bool frac128_eqz(FloatParts128 *a)
1084{
1085 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1086}
1087
d46975bc 1088#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1089
aca84527
RH
1090static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1091{
1092 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1093}
1094
1095static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1096{
1097 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1098 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1099}
1100
1101#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1102
da10a907
RH
1103static void frac64_neg(FloatParts64 *a)
1104{
1105 a->frac = -a->frac;
1106}
1107
1108static void frac128_neg(FloatParts128 *a)
1109{
1110 bool c = 0;
1111 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1112 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1113}
1114
dedd123c
RH
1115static void frac256_neg(FloatParts256 *a)
1116{
1117 bool c = 0;
1118 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1119 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1120 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1121 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1122}
1123
1124#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1125
d46975bc 1126static int frac64_normalize(FloatParts64 *a)
6fff2167 1127{
d46975bc
RH
1128 if (a->frac) {
1129 int shift = clz64(a->frac);
1130 a->frac <<= shift;
1131 return shift;
1132 }
1133 return 64;
1134}
1135
1136static int frac128_normalize(FloatParts128 *a)
1137{
1138 if (a->frac_hi) {
1139 int shl = clz64(a->frac_hi);
463e45dc
RH
1140 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1141 a->frac_lo <<= shl;
d46975bc
RH
1142 return shl;
1143 } else if (a->frac_lo) {
1144 int shl = clz64(a->frac_lo);
463e45dc 1145 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1146 a->frac_lo = 0;
1147 return shl + 64;
6fff2167 1148 }
d46975bc 1149 return 128;
6fff2167
AB
1150}
1151
dedd123c
RH
1152static int frac256_normalize(FloatParts256 *a)
1153{
1154 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1155 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1156 int ret, shl;
dedd123c
RH
1157
1158 if (likely(a0)) {
1159 shl = clz64(a0);
1160 if (shl == 0) {
1161 return 0;
1162 }
1163 ret = shl;
1164 } else {
1165 if (a1) {
1166 ret = 64;
1167 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1168 } else if (a2) {
1169 ret = 128;
1170 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1171 } else if (a3) {
1172 ret = 192;
1173 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1174 } else {
1175 ret = 256;
1176 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1177 goto done;
1178 }
1179 shl = clz64(a0);
1180 if (shl == 0) {
1181 goto done;
1182 }
1183 ret += shl;
1184 }
1185
463e45dc
RH
1186 a0 = shl_double(a0, a1, shl);
1187 a1 = shl_double(a1, a2, shl);
1188 a2 = shl_double(a2, a3, shl);
1189 a3 <<= shl;
dedd123c
RH
1190
1191 done:
1192 a->frac_hi = a0;
1193 a->frac_hm = a1;
1194 a->frac_lm = a2;
1195 a->frac_lo = a3;
1196 return ret;
1197}
1198
1199#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1200
1201static void frac64_shl(FloatParts64 *a, int c)
1202{
1203 a->frac <<= c;
1204}
1205
1206static void frac128_shl(FloatParts128 *a, int c)
1207{
463e45dc
RH
1208 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1209
1210 if (c & 64) {
1211 a0 = a1, a1 = 0;
1212 }
1213
1214 c &= 63;
1215 if (c) {
1216 a0 = shl_double(a0, a1, c);
1217 a1 = a1 << c;
1218 }
1219
1220 a->frac_hi = a0;
1221 a->frac_lo = a1;
d46975bc
RH
1222}
1223
1224#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1225
1226static void frac64_shr(FloatParts64 *a, int c)
1227{
1228 a->frac >>= c;
1229}
1230
1231static void frac128_shr(FloatParts128 *a, int c)
1232{
463e45dc
RH
1233 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1234
1235 if (c & 64) {
1236 a1 = a0, a0 = 0;
1237 }
1238
1239 c &= 63;
1240 if (c) {
1241 a1 = shr_double(a0, a1, c);
1242 a0 = a0 >> c;
1243 }
1244
1245 a->frac_hi = a0;
1246 a->frac_lo = a1;
d46975bc
RH
1247}
1248
1249#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1250
ee6959f2 1251static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1252{
463e45dc
RH
1253 uint64_t a0 = a->frac;
1254
1255 if (likely(c != 0)) {
1256 if (likely(c < 64)) {
1257 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1258 } else {
1259 a0 = a0 != 0;
1260 }
1261 a->frac = a0;
1262 }
ee6959f2 1263}
6fff2167 1264
ee6959f2
RH
1265static void frac128_shrjam(FloatParts128 *a, int c)
1266{
463e45dc
RH
1267 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1268 uint64_t sticky = 0;
1269
1270 if (unlikely(c == 0)) {
1271 return;
1272 } else if (likely(c < 64)) {
1273 /* nothing */
1274 } else if (likely(c < 128)) {
1275 sticky = a1;
1276 a1 = a0;
1277 a0 = 0;
1278 c &= 63;
1279 if (c == 0) {
1280 goto done;
1281 }
1282 } else {
1283 sticky = a0 | a1;
1284 a0 = a1 = 0;
1285 goto done;
1286 }
1287
1288 sticky |= shr_double(a1, 0, c);
1289 a1 = shr_double(a0, a1, c);
1290 a0 = a0 >> c;
1291
1292 done:
1293 a->frac_lo = a1 | (sticky != 0);
1294 a->frac_hi = a0;
6fff2167
AB
1295}
1296
dedd123c
RH
1297static void frac256_shrjam(FloatParts256 *a, int c)
1298{
1299 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1300 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1301 uint64_t sticky = 0;
dedd123c
RH
1302
1303 if (unlikely(c == 0)) {
1304 return;
1305 } else if (likely(c < 64)) {
1306 /* nothing */
1307 } else if (likely(c < 256)) {
1308 if (unlikely(c & 128)) {
1309 sticky |= a2 | a3;
1310 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1311 }
1312 if (unlikely(c & 64)) {
1313 sticky |= a3;
1314 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1315 }
1316 c &= 63;
1317 if (c == 0) {
1318 goto done;
1319 }
1320 } else {
1321 sticky = a0 | a1 | a2 | a3;
1322 a0 = a1 = a2 = a3 = 0;
1323 goto done;
1324 }
1325
463e45dc
RH
1326 sticky |= shr_double(a3, 0, c);
1327 a3 = shr_double(a2, a3, c);
1328 a2 = shr_double(a1, a2, c);
1329 a1 = shr_double(a0, a1, c);
1330 a0 = a0 >> c;
dedd123c
RH
1331
1332 done:
1333 a->frac_lo = a3 | (sticky != 0);
1334 a->frac_lm = a2;
1335 a->frac_hm = a1;
1336 a->frac_hi = a0;
1337}
1338
1339#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1340
da10a907
RH
1341static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1342{
1343 return usub64_overflow(a->frac, b->frac, &r->frac);
1344}
7c45bad8 1345
da10a907
RH
1346static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1347{
1348 bool c = 0;
1349 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1350 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1351 return c;
1352}
1353
dedd123c
RH
1354static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1355{
1356 bool c = 0;
1357 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1358 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1359 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1360 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1361 return c;
1362}
1363
1364#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1365
aca84527
RH
1366static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1367{
1368 r->frac = a->frac_hi | (a->frac_lo != 0);
1369}
1370
1371static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1372{
1373 r->frac_hi = a->frac_hi;
1374 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1375}
1376
1377#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1378
dedd123c
RH
1379static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1380{
1381 r->frac_hi = a->frac;
1382 r->frac_lo = 0;
1383}
1384
1385static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1386{
1387 r->frac_hi = a->frac_hi;
1388 r->frac_hm = a->frac_lo;
1389 r->frac_lm = 0;
1390 r->frac_lo = 0;
1391}
1392
1393#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1394
9261b245
RH
1395/*
1396 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1397 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1398 * and thus MIT licenced.
1399 */
1400static const uint16_t rsqrt_tab[128] = {
1401 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1402 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1403 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1404 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1405 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1406 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1407 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1408 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1409 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1410 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1411 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1412 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1413 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1414 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1415 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1416 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1417};
1418
da10a907
RH
1419#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1420#define FloatPartsN glue(FloatParts,N)
aca84527 1421#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1422
1423#define N 64
aca84527 1424#define W 128
da10a907
RH
1425
1426#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1427#include "softfloat-parts.c.inc"
1428
da10a907 1429#undef N
aca84527 1430#undef W
da10a907 1431#define N 128
aca84527 1432#define W 256
7c45bad8 1433
da10a907 1434#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1435#include "softfloat-parts.c.inc"
1436
dedd123c
RH
1437#undef N
1438#undef W
1439#define N 256
1440
1441#include "softfloat-parts-addsub.c.inc"
1442
da10a907 1443#undef N
aca84527 1444#undef W
7c45bad8
RH
1445#undef partsN
1446#undef FloatPartsN
aca84527 1447#undef FloatPartsW
7c45bad8 1448
aaffb7bf
RH
1449/*
1450 * Pack/unpack routines with a specific FloatFmt.
1451 */
1452
98e256fc
RH
1453static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1454 float_status *s, const FloatFmt *params)
aaffb7bf 1455{
98e256fc 1456 float16_unpack_raw(p, f);
d46975bc 1457 parts_canonicalize(p, s, params);
aaffb7bf
RH
1458}
1459
98e256fc
RH
1460static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1461 float_status *s)
aaffb7bf 1462{
98e256fc 1463 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1464}
1465
98e256fc
RH
1466static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1467 float_status *s)
aaffb7bf 1468{
98e256fc 1469 bfloat16_unpack_raw(p, f);
d46975bc 1470 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1471}
1472
e293e927
RH
1473static float16 float16a_round_pack_canonical(FloatParts64 *p,
1474 float_status *s,
aaffb7bf
RH
1475 const FloatFmt *params)
1476{
ee6959f2 1477 parts_uncanon(p, s, params);
e293e927 1478 return float16_pack_raw(p);
aaffb7bf
RH
1479}
1480
e293e927
RH
1481static float16 float16_round_pack_canonical(FloatParts64 *p,
1482 float_status *s)
aaffb7bf
RH
1483{
1484 return float16a_round_pack_canonical(p, s, &float16_params);
1485}
1486
e293e927
RH
1487static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1488 float_status *s)
aaffb7bf 1489{
ee6959f2 1490 parts_uncanon(p, s, &bfloat16_params);
e293e927 1491 return bfloat16_pack_raw(p);
aaffb7bf
RH
1492}
1493
98e256fc
RH
1494static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1495 float_status *s)
aaffb7bf 1496{
98e256fc 1497 float32_unpack_raw(p, f);
d46975bc 1498 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1499}
1500
e293e927
RH
1501static float32 float32_round_pack_canonical(FloatParts64 *p,
1502 float_status *s)
aaffb7bf 1503{
ee6959f2 1504 parts_uncanon(p, s, &float32_params);
e293e927 1505 return float32_pack_raw(p);
aaffb7bf
RH
1506}
1507
98e256fc
RH
1508static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1509 float_status *s)
aaffb7bf 1510{
98e256fc 1511 float64_unpack_raw(p, f);
d46975bc 1512 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1513}
1514
e293e927
RH
1515static float64 float64_round_pack_canonical(FloatParts64 *p,
1516 float_status *s)
aaffb7bf 1517{
ee6959f2 1518 parts_uncanon(p, s, &float64_params);
e293e927 1519 return float64_pack_raw(p);
aaffb7bf
RH
1520}
1521
3ff49e56
RH
1522static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1523 float_status *s)
1524{
1525 float128_unpack_raw(p, f);
1526 parts_canonicalize(p, s, &float128_params);
1527}
1528
1529static float128 float128_round_pack_canonical(FloatParts128 *p,
1530 float_status *s)
1531{
1532 parts_uncanon(p, s, &float128_params);
1533 return float128_pack_raw(p);
1534}
1535
6fff2167 1536/*
da10a907 1537 * Addition and subtraction
6fff2167
AB
1538 */
1539
da10a907
RH
1540static float16 QEMU_FLATTEN
1541float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1542{
da10a907 1543 FloatParts64 pa, pb, *pr;
98e256fc
RH
1544
1545 float16_unpack_canonical(&pa, a, status);
1546 float16_unpack_canonical(&pb, b, status);
da10a907 1547 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1548
da10a907 1549 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1550}
1551
da10a907 1552float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1553{
da10a907
RH
1554 return float16_addsub(a, b, status, false);
1555}
1b615d48 1556
da10a907
RH
1557float16 float16_sub(float16 a, float16 b, float_status *status)
1558{
1559 return float16_addsub(a, b, status, true);
1b615d48
EC
1560}
1561
1562static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1563soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1564{
da10a907 1565 FloatParts64 pa, pb, *pr;
98e256fc
RH
1566
1567 float32_unpack_canonical(&pa, a, status);
1568 float32_unpack_canonical(&pb, b, status);
da10a907 1569 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1570
da10a907 1571 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1572}
1573
da10a907 1574static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1575{
da10a907 1576 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1577}
1578
da10a907 1579static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1580{
da10a907 1581 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1582}
1583
1584static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1585soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1586{
da10a907 1587 FloatParts64 pa, pb, *pr;
98e256fc
RH
1588
1589 float64_unpack_canonical(&pa, a, status);
1590 float64_unpack_canonical(&pb, b, status);
da10a907 1591 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1592
da10a907 1593 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1594}
1595
da10a907 1596static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1597{
da10a907 1598 return soft_f64_addsub(a, b, status, false);
1b615d48 1599}
6fff2167 1600
da10a907 1601static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1602{
da10a907 1603 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1604}
1605
1b615d48 1606static float hard_f32_add(float a, float b)
6fff2167 1607{
1b615d48
EC
1608 return a + b;
1609}
6fff2167 1610
1b615d48
EC
1611static float hard_f32_sub(float a, float b)
1612{
1613 return a - b;
6fff2167
AB
1614}
1615
1b615d48 1616static double hard_f64_add(double a, double b)
6fff2167 1617{
1b615d48
EC
1618 return a + b;
1619}
6fff2167 1620
1b615d48
EC
1621static double hard_f64_sub(double a, double b)
1622{
1623 return a - b;
1624}
1625
b240c9c4 1626static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1627{
1628 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1629 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1630 }
1631 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1632}
1633
b240c9c4 1634static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1635{
1636 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1637 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1638 } else {
1639 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1640 }
1641}
1642
1643static float32 float32_addsub(float32 a, float32 b, float_status *s,
1644 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1645{
1646 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1647 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1648}
1649
1650static float64 float64_addsub(float64 a, float64 b, float_status *s,
1651 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1652{
1653 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1654 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1655}
1656
1657float32 QEMU_FLATTEN
1658float32_add(float32 a, float32 b, float_status *s)
1659{
1660 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1661}
1662
1663float32 QEMU_FLATTEN
1664float32_sub(float32 a, float32 b, float_status *s)
1665{
1666 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1667}
1668
1669float64 QEMU_FLATTEN
1670float64_add(float64 a, float64 b, float_status *s)
1671{
1672 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1673}
1674
1675float64 QEMU_FLATTEN
1676float64_sub(float64 a, float64 b, float_status *s)
1677{
1678 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1679}
1680
da10a907
RH
1681static bfloat16 QEMU_FLATTEN
1682bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1683{
da10a907 1684 FloatParts64 pa, pb, *pr;
98e256fc
RH
1685
1686 bfloat16_unpack_canonical(&pa, a, status);
1687 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1688 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1689
da10a907 1690 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1691}
1692
da10a907 1693bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1694{
da10a907
RH
1695 return bfloat16_addsub(a, b, status, false);
1696}
8282310d 1697
da10a907
RH
1698bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1699{
1700 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1701}
1702
3ff49e56
RH
1703static float128 QEMU_FLATTEN
1704float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1705{
1706 FloatParts128 pa, pb, *pr;
1707
1708 float128_unpack_canonical(&pa, a, status);
1709 float128_unpack_canonical(&pb, b, status);
1710 pr = parts_addsub(&pa, &pb, status, subtract);
1711
1712 return float128_round_pack_canonical(pr, status);
1713}
1714
1715float128 float128_add(float128 a, float128 b, float_status *status)
1716{
1717 return float128_addsub(a, b, status, false);
1718}
1719
1720float128 float128_sub(float128 a, float128 b, float_status *status)
1721{
1722 return float128_addsub(a, b, status, true);
1723}
1724
74d707e2 1725/*
aca84527 1726 * Multiplication
74d707e2
AB
1727 */
1728
97ff87c0 1729float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1730{
aca84527 1731 FloatParts64 pa, pb, *pr;
98e256fc
RH
1732
1733 float16_unpack_canonical(&pa, a, status);
1734 float16_unpack_canonical(&pb, b, status);
aca84527 1735 pr = parts_mul(&pa, &pb, status);
74d707e2 1736
aca84527 1737 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1738}
1739
2dfabc86
EC
1740static float32 QEMU_SOFTFLOAT_ATTR
1741soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1742{
aca84527 1743 FloatParts64 pa, pb, *pr;
98e256fc
RH
1744
1745 float32_unpack_canonical(&pa, a, status);
1746 float32_unpack_canonical(&pb, b, status);
aca84527 1747 pr = parts_mul(&pa, &pb, status);
74d707e2 1748
aca84527 1749 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1750}
1751
2dfabc86
EC
1752static float64 QEMU_SOFTFLOAT_ATTR
1753soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1754{
aca84527 1755 FloatParts64 pa, pb, *pr;
98e256fc
RH
1756
1757 float64_unpack_canonical(&pa, a, status);
1758 float64_unpack_canonical(&pb, b, status);
aca84527 1759 pr = parts_mul(&pa, &pb, status);
74d707e2 1760
aca84527 1761 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1762}
1763
2dfabc86
EC
1764static float hard_f32_mul(float a, float b)
1765{
1766 return a * b;
1767}
1768
1769static double hard_f64_mul(double a, double b)
1770{
1771 return a * b;
1772}
1773
2dfabc86
EC
1774float32 QEMU_FLATTEN
1775float32_mul(float32 a, float32 b, float_status *s)
1776{
1777 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1778 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1779}
1780
1781float64 QEMU_FLATTEN
1782float64_mul(float64 a, float64 b, float_status *s)
1783{
1784 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1785 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1786}
1787
aca84527
RH
1788bfloat16 QEMU_FLATTEN
1789bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1790{
aca84527 1791 FloatParts64 pa, pb, *pr;
98e256fc
RH
1792
1793 bfloat16_unpack_canonical(&pa, a, status);
1794 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1795 pr = parts_mul(&pa, &pb, status);
8282310d 1796
aca84527
RH
1797 return bfloat16_round_pack_canonical(pr, status);
1798}
1799
1800float128 QEMU_FLATTEN
1801float128_mul(float128 a, float128 b, float_status *status)
1802{
1803 FloatParts128 pa, pb, *pr;
1804
1805 float128_unpack_canonical(&pa, a, status);
1806 float128_unpack_canonical(&pb, b, status);
1807 pr = parts_mul(&pa, &pb, status);
1808
1809 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1810}
1811
d446830a 1812/*
dedd123c 1813 * Fused multiply-add
d446830a
AB
1814 */
1815
97ff87c0 1816float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1817 int flags, float_status *status)
d446830a 1818{
dedd123c 1819 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1820
1821 float16_unpack_canonical(&pa, a, status);
1822 float16_unpack_canonical(&pb, b, status);
1823 float16_unpack_canonical(&pc, c, status);
dedd123c 1824 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1825
dedd123c 1826 return float16_round_pack_canonical(pr, status);
d446830a
AB
1827}
1828
ccf770ba
EC
1829static float32 QEMU_SOFTFLOAT_ATTR
1830soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1831 float_status *status)
d446830a 1832{
dedd123c 1833 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1834
1835 float32_unpack_canonical(&pa, a, status);
1836 float32_unpack_canonical(&pb, b, status);
1837 float32_unpack_canonical(&pc, c, status);
dedd123c 1838 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1839
dedd123c 1840 return float32_round_pack_canonical(pr, status);
d446830a
AB
1841}
1842
ccf770ba
EC
1843static float64 QEMU_SOFTFLOAT_ATTR
1844soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1845 float_status *status)
d446830a 1846{
dedd123c 1847 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1848
1849 float64_unpack_canonical(&pa, a, status);
1850 float64_unpack_canonical(&pb, b, status);
1851 float64_unpack_canonical(&pc, c, status);
dedd123c 1852 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1853
dedd123c 1854 return float64_round_pack_canonical(pr, status);
d446830a
AB
1855}
1856
f6b3b108
EC
1857static bool force_soft_fma;
1858
ccf770ba
EC
1859float32 QEMU_FLATTEN
1860float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1861{
1862 union_float32 ua, ub, uc, ur;
1863
1864 ua.s = xa;
1865 ub.s = xb;
1866 uc.s = xc;
1867
1868 if (unlikely(!can_use_fpu(s))) {
1869 goto soft;
1870 }
1871 if (unlikely(flags & float_muladd_halve_result)) {
1872 goto soft;
1873 }
1874
1875 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1876 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1877 goto soft;
1878 }
f6b3b108
EC
1879
1880 if (unlikely(force_soft_fma)) {
1881 goto soft;
1882 }
1883
ccf770ba
EC
1884 /*
1885 * When (a || b) == 0, there's no need to check for under/over flow,
1886 * since we know the addend is (normal || 0) and the product is 0.
1887 */
1888 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1889 union_float32 up;
1890 bool prod_sign;
1891
1892 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1893 prod_sign ^= !!(flags & float_muladd_negate_product);
1894 up.s = float32_set_sign(float32_zero, prod_sign);
1895
1896 if (flags & float_muladd_negate_c) {
1897 uc.h = -uc.h;
1898 }
1899 ur.h = up.h + uc.h;
1900 } else {
896f51fb
KC
1901 union_float32 ua_orig = ua;
1902 union_float32 uc_orig = uc;
1903
ccf770ba
EC
1904 if (flags & float_muladd_negate_product) {
1905 ua.h = -ua.h;
1906 }
1907 if (flags & float_muladd_negate_c) {
1908 uc.h = -uc.h;
1909 }
1910
1911 ur.h = fmaf(ua.h, ub.h, uc.h);
1912
1913 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1914 float_raise(float_flag_overflow, s);
ccf770ba 1915 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1916 ua = ua_orig;
1917 uc = uc_orig;
ccf770ba
EC
1918 goto soft;
1919 }
1920 }
1921 if (flags & float_muladd_negate_result) {
1922 return float32_chs(ur.s);
1923 }
1924 return ur.s;
1925
1926 soft:
1927 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1928}
1929
1930float64 QEMU_FLATTEN
1931float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1932{
1933 union_float64 ua, ub, uc, ur;
1934
1935 ua.s = xa;
1936 ub.s = xb;
1937 uc.s = xc;
1938
1939 if (unlikely(!can_use_fpu(s))) {
1940 goto soft;
1941 }
1942 if (unlikely(flags & float_muladd_halve_result)) {
1943 goto soft;
1944 }
1945
1946 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1947 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1948 goto soft;
1949 }
f6b3b108
EC
1950
1951 if (unlikely(force_soft_fma)) {
1952 goto soft;
1953 }
1954
ccf770ba
EC
1955 /*
1956 * When (a || b) == 0, there's no need to check for under/over flow,
1957 * since we know the addend is (normal || 0) and the product is 0.
1958 */
1959 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1960 union_float64 up;
1961 bool prod_sign;
1962
1963 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1964 prod_sign ^= !!(flags & float_muladd_negate_product);
1965 up.s = float64_set_sign(float64_zero, prod_sign);
1966
1967 if (flags & float_muladd_negate_c) {
1968 uc.h = -uc.h;
1969 }
1970 ur.h = up.h + uc.h;
1971 } else {
896f51fb
KC
1972 union_float64 ua_orig = ua;
1973 union_float64 uc_orig = uc;
1974
ccf770ba
EC
1975 if (flags & float_muladd_negate_product) {
1976 ua.h = -ua.h;
1977 }
1978 if (flags & float_muladd_negate_c) {
1979 uc.h = -uc.h;
1980 }
1981
1982 ur.h = fma(ua.h, ub.h, uc.h);
1983
1984 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1985 float_raise(float_flag_overflow, s);
ccf770ba 1986 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1987 ua = ua_orig;
1988 uc = uc_orig;
ccf770ba
EC
1989 goto soft;
1990 }
1991 }
1992 if (flags & float_muladd_negate_result) {
1993 return float64_chs(ur.s);
1994 }
1995 return ur.s;
1996
1997 soft:
1998 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1999}
2000
8282310d
LZ
2001bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2002 int flags, float_status *status)
2003{
dedd123c 2004 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2005
2006 bfloat16_unpack_canonical(&pa, a, status);
2007 bfloat16_unpack_canonical(&pb, b, status);
2008 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2009 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2010
2011 return bfloat16_round_pack_canonical(pr, status);
2012}
8282310d 2013
dedd123c
RH
2014float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2015 int flags, float_status *status)
2016{
2017 FloatParts128 pa, pb, pc, *pr;
2018
2019 float128_unpack_canonical(&pa, a, status);
2020 float128_unpack_canonical(&pb, b, status);
2021 float128_unpack_canonical(&pc, c, status);
2022 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2023
2024 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2025}
2026
cf07323d 2027/*
ec961b81 2028 * Division
cf07323d
AB
2029 */
2030
cf07323d
AB
2031float16 float16_div(float16 a, float16 b, float_status *status)
2032{
ec961b81 2033 FloatParts64 pa, pb, *pr;
98e256fc
RH
2034
2035 float16_unpack_canonical(&pa, a, status);
2036 float16_unpack_canonical(&pb, b, status);
ec961b81 2037 pr = parts_div(&pa, &pb, status);
cf07323d 2038
ec961b81 2039 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2040}
2041
4a629561
EC
2042static float32 QEMU_SOFTFLOAT_ATTR
2043soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2044{
ec961b81 2045 FloatParts64 pa, pb, *pr;
98e256fc
RH
2046
2047 float32_unpack_canonical(&pa, a, status);
2048 float32_unpack_canonical(&pb, b, status);
ec961b81 2049 pr = parts_div(&pa, &pb, status);
cf07323d 2050
ec961b81 2051 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2052}
2053
4a629561
EC
2054static float64 QEMU_SOFTFLOAT_ATTR
2055soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2056{
ec961b81 2057 FloatParts64 pa, pb, *pr;
98e256fc
RH
2058
2059 float64_unpack_canonical(&pa, a, status);
2060 float64_unpack_canonical(&pb, b, status);
ec961b81 2061 pr = parts_div(&pa, &pb, status);
cf07323d 2062
ec961b81 2063 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2064}
2065
4a629561
EC
2066static float hard_f32_div(float a, float b)
2067{
2068 return a / b;
2069}
2070
2071static double hard_f64_div(double a, double b)
2072{
2073 return a / b;
2074}
2075
2076static bool f32_div_pre(union_float32 a, union_float32 b)
2077{
2078 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2079 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2080 fpclassify(b.h) == FP_NORMAL;
2081 }
2082 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2083}
2084
2085static bool f64_div_pre(union_float64 a, union_float64 b)
2086{
2087 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2088 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2089 fpclassify(b.h) == FP_NORMAL;
2090 }
2091 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2092}
2093
2094static bool f32_div_post(union_float32 a, union_float32 b)
2095{
2096 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2097 return fpclassify(a.h) != FP_ZERO;
2098 }
2099 return !float32_is_zero(a.s);
2100}
2101
2102static bool f64_div_post(union_float64 a, union_float64 b)
2103{
2104 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2105 return fpclassify(a.h) != FP_ZERO;
2106 }
2107 return !float64_is_zero(a.s);
2108}
2109
2110float32 QEMU_FLATTEN
2111float32_div(float32 a, float32 b, float_status *s)
2112{
2113 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2114 f32_div_pre, f32_div_post);
4a629561
EC
2115}
2116
2117float64 QEMU_FLATTEN
2118float64_div(float64 a, float64 b, float_status *s)
2119{
2120 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2121 f64_div_pre, f64_div_post);
4a629561
EC
2122}
2123
ec961b81
RH
2124bfloat16 QEMU_FLATTEN
2125bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2126{
ec961b81 2127 FloatParts64 pa, pb, *pr;
98e256fc
RH
2128
2129 bfloat16_unpack_canonical(&pa, a, status);
2130 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2131 pr = parts_div(&pa, &pb, status);
8282310d 2132
ec961b81
RH
2133 return bfloat16_round_pack_canonical(pr, status);
2134}
2135
2136float128 QEMU_FLATTEN
2137float128_div(float128 a, float128 b, float_status *status)
2138{
2139 FloatParts128 pa, pb, *pr;
2140
2141 float128_unpack_canonical(&pa, a, status);
2142 float128_unpack_canonical(&pb, b, status);
2143 pr = parts_div(&pa, &pb, status);
2144
2145 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2146}
2147
6fed16b2
AB
2148/*
2149 * Float to Float conversions
2150 *
2151 * Returns the result of converting one float format to another. The
2152 * conversion is performed according to the IEC/IEEE Standard for
2153 * Binary Floating-Point Arithmetic.
2154 *
c3f1875e
RH
2155 * Usually this only needs to take care of raising invalid exceptions
2156 * and handling the conversion on NaNs.
6fed16b2
AB
2157 */
2158
c3f1875e
RH
2159static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2160{
2161 switch (a->cls) {
2162 case float_class_qnan:
2163 case float_class_snan:
2164 /*
2165 * There is no NaN in the destination format. Raise Invalid
2166 * and return a zero with the sign of the input NaN.
2167 */
2168 float_raise(float_flag_invalid, s);
2169 a->cls = float_class_zero;
2170 break;
2171
2172 case float_class_inf:
2173 /*
2174 * There is no Inf in the destination format. Raise Invalid
2175 * and return the maximum normal with the correct sign.
2176 */
2177 float_raise(float_flag_invalid, s);
2178 a->cls = float_class_normal;
2179 a->exp = float16_params_ahp.exp_max;
2180 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2181 float16_params_ahp.frac_size + 1);
2182 break;
2183
2184 case float_class_normal:
2185 case float_class_zero:
2186 break;
2187
2188 default:
2189 g_assert_not_reached();
2190 }
2191}
2192
2193static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2194{
2195 if (is_nan(a->cls)) {
2196 parts_return_nan(a, s);
6fed16b2 2197 }
6fed16b2
AB
2198}
2199
c3f1875e
RH
2200static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2201{
2202 if (is_nan(a->cls)) {
2203 parts_return_nan(a, s);
2204 }
2205}
2206
2207#define parts_float_to_float(P, S) \
2208 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2209
9882ccaf
RH
2210static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2211 float_status *s)
2212{
2213 a->cls = b->cls;
2214 a->sign = b->sign;
2215 a->exp = b->exp;
2216
2217 if (a->cls == float_class_normal) {
2218 frac_truncjam(a, b);
2219 } else if (is_nan(a->cls)) {
2220 /* Discard the low bits of the NaN. */
2221 a->frac = b->frac_hi;
2222 parts_return_nan(a, s);
2223 }
2224}
2225
2226static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2227 float_status *s)
2228{
2229 a->cls = b->cls;
2230 a->sign = b->sign;
2231 a->exp = b->exp;
2232 frac_widen(a, b);
2233
2234 if (is_nan(a->cls)) {
2235 parts_return_nan(a, s);
2236 }
2237}
2238
6fed16b2
AB
2239float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2240{
2241 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2242 FloatParts64 p;
98e256fc 2243
c3f1875e
RH
2244 float16a_unpack_canonical(&p, a, s, fmt16);
2245 parts_float_to_float(&p, s);
2246 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2247}
2248
2249float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2250{
2251 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2252 FloatParts64 p;
98e256fc 2253
c3f1875e
RH
2254 float16a_unpack_canonical(&p, a, s, fmt16);
2255 parts_float_to_float(&p, s);
2256 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2257}
2258
2259float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2260{
c3f1875e
RH
2261 FloatParts64 p;
2262 const FloatFmt *fmt;
98e256fc 2263
c3f1875e
RH
2264 float32_unpack_canonical(&p, a, s);
2265 if (ieee) {
2266 parts_float_to_float(&p, s);
2267 fmt = &float16_params;
2268 } else {
2269 parts_float_to_ahp(&p, s);
2270 fmt = &float16_params_ahp;
2271 }
2272 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2273}
2274
21381dcf
MK
2275static float64 QEMU_SOFTFLOAT_ATTR
2276soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2277{
c3f1875e 2278 FloatParts64 p;
98e256fc 2279
c3f1875e
RH
2280 float32_unpack_canonical(&p, a, s);
2281 parts_float_to_float(&p, s);
2282 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2283}
2284
21381dcf
MK
2285float64 float32_to_float64(float32 a, float_status *s)
2286{
2287 if (likely(float32_is_normal(a))) {
2288 /* Widening conversion can never produce inexact results. */
2289 union_float32 uf;
2290 union_float64 ud;
2291 uf.s = a;
2292 ud.h = uf.h;
2293 return ud.s;
2294 } else if (float32_is_zero(a)) {
2295 return float64_set_sign(float64_zero, float32_is_neg(a));
2296 } else {
2297 return soft_float32_to_float64(a, s);
2298 }
2299}
2300
6fed16b2
AB
2301float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2302{
c3f1875e
RH
2303 FloatParts64 p;
2304 const FloatFmt *fmt;
98e256fc 2305
c3f1875e
RH
2306 float64_unpack_canonical(&p, a, s);
2307 if (ieee) {
2308 parts_float_to_float(&p, s);
2309 fmt = &float16_params;
2310 } else {
2311 parts_float_to_ahp(&p, s);
2312 fmt = &float16_params_ahp;
2313 }
2314 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2315}
2316
2317float32 float64_to_float32(float64 a, float_status *s)
2318{
c3f1875e 2319 FloatParts64 p;
98e256fc 2320
c3f1875e
RH
2321 float64_unpack_canonical(&p, a, s);
2322 parts_float_to_float(&p, s);
2323 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2324}
2325
34f0c0a9
LZ
2326float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2327{
c3f1875e 2328 FloatParts64 p;
98e256fc 2329
c3f1875e
RH
2330 bfloat16_unpack_canonical(&p, a, s);
2331 parts_float_to_float(&p, s);
2332 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2333}
2334
2335float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2336{
c3f1875e 2337 FloatParts64 p;
98e256fc 2338
c3f1875e
RH
2339 bfloat16_unpack_canonical(&p, a, s);
2340 parts_float_to_float(&p, s);
2341 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2342}
2343
2344bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2345{
c3f1875e 2346 FloatParts64 p;
98e256fc 2347
c3f1875e
RH
2348 float32_unpack_canonical(&p, a, s);
2349 parts_float_to_float(&p, s);
2350 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2351}
2352
2353bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2354{
c3f1875e 2355 FloatParts64 p;
98e256fc 2356
c3f1875e
RH
2357 float64_unpack_canonical(&p, a, s);
2358 parts_float_to_float(&p, s);
2359 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2360}
2361
9882ccaf
RH
2362float32 float128_to_float32(float128 a, float_status *s)
2363{
2364 FloatParts64 p64;
2365 FloatParts128 p128;
2366
2367 float128_unpack_canonical(&p128, a, s);
2368 parts_float_to_float_narrow(&p64, &p128, s);
2369 return float32_round_pack_canonical(&p64, s);
2370}
2371
2372float64 float128_to_float64(float128 a, float_status *s)
2373{
2374 FloatParts64 p64;
2375 FloatParts128 p128;
2376
2377 float128_unpack_canonical(&p128, a, s);
2378 parts_float_to_float_narrow(&p64, &p128, s);
2379 return float64_round_pack_canonical(&p64, s);
2380}
2381
2382float128 float32_to_float128(float32 a, float_status *s)
2383{
2384 FloatParts64 p64;
2385 FloatParts128 p128;
2386
2387 float32_unpack_canonical(&p64, a, s);
2388 parts_float_to_float_widen(&p128, &p64, s);
2389 return float128_round_pack_canonical(&p128, s);
2390}
2391
2392float128 float64_to_float128(float64 a, float_status *s)
2393{
2394 FloatParts64 p64;
2395 FloatParts128 p128;
2396
2397 float64_unpack_canonical(&p64, a, s);
2398 parts_float_to_float_widen(&p128, &p64, s);
2399 return float128_round_pack_canonical(&p128, s);
2400}
2401
dbe4d53a 2402/*
afc34931 2403 * Round to integral value
dbe4d53a
AB
2404 */
2405
dbe4d53a
AB
2406float16 float16_round_to_int(float16 a, float_status *s)
2407{
afc34931 2408 FloatParts64 p;
98e256fc 2409
afc34931
RH
2410 float16_unpack_canonical(&p, a, s);
2411 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2412 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2413}
2414
2415float32 float32_round_to_int(float32 a, float_status *s)
2416{
afc34931 2417 FloatParts64 p;
98e256fc 2418
afc34931
RH
2419 float32_unpack_canonical(&p, a, s);
2420 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2421 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2422}
2423
2424float64 float64_round_to_int(float64 a, float_status *s)
2425{
afc34931 2426 FloatParts64 p;
98e256fc 2427
afc34931
RH
2428 float64_unpack_canonical(&p, a, s);
2429 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2430 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2431}
2432
34f0c0a9
LZ
2433bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2434{
afc34931 2435 FloatParts64 p;
98e256fc 2436
afc34931
RH
2437 bfloat16_unpack_canonical(&p, a, s);
2438 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2439 return bfloat16_round_pack_canonical(&p, s);
2440}
2441
2442float128 float128_round_to_int(float128 a, float_status *s)
2443{
2444 FloatParts128 p;
2445
2446 float128_unpack_canonical(&p, a, s);
2447 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2448 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2449}
2450
ab52f973 2451/*
463b3f0d
RH
2452 * Floating-point to signed integer conversions
2453 */
ab52f973 2454
0d93d8ec
FC
2455int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2456 float_status *s)
2457{
98e256fc
RH
2458 FloatParts64 p;
2459
2460 float16_unpack_canonical(&p, a, s);
463b3f0d 2461 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2462}
2463
3dede407 2464int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2465 float_status *s)
2466{
98e256fc
RH
2467 FloatParts64 p;
2468
2469 float16_unpack_canonical(&p, a, s);
463b3f0d 2470 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2471}
2472
3dede407 2473int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2474 float_status *s)
2475{
98e256fc
RH
2476 FloatParts64 p;
2477
2478 float16_unpack_canonical(&p, a, s);
463b3f0d 2479 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2480}
2481
3dede407 2482int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2483 float_status *s)
2484{
98e256fc
RH
2485 FloatParts64 p;
2486
2487 float16_unpack_canonical(&p, a, s);
463b3f0d 2488 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2489}
2490
3dede407 2491int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2492 float_status *s)
2493{
98e256fc
RH
2494 FloatParts64 p;
2495
2496 float32_unpack_canonical(&p, a, s);
463b3f0d 2497 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2498}
2499
3dede407 2500int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2501 float_status *s)
2502{
98e256fc
RH
2503 FloatParts64 p;
2504
2505 float32_unpack_canonical(&p, a, s);
463b3f0d 2506 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2507}
2508
3dede407 2509int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2510 float_status *s)
2511{
98e256fc
RH
2512 FloatParts64 p;
2513
2514 float32_unpack_canonical(&p, a, s);
463b3f0d 2515 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2516}
2517
3dede407 2518int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2519 float_status *s)
2520{
98e256fc
RH
2521 FloatParts64 p;
2522
2523 float64_unpack_canonical(&p, a, s);
463b3f0d 2524 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2525}
2526
3dede407 2527int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2528 float_status *s)
2529{
98e256fc
RH
2530 FloatParts64 p;
2531
2532 float64_unpack_canonical(&p, a, s);
463b3f0d 2533 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2534}
2535
3dede407 2536int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2537 float_status *s)
2538{
98e256fc
RH
2539 FloatParts64 p;
2540
2541 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2542 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2543}
2544
2545int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2546 float_status *s)
2547{
2548 FloatParts64 p;
2549
2550 bfloat16_unpack_canonical(&p, a, s);
2551 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2552}
2553
2554int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2555 float_status *s)
2556{
2557 FloatParts64 p;
2558
2559 bfloat16_unpack_canonical(&p, a, s);
2560 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2561}
2562
2563int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2564 float_status *s)
2565{
2566 FloatParts64 p;
2567
2568 bfloat16_unpack_canonical(&p, a, s);
2569 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2570}
2571
2572static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2573 int scale, float_status *s)
2574{
2575 FloatParts128 p;
2576
2577 float128_unpack_canonical(&p, a, s);
2578 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2579}
2580
2581static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2582 int scale, float_status *s)
2583{
2584 FloatParts128 p;
2585
2586 float128_unpack_canonical(&p, a, s);
2587 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2588}
2589
0d93d8ec
FC
2590int8_t float16_to_int8(float16 a, float_status *s)
2591{
2592 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2593}
2594
2f6c74be
RH
2595int16_t float16_to_int16(float16 a, float_status *s)
2596{
2597 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2598}
2599
2600int32_t float16_to_int32(float16 a, float_status *s)
2601{
2602 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2603}
2604
2605int64_t float16_to_int64(float16 a, float_status *s)
2606{
2607 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2608}
2609
2610int16_t float32_to_int16(float32 a, float_status *s)
2611{
2612 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2613}
2614
2615int32_t float32_to_int32(float32 a, float_status *s)
2616{
2617 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2618}
2619
2620int64_t float32_to_int64(float32 a, float_status *s)
2621{
2622 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2623}
2624
2625int16_t float64_to_int16(float64 a, float_status *s)
2626{
2627 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2628}
2629
2630int32_t float64_to_int32(float64 a, float_status *s)
2631{
2632 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2633}
2634
2635int64_t float64_to_int64(float64 a, float_status *s)
2636{
2637 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2638}
2639
463b3f0d
RH
2640int32_t float128_to_int32(float128 a, float_status *s)
2641{
2642 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2643}
2644
2645int64_t float128_to_int64(float128 a, float_status *s)
2646{
2647 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2648}
2649
2f6c74be
RH
2650int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2651{
2652 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2653}
2654
2655int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2656{
2657 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2658}
2659
2660int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2661{
2662 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2663}
2664
2f6c74be
RH
2665int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2666{
2667 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2668}
ab52f973 2669
2f6c74be
RH
2670int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2671{
2672 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2673}
2674
2675int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2676{
2677 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2678}
2679
2680int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2681{
2682 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2683}
ab52f973 2684
2f6c74be
RH
2685int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2686{
2687 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2688}
ab52f973 2689
2f6c74be
RH
2690int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2691{
2692 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2693}
ab52f973 2694
463b3f0d 2695int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2696{
463b3f0d 2697 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2698}
2699
463b3f0d 2700int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2701{
463b3f0d 2702 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2703}
2704
2705int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2706{
2707 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2708}
2709
2710int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2711{
2712 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2713}
2714
2715int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2716{
2717 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2718}
2719
2720int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2721{
2722 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2723}
2724
2725int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2726{
2727 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2728}
2729
2730int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2731{
2732 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2733}
2734
ab52f973 2735/*
4ab4aef0 2736 * Floating-point to unsigned integer conversions
ab52f973
AB
2737 */
2738
0d93d8ec
FC
2739uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2740 float_status *s)
2741{
98e256fc
RH
2742 FloatParts64 p;
2743
2744 float16_unpack_canonical(&p, a, s);
4ab4aef0 2745 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2746}
2747
3dede407 2748uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2749 float_status *s)
2750{
98e256fc
RH
2751 FloatParts64 p;
2752
2753 float16_unpack_canonical(&p, a, s);
4ab4aef0 2754 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2755}
2756
3dede407 2757uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2758 float_status *s)
2759{
98e256fc
RH
2760 FloatParts64 p;
2761
2762 float16_unpack_canonical(&p, a, s);
4ab4aef0 2763 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2764}
2765
3dede407 2766uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2767 float_status *s)
2768{
98e256fc
RH
2769 FloatParts64 p;
2770
2771 float16_unpack_canonical(&p, a, s);
4ab4aef0 2772 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2773}
2774
3dede407 2775uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2776 float_status *s)
2777{
98e256fc
RH
2778 FloatParts64 p;
2779
2780 float32_unpack_canonical(&p, a, s);
4ab4aef0 2781 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2782}
2783
3dede407 2784uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2785 float_status *s)
2786{
98e256fc
RH
2787 FloatParts64 p;
2788
2789 float32_unpack_canonical(&p, a, s);
4ab4aef0 2790 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2791}
2792
3dede407 2793uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2794 float_status *s)
2795{
98e256fc
RH
2796 FloatParts64 p;
2797
2798 float32_unpack_canonical(&p, a, s);
4ab4aef0 2799 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2800}
2801
3dede407 2802uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2803 float_status *s)
2804{
98e256fc
RH
2805 FloatParts64 p;
2806
2807 float64_unpack_canonical(&p, a, s);
4ab4aef0 2808 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2809}
2810
3dede407 2811uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2812 float_status *s)
2813{
98e256fc
RH
2814 FloatParts64 p;
2815
2816 float64_unpack_canonical(&p, a, s);
4ab4aef0 2817 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2818}
2819
3dede407 2820uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2821 float_status *s)
2822{
98e256fc
RH
2823 FloatParts64 p;
2824
2825 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
2826 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2827}
2828
2829uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2830 int scale, float_status *s)
2831{
2832 FloatParts64 p;
2833
2834 bfloat16_unpack_canonical(&p, a, s);
2835 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2836}
2837
2838uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2839 int scale, float_status *s)
2840{
2841 FloatParts64 p;
2842
2843 bfloat16_unpack_canonical(&p, a, s);
2844 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2845}
2846
2847uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2848 int scale, float_status *s)
2849{
2850 FloatParts64 p;
2851
2852 bfloat16_unpack_canonical(&p, a, s);
2853 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2854}
2855
2856static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2857 int scale, float_status *s)
2858{
2859 FloatParts128 p;
2860
2861 float128_unpack_canonical(&p, a, s);
2862 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2863}
2864
2865static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2866 int scale, float_status *s)
2867{
2868 FloatParts128 p;
2869
2870 float128_unpack_canonical(&p, a, s);
2871 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2872}
2873
0d93d8ec
FC
2874uint8_t float16_to_uint8(float16 a, float_status *s)
2875{
2876 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2877}
2878
2f6c74be
RH
2879uint16_t float16_to_uint16(float16 a, float_status *s)
2880{
2881 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2882}
2883
2884uint32_t float16_to_uint32(float16 a, float_status *s)
2885{
2886 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2887}
2888
2889uint64_t float16_to_uint64(float16 a, float_status *s)
2890{
2891 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2892}
2893
2894uint16_t float32_to_uint16(float32 a, float_status *s)
2895{
2896 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2897}
2898
2899uint32_t float32_to_uint32(float32 a, float_status *s)
2900{
2901 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2902}
2903
2904uint64_t float32_to_uint64(float32 a, float_status *s)
2905{
2906 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2907}
2908
2909uint16_t float64_to_uint16(float64 a, float_status *s)
2910{
2911 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2912}
2913
2914uint32_t float64_to_uint32(float64 a, float_status *s)
2915{
2916 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2917}
2918
2919uint64_t float64_to_uint64(float64 a, float_status *s)
2920{
2921 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2922}
2923
4ab4aef0
RH
2924uint32_t float128_to_uint32(float128 a, float_status *s)
2925{
2926 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2927}
2928
2929uint64_t float128_to_uint64(float128 a, float_status *s)
2930{
2931 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2932}
2933
2f6c74be
RH
2934uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2935{
2936 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2937}
2938
2939uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2940{
2941 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2942}
2943
2944uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2945{
2946 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2947}
2948
2949uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2950{
2951 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2952}
2953
2954uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2955{
2956 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2957}
2958
2959uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2960{
2961 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2962}
2963
2964uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2965{
2966 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2967}
2968
2969uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2970{
2971 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2972}
2973
2974uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2975{
2976 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2977}
ab52f973 2978
4ab4aef0 2979uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2980{
4ab4aef0 2981 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2982}
2983
4ab4aef0 2984uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2985{
4ab4aef0 2986 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2987}
2988
2989uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2990{
2991 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2992}
2993
2994uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2995{
2996 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2997}
2998
2999uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3000{
3001 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3002}
3003
3004uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3005{
3006 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3007}
3008
3009uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3010{
3011 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3012}
3013
3014uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3015{
3016 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3017}
3018
c02e1fb8 3019/*
e3689519 3020 * Signed integer to floating-point conversions
c02e1fb8
AB
3021 */
3022
2abdfe24 3023float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3024{
e3689519
RH
3025 FloatParts64 p;
3026
3027 parts_sint_to_float(&p, a, scale, status);
3028 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3029}
3030
2abdfe24
RH
3031float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3032{
3033 return int64_to_float16_scalbn(a, scale, status);
3034}
3035
3036float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3037{
3038 return int64_to_float16_scalbn(a, scale, status);
3039}
3040
3041float16 int64_to_float16(int64_t a, float_status *status)
3042{
3043 return int64_to_float16_scalbn(a, 0, status);
3044}
3045
c02e1fb8
AB
3046float16 int32_to_float16(int32_t a, float_status *status)
3047{
2abdfe24 3048 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3049}
3050
3051float16 int16_to_float16(int16_t a, float_status *status)
3052{
2abdfe24 3053 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3054}
3055
0d93d8ec
FC
3056float16 int8_to_float16(int8_t a, float_status *status)
3057{
3058 return int64_to_float16_scalbn(a, 0, status);
3059}
3060
2abdfe24 3061float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3062{
e3689519
RH
3063 FloatParts64 p;
3064
3065 parts64_sint_to_float(&p, a, scale, status);
3066 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3067}
3068
2abdfe24
RH
3069float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3070{
3071 return int64_to_float32_scalbn(a, scale, status);
3072}
3073
3074float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3075{
3076 return int64_to_float32_scalbn(a, scale, status);
3077}
3078
3079float32 int64_to_float32(int64_t a, float_status *status)
3080{
3081 return int64_to_float32_scalbn(a, 0, status);
3082}
3083
c02e1fb8
AB
3084float32 int32_to_float32(int32_t a, float_status *status)
3085{
2abdfe24 3086 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3087}
3088
3089float32 int16_to_float32(int16_t a, float_status *status)
3090{
2abdfe24 3091 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3092}
3093
2abdfe24 3094float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3095{
e3689519
RH
3096 FloatParts64 p;
3097
3098 parts_sint_to_float(&p, a, scale, status);
3099 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3100}
3101
2abdfe24
RH
3102float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3103{
3104 return int64_to_float64_scalbn(a, scale, status);
3105}
3106
3107float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3108{
3109 return int64_to_float64_scalbn(a, scale, status);
3110}
3111
3112float64 int64_to_float64(int64_t a, float_status *status)
3113{
3114 return int64_to_float64_scalbn(a, 0, status);
3115}
3116
c02e1fb8
AB
3117float64 int32_to_float64(int32_t a, float_status *status)
3118{
2abdfe24 3119 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3120}
3121
3122float64 int16_to_float64(int16_t a, float_status *status)
3123{
2abdfe24 3124 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3125}
3126
34f0c0a9
LZ
3127bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3128{
e3689519
RH
3129 FloatParts64 p;
3130
3131 parts_sint_to_float(&p, a, scale, status);
3132 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3133}
3134
3135bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3136{
3137 return int64_to_bfloat16_scalbn(a, scale, status);
3138}
3139
3140bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3141{
3142 return int64_to_bfloat16_scalbn(a, scale, status);
3143}
3144
3145bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3146{
3147 return int64_to_bfloat16_scalbn(a, 0, status);
3148}
3149
3150bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3151{
3152 return int64_to_bfloat16_scalbn(a, 0, status);
3153}
3154
3155bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3156{
3157 return int64_to_bfloat16_scalbn(a, 0, status);
3158}
c02e1fb8 3159
e3689519
RH
3160float128 int64_to_float128(int64_t a, float_status *status)
3161{
3162 FloatParts128 p;
3163
3164 parts_sint_to_float(&p, a, 0, status);
3165 return float128_round_pack_canonical(&p, status);
3166}
3167
3168float128 int32_to_float128(int32_t a, float_status *status)
3169{
3170 return int64_to_float128(a, status);
3171}
3172
c02e1fb8 3173/*
37c954a1 3174 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3175 */
3176
2abdfe24 3177float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3178{
37c954a1
RH
3179 FloatParts64 p;
3180
3181 parts_uint_to_float(&p, a, scale, status);
3182 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3183}
3184
2abdfe24
RH
3185float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3186{
3187 return uint64_to_float16_scalbn(a, scale, status);
3188}
3189
3190float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3191{
3192 return uint64_to_float16_scalbn(a, scale, status);
3193}
3194
3195float16 uint64_to_float16(uint64_t a, float_status *status)
3196{
3197 return uint64_to_float16_scalbn(a, 0, status);
3198}
3199
c02e1fb8
AB
3200float16 uint32_to_float16(uint32_t a, float_status *status)
3201{
2abdfe24 3202 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3203}
3204
3205float16 uint16_to_float16(uint16_t a, float_status *status)
3206{
2abdfe24 3207 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3208}
3209
0d93d8ec
FC
3210float16 uint8_to_float16(uint8_t a, float_status *status)
3211{
3212 return uint64_to_float16_scalbn(a, 0, status);
3213}
3214
2abdfe24 3215float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3216{
37c954a1
RH
3217 FloatParts64 p;
3218
3219 parts_uint_to_float(&p, a, scale, status);
3220 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3221}
3222
2abdfe24
RH
3223float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3224{
3225 return uint64_to_float32_scalbn(a, scale, status);
3226}
3227
3228float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3229{
3230 return uint64_to_float32_scalbn(a, scale, status);
3231}
3232
3233float32 uint64_to_float32(uint64_t a, float_status *status)
3234{
3235 return uint64_to_float32_scalbn(a, 0, status);
3236}
3237
c02e1fb8
AB
3238float32 uint32_to_float32(uint32_t a, float_status *status)
3239{
2abdfe24 3240 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3241}
3242
3243float32 uint16_to_float32(uint16_t a, float_status *status)
3244{
2abdfe24 3245 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3246}
3247
2abdfe24 3248float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3249{
37c954a1
RH
3250 FloatParts64 p;
3251
3252 parts_uint_to_float(&p, a, scale, status);
3253 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3254}
3255
2abdfe24
RH
3256float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3257{
3258 return uint64_to_float64_scalbn(a, scale, status);
3259}
3260
3261float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3262{
3263 return uint64_to_float64_scalbn(a, scale, status);
3264}
3265
3266float64 uint64_to_float64(uint64_t a, float_status *status)
3267{
3268 return uint64_to_float64_scalbn(a, 0, status);
3269}
3270
c02e1fb8
AB
3271float64 uint32_to_float64(uint32_t a, float_status *status)
3272{
2abdfe24 3273 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3274}
3275
3276float64 uint16_to_float64(uint16_t a, float_status *status)
3277{
2abdfe24 3278 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3279}
3280
34f0c0a9
LZ
3281bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3282{
37c954a1
RH
3283 FloatParts64 p;
3284
3285 parts_uint_to_float(&p, a, scale, status);
3286 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3287}
3288
3289bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3290{
3291 return uint64_to_bfloat16_scalbn(a, scale, status);
3292}
3293
3294bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3295{
3296 return uint64_to_bfloat16_scalbn(a, scale, status);
3297}
3298
3299bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3300{
3301 return uint64_to_bfloat16_scalbn(a, 0, status);
3302}
3303
3304bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3305{
3306 return uint64_to_bfloat16_scalbn(a, 0, status);
3307}
3308
3309bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3310{
3311 return uint64_to_bfloat16_scalbn(a, 0, status);
3312}
3313
37c954a1
RH
3314float128 uint64_to_float128(uint64_t a, float_status *status)
3315{
3316 FloatParts128 p;
3317
3318 parts_uint_to_float(&p, a, 0, status);
3319 return float128_round_pack_canonical(&p, status);
3320}
3321
e1c4667a
RH
3322/*
3323 * Minimum and maximum
89360067 3324 */
89360067 3325
e1c4667a
RH
3326static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3327{
3328 FloatParts64 pa, pb, *pr;
89360067 3329
e1c4667a
RH
3330 float16_unpack_canonical(&pa, a, s);
3331 float16_unpack_canonical(&pb, b, s);
3332 pr = parts_minmax(&pa, &pb, s, flags);
3333
3334 return float16_round_pack_canonical(pr, s);
89360067
AB
3335}
3336
e1c4667a
RH
3337static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3338 float_status *s, int flags)
3339{
3340 FloatParts64 pa, pb, *pr;
3341
3342 bfloat16_unpack_canonical(&pa, a, s);
3343 bfloat16_unpack_canonical(&pb, b, s);
3344 pr = parts_minmax(&pa, &pb, s, flags);
3345
3346 return bfloat16_round_pack_canonical(pr, s);
3347}
3348
3349static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3350{
3351 FloatParts64 pa, pb, *pr;
3352
3353 float32_unpack_canonical(&pa, a, s);
3354 float32_unpack_canonical(&pb, b, s);
3355 pr = parts_minmax(&pa, &pb, s, flags);
3356
3357 return float32_round_pack_canonical(pr, s);
3358}
3359
3360static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3361{
3362 FloatParts64 pa, pb, *pr;
3363
3364 float64_unpack_canonical(&pa, a, s);
3365 float64_unpack_canonical(&pb, b, s);
3366 pr = parts_minmax(&pa, &pb, s, flags);
3367
3368 return float64_round_pack_canonical(pr, s);
3369}
3370
ceebc129
DH
3371static float128 float128_minmax(float128 a, float128 b,
3372 float_status *s, int flags)
3373{
3374 FloatParts128 pa, pb, *pr;
3375
3376 float128_unpack_canonical(&pa, a, s);
3377 float128_unpack_canonical(&pb, b, s);
3378 pr = parts_minmax(&pa, &pb, s, flags);
3379
3380 return float128_round_pack_canonical(pr, s);
3381}
3382
e1c4667a
RH
3383#define MINMAX_1(type, name, flags) \
3384 type type##_##name(type a, type b, float_status *s) \
3385 { return type##_minmax(a, b, s, flags); }
3386
3387#define MINMAX_2(type) \
3388 MINMAX_1(type, max, 0) \
3389 MINMAX_1(type, maxnum, minmax_isnum) \
3390 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3391 MINMAX_1(type, min, minmax_ismin) \
3392 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3393 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3394
3395MINMAX_2(float16)
3396MINMAX_2(bfloat16)
3397MINMAX_2(float32)
3398MINMAX_2(float64)
ceebc129 3399MINMAX_2(float128)
e1c4667a
RH
3400
3401#undef MINMAX_1
3402#undef MINMAX_2
8282310d 3403
6eb169b8
RH
3404/*
3405 * Floating point compare
3406 */
0c4c9092 3407
6eb169b8
RH
3408static FloatRelation QEMU_FLATTEN
3409float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3410{
3411 FloatParts64 pa, pb;
0c4c9092 3412
6eb169b8
RH
3413 float16_unpack_canonical(&pa, a, s);
3414 float16_unpack_canonical(&pb, b, s);
3415 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
3416}
3417
71bfd65c 3418FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 3419{
6eb169b8 3420 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
3421}
3422
71bfd65c 3423FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 3424{
6eb169b8
RH
3425 return float16_do_compare(a, b, s, true);
3426}
3427
3428static FloatRelation QEMU_SOFTFLOAT_ATTR
3429float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3430{
3431 FloatParts64 pa, pb;
3432
3433 float32_unpack_canonical(&pa, a, s);
3434 float32_unpack_canonical(&pb, b, s);
3435 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3436}
3437
71bfd65c 3438static FloatRelation QEMU_FLATTEN
6eb169b8 3439float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3440{
3441 union_float32 ua, ub;
3442
3443 ua.s = xa;
3444 ub.s = xb;
3445
3446 if (QEMU_NO_HARDFLOAT) {
3447 goto soft;
3448 }
3449
3450 float32_input_flush2(&ua.s, &ub.s, s);
3451 if (isgreaterequal(ua.h, ub.h)) {
3452 if (isgreater(ua.h, ub.h)) {
3453 return float_relation_greater;
3454 }
3455 return float_relation_equal;
3456 }
3457 if (likely(isless(ua.h, ub.h))) {
3458 return float_relation_less;
3459 }
6eb169b8
RH
3460 /*
3461 * The only condition remaining is unordered.
d9fe9db9
EC
3462 * Fall through to set flags.
3463 */
3464 soft:
6eb169b8 3465 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3466}
3467
71bfd65c 3468FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 3469{
6eb169b8 3470 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
3471}
3472
71bfd65c 3473FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 3474{
6eb169b8
RH
3475 return float32_hs_compare(a, b, s, true);
3476}
3477
3478static FloatRelation QEMU_SOFTFLOAT_ATTR
3479float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3480{
3481 FloatParts64 pa, pb;
3482
3483 float64_unpack_canonical(&pa, a, s);
3484 float64_unpack_canonical(&pb, b, s);
3485 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3486}
3487
71bfd65c 3488static FloatRelation QEMU_FLATTEN
6eb169b8 3489float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3490{
3491 union_float64 ua, ub;
3492
3493 ua.s = xa;
3494 ub.s = xb;
3495
3496 if (QEMU_NO_HARDFLOAT) {
3497 goto soft;
3498 }
3499
3500 float64_input_flush2(&ua.s, &ub.s, s);
3501 if (isgreaterequal(ua.h, ub.h)) {
3502 if (isgreater(ua.h, ub.h)) {
3503 return float_relation_greater;
3504 }
3505 return float_relation_equal;
3506 }
3507 if (likely(isless(ua.h, ub.h))) {
3508 return float_relation_less;
3509 }
6eb169b8
RH
3510 /*
3511 * The only condition remaining is unordered.
d9fe9db9
EC
3512 * Fall through to set flags.
3513 */
3514 soft:
6eb169b8 3515 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3516}
3517
71bfd65c 3518FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 3519{
6eb169b8 3520 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
3521}
3522
71bfd65c 3523FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 3524{
6eb169b8 3525 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
3526}
3527
8282310d 3528static FloatRelation QEMU_FLATTEN
6eb169b8 3529bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 3530{
98e256fc
RH
3531 FloatParts64 pa, pb;
3532
3533 bfloat16_unpack_canonical(&pa, a, s);
3534 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 3535 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
3536}
3537
3538FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3539{
6eb169b8 3540 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
3541}
3542
3543FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3544{
6eb169b8
RH
3545 return bfloat16_do_compare(a, b, s, true);
3546}
3547
3548static FloatRelation QEMU_FLATTEN
3549float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3550{
3551 FloatParts128 pa, pb;
3552
3553 float128_unpack_canonical(&pa, a, s);
3554 float128_unpack_canonical(&pb, b, s);
3555 return parts_compare(&pa, &pb, s, is_quiet);
3556}
3557
3558FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3559{
3560 return float128_do_compare(a, b, s, false);
3561}
3562
3563FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3564{
3565 return float128_do_compare(a, b, s, true);
8282310d
LZ
3566}
3567
39626b0c
RH
3568/*
3569 * Scale by 2**N
3570 */
0bfc9f19
AB
3571
3572float16 float16_scalbn(float16 a, int n, float_status *status)
3573{
39626b0c 3574 FloatParts64 p;
98e256fc 3575
39626b0c
RH
3576 float16_unpack_canonical(&p, a, status);
3577 parts_scalbn(&p, n, status);
3578 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
3579}
3580
3581float32 float32_scalbn(float32 a, int n, float_status *status)
3582{
39626b0c 3583 FloatParts64 p;
98e256fc 3584
39626b0c
RH
3585 float32_unpack_canonical(&p, a, status);
3586 parts_scalbn(&p, n, status);
3587 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
3588}
3589
3590float64 float64_scalbn(float64 a, int n, float_status *status)
3591{
39626b0c 3592 FloatParts64 p;
98e256fc 3593
39626b0c
RH
3594 float64_unpack_canonical(&p, a, status);
3595 parts_scalbn(&p, n, status);
3596 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
3597}
3598
8282310d
LZ
3599bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3600{
39626b0c 3601 FloatParts64 p;
98e256fc 3602
39626b0c
RH
3603 bfloat16_unpack_canonical(&p, a, status);
3604 parts_scalbn(&p, n, status);
3605 return bfloat16_round_pack_canonical(&p, status);
3606}
3607
3608float128 float128_scalbn(float128 a, int n, float_status *status)
3609{
3610 FloatParts128 p;
3611
3612 float128_unpack_canonical(&p, a, status);
3613 parts_scalbn(&p, n, status);
3614 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3615}
3616
c13bb2da
AB
3617/*
3618 * Square Root
c13bb2da
AB
3619 */
3620
97ff87c0 3621float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3622{
9261b245 3623 FloatParts64 p;
98e256fc 3624
9261b245
RH
3625 float16_unpack_canonical(&p, a, status);
3626 parts_sqrt(&p, status, &float16_params);
3627 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
3628}
3629
f131bae8
EC
3630static float32 QEMU_SOFTFLOAT_ATTR
3631soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3632{
9261b245 3633 FloatParts64 p;
98e256fc 3634
9261b245
RH
3635 float32_unpack_canonical(&p, a, status);
3636 parts_sqrt(&p, status, &float32_params);
3637 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
3638}
3639
f131bae8
EC
3640static float64 QEMU_SOFTFLOAT_ATTR
3641soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3642{
9261b245 3643 FloatParts64 p;
98e256fc 3644
9261b245
RH
3645 float64_unpack_canonical(&p, a, status);
3646 parts_sqrt(&p, status, &float64_params);
3647 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
3648}
3649
f131bae8
EC
3650float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3651{
3652 union_float32 ua, ur;
3653
3654 ua.s = xa;
3655 if (unlikely(!can_use_fpu(s))) {
3656 goto soft;
3657 }
3658
3659 float32_input_flush1(&ua.s, s);
3660 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3661 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3662 fpclassify(ua.h) == FP_ZERO) ||
3663 signbit(ua.h))) {
3664 goto soft;
3665 }
3666 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3667 float32_is_neg(ua.s))) {
3668 goto soft;
3669 }
3670 ur.h = sqrtf(ua.h);
3671 return ur.s;
3672
3673 soft:
3674 return soft_f32_sqrt(ua.s, s);
3675}
3676
3677float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3678{
3679 union_float64 ua, ur;
3680
3681 ua.s = xa;
3682 if (unlikely(!can_use_fpu(s))) {
3683 goto soft;
3684 }
3685
3686 float64_input_flush1(&ua.s, s);
3687 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3688 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3689 fpclassify(ua.h) == FP_ZERO) ||
3690 signbit(ua.h))) {
3691 goto soft;
3692 }
3693 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3694 float64_is_neg(ua.s))) {
3695 goto soft;
3696 }
3697 ur.h = sqrt(ua.h);
3698 return ur.s;
3699
3700 soft:
3701 return soft_f64_sqrt(ua.s, s);
3702}
3703
8282310d
LZ
3704bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3705{
9261b245 3706 FloatParts64 p;
98e256fc 3707
9261b245
RH
3708 bfloat16_unpack_canonical(&p, a, status);
3709 parts_sqrt(&p, status, &bfloat16_params);
3710 return bfloat16_round_pack_canonical(&p, status);
3711}
3712
3713float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3714{
3715 FloatParts128 p;
3716
3717 float128_unpack_canonical(&p, a, status);
3718 parts_sqrt(&p, status, &float128_params);
3719 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3720}
3721
0218a16e
RH
3722/*----------------------------------------------------------------------------
3723| The pattern for a default generated NaN.
3724*----------------------------------------------------------------------------*/
3725
3726float16 float16_default_nan(float_status *status)
3727{
0fc07cad
RH
3728 FloatParts64 p;
3729
3730 parts_default_nan(&p, status);
0218a16e 3731 p.frac >>= float16_params.frac_shift;
71fd178e 3732 return float16_pack_raw(&p);
0218a16e
RH
3733}
3734
3735float32 float32_default_nan(float_status *status)
3736{
0fc07cad
RH
3737 FloatParts64 p;
3738
3739 parts_default_nan(&p, status);
0218a16e 3740 p.frac >>= float32_params.frac_shift;
71fd178e 3741 return float32_pack_raw(&p);
0218a16e
RH
3742}
3743
3744float64 float64_default_nan(float_status *status)
3745{
0fc07cad
RH
3746 FloatParts64 p;
3747
3748 parts_default_nan(&p, status);
0218a16e 3749 p.frac >>= float64_params.frac_shift;
71fd178e 3750 return float64_pack_raw(&p);
0218a16e
RH
3751}
3752
3753float128 float128_default_nan(float_status *status)
3754{
e9034ea8 3755 FloatParts128 p;
0218a16e 3756
0fc07cad 3757 parts_default_nan(&p, status);
e9034ea8
RH
3758 frac_shr(&p, float128_params.frac_shift);
3759 return float128_pack_raw(&p);
0218a16e 3760}
c13bb2da 3761
8282310d
LZ
3762bfloat16 bfloat16_default_nan(float_status *status)
3763{
0fc07cad
RH
3764 FloatParts64 p;
3765
3766 parts_default_nan(&p, status);
8282310d 3767 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3768 return bfloat16_pack_raw(&p);
8282310d
LZ
3769}
3770
158142c2 3771/*----------------------------------------------------------------------------
377ed926
RH
3772| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3773*----------------------------------------------------------------------------*/
3774
3775float16 float16_silence_nan(float16 a, float_status *status)
3776{
3dddb203
RH
3777 FloatParts64 p;
3778
3779 float16_unpack_raw(&p, a);
377ed926 3780 p.frac <<= float16_params.frac_shift;
92ff426d 3781 parts_silence_nan(&p, status);
377ed926 3782 p.frac >>= float16_params.frac_shift;
71fd178e 3783 return float16_pack_raw(&p);
377ed926
RH
3784}
3785
3786float32 float32_silence_nan(float32 a, float_status *status)
3787{
3dddb203
RH
3788 FloatParts64 p;
3789
3790 float32_unpack_raw(&p, a);
377ed926 3791 p.frac <<= float32_params.frac_shift;
92ff426d 3792 parts_silence_nan(&p, status);
377ed926 3793 p.frac >>= float32_params.frac_shift;
71fd178e 3794 return float32_pack_raw(&p);
377ed926
RH
3795}
3796
3797float64 float64_silence_nan(float64 a, float_status *status)
3798{
3dddb203
RH
3799 FloatParts64 p;
3800
3801 float64_unpack_raw(&p, a);
377ed926 3802 p.frac <<= float64_params.frac_shift;
92ff426d 3803 parts_silence_nan(&p, status);
377ed926 3804 p.frac >>= float64_params.frac_shift;
71fd178e 3805 return float64_pack_raw(&p);
377ed926
RH
3806}
3807
8282310d
LZ
3808bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3809{
3dddb203
RH
3810 FloatParts64 p;
3811
3812 bfloat16_unpack_raw(&p, a);
8282310d 3813 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3814 parts_silence_nan(&p, status);
8282310d 3815 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3816 return bfloat16_pack_raw(&p);
8282310d 3817}
e6b405fe 3818
0018b1f4
RH
3819float128 float128_silence_nan(float128 a, float_status *status)
3820{
3821 FloatParts128 p;
3822
3823 float128_unpack_raw(&p, a);
3824 frac_shl(&p, float128_params.frac_shift);
3825 parts_silence_nan(&p, status);
3826 frac_shr(&p, float128_params.frac_shift);
3827 return float128_pack_raw(&p);
3828}
3829
e6b405fe
AB
3830/*----------------------------------------------------------------------------
3831| If `a' is denormal and we are in flush-to-zero mode then set the
3832| input-denormal exception and return zero. Otherwise just return the value.
3833*----------------------------------------------------------------------------*/
3834
f8155c1d 3835static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3836{
3837 if (p.exp == 0 && p.frac != 0) {
3838 float_raise(float_flag_input_denormal, status);
3839 return true;
3840 }
3841
3842 return false;
3843}
3844
3845float16 float16_squash_input_denormal(float16 a, float_status *status)
3846{
3847 if (status->flush_inputs_to_zero) {
3dddb203
RH
3848 FloatParts64 p;
3849
3850 float16_unpack_raw(&p, a);
e6b405fe
AB
3851 if (parts_squash_denormal(p, status)) {
3852 return float16_set_sign(float16_zero, p.sign);
3853 }
3854 }
3855 return a;
3856}
3857
3858float32 float32_squash_input_denormal(float32 a, float_status *status)
3859{
3860 if (status->flush_inputs_to_zero) {
3dddb203
RH
3861 FloatParts64 p;
3862
3863 float32_unpack_raw(&p, a);
e6b405fe
AB
3864 if (parts_squash_denormal(p, status)) {
3865 return float32_set_sign(float32_zero, p.sign);
3866 }
3867 }
3868 return a;
3869}
3870
3871float64 float64_squash_input_denormal(float64 a, float_status *status)
3872{
3873 if (status->flush_inputs_to_zero) {
3dddb203
RH
3874 FloatParts64 p;
3875
3876 float64_unpack_raw(&p, a);
e6b405fe
AB
3877 if (parts_squash_denormal(p, status)) {
3878 return float64_set_sign(float64_zero, p.sign);
3879 }
3880 }
3881 return a;
3882}
3883
8282310d
LZ
3884bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3885{
3886 if (status->flush_inputs_to_zero) {
3dddb203
RH
3887 FloatParts64 p;
3888
3889 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3890 if (parts_squash_denormal(p, status)) {
3891 return bfloat16_set_sign(bfloat16_zero, p.sign);
3892 }
3893 }
3894 return a;
3895}
3896
377ed926 3897/*----------------------------------------------------------------------------
158142c2
FB
3898| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3899| and 7, and returns the properly rounded 32-bit integer corresponding to the
3900| input. If `zSign' is 1, the input is negated before being converted to an
3901| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3902| is simply rounded to an integer, with the inexact exception raised if the
3903| input cannot be represented exactly as an integer. However, if the fixed-
3904| point input is too large, the invalid exception is raised and the largest
3905| positive or negative integer is returned.
3906*----------------------------------------------------------------------------*/
3907
c120391c
RH
3908static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3909 float_status *status)
158142c2 3910{
8f506c70 3911 int8_t roundingMode;
c120391c 3912 bool roundNearestEven;
8f506c70 3913 int8_t roundIncrement, roundBits;
760e1416 3914 int32_t z;
158142c2 3915
a2f2d288 3916 roundingMode = status->float_rounding_mode;
158142c2 3917 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3918 switch (roundingMode) {
3919 case float_round_nearest_even:
f9288a76 3920 case float_round_ties_away:
dc355b76
PM
3921 roundIncrement = 0x40;
3922 break;
3923 case float_round_to_zero:
3924 roundIncrement = 0;
3925 break;
3926 case float_round_up:
3927 roundIncrement = zSign ? 0 : 0x7f;
3928 break;
3929 case float_round_down:
3930 roundIncrement = zSign ? 0x7f : 0;
3931 break;
5d64abb3
RH
3932 case float_round_to_odd:
3933 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3934 break;
dc355b76
PM
3935 default:
3936 abort();
158142c2
FB
3937 }
3938 roundBits = absZ & 0x7F;
3939 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3940 if (!(roundBits ^ 0x40) && roundNearestEven) {
3941 absZ &= ~1;
3942 }
158142c2
FB
3943 z = absZ;
3944 if ( zSign ) z = - z;
3945 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3946 float_raise(float_flag_invalid, status);
2c217da0 3947 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3948 }
a2f2d288 3949 if (roundBits) {
d82f3b2d 3950 float_raise(float_flag_inexact, status);
a2f2d288 3951 }
158142c2
FB
3952 return z;
3953
3954}
3955
3956/*----------------------------------------------------------------------------
3957| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3958| `absZ1', with binary point between bits 63 and 64 (between the input words),
3959| and returns the properly rounded 64-bit integer corresponding to the input.
3960| If `zSign' is 1, the input is negated before being converted to an integer.
3961| Ordinarily, the fixed-point input is simply rounded to an integer, with
3962| the inexact exception raised if the input cannot be represented exactly as
3963| an integer. However, if the fixed-point input is too large, the invalid
3964| exception is raised and the largest positive or negative integer is
3965| returned.
3966*----------------------------------------------------------------------------*/
3967
c120391c 3968static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3969 float_status *status)
158142c2 3970{
8f506c70 3971 int8_t roundingMode;
c120391c 3972 bool roundNearestEven, increment;
760e1416 3973 int64_t z;
158142c2 3974
a2f2d288 3975 roundingMode = status->float_rounding_mode;
158142c2 3976 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3977 switch (roundingMode) {
3978 case float_round_nearest_even:
f9288a76 3979 case float_round_ties_away:
dc355b76
PM
3980 increment = ((int64_t) absZ1 < 0);
3981 break;
3982 case float_round_to_zero:
3983 increment = 0;
3984 break;
3985 case float_round_up:
3986 increment = !zSign && absZ1;
3987 break;
3988 case float_round_down:
3989 increment = zSign && absZ1;
3990 break;
5d64abb3
RH
3991 case float_round_to_odd:
3992 increment = !(absZ0 & 1) && absZ1;
3993 break;
dc355b76
PM
3994 default:
3995 abort();
158142c2
FB
3996 }
3997 if ( increment ) {
3998 ++absZ0;
3999 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4000 if (!(absZ1 << 1) && roundNearestEven) {
4001 absZ0 &= ~1;
4002 }
158142c2
FB
4003 }
4004 z = absZ0;
4005 if ( zSign ) z = - z;
4006 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4007 overflow:
ff32e16e 4008 float_raise(float_flag_invalid, status);
2c217da0 4009 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4010 }
a2f2d288 4011 if (absZ1) {
d82f3b2d 4012 float_raise(float_flag_inexact, status);
a2f2d288 4013 }
158142c2
FB
4014 return z;
4015
4016}
4017
158142c2
FB
4018/*----------------------------------------------------------------------------
4019| Normalizes the subnormal single-precision floating-point value represented
4020| by the denormalized significand `aSig'. The normalized exponent and
4021| significand are stored at the locations pointed to by `zExpPtr' and
4022| `zSigPtr', respectively.
4023*----------------------------------------------------------------------------*/
4024
4025static void
0c48262d 4026 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4027{
8f506c70 4028 int8_t shiftCount;
158142c2 4029
0019d5c3 4030 shiftCount = clz32(aSig) - 8;
158142c2
FB
4031 *zSigPtr = aSig<<shiftCount;
4032 *zExpPtr = 1 - shiftCount;
4033
4034}
4035
158142c2
FB
4036/*----------------------------------------------------------------------------
4037| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4038| and significand `zSig', and returns the proper single-precision floating-
4039| point value corresponding to the abstract input. Ordinarily, the abstract
4040| value is simply rounded and packed into the single-precision format, with
4041| the inexact exception raised if the abstract input cannot be represented
4042| exactly. However, if the abstract value is too large, the overflow and
4043| inexact exceptions are raised and an infinity or maximal finite value is
4044| returned. If the abstract value is too small, the input value is rounded to
4045| a subnormal number, and the underflow and inexact exceptions are raised if
4046| the abstract input cannot be represented exactly as a subnormal single-
4047| precision floating-point number.
4048| The input significand `zSig' has its binary point between bits 30
4049| and 29, which is 7 bits to the left of the usual location. This shifted
4050| significand must be normalized or smaller. If `zSig' is not normalized,
4051| `zExp' must be 0; in that case, the result returned is a subnormal number,
4052| and it must not require rounding. In the usual case that `zSig' is
4053| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4054| The handling of underflow and overflow follows the IEC/IEEE Standard for
4055| Binary Floating-Point Arithmetic.
4056*----------------------------------------------------------------------------*/
4057
c120391c 4058static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4059 float_status *status)
158142c2 4060{
8f506c70 4061 int8_t roundingMode;
c120391c 4062 bool roundNearestEven;
8f506c70 4063 int8_t roundIncrement, roundBits;
c120391c 4064 bool isTiny;
158142c2 4065
a2f2d288 4066 roundingMode = status->float_rounding_mode;
158142c2 4067 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4068 switch (roundingMode) {
4069 case float_round_nearest_even:
f9288a76 4070 case float_round_ties_away:
dc355b76
PM
4071 roundIncrement = 0x40;
4072 break;
4073 case float_round_to_zero:
4074 roundIncrement = 0;
4075 break;
4076 case float_round_up:
4077 roundIncrement = zSign ? 0 : 0x7f;
4078 break;
4079 case float_round_down:
4080 roundIncrement = zSign ? 0x7f : 0;
4081 break;
5d64abb3
RH
4082 case float_round_to_odd:
4083 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4084 break;
dc355b76
PM
4085 default:
4086 abort();
4087 break;
158142c2
FB
4088 }
4089 roundBits = zSig & 0x7F;
bb98fe42 4090 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4091 if ( ( 0xFD < zExp )
4092 || ( ( zExp == 0xFD )
bb98fe42 4093 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4094 ) {
5d64abb3
RH
4095 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4096 roundIncrement != 0;
ff32e16e 4097 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4098 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4099 }
4100 if ( zExp < 0 ) {
a2f2d288 4101 if (status->flush_to_zero) {
ff32e16e 4102 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4103 return packFloat32(zSign, 0, 0);
4104 }
a828b373
RH
4105 isTiny = status->tininess_before_rounding
4106 || (zExp < -1)
4107 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4108 shift32RightJamming( zSig, - zExp, &zSig );
4109 zExp = 0;
4110 roundBits = zSig & 0x7F;
ff32e16e
PM
4111 if (isTiny && roundBits) {
4112 float_raise(float_flag_underflow, status);
4113 }
5d64abb3
RH
4114 if (roundingMode == float_round_to_odd) {
4115 /*
4116 * For round-to-odd case, the roundIncrement depends on
4117 * zSig which just changed.
4118 */
4119 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4120 }
158142c2
FB
4121 }
4122 }
a2f2d288 4123 if (roundBits) {
d82f3b2d 4124 float_raise(float_flag_inexact, status);
a2f2d288 4125 }
158142c2 4126 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4127 if (!(roundBits ^ 0x40) && roundNearestEven) {
4128 zSig &= ~1;
4129 }
158142c2
FB
4130 if ( zSig == 0 ) zExp = 0;
4131 return packFloat32( zSign, zExp, zSig );
4132
4133}
4134
4135/*----------------------------------------------------------------------------
4136| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4137| and significand `zSig', and returns the proper single-precision floating-
4138| point value corresponding to the abstract input. This routine is just like
4139| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4140| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4141| floating-point exponent.
4142*----------------------------------------------------------------------------*/
4143
4144static float32
c120391c 4145 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4146 float_status *status)
158142c2 4147{
8f506c70 4148 int8_t shiftCount;
158142c2 4149
0019d5c3 4150 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4151 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4152 status);
158142c2
FB
4153
4154}
4155
158142c2
FB
4156/*----------------------------------------------------------------------------
4157| Normalizes the subnormal double-precision floating-point value represented
4158| by the denormalized significand `aSig'. The normalized exponent and
4159| significand are stored at the locations pointed to by `zExpPtr' and
4160| `zSigPtr', respectively.
4161*----------------------------------------------------------------------------*/
4162
4163static void
0c48262d 4164 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4165{
8f506c70 4166 int8_t shiftCount;
158142c2 4167
0019d5c3 4168 shiftCount = clz64(aSig) - 11;
158142c2
FB
4169 *zSigPtr = aSig<<shiftCount;
4170 *zExpPtr = 1 - shiftCount;
4171
4172}
4173
4174/*----------------------------------------------------------------------------
4175| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4176| double-precision floating-point value, returning the result. After being
4177| shifted into the proper positions, the three fields are simply added
4178| together to form the result. This means that any integer portion of `zSig'
4179| will be added into the exponent. Since a properly normalized significand
4180| will have an integer portion equal to 1, the `zExp' input should be 1 less
4181| than the desired result exponent whenever `zSig' is a complete, normalized
4182| significand.
4183*----------------------------------------------------------------------------*/
4184
c120391c 4185static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4186{
4187
f090c9d4 4188 return make_float64(
bb98fe42 4189 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4190
4191}
4192
4193/*----------------------------------------------------------------------------
4194| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4195| and significand `zSig', and returns the proper double-precision floating-
4196| point value corresponding to the abstract input. Ordinarily, the abstract
4197| value is simply rounded and packed into the double-precision format, with
4198| the inexact exception raised if the abstract input cannot be represented
4199| exactly. However, if the abstract value is too large, the overflow and
4200| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4201| returned. If the abstract value is too small, the input value is rounded to
4202| a subnormal number, and the underflow and inexact exceptions are raised if
4203| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4204| precision floating-point number.
4205| The input significand `zSig' has its binary point between bits 62
4206| and 61, which is 10 bits to the left of the usual location. This shifted
4207| significand must be normalized or smaller. If `zSig' is not normalized,
4208| `zExp' must be 0; in that case, the result returned is a subnormal number,
4209| and it must not require rounding. In the usual case that `zSig' is
4210| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4211| The handling of underflow and overflow follows the IEC/IEEE Standard for
4212| Binary Floating-Point Arithmetic.
4213*----------------------------------------------------------------------------*/
4214
c120391c 4215static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4216 float_status *status)
158142c2 4217{
8f506c70 4218 int8_t roundingMode;
c120391c 4219 bool roundNearestEven;
0c48262d 4220 int roundIncrement, roundBits;
c120391c 4221 bool isTiny;
158142c2 4222
a2f2d288 4223 roundingMode = status->float_rounding_mode;
158142c2 4224 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4225 switch (roundingMode) {
4226 case float_round_nearest_even:
f9288a76 4227 case float_round_ties_away:
dc355b76
PM
4228 roundIncrement = 0x200;
4229 break;
4230 case float_round_to_zero:
4231 roundIncrement = 0;
4232 break;
4233 case float_round_up:
4234 roundIncrement = zSign ? 0 : 0x3ff;
4235 break;
4236 case float_round_down:
4237 roundIncrement = zSign ? 0x3ff : 0;
4238 break;
9ee6f678
BR
4239 case float_round_to_odd:
4240 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4241 break;
dc355b76
PM
4242 default:
4243 abort();
158142c2
FB
4244 }
4245 roundBits = zSig & 0x3FF;
bb98fe42 4246 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4247 if ( ( 0x7FD < zExp )
4248 || ( ( zExp == 0x7FD )
bb98fe42 4249 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4250 ) {
9ee6f678
BR
4251 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4252 roundIncrement != 0;
ff32e16e 4253 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4254 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4255 }
4256 if ( zExp < 0 ) {
a2f2d288 4257 if (status->flush_to_zero) {
ff32e16e 4258 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4259 return packFloat64(zSign, 0, 0);
4260 }
a828b373
RH
4261 isTiny = status->tininess_before_rounding
4262 || (zExp < -1)
4263 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4264 shift64RightJamming( zSig, - zExp, &zSig );
4265 zExp = 0;
4266 roundBits = zSig & 0x3FF;
ff32e16e
PM
4267 if (isTiny && roundBits) {
4268 float_raise(float_flag_underflow, status);
4269 }
9ee6f678
BR
4270 if (roundingMode == float_round_to_odd) {
4271 /*
4272 * For round-to-odd case, the roundIncrement depends on
4273 * zSig which just changed.
4274 */
4275 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4276 }
158142c2
FB
4277 }
4278 }
a2f2d288 4279 if (roundBits) {
d82f3b2d 4280 float_raise(float_flag_inexact, status);
a2f2d288 4281 }
158142c2 4282 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4283 if (!(roundBits ^ 0x200) && roundNearestEven) {
4284 zSig &= ~1;
4285 }
158142c2
FB
4286 if ( zSig == 0 ) zExp = 0;
4287 return packFloat64( zSign, zExp, zSig );
4288
4289}
4290
4291/*----------------------------------------------------------------------------
4292| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4293| and significand `zSig', and returns the proper double-precision floating-
4294| point value corresponding to the abstract input. This routine is just like
4295| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4296| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4297| floating-point exponent.
4298*----------------------------------------------------------------------------*/
4299
4300static float64
c120391c 4301 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4302 float_status *status)
158142c2 4303{
8f506c70 4304 int8_t shiftCount;
158142c2 4305
0019d5c3 4306 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4307 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4308 status);
158142c2
FB
4309
4310}
4311
158142c2
FB
4312/*----------------------------------------------------------------------------
4313| Normalizes the subnormal extended double-precision floating-point value
4314| represented by the denormalized significand `aSig'. The normalized exponent
4315| and significand are stored at the locations pointed to by `zExpPtr' and
4316| `zSigPtr', respectively.
4317*----------------------------------------------------------------------------*/
4318
88857aca
LV
4319void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4320 uint64_t *zSigPtr)
158142c2 4321{
8f506c70 4322 int8_t shiftCount;
158142c2 4323
0019d5c3 4324 shiftCount = clz64(aSig);
158142c2
FB
4325 *zSigPtr = aSig<<shiftCount;
4326 *zExpPtr = 1 - shiftCount;
158142c2
FB
4327}
4328
4329/*----------------------------------------------------------------------------
4330| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4331| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4332| and returns the proper extended double-precision floating-point value
4333| corresponding to the abstract input. Ordinarily, the abstract value is
4334| rounded and packed into the extended double-precision format, with the
4335| inexact exception raised if the abstract input cannot be represented
4336| exactly. However, if the abstract value is too large, the overflow and
4337| inexact exceptions are raised and an infinity or maximal finite value is
4338| returned. If the abstract value is too small, the input value is rounded to
4339| a subnormal number, and the underflow and inexact exceptions are raised if
4340| the abstract input cannot be represented exactly as a subnormal extended
4341| double-precision floating-point number.
4342| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4343| number of bits as single or double precision, respectively. Otherwise, the
4344| result is rounded to the full precision of the extended double-precision
4345| format.
4346| The input significand must be normalized or smaller. If the input
4347| significand is not normalized, `zExp' must be 0; in that case, the result
4348| returned is a subnormal number, and it must not require rounding. The
4349| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4350| Floating-Point Arithmetic.
4351*----------------------------------------------------------------------------*/
4352
c120391c 4353floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4354 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4355 float_status *status)
158142c2 4356{
8f506c70 4357 int8_t roundingMode;
c120391c 4358 bool roundNearestEven, increment, isTiny;
f42c2224 4359 int64_t roundIncrement, roundMask, roundBits;
158142c2 4360
a2f2d288 4361 roundingMode = status->float_rounding_mode;
158142c2
FB
4362 roundNearestEven = ( roundingMode == float_round_nearest_even );
4363 if ( roundingPrecision == 80 ) goto precision80;
4364 if ( roundingPrecision == 64 ) {
e9321124
AB
4365 roundIncrement = UINT64_C(0x0000000000000400);
4366 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4367 }
4368 else if ( roundingPrecision == 32 ) {
e9321124
AB
4369 roundIncrement = UINT64_C(0x0000008000000000);
4370 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4371 }
4372 else {
4373 goto precision80;
4374 }
4375 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4376 switch (roundingMode) {
4377 case float_round_nearest_even:
f9288a76 4378 case float_round_ties_away:
dc355b76
PM
4379 break;
4380 case float_round_to_zero:
4381 roundIncrement = 0;
4382 break;
4383 case float_round_up:
4384 roundIncrement = zSign ? 0 : roundMask;
4385 break;
4386 case float_round_down:
4387 roundIncrement = zSign ? roundMask : 0;
4388 break;
4389 default:
4390 abort();
158142c2
FB
4391 }
4392 roundBits = zSig0 & roundMask;
bb98fe42 4393 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4394 if ( ( 0x7FFE < zExp )
4395 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4396 ) {
4397 goto overflow;
4398 }
4399 if ( zExp <= 0 ) {
a2f2d288 4400 if (status->flush_to_zero) {
ff32e16e 4401 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4402 return packFloatx80(zSign, 0, 0);
4403 }
a828b373
RH
4404 isTiny = status->tininess_before_rounding
4405 || (zExp < 0 )
4406 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4407 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4408 zExp = 0;
4409 roundBits = zSig0 & roundMask;
ff32e16e
PM
4410 if (isTiny && roundBits) {
4411 float_raise(float_flag_underflow, status);
4412 }
a2f2d288 4413 if (roundBits) {
d82f3b2d 4414 float_raise(float_flag_inexact, status);
a2f2d288 4415 }
158142c2 4416 zSig0 += roundIncrement;
bb98fe42 4417 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4418 roundIncrement = roundMask + 1;
4419 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4420 roundMask |= roundIncrement;
4421 }
4422 zSig0 &= ~ roundMask;
4423 return packFloatx80( zSign, zExp, zSig0 );
4424 }
4425 }
a2f2d288 4426 if (roundBits) {
d82f3b2d 4427 float_raise(float_flag_inexact, status);
a2f2d288 4428 }
158142c2
FB
4429 zSig0 += roundIncrement;
4430 if ( zSig0 < roundIncrement ) {
4431 ++zExp;
e9321124 4432 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4433 }
4434 roundIncrement = roundMask + 1;
4435 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4436 roundMask |= roundIncrement;
4437 }
4438 zSig0 &= ~ roundMask;
4439 if ( zSig0 == 0 ) zExp = 0;
4440 return packFloatx80( zSign, zExp, zSig0 );
4441 precision80:
dc355b76
PM
4442 switch (roundingMode) {
4443 case float_round_nearest_even:
f9288a76 4444 case float_round_ties_away:
dc355b76
PM
4445 increment = ((int64_t)zSig1 < 0);
4446 break;
4447 case float_round_to_zero:
4448 increment = 0;
4449 break;
4450 case float_round_up:
4451 increment = !zSign && zSig1;
4452 break;
4453 case float_round_down:
4454 increment = zSign && zSig1;
4455 break;
4456 default:
4457 abort();
158142c2 4458 }
bb98fe42 4459 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4460 if ( ( 0x7FFE < zExp )
4461 || ( ( zExp == 0x7FFE )
e9321124 4462 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4463 && increment
4464 )
4465 ) {
4466 roundMask = 0;
4467 overflow:
ff32e16e 4468 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4469 if ( ( roundingMode == float_round_to_zero )
4470 || ( zSign && ( roundingMode == float_round_up ) )
4471 || ( ! zSign && ( roundingMode == float_round_down ) )
4472 ) {
4473 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4474 }
0f605c88
LV
4475 return packFloatx80(zSign,
4476 floatx80_infinity_high,
4477 floatx80_infinity_low);
158142c2
FB
4478 }
4479 if ( zExp <= 0 ) {
a828b373
RH
4480 isTiny = status->tininess_before_rounding
4481 || (zExp < 0)
4482 || !increment
4483 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4484 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4485 zExp = 0;
ff32e16e
PM
4486 if (isTiny && zSig1) {
4487 float_raise(float_flag_underflow, status);
4488 }
a2f2d288 4489 if (zSig1) {
d82f3b2d 4490 float_raise(float_flag_inexact, status);
a2f2d288 4491 }
dc355b76
PM
4492 switch (roundingMode) {
4493 case float_round_nearest_even:
f9288a76 4494 case float_round_ties_away:
dc355b76
PM
4495 increment = ((int64_t)zSig1 < 0);
4496 break;
4497 case float_round_to_zero:
4498 increment = 0;
4499 break;
4500 case float_round_up:
4501 increment = !zSign && zSig1;
4502 break;
4503 case float_round_down:
4504 increment = zSign && zSig1;
4505 break;
4506 default:
4507 abort();
158142c2
FB
4508 }
4509 if ( increment ) {
4510 ++zSig0;
40662886
PMD
4511 if (!(zSig1 << 1) && roundNearestEven) {
4512 zSig0 &= ~1;
4513 }
bb98fe42 4514 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4515 }
4516 return packFloatx80( zSign, zExp, zSig0 );
4517 }
4518 }
a2f2d288 4519 if (zSig1) {
d82f3b2d 4520 float_raise(float_flag_inexact, status);
a2f2d288 4521 }
158142c2
FB
4522 if ( increment ) {
4523 ++zSig0;
4524 if ( zSig0 == 0 ) {
4525 ++zExp;
e9321124 4526 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4527 }
4528 else {
40662886
PMD
4529 if (!(zSig1 << 1) && roundNearestEven) {
4530 zSig0 &= ~1;
4531 }
158142c2
FB
4532 }
4533 }
4534 else {
4535 if ( zSig0 == 0 ) zExp = 0;
4536 }
4537 return packFloatx80( zSign, zExp, zSig0 );
4538
4539}
4540
4541/*----------------------------------------------------------------------------
4542| Takes an abstract floating-point value having sign `zSign', exponent
4543| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4544| and returns the proper extended double-precision floating-point value
4545| corresponding to the abstract input. This routine is just like
4546| `roundAndPackFloatx80' except that the input significand does not have to be
4547| normalized.
4548*----------------------------------------------------------------------------*/
4549
88857aca 4550floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4551 bool zSign, int32_t zExp,
88857aca
LV
4552 uint64_t zSig0, uint64_t zSig1,
4553 float_status *status)
158142c2 4554{
8f506c70 4555 int8_t shiftCount;
158142c2
FB
4556
4557 if ( zSig0 == 0 ) {
4558 zSig0 = zSig1;
4559 zSig1 = 0;
4560 zExp -= 64;
4561 }
0019d5c3 4562 shiftCount = clz64(zSig0);
158142c2
FB
4563 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4564 zExp -= shiftCount;
ff32e16e
PM
4565 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4566 zSig0, zSig1, status);
158142c2
FB
4567
4568}
4569
158142c2
FB
4570/*----------------------------------------------------------------------------
4571| Returns the least-significant 64 fraction bits of the quadruple-precision
4572| floating-point value `a'.
4573*----------------------------------------------------------------------------*/
4574
a49db98d 4575static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4576{
4577
4578 return a.low;
4579
4580}
4581
4582/*----------------------------------------------------------------------------
4583| Returns the most-significant 48 fraction bits of the quadruple-precision
4584| floating-point value `a'.
4585*----------------------------------------------------------------------------*/
4586
a49db98d 4587static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4588{
4589
e9321124 4590 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4591
4592}
4593
4594/*----------------------------------------------------------------------------
4595| Returns the exponent bits of the quadruple-precision floating-point value
4596| `a'.
4597*----------------------------------------------------------------------------*/
4598
f4014512 4599static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4600{
4601
4602 return ( a.high>>48 ) & 0x7FFF;
4603
4604}
4605
4606/*----------------------------------------------------------------------------
4607| Returns the sign bit of the quadruple-precision floating-point value `a'.
4608*----------------------------------------------------------------------------*/
4609
c120391c 4610static inline bool extractFloat128Sign(float128 a)
158142c2 4611{
c120391c 4612 return a.high >> 63;
158142c2
FB
4613}
4614
4615/*----------------------------------------------------------------------------
4616| Normalizes the subnormal quadruple-precision floating-point value
4617| represented by the denormalized significand formed by the concatenation of
4618| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4619| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4620| significand are stored at the location pointed to by `zSig0Ptr', and the
4621| least significant 64 bits of the normalized significand are stored at the
4622| location pointed to by `zSig1Ptr'.
4623*----------------------------------------------------------------------------*/
4624
4625static void
4626 normalizeFloat128Subnormal(
bb98fe42
AF
4627 uint64_t aSig0,
4628 uint64_t aSig1,
f4014512 4629 int32_t *zExpPtr,
bb98fe42
AF
4630 uint64_t *zSig0Ptr,
4631 uint64_t *zSig1Ptr
158142c2
FB
4632 )
4633{
8f506c70 4634 int8_t shiftCount;
158142c2
FB
4635
4636 if ( aSig0 == 0 ) {
0019d5c3 4637 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4638 if ( shiftCount < 0 ) {
4639 *zSig0Ptr = aSig1>>( - shiftCount );
4640 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4641 }
4642 else {
4643 *zSig0Ptr = aSig1<<shiftCount;
4644 *zSig1Ptr = 0;
4645 }
4646 *zExpPtr = - shiftCount - 63;
4647 }
4648 else {
0019d5c3 4649 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4650 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4651 *zExpPtr = 1 - shiftCount;
4652 }
4653
4654}
4655
4656/*----------------------------------------------------------------------------
4657| Packs the sign `zSign', the exponent `zExp', and the significand formed
4658| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4659| floating-point value, returning the result. After being shifted into the
4660| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4661| added together to form the most significant 32 bits of the result. This
4662| means that any integer portion of `zSig0' will be added into the exponent.
4663| Since a properly normalized significand will have an integer portion equal
4664| to 1, the `zExp' input should be 1 less than the desired result exponent
4665| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4666| significand.
4667*----------------------------------------------------------------------------*/
4668
a49db98d 4669static inline float128
c120391c 4670packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4671{
4672 float128 z;
4673
4674 z.low = zSig1;
c120391c 4675 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4676 return z;
158142c2
FB
4677}
4678
4679/*----------------------------------------------------------------------------
4680| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4681| and extended significand formed by the concatenation of `zSig0', `zSig1',
4682| and `zSig2', and returns the proper quadruple-precision floating-point value
4683| corresponding to the abstract input. Ordinarily, the abstract value is
4684| simply rounded and packed into the quadruple-precision format, with the
4685| inexact exception raised if the abstract input cannot be represented
4686| exactly. However, if the abstract value is too large, the overflow and
4687| inexact exceptions are raised and an infinity or maximal finite value is
4688| returned. If the abstract value is too small, the input value is rounded to
4689| a subnormal number, and the underflow and inexact exceptions are raised if
4690| the abstract input cannot be represented exactly as a subnormal quadruple-
4691| precision floating-point number.
4692| The input significand must be normalized or smaller. If the input
4693| significand is not normalized, `zExp' must be 0; in that case, the result
4694| returned is a subnormal number, and it must not require rounding. In the
4695| usual case that the input significand is normalized, `zExp' must be 1 less
4696| than the ``true'' floating-point exponent. The handling of underflow and
4697| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4698*----------------------------------------------------------------------------*/
4699
c120391c 4700static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4701 uint64_t zSig0, uint64_t zSig1,
4702 uint64_t zSig2, float_status *status)
158142c2 4703{
8f506c70 4704 int8_t roundingMode;
c120391c 4705 bool roundNearestEven, increment, isTiny;
158142c2 4706
a2f2d288 4707 roundingMode = status->float_rounding_mode;
158142c2 4708 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4709 switch (roundingMode) {
4710 case float_round_nearest_even:
f9288a76 4711 case float_round_ties_away:
dc355b76
PM
4712 increment = ((int64_t)zSig2 < 0);
4713 break;
4714 case float_round_to_zero:
4715 increment = 0;
4716 break;
4717 case float_round_up:
4718 increment = !zSign && zSig2;
4719 break;
4720 case float_round_down:
4721 increment = zSign && zSig2;
4722 break;
9ee6f678
BR
4723 case float_round_to_odd:
4724 increment = !(zSig1 & 0x1) && zSig2;
4725 break;
dc355b76
PM
4726 default:
4727 abort();
158142c2 4728 }
bb98fe42 4729 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4730 if ( ( 0x7FFD < zExp )
4731 || ( ( zExp == 0x7FFD )
4732 && eq128(
e9321124
AB
4733 UINT64_C(0x0001FFFFFFFFFFFF),
4734 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4735 zSig0,
4736 zSig1
4737 )
4738 && increment
4739 )
4740 ) {
ff32e16e 4741 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4742 if ( ( roundingMode == float_round_to_zero )
4743 || ( zSign && ( roundingMode == float_round_up ) )
4744 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4745 || (roundingMode == float_round_to_odd)
158142c2
FB
4746 ) {
4747 return
4748 packFloat128(
4749 zSign,
4750 0x7FFE,
e9321124
AB
4751 UINT64_C(0x0000FFFFFFFFFFFF),
4752 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4753 );
4754 }
4755 return packFloat128( zSign, 0x7FFF, 0, 0 );
4756 }
4757 if ( zExp < 0 ) {
a2f2d288 4758 if (status->flush_to_zero) {
ff32e16e 4759 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4760 return packFloat128(zSign, 0, 0, 0);
4761 }
a828b373
RH
4762 isTiny = status->tininess_before_rounding
4763 || (zExp < -1)
4764 || !increment
4765 || lt128(zSig0, zSig1,
4766 UINT64_C(0x0001FFFFFFFFFFFF),
4767 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4768 shift128ExtraRightJamming(
4769 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4770 zExp = 0;
ff32e16e
PM
4771 if (isTiny && zSig2) {
4772 float_raise(float_flag_underflow, status);
4773 }
dc355b76
PM
4774 switch (roundingMode) {
4775 case float_round_nearest_even:
f9288a76 4776 case float_round_ties_away:
dc355b76
PM
4777 increment = ((int64_t)zSig2 < 0);
4778 break;
4779 case float_round_to_zero:
4780 increment = 0;
4781 break;
4782 case float_round_up:
4783 increment = !zSign && zSig2;
4784 break;
4785 case float_round_down:
4786 increment = zSign && zSig2;
4787 break;
9ee6f678
BR
4788 case float_round_to_odd:
4789 increment = !(zSig1 & 0x1) && zSig2;
4790 break;
dc355b76
PM
4791 default:
4792 abort();
158142c2
FB
4793 }
4794 }
4795 }
a2f2d288 4796 if (zSig2) {
d82f3b2d 4797 float_raise(float_flag_inexact, status);
a2f2d288 4798 }
158142c2
FB
4799 if ( increment ) {
4800 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4801 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4802 zSig1 &= ~1;
4803 }
158142c2
FB
4804 }
4805 else {
4806 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4807 }
4808 return packFloat128( zSign, zExp, zSig0, zSig1 );
4809
4810}
4811
4812/*----------------------------------------------------------------------------
4813| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4814| and significand formed by the concatenation of `zSig0' and `zSig1', and
4815| returns the proper quadruple-precision floating-point value corresponding
4816| to the abstract input. This routine is just like `roundAndPackFloat128'
4817| except that the input significand has fewer bits and does not have to be
4818| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4819| point exponent.
4820*----------------------------------------------------------------------------*/
4821
c120391c 4822static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4823 uint64_t zSig0, uint64_t zSig1,
4824 float_status *status)
158142c2 4825{
8f506c70 4826 int8_t shiftCount;
bb98fe42 4827 uint64_t zSig2;
158142c2
FB
4828
4829 if ( zSig0 == 0 ) {
4830 zSig0 = zSig1;
4831 zSig1 = 0;
4832 zExp -= 64;
4833 }
0019d5c3 4834 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4835 if ( 0 <= shiftCount ) {
4836 zSig2 = 0;
4837 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4838 }
4839 else {
4840 shift128ExtraRightJamming(
4841 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4842 }
4843 zExp -= shiftCount;
ff32e16e 4844 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4845
4846}
4847
158142c2 4848
158142c2
FB
4849/*----------------------------------------------------------------------------
4850| Returns the result of converting the 32-bit two's complement integer `a'
4851| to the extended double-precision floating-point format. The conversion
4852| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4853| Arithmetic.
4854*----------------------------------------------------------------------------*/
4855
e5a41ffa 4856floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4857{
c120391c 4858 bool zSign;
3a87d009 4859 uint32_t absA;
8f506c70 4860 int8_t shiftCount;
bb98fe42 4861 uint64_t zSig;
158142c2
FB
4862
4863 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4864 zSign = ( a < 0 );
4865 absA = zSign ? - a : a;
0019d5c3 4866 shiftCount = clz32(absA) + 32;
158142c2
FB
4867 zSig = absA;
4868 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4869
4870}
4871
158142c2
FB
4872/*----------------------------------------------------------------------------
4873| Returns the result of converting the 64-bit two's complement integer `a'
4874| to the extended double-precision floating-point format. The conversion
4875| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4876| Arithmetic.
4877*----------------------------------------------------------------------------*/
4878
e5a41ffa 4879floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4880{
c120391c 4881 bool zSign;
182f42fd 4882 uint64_t absA;
8f506c70 4883 int8_t shiftCount;
158142c2
FB
4884
4885 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4886 zSign = ( a < 0 );
4887 absA = zSign ? - a : a;
0019d5c3 4888 shiftCount = clz64(absA);
158142c2
FB
4889 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4890
4891}
4892
158142c2
FB
4893/*----------------------------------------------------------------------------
4894| Returns the result of converting the single-precision floating-point value
4895| `a' to the extended double-precision floating-point format. The conversion
4896| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4897| Arithmetic.
4898*----------------------------------------------------------------------------*/
4899
e5a41ffa 4900floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4901{
c120391c 4902 bool aSign;
0c48262d 4903 int aExp;
bb98fe42 4904 uint32_t aSig;
158142c2 4905
ff32e16e 4906 a = float32_squash_input_denormal(a, status);
158142c2
FB
4907 aSig = extractFloat32Frac( a );
4908 aExp = extractFloat32Exp( a );
4909 aSign = extractFloat32Sign( a );
4910 if ( aExp == 0xFF ) {
ff32e16e 4911 if (aSig) {
7537c2b4
JM
4912 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4913 status);
4914 return floatx80_silence_nan(res, status);
ff32e16e 4915 }
0f605c88
LV
4916 return packFloatx80(aSign,
4917 floatx80_infinity_high,
4918 floatx80_infinity_low);
158142c2
FB
4919 }
4920 if ( aExp == 0 ) {
4921 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4922 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4923 }
4924 aSig |= 0x00800000;
bb98fe42 4925 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4926
4927}
4928
158142c2
FB
4929/*----------------------------------------------------------------------------
4930| Returns the remainder of the single-precision floating-point value `a'
4931| with respect to the corresponding value `b'. The operation is performed
4932| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4933*----------------------------------------------------------------------------*/
4934
e5a41ffa 4935float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4936{
c120391c 4937 bool aSign, zSign;
0c48262d 4938 int aExp, bExp, expDiff;
bb98fe42
AF
4939 uint32_t aSig, bSig;
4940 uint32_t q;
4941 uint64_t aSig64, bSig64, q64;
4942 uint32_t alternateASig;
4943 int32_t sigMean;
ff32e16e
PM
4944 a = float32_squash_input_denormal(a, status);
4945 b = float32_squash_input_denormal(b, status);
158142c2
FB
4946
4947 aSig = extractFloat32Frac( a );
4948 aExp = extractFloat32Exp( a );
4949 aSign = extractFloat32Sign( a );
4950 bSig = extractFloat32Frac( b );
4951 bExp = extractFloat32Exp( b );
158142c2
FB
4952 if ( aExp == 0xFF ) {
4953 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4954 return propagateFloat32NaN(a, b, status);
158142c2 4955 }
ff32e16e 4956 float_raise(float_flag_invalid, status);
af39bc8c 4957 return float32_default_nan(status);
158142c2
FB
4958 }
4959 if ( bExp == 0xFF ) {
ff32e16e
PM
4960 if (bSig) {
4961 return propagateFloat32NaN(a, b, status);
4962 }
158142c2
FB
4963 return a;
4964 }
4965 if ( bExp == 0 ) {
4966 if ( bSig == 0 ) {
ff32e16e 4967 float_raise(float_flag_invalid, status);
af39bc8c 4968 return float32_default_nan(status);
158142c2
FB
4969 }
4970 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4971 }
4972 if ( aExp == 0 ) {
4973 if ( aSig == 0 ) return a;
4974 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4975 }
4976 expDiff = aExp - bExp;
4977 aSig |= 0x00800000;
4978 bSig |= 0x00800000;
4979 if ( expDiff < 32 ) {
4980 aSig <<= 8;
4981 bSig <<= 8;
4982 if ( expDiff < 0 ) {
4983 if ( expDiff < -1 ) return a;
4984 aSig >>= 1;
4985 }
4986 q = ( bSig <= aSig );
4987 if ( q ) aSig -= bSig;
4988 if ( 0 < expDiff ) {
bb98fe42 4989 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4990 q >>= 32 - expDiff;
4991 bSig >>= 2;
4992 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4993 }
4994 else {
4995 aSig >>= 2;
4996 bSig >>= 2;
4997 }
4998 }
4999 else {
5000 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5001 aSig64 = ( (uint64_t) aSig )<<40;
5002 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5003 expDiff -= 64;
5004 while ( 0 < expDiff ) {
5005 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5006 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5007 aSig64 = - ( ( bSig * q64 )<<38 );
5008 expDiff -= 62;
5009 }
5010 expDiff += 64;
5011 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5012 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5013 q = q64>>( 64 - expDiff );
5014 bSig <<= 6;
5015 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5016 }
5017 do {
5018 alternateASig = aSig;
5019 ++q;
5020 aSig -= bSig;
bb98fe42 5021 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5022 sigMean = aSig + alternateASig;
5023 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5024 aSig = alternateASig;
5025 }
bb98fe42 5026 zSign = ( (int32_t) aSig < 0 );
158142c2 5027 if ( zSign ) aSig = - aSig;
ff32e16e 5028 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5029}
5030
369be8f6 5031
158142c2 5032
8229c991
AJ
5033/*----------------------------------------------------------------------------
5034| Returns the binary exponential of the single-precision floating-point value
5035| `a'. The operation is performed according to the IEC/IEEE Standard for
5036| Binary Floating-Point Arithmetic.
5037|
5038| Uses the following identities:
5039|
5040| 1. -------------------------------------------------------------------------
5041| x x*ln(2)
5042| 2 = e
5043|
5044| 2. -------------------------------------------------------------------------
5045| 2 3 4 5 n
5046| x x x x x x x
5047| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5048| 1! 2! 3! 4! 5! n!
5049*----------------------------------------------------------------------------*/
5050
5051static const float64 float32_exp2_coefficients[15] =
5052{
d5138cf4
PM
5053 const_float64( 0x3ff0000000000000ll ), /* 1 */
5054 const_float64( 0x3fe0000000000000ll ), /* 2 */
5055 const_float64( 0x3fc5555555555555ll ), /* 3 */
5056 const_float64( 0x3fa5555555555555ll ), /* 4 */
5057 const_float64( 0x3f81111111111111ll ), /* 5 */
5058 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5059 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5060 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5061 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5062 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5063 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5064 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5065 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5066 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5067 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5068};
5069
e5a41ffa 5070float32 float32_exp2(float32 a, float_status *status)
8229c991 5071{
c120391c 5072 bool aSign;
0c48262d 5073 int aExp;
bb98fe42 5074 uint32_t aSig;
8229c991
AJ
5075 float64 r, x, xn;
5076 int i;
ff32e16e 5077 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5078
5079 aSig = extractFloat32Frac( a );
5080 aExp = extractFloat32Exp( a );
5081 aSign = extractFloat32Sign( a );
5082
5083 if ( aExp == 0xFF) {
ff32e16e
PM
5084 if (aSig) {
5085 return propagateFloat32NaN(a, float32_zero, status);
5086 }
8229c991
AJ
5087 return (aSign) ? float32_zero : a;
5088 }
5089 if (aExp == 0) {
5090 if (aSig == 0) return float32_one;
5091 }
5092
ff32e16e 5093 float_raise(float_flag_inexact, status);
8229c991
AJ
5094
5095 /* ******************************* */
5096 /* using float64 for approximation */
5097 /* ******************************* */
ff32e16e
PM
5098 x = float32_to_float64(a, status);
5099 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5100
5101 xn = x;
5102 r = float64_one;
5103 for (i = 0 ; i < 15 ; i++) {
5104 float64 f;
5105
ff32e16e
PM
5106 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5107 r = float64_add(r, f, status);
8229c991 5108
ff32e16e 5109 xn = float64_mul(xn, x, status);
8229c991
AJ
5110 }
5111
5112 return float64_to_float32(r, status);
5113}
5114
374dfc33
AJ
5115/*----------------------------------------------------------------------------
5116| Returns the binary log of the single-precision floating-point value `a'.
5117| The operation is performed according to the IEC/IEEE Standard for Binary
5118| Floating-Point Arithmetic.
5119*----------------------------------------------------------------------------*/
e5a41ffa 5120float32 float32_log2(float32 a, float_status *status)
374dfc33 5121{
c120391c 5122 bool aSign, zSign;
0c48262d 5123 int aExp;
bb98fe42 5124 uint32_t aSig, zSig, i;
374dfc33 5125
ff32e16e 5126 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5127 aSig = extractFloat32Frac( a );
5128 aExp = extractFloat32Exp( a );
5129 aSign = extractFloat32Sign( a );
5130
5131 if ( aExp == 0 ) {
5132 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5133 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5134 }
5135 if ( aSign ) {
ff32e16e 5136 float_raise(float_flag_invalid, status);
af39bc8c 5137 return float32_default_nan(status);
374dfc33
AJ
5138 }
5139 if ( aExp == 0xFF ) {
ff32e16e
PM
5140 if (aSig) {
5141 return propagateFloat32NaN(a, float32_zero, status);
5142 }
374dfc33
AJ
5143 return a;
5144 }
5145
5146 aExp -= 0x7F;
5147 aSig |= 0x00800000;
5148 zSign = aExp < 0;
5149 zSig = aExp << 23;
5150
5151 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5152 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5153 if ( aSig & 0x01000000 ) {
5154 aSig >>= 1;
5155 zSig |= i;
5156 }
5157 }
5158
5159 if ( zSign )
5160 zSig = -zSig;
5161
ff32e16e 5162 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5163}
5164
158142c2 5165/*----------------------------------------------------------------------------
158142c2
FB
5166| Returns the result of converting the double-precision floating-point value
5167| `a' to the extended double-precision floating-point format. The conversion
5168| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5169| Arithmetic.
5170*----------------------------------------------------------------------------*/
5171
e5a41ffa 5172floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5173{
c120391c 5174 bool aSign;
0c48262d 5175 int aExp;
bb98fe42 5176 uint64_t aSig;
158142c2 5177
ff32e16e 5178 a = float64_squash_input_denormal(a, status);
158142c2
FB
5179 aSig = extractFloat64Frac( a );
5180 aExp = extractFloat64Exp( a );
5181 aSign = extractFloat64Sign( a );
5182 if ( aExp == 0x7FF ) {
ff32e16e 5183 if (aSig) {
7537c2b4
JM
5184 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5185 status);
5186 return floatx80_silence_nan(res, status);
ff32e16e 5187 }
0f605c88
LV
5188 return packFloatx80(aSign,
5189 floatx80_infinity_high,
5190 floatx80_infinity_low);
158142c2
FB
5191 }
5192 if ( aExp == 0 ) {
5193 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5194 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5195 }
5196 return
5197 packFloatx80(
e9321124 5198 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5199
5200}
5201
158142c2
FB
5202/*----------------------------------------------------------------------------
5203| Returns the remainder of the double-precision floating-point value `a'
5204| with respect to the corresponding value `b'. The operation is performed
5205| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5206*----------------------------------------------------------------------------*/
5207
e5a41ffa 5208float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5209{
c120391c 5210 bool aSign, zSign;
0c48262d 5211 int aExp, bExp, expDiff;
bb98fe42
AF
5212 uint64_t aSig, bSig;
5213 uint64_t q, alternateASig;
5214 int64_t sigMean;
158142c2 5215
ff32e16e
PM
5216 a = float64_squash_input_denormal(a, status);
5217 b = float64_squash_input_denormal(b, status);
158142c2
FB
5218 aSig = extractFloat64Frac( a );
5219 aExp = extractFloat64Exp( a );
5220 aSign = extractFloat64Sign( a );
5221 bSig = extractFloat64Frac( b );
5222 bExp = extractFloat64Exp( b );
158142c2
FB
5223 if ( aExp == 0x7FF ) {
5224 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5225 return propagateFloat64NaN(a, b, status);
158142c2 5226 }
ff32e16e 5227 float_raise(float_flag_invalid, status);
af39bc8c 5228 return float64_default_nan(status);
158142c2
FB
5229 }
5230 if ( bExp == 0x7FF ) {
ff32e16e
PM
5231 if (bSig) {
5232 return propagateFloat64NaN(a, b, status);
5233 }
158142c2
FB
5234 return a;
5235 }
5236 if ( bExp == 0 ) {
5237 if ( bSig == 0 ) {
ff32e16e 5238 float_raise(float_flag_invalid, status);
af39bc8c 5239 return float64_default_nan(status);
158142c2
FB
5240 }
5241 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5242 }
5243 if ( aExp == 0 ) {
5244 if ( aSig == 0 ) return a;
5245 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5246 }
5247 expDiff = aExp - bExp;
e9321124
AB
5248 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5249 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5250 if ( expDiff < 0 ) {
5251 if ( expDiff < -1 ) return a;
5252 aSig >>= 1;
5253 }
5254 q = ( bSig <= aSig );
5255 if ( q ) aSig -= bSig;
5256 expDiff -= 64;
5257 while ( 0 < expDiff ) {
5258 q = estimateDiv128To64( aSig, 0, bSig );
5259 q = ( 2 < q ) ? q - 2 : 0;
5260 aSig = - ( ( bSig>>2 ) * q );
5261 expDiff -= 62;
5262 }
5263 expDiff += 64;
5264 if ( 0 < expDiff ) {
5265 q = estimateDiv128To64( aSig, 0, bSig );
5266 q = ( 2 < q ) ? q - 2 : 0;
5267 q >>= 64 - expDiff;
5268 bSig >>= 2;
5269 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5270 }
5271 else {
5272 aSig >>= 2;
5273 bSig >>= 2;
5274 }
5275 do {
5276 alternateASig = aSig;
5277 ++q;
5278 aSig -= bSig;
bb98fe42 5279 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5280 sigMean = aSig + alternateASig;
5281 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5282 aSig = alternateASig;
5283 }
bb98fe42 5284 zSign = ( (int64_t) aSig < 0 );
158142c2 5285 if ( zSign ) aSig = - aSig;
ff32e16e 5286 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5287
5288}
5289
374dfc33
AJ
5290/*----------------------------------------------------------------------------
5291| Returns the binary log of the double-precision floating-point value `a'.
5292| The operation is performed according to the IEC/IEEE Standard for Binary
5293| Floating-Point Arithmetic.
5294*----------------------------------------------------------------------------*/
e5a41ffa 5295float64 float64_log2(float64 a, float_status *status)
374dfc33 5296{
c120391c 5297 bool aSign, zSign;
0c48262d 5298 int aExp;
bb98fe42 5299 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5300 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5301
5302 aSig = extractFloat64Frac( a );
5303 aExp = extractFloat64Exp( a );
5304 aSign = extractFloat64Sign( a );
5305
5306 if ( aExp == 0 ) {
5307 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5308 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5309 }
5310 if ( aSign ) {
ff32e16e 5311 float_raise(float_flag_invalid, status);
af39bc8c 5312 return float64_default_nan(status);
374dfc33
AJ
5313 }
5314 if ( aExp == 0x7FF ) {
ff32e16e
PM
5315 if (aSig) {
5316 return propagateFloat64NaN(a, float64_zero, status);
5317 }
374dfc33
AJ
5318 return a;
5319 }
5320
5321 aExp -= 0x3FF;
e9321124 5322 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5323 zSign = aExp < 0;
bb98fe42 5324 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5325 for (i = 1LL << 51; i > 0; i >>= 1) {
5326 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5327 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5328 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5329 aSig >>= 1;
5330 zSig |= i;
5331 }
5332 }
5333
5334 if ( zSign )
5335 zSig = -zSig;
ff32e16e 5336 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5337}
5338
158142c2
FB
5339/*----------------------------------------------------------------------------
5340| Returns the result of converting the extended double-precision floating-
5341| point value `a' to the 32-bit two's complement integer format. The
5342| conversion is performed according to the IEC/IEEE Standard for Binary
5343| Floating-Point Arithmetic---which means in particular that the conversion
5344| is rounded according to the current rounding mode. If `a' is a NaN, the
5345| largest positive integer is returned. Otherwise, if the conversion
5346| overflows, the largest integer with the same sign as `a' is returned.
5347*----------------------------------------------------------------------------*/
5348
f4014512 5349int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5350{
c120391c 5351 bool aSign;
f4014512 5352 int32_t aExp, shiftCount;
bb98fe42 5353 uint64_t aSig;
158142c2 5354
d1eb8f2a
AD
5355 if (floatx80_invalid_encoding(a)) {
5356 float_raise(float_flag_invalid, status);
5357 return 1 << 31;
5358 }
158142c2
FB
5359 aSig = extractFloatx80Frac( a );
5360 aExp = extractFloatx80Exp( a );
5361 aSign = extractFloatx80Sign( a );
bb98fe42 5362 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5363 shiftCount = 0x4037 - aExp;
5364 if ( shiftCount <= 0 ) shiftCount = 1;
5365 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5366 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5367
5368}
5369
5370/*----------------------------------------------------------------------------
5371| Returns the result of converting the extended double-precision floating-
5372| point value `a' to the 32-bit two's complement integer format. The
5373| conversion is performed according to the IEC/IEEE Standard for Binary
5374| Floating-Point Arithmetic, except that the conversion is always rounded
5375| toward zero. If `a' is a NaN, the largest positive integer is returned.
5376| Otherwise, if the conversion overflows, the largest integer with the same
5377| sign as `a' is returned.
5378*----------------------------------------------------------------------------*/
5379
f4014512 5380int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5381{
c120391c 5382 bool aSign;
f4014512 5383 int32_t aExp, shiftCount;
bb98fe42 5384 uint64_t aSig, savedASig;
b3a6a2e0 5385 int32_t z;
158142c2 5386
d1eb8f2a
AD
5387 if (floatx80_invalid_encoding(a)) {
5388 float_raise(float_flag_invalid, status);
5389 return 1 << 31;
5390 }
158142c2
FB
5391 aSig = extractFloatx80Frac( a );
5392 aExp = extractFloatx80Exp( a );
5393 aSign = extractFloatx80Sign( a );
5394 if ( 0x401E < aExp ) {
bb98fe42 5395 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5396 goto invalid;
5397 }
5398 else if ( aExp < 0x3FFF ) {
a2f2d288 5399 if (aExp || aSig) {
d82f3b2d 5400 float_raise(float_flag_inexact, status);
a2f2d288 5401 }
158142c2
FB
5402 return 0;
5403 }
5404 shiftCount = 0x403E - aExp;
5405 savedASig = aSig;
5406 aSig >>= shiftCount;
5407 z = aSig;
5408 if ( aSign ) z = - z;
5409 if ( ( z < 0 ) ^ aSign ) {
5410 invalid:
ff32e16e 5411 float_raise(float_flag_invalid, status);
bb98fe42 5412 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5413 }
5414 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5415 float_raise(float_flag_inexact, status);
158142c2
FB
5416 }
5417 return z;
5418
5419}
5420
5421/*----------------------------------------------------------------------------
5422| Returns the result of converting the extended double-precision floating-
5423| point value `a' to the 64-bit two's complement integer format. The
5424| conversion is performed according to the IEC/IEEE Standard for Binary
5425| Floating-Point Arithmetic---which means in particular that the conversion
5426| is rounded according to the current rounding mode. If `a' is a NaN,
5427| the largest positive integer is returned. Otherwise, if the conversion
5428| overflows, the largest integer with the same sign as `a' is returned.
5429*----------------------------------------------------------------------------*/
5430
f42c2224 5431int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5432{
c120391c 5433 bool aSign;
f4014512 5434 int32_t aExp, shiftCount;
bb98fe42 5435 uint64_t aSig, aSigExtra;
158142c2 5436
d1eb8f2a
AD
5437 if (floatx80_invalid_encoding(a)) {
5438 float_raise(float_flag_invalid, status);
5439 return 1ULL << 63;
5440 }
158142c2
FB
5441 aSig = extractFloatx80Frac( a );
5442 aExp = extractFloatx80Exp( a );
5443 aSign = extractFloatx80Sign( a );
5444 shiftCount = 0x403E - aExp;
5445 if ( shiftCount <= 0 ) {
5446 if ( shiftCount ) {
ff32e16e 5447 float_raise(float_flag_invalid, status);
0f605c88 5448 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5449 return INT64_MAX;
158142c2 5450 }
2c217da0 5451 return INT64_MIN;
158142c2
FB
5452 }
5453 aSigExtra = 0;
5454 }
5455 else {
5456 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5457 }
ff32e16e 5458 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5459
5460}
5461
5462/*----------------------------------------------------------------------------
5463| Returns the result of converting the extended double-precision floating-
5464| point value `a' to the 64-bit two's complement integer format. The
5465| conversion is performed according to the IEC/IEEE Standard for Binary
5466| Floating-Point Arithmetic, except that the conversion is always rounded
5467| toward zero. If `a' is a NaN, the largest positive integer is returned.
5468| Otherwise, if the conversion overflows, the largest integer with the same
5469| sign as `a' is returned.
5470*----------------------------------------------------------------------------*/
5471
f42c2224 5472int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5473{
c120391c 5474 bool aSign;
f4014512 5475 int32_t aExp, shiftCount;
bb98fe42 5476 uint64_t aSig;
f42c2224 5477 int64_t z;
158142c2 5478
d1eb8f2a
AD
5479 if (floatx80_invalid_encoding(a)) {
5480 float_raise(float_flag_invalid, status);
5481 return 1ULL << 63;
5482 }
158142c2
FB
5483 aSig = extractFloatx80Frac( a );
5484 aExp = extractFloatx80Exp( a );
5485 aSign = extractFloatx80Sign( a );
5486 shiftCount = aExp - 0x403E;
5487 if ( 0 <= shiftCount ) {
e9321124 5488 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5489 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5490 float_raise(float_flag_invalid, status);
158142c2 5491 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5492 return INT64_MAX;
158142c2
FB
5493 }
5494 }
2c217da0 5495 return INT64_MIN;
158142c2
FB
5496 }
5497 else if ( aExp < 0x3FFF ) {
a2f2d288 5498 if (aExp | aSig) {
d82f3b2d 5499 float_raise(float_flag_inexact, status);
a2f2d288 5500 }
158142c2
FB
5501 return 0;
5502 }
5503 z = aSig>>( - shiftCount );
bb98fe42 5504 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5505 float_raise(float_flag_inexact, status);
158142c2
FB
5506 }
5507 if ( aSign ) z = - z;
5508 return z;
5509
5510}
5511
5512/*----------------------------------------------------------------------------
5513| Returns the result of converting the extended double-precision floating-
5514| point value `a' to the single-precision floating-point format. The
5515| conversion is performed according to the IEC/IEEE Standard for Binary
5516| Floating-Point Arithmetic.
5517*----------------------------------------------------------------------------*/
5518
e5a41ffa 5519float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5520{
c120391c 5521 bool aSign;
f4014512 5522 int32_t aExp;
bb98fe42 5523 uint64_t aSig;
158142c2 5524
d1eb8f2a
AD
5525 if (floatx80_invalid_encoding(a)) {
5526 float_raise(float_flag_invalid, status);
5527 return float32_default_nan(status);
5528 }
158142c2
FB
5529 aSig = extractFloatx80Frac( a );
5530 aExp = extractFloatx80Exp( a );
5531 aSign = extractFloatx80Sign( a );
5532 if ( aExp == 0x7FFF ) {
bb98fe42 5533 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5534 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5535 status);
5536 return float32_silence_nan(res, status);
158142c2
FB
5537 }
5538 return packFloat32( aSign, 0xFF, 0 );
5539 }
5540 shift64RightJamming( aSig, 33, &aSig );
5541 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5542 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5543
5544}
5545
5546/*----------------------------------------------------------------------------
5547| Returns the result of converting the extended double-precision floating-
5548| point value `a' to the double-precision floating-point format. The
5549| conversion is performed according to the IEC/IEEE Standard for Binary
5550| Floating-Point Arithmetic.
5551*----------------------------------------------------------------------------*/
5552
e5a41ffa 5553float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5554{
c120391c 5555 bool aSign;
f4014512 5556 int32_t aExp;
bb98fe42 5557 uint64_t aSig, zSig;
158142c2 5558
d1eb8f2a
AD
5559 if (floatx80_invalid_encoding(a)) {
5560 float_raise(float_flag_invalid, status);
5561 return float64_default_nan(status);
5562 }
158142c2
FB
5563 aSig = extractFloatx80Frac( a );
5564 aExp = extractFloatx80Exp( a );
5565 aSign = extractFloatx80Sign( a );
5566 if ( aExp == 0x7FFF ) {
bb98fe42 5567 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5568 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5569 status);
5570 return float64_silence_nan(res, status);
158142c2
FB
5571 }
5572 return packFloat64( aSign, 0x7FF, 0 );
5573 }
5574 shift64RightJamming( aSig, 1, &zSig );
5575 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5576 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5577
5578}
5579
158142c2
FB
5580/*----------------------------------------------------------------------------
5581| Returns the result of converting the extended double-precision floating-
5582| point value `a' to the quadruple-precision floating-point format. The
5583| conversion is performed according to the IEC/IEEE Standard for Binary
5584| Floating-Point Arithmetic.
5585*----------------------------------------------------------------------------*/
5586
e5a41ffa 5587float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5588{
c120391c 5589 bool aSign;
0c48262d 5590 int aExp;
bb98fe42 5591 uint64_t aSig, zSig0, zSig1;
158142c2 5592
d1eb8f2a
AD
5593 if (floatx80_invalid_encoding(a)) {
5594 float_raise(float_flag_invalid, status);
5595 return float128_default_nan(status);
5596 }
158142c2
FB
5597 aSig = extractFloatx80Frac( a );
5598 aExp = extractFloatx80Exp( a );
5599 aSign = extractFloatx80Sign( a );
bb98fe42 5600 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5601 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5602 status);
5603 return float128_silence_nan(res, status);
158142c2
FB
5604 }
5605 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5606 return packFloat128( aSign, aExp, zSig0, zSig1 );
5607
5608}
5609
0f721292
LV
5610/*----------------------------------------------------------------------------
5611| Rounds the extended double-precision floating-point value `a'
5612| to the precision provided by floatx80_rounding_precision and returns the
5613| result as an extended double-precision floating-point value.
5614| The operation is performed according to the IEC/IEEE Standard for Binary
5615| Floating-Point Arithmetic.
5616*----------------------------------------------------------------------------*/
5617
5618floatx80 floatx80_round(floatx80 a, float_status *status)
5619{
5620 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5621 extractFloatx80Sign(a),
5622 extractFloatx80Exp(a),
5623 extractFloatx80Frac(a), 0, status);
5624}
5625
158142c2
FB
5626/*----------------------------------------------------------------------------
5627| Rounds the extended double-precision floating-point value `a' to an integer,
5628| and returns the result as an extended quadruple-precision floating-point
5629| value. The operation is performed according to the IEC/IEEE Standard for
5630| Binary Floating-Point Arithmetic.
5631*----------------------------------------------------------------------------*/
5632
e5a41ffa 5633floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5634{
c120391c 5635 bool aSign;
f4014512 5636 int32_t aExp;
bb98fe42 5637 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5638 floatx80 z;
5639
d1eb8f2a
AD
5640 if (floatx80_invalid_encoding(a)) {
5641 float_raise(float_flag_invalid, status);
5642 return floatx80_default_nan(status);
5643 }
158142c2
FB
5644 aExp = extractFloatx80Exp( a );
5645 if ( 0x403E <= aExp ) {
bb98fe42 5646 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5647 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5648 }
5649 return a;
5650 }
5651 if ( aExp < 0x3FFF ) {
5652 if ( ( aExp == 0 )
9ecaf5cc 5653 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5654 return a;
5655 }
d82f3b2d 5656 float_raise(float_flag_inexact, status);
158142c2 5657 aSign = extractFloatx80Sign( a );
a2f2d288 5658 switch (status->float_rounding_mode) {
158142c2 5659 case float_round_nearest_even:
bb98fe42 5660 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5661 ) {
5662 return
e9321124 5663 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5664 }
5665 break;
f9288a76
PM
5666 case float_round_ties_away:
5667 if (aExp == 0x3FFE) {
e9321124 5668 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5669 }
5670 break;
158142c2
FB
5671 case float_round_down:
5672 return
5673 aSign ?
e9321124 5674 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5675 : packFloatx80( 0, 0, 0 );
5676 case float_round_up:
5677 return
5678 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5679 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5680
5681 case float_round_to_zero:
5682 break;
5683 default:
5684 g_assert_not_reached();
158142c2
FB
5685 }
5686 return packFloatx80( aSign, 0, 0 );
5687 }
5688 lastBitMask = 1;
5689 lastBitMask <<= 0x403E - aExp;
5690 roundBitsMask = lastBitMask - 1;
5691 z = a;
a2f2d288 5692 switch (status->float_rounding_mode) {
dc355b76 5693 case float_round_nearest_even:
158142c2 5694 z.low += lastBitMask>>1;
dc355b76
PM
5695 if ((z.low & roundBitsMask) == 0) {
5696 z.low &= ~lastBitMask;
5697 }
5698 break;
f9288a76
PM
5699 case float_round_ties_away:
5700 z.low += lastBitMask >> 1;
5701 break;
dc355b76
PM
5702 case float_round_to_zero:
5703 break;
5704 case float_round_up:
5705 if (!extractFloatx80Sign(z)) {
5706 z.low += roundBitsMask;
5707 }
5708 break;
5709 case float_round_down:
5710 if (extractFloatx80Sign(z)) {
158142c2
FB
5711 z.low += roundBitsMask;
5712 }
dc355b76
PM
5713 break;
5714 default:
5715 abort();
158142c2
FB
5716 }
5717 z.low &= ~ roundBitsMask;
5718 if ( z.low == 0 ) {
5719 ++z.high;
e9321124 5720 z.low = UINT64_C(0x8000000000000000);
158142c2 5721 }
a2f2d288 5722 if (z.low != a.low) {
d82f3b2d 5723 float_raise(float_flag_inexact, status);
a2f2d288 5724 }
158142c2
FB
5725 return z;
5726
5727}
5728
5729/*----------------------------------------------------------------------------
5730| Returns the result of adding the absolute values of the extended double-
5731| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5732| negated before being returned. `zSign' is ignored if the result is a NaN.
5733| The addition is performed according to the IEC/IEEE Standard for Binary
5734| Floating-Point Arithmetic.
5735*----------------------------------------------------------------------------*/
5736
c120391c 5737static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5738 float_status *status)
158142c2 5739{
f4014512 5740 int32_t aExp, bExp, zExp;
bb98fe42 5741 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5742 int32_t expDiff;
158142c2
FB
5743
5744 aSig = extractFloatx80Frac( a );
5745 aExp = extractFloatx80Exp( a );
5746 bSig = extractFloatx80Frac( b );
5747 bExp = extractFloatx80Exp( b );
5748 expDiff = aExp - bExp;
5749 if ( 0 < expDiff ) {
5750 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5751 if ((uint64_t)(aSig << 1)) {
5752 return propagateFloatx80NaN(a, b, status);
5753 }
158142c2
FB
5754 return a;
5755 }
5756 if ( bExp == 0 ) --expDiff;
5757 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5758 zExp = aExp;
5759 }
5760 else if ( expDiff < 0 ) {
5761 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5762 if ((uint64_t)(bSig << 1)) {
5763 return propagateFloatx80NaN(a, b, status);
5764 }
0f605c88
LV
5765 return packFloatx80(zSign,
5766 floatx80_infinity_high,
5767 floatx80_infinity_low);
158142c2
FB
5768 }
5769 if ( aExp == 0 ) ++expDiff;
5770 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5771 zExp = bExp;
5772 }
5773 else {
5774 if ( aExp == 0x7FFF ) {
bb98fe42 5775 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5776 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5777 }
5778 return a;
5779 }
5780 zSig1 = 0;
5781 zSig0 = aSig + bSig;
5782 if ( aExp == 0 ) {
41602807
JM
5783 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5784 /* At least one of the values is a pseudo-denormal,
5785 * and there is a carry out of the result. */
5786 zExp = 1;
5787 goto shiftRight1;
5788 }
2f311075
RH
5789 if (zSig0 == 0) {
5790 return packFloatx80(zSign, 0, 0);
5791 }
158142c2
FB
5792 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5793 goto roundAndPack;
5794 }
5795 zExp = aExp;
5796 goto shiftRight1;
5797 }
5798 zSig0 = aSig + bSig;
bb98fe42 5799 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5800 shiftRight1:
5801 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5802 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5803 ++zExp;
5804 roundAndPack:
a2f2d288 5805 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5806 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5807}
5808
5809/*----------------------------------------------------------------------------
5810| Returns the result of subtracting the absolute values of the extended
5811| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5812| difference is negated before being returned. `zSign' is ignored if the
5813| result is a NaN. The subtraction is performed according to the IEC/IEEE
5814| Standard for Binary Floating-Point Arithmetic.
5815*----------------------------------------------------------------------------*/
5816
c120391c 5817static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5818 float_status *status)
158142c2 5819{
f4014512 5820 int32_t aExp, bExp, zExp;
bb98fe42 5821 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5822 int32_t expDiff;
158142c2
FB
5823
5824 aSig = extractFloatx80Frac( a );
5825 aExp = extractFloatx80Exp( a );
5826 bSig = extractFloatx80Frac( b );
5827 bExp = extractFloatx80Exp( b );
5828 expDiff = aExp - bExp;
5829 if ( 0 < expDiff ) goto aExpBigger;
5830 if ( expDiff < 0 ) goto bExpBigger;
5831 if ( aExp == 0x7FFF ) {
bb98fe42 5832 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5833 return propagateFloatx80NaN(a, b, status);
158142c2 5834 }
ff32e16e 5835 float_raise(float_flag_invalid, status);
af39bc8c 5836 return floatx80_default_nan(status);
158142c2
FB
5837 }
5838 if ( aExp == 0 ) {
5839 aExp = 1;
5840 bExp = 1;
5841 }
5842 zSig1 = 0;
5843 if ( bSig < aSig ) goto aBigger;
5844 if ( aSig < bSig ) goto bBigger;
a2f2d288 5845 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5846 bExpBigger:
5847 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5848 if ((uint64_t)(bSig << 1)) {
5849 return propagateFloatx80NaN(a, b, status);
5850 }
0f605c88
LV
5851 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5852 floatx80_infinity_low);
158142c2
FB
5853 }
5854 if ( aExp == 0 ) ++expDiff;
5855 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5856 bBigger:
5857 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5858 zExp = bExp;
5859 zSign ^= 1;
5860 goto normalizeRoundAndPack;
5861 aExpBigger:
5862 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5863 if ((uint64_t)(aSig << 1)) {
5864 return propagateFloatx80NaN(a, b, status);
5865 }
158142c2
FB
5866 return a;
5867 }
5868 if ( bExp == 0 ) --expDiff;
5869 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5870 aBigger:
5871 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5872 zExp = aExp;
5873 normalizeRoundAndPack:
a2f2d288 5874 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5875 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5876}
5877
5878/*----------------------------------------------------------------------------
5879| Returns the result of adding the extended double-precision floating-point
5880| values `a' and `b'. The operation is performed according to the IEC/IEEE
5881| Standard for Binary Floating-Point Arithmetic.
5882*----------------------------------------------------------------------------*/
5883
e5a41ffa 5884floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5885{
c120391c 5886 bool aSign, bSign;
158142c2 5887
d1eb8f2a
AD
5888 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5889 float_raise(float_flag_invalid, status);
5890 return floatx80_default_nan(status);
5891 }
158142c2
FB
5892 aSign = extractFloatx80Sign( a );
5893 bSign = extractFloatx80Sign( b );
5894 if ( aSign == bSign ) {
ff32e16e 5895 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5896 }
5897 else {
ff32e16e 5898 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5899 }
5900
5901}
5902
5903/*----------------------------------------------------------------------------
5904| Returns the result of subtracting the extended double-precision floating-
5905| point values `a' and `b'. The operation is performed according to the
5906| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5907*----------------------------------------------------------------------------*/
5908
e5a41ffa 5909floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5910{
c120391c 5911 bool aSign, bSign;
158142c2 5912
d1eb8f2a
AD
5913 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5914 float_raise(float_flag_invalid, status);
5915 return floatx80_default_nan(status);
5916 }
158142c2
FB
5917 aSign = extractFloatx80Sign( a );
5918 bSign = extractFloatx80Sign( b );
5919 if ( aSign == bSign ) {
ff32e16e 5920 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5921 }
5922 else {
ff32e16e 5923 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5924 }
5925
5926}
5927
5928/*----------------------------------------------------------------------------
5929| Returns the result of multiplying the extended double-precision floating-
5930| point values `a' and `b'. The operation is performed according to the
5931| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5932*----------------------------------------------------------------------------*/
5933
e5a41ffa 5934floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5935{
c120391c 5936 bool aSign, bSign, zSign;
f4014512 5937 int32_t aExp, bExp, zExp;
bb98fe42 5938 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5939
d1eb8f2a
AD
5940 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5941 float_raise(float_flag_invalid, status);
5942 return floatx80_default_nan(status);
5943 }
158142c2
FB
5944 aSig = extractFloatx80Frac( a );
5945 aExp = extractFloatx80Exp( a );
5946 aSign = extractFloatx80Sign( a );
5947 bSig = extractFloatx80Frac( b );
5948 bExp = extractFloatx80Exp( b );
5949 bSign = extractFloatx80Sign( b );
5950 zSign = aSign ^ bSign;
5951 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5952 if ( (uint64_t) ( aSig<<1 )
5953 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5954 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5955 }
5956 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5957 return packFloatx80(zSign, floatx80_infinity_high,
5958 floatx80_infinity_low);
158142c2
FB
5959 }
5960 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5961 if ((uint64_t)(bSig << 1)) {
5962 return propagateFloatx80NaN(a, b, status);
5963 }
158142c2
FB
5964 if ( ( aExp | aSig ) == 0 ) {
5965 invalid:
ff32e16e 5966 float_raise(float_flag_invalid, status);
af39bc8c 5967 return floatx80_default_nan(status);
158142c2 5968 }
0f605c88
LV
5969 return packFloatx80(zSign, floatx80_infinity_high,
5970 floatx80_infinity_low);
158142c2
FB
5971 }
5972 if ( aExp == 0 ) {
5973 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5974 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5975 }
5976 if ( bExp == 0 ) {
5977 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5978 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5979 }
5980 zExp = aExp + bExp - 0x3FFE;
5981 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5982 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5983 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5984 --zExp;
5985 }
a2f2d288 5986 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5987 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5988}
5989
5990/*----------------------------------------------------------------------------
5991| Returns the result of dividing the extended double-precision floating-point
5992| value `a' by the corresponding value `b'. The operation is performed
5993| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5994*----------------------------------------------------------------------------*/
5995
e5a41ffa 5996floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 5997{
c120391c 5998 bool aSign, bSign, zSign;
f4014512 5999 int32_t aExp, bExp, zExp;
bb98fe42
AF
6000 uint64_t aSig, bSig, zSig0, zSig1;
6001 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6002
d1eb8f2a
AD
6003 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6004 float_raise(float_flag_invalid, status);
6005 return floatx80_default_nan(status);
6006 }
158142c2
FB
6007 aSig = extractFloatx80Frac( a );
6008 aExp = extractFloatx80Exp( a );
6009 aSign = extractFloatx80Sign( a );
6010 bSig = extractFloatx80Frac( b );
6011 bExp = extractFloatx80Exp( b );
6012 bSign = extractFloatx80Sign( b );
6013 zSign = aSign ^ bSign;
6014 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6015 if ((uint64_t)(aSig << 1)) {
6016 return propagateFloatx80NaN(a, b, status);
6017 }
158142c2 6018 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6019 if ((uint64_t)(bSig << 1)) {
6020 return propagateFloatx80NaN(a, b, status);
6021 }
158142c2
FB
6022 goto invalid;
6023 }
0f605c88
LV
6024 return packFloatx80(zSign, floatx80_infinity_high,
6025 floatx80_infinity_low);
158142c2
FB
6026 }
6027 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6028 if ((uint64_t)(bSig << 1)) {
6029 return propagateFloatx80NaN(a, b, status);
6030 }
158142c2
FB
6031 return packFloatx80( zSign, 0, 0 );
6032 }
6033 if ( bExp == 0 ) {
6034 if ( bSig == 0 ) {
6035 if ( ( aExp | aSig ) == 0 ) {
6036 invalid:
ff32e16e 6037 float_raise(float_flag_invalid, status);
af39bc8c 6038 return floatx80_default_nan(status);
158142c2 6039 }
ff32e16e 6040 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6041 return packFloatx80(zSign, floatx80_infinity_high,
6042 floatx80_infinity_low);
158142c2
FB
6043 }
6044 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6045 }
6046 if ( aExp == 0 ) {
6047 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6048 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6049 }
6050 zExp = aExp - bExp + 0x3FFE;
6051 rem1 = 0;
6052 if ( bSig <= aSig ) {
6053 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6054 ++zExp;
6055 }
6056 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6057 mul64To128( bSig, zSig0, &term0, &term1 );
6058 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6059 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6060 --zSig0;
6061 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6062 }
6063 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6064 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6065 mul64To128( bSig, zSig1, &term1, &term2 );
6066 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6067 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6068 --zSig1;
6069 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6070 }
6071 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6072 }
a2f2d288 6073 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6074 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6075}
6076
6077/*----------------------------------------------------------------------------
6078| Returns the remainder of the extended double-precision floating-point value
6079| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6080| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6081| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6082| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6083| the absolute value of the integer quotient.
158142c2
FB
6084*----------------------------------------------------------------------------*/
6085
445810ec 6086floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6087 float_status *status)
158142c2 6088{
c120391c 6089 bool aSign, zSign;
b662495d 6090 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6091 uint64_t aSig0, aSig1, bSig;
6092 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6093
445810ec 6094 *quotient = 0;
d1eb8f2a
AD
6095 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6096 float_raise(float_flag_invalid, status);
6097 return floatx80_default_nan(status);
6098 }
158142c2 6099 aSig0 = extractFloatx80Frac( a );
b662495d 6100 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6101 aSign = extractFloatx80Sign( a );
6102 bSig = extractFloatx80Frac( b );
6103 bExp = extractFloatx80Exp( b );
158142c2 6104 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6105 if ( (uint64_t) ( aSig0<<1 )
6106 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6107 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6108 }
6109 goto invalid;
6110 }
6111 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6112 if ((uint64_t)(bSig << 1)) {
6113 return propagateFloatx80NaN(a, b, status);
6114 }
b662495d
JM
6115 if (aExp == 0 && aSig0 >> 63) {
6116 /*
6117 * Pseudo-denormal argument must be returned in normalized
6118 * form.
6119 */
6120 return packFloatx80(aSign, 1, aSig0);
6121 }
158142c2
FB
6122 return a;
6123 }
6124 if ( bExp == 0 ) {
6125 if ( bSig == 0 ) {
6126 invalid:
ff32e16e 6127 float_raise(float_flag_invalid, status);
af39bc8c 6128 return floatx80_default_nan(status);
158142c2
FB
6129 }
6130 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6131 }
6132 if ( aExp == 0 ) {
499a2f7b 6133 if ( aSig0 == 0 ) return a;
158142c2
FB
6134 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6135 }
158142c2
FB
6136 zSign = aSign;
6137 expDiff = aExp - bExp;
6138 aSig1 = 0;
6139 if ( expDiff < 0 ) {
b662495d
JM
6140 if ( mod || expDiff < -1 ) {
6141 if (aExp == 1 && aExpOrig == 0) {
6142 /*
6143 * Pseudo-denormal argument must be returned in
6144 * normalized form.
6145 */
6146 return packFloatx80(aSign, aExp, aSig0);
6147 }
6148 return a;
6149 }
158142c2
FB
6150 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6151 expDiff = 0;
6152 }
445810ec 6153 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6154 if ( q ) aSig0 -= bSig;
6155 expDiff -= 64;
6156 while ( 0 < expDiff ) {
6157 q = estimateDiv128To64( aSig0, aSig1, bSig );
6158 q = ( 2 < q ) ? q - 2 : 0;
6159 mul64To128( bSig, q, &term0, &term1 );
6160 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6161 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6162 expDiff -= 62;
445810ec
JM
6163 *quotient <<= 62;
6164 *quotient += q;
158142c2
FB
6165 }
6166 expDiff += 64;
6167 if ( 0 < expDiff ) {
6168 q = estimateDiv128To64( aSig0, aSig1, bSig );
6169 q = ( 2 < q ) ? q - 2 : 0;
6170 q >>= 64 - expDiff;
6171 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6172 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6173 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6174 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6175 ++q;
6176 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6177 }
445810ec
JM
6178 if (expDiff < 64) {
6179 *quotient <<= expDiff;
6180 } else {
6181 *quotient = 0;
6182 }
6183 *quotient += q;
158142c2
FB
6184 }
6185 else {
6186 term1 = 0;
6187 term0 = bSig;
6188 }
6b8b0136
JM
6189 if (!mod) {
6190 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6191 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6192 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6193 && ( q & 1 ) )
6194 ) {
6195 aSig0 = alternateASig0;
6196 aSig1 = alternateASig1;
6197 zSign = ! zSign;
445810ec 6198 ++*quotient;
6b8b0136 6199 }
158142c2
FB
6200 }
6201 return
6202 normalizeRoundAndPackFloatx80(
ff32e16e 6203 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6204
6205}
6206
6b8b0136
JM
6207/*----------------------------------------------------------------------------
6208| Returns the remainder of the extended double-precision floating-point value
6209| `a' with respect to the corresponding value `b'. The operation is performed
6210| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6211*----------------------------------------------------------------------------*/
6212
6213floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6214{
445810ec
JM
6215 uint64_t quotient;
6216 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6217}
6218
6219/*----------------------------------------------------------------------------
6220| Returns the remainder of the extended double-precision floating-point value
6221| `a' with respect to the corresponding value `b', with the quotient truncated
6222| toward zero.
6223*----------------------------------------------------------------------------*/
6224
6225floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6226{
445810ec
JM
6227 uint64_t quotient;
6228 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6229}
6230
158142c2
FB
6231/*----------------------------------------------------------------------------
6232| Returns the square root of the extended double-precision floating-point
6233| value `a'. The operation is performed according to the IEC/IEEE Standard
6234| for Binary Floating-Point Arithmetic.
6235*----------------------------------------------------------------------------*/
6236
e5a41ffa 6237floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6238{
c120391c 6239 bool aSign;
f4014512 6240 int32_t aExp, zExp;
bb98fe42
AF
6241 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6242 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6243
d1eb8f2a
AD
6244 if (floatx80_invalid_encoding(a)) {
6245 float_raise(float_flag_invalid, status);
6246 return floatx80_default_nan(status);
6247 }
158142c2
FB
6248 aSig0 = extractFloatx80Frac( a );
6249 aExp = extractFloatx80Exp( a );
6250 aSign = extractFloatx80Sign( a );
6251 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6252 if ((uint64_t)(aSig0 << 1)) {
6253 return propagateFloatx80NaN(a, a, status);
6254 }
158142c2
FB
6255 if ( ! aSign ) return a;
6256 goto invalid;
6257 }
6258 if ( aSign ) {
6259 if ( ( aExp | aSig0 ) == 0 ) return a;
6260 invalid:
ff32e16e 6261 float_raise(float_flag_invalid, status);
af39bc8c 6262 return floatx80_default_nan(status);
158142c2
FB
6263 }
6264 if ( aExp == 0 ) {
6265 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6266 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6267 }
6268 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6269 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6270 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6271 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6272 doubleZSig0 = zSig0<<1;
6273 mul64To128( zSig0, zSig0, &term0, &term1 );
6274 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6275 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6276 --zSig0;
6277 doubleZSig0 -= 2;
6278 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6279 }
6280 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6281 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6282 if ( zSig1 == 0 ) zSig1 = 1;
6283 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6284 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6285 mul64To128( zSig1, zSig1, &term2, &term3 );
6286 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6287 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6288 --zSig1;
6289 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6290 term3 |= 1;
6291 term2 |= doubleZSig0;
6292 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6293 }
6294 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6295 }
6296 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6297 zSig0 |= doubleZSig0;
a2f2d288
PM
6298 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6299 0, zExp, zSig0, zSig1, status);
158142c2
FB
6300}
6301
158142c2
FB
6302/*----------------------------------------------------------------------------
6303| Returns the result of converting the quadruple-precision floating-point
6304| value `a' to the extended double-precision floating-point format. The
6305| conversion is performed according to the IEC/IEEE Standard for Binary
6306| Floating-Point Arithmetic.
6307*----------------------------------------------------------------------------*/
6308
e5a41ffa 6309floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6310{
c120391c 6311 bool aSign;
f4014512 6312 int32_t aExp;
bb98fe42 6313 uint64_t aSig0, aSig1;
158142c2
FB
6314
6315 aSig1 = extractFloat128Frac1( a );
6316 aSig0 = extractFloat128Frac0( a );
6317 aExp = extractFloat128Exp( a );
6318 aSign = extractFloat128Sign( a );
6319 if ( aExp == 0x7FFF ) {
6320 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6321 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6322 status);
6323 return floatx80_silence_nan(res, status);
158142c2 6324 }
0f605c88
LV
6325 return packFloatx80(aSign, floatx80_infinity_high,
6326 floatx80_infinity_low);
158142c2
FB
6327 }
6328 if ( aExp == 0 ) {
6329 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6330 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6331 }
6332 else {
e9321124 6333 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6334 }
6335 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6336 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6337
6338}
6339
158142c2
FB
6340/*----------------------------------------------------------------------------
6341| Returns the remainder of the quadruple-precision floating-point value `a'
6342| with respect to the corresponding value `b'. The operation is performed
6343| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6344*----------------------------------------------------------------------------*/
6345
e5a41ffa 6346float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6347{
c120391c 6348 bool aSign, zSign;
f4014512 6349 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6350 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6351 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6352 int64_t sigMean0;
158142c2
FB
6353
6354 aSig1 = extractFloat128Frac1( a );
6355 aSig0 = extractFloat128Frac0( a );
6356 aExp = extractFloat128Exp( a );
6357 aSign = extractFloat128Sign( a );
6358 bSig1 = extractFloat128Frac1( b );
6359 bSig0 = extractFloat128Frac0( b );
6360 bExp = extractFloat128Exp( b );
158142c2
FB
6361 if ( aExp == 0x7FFF ) {
6362 if ( ( aSig0 | aSig1 )
6363 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6364 return propagateFloat128NaN(a, b, status);
158142c2
FB
6365 }
6366 goto invalid;
6367 }
6368 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6369 if (bSig0 | bSig1) {
6370 return propagateFloat128NaN(a, b, status);
6371 }
158142c2
FB
6372 return a;
6373 }
6374 if ( bExp == 0 ) {
6375 if ( ( bSig0 | bSig1 ) == 0 ) {
6376 invalid:
ff32e16e 6377 float_raise(float_flag_invalid, status);
af39bc8c 6378 return float128_default_nan(status);
158142c2
FB
6379 }
6380 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6381 }
6382 if ( aExp == 0 ) {
6383 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6384 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6385 }
6386 expDiff = aExp - bExp;
6387 if ( expDiff < -1 ) return a;
6388 shortShift128Left(
e9321124 6389 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6390 aSig1,
6391 15 - ( expDiff < 0 ),
6392 &aSig0,
6393 &aSig1
6394 );
6395 shortShift128Left(
e9321124 6396 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6397 q = le128( bSig0, bSig1, aSig0, aSig1 );
6398 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6399 expDiff -= 64;
6400 while ( 0 < expDiff ) {
6401 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6402 q = ( 4 < q ) ? q - 4 : 0;
6403 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6404 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6405 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6406 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6407 expDiff -= 61;
6408 }
6409 if ( -64 < expDiff ) {
6410 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6411 q = ( 4 < q ) ? q - 4 : 0;
6412 q >>= - expDiff;
6413 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6414 expDiff += 52;
6415 if ( expDiff < 0 ) {
6416 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6417 }
6418 else {
6419 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6420 }
6421 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6422 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6423 }
6424 else {
6425 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6426 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6427 }
6428 do {
6429 alternateASig0 = aSig0;
6430 alternateASig1 = aSig1;
6431 ++q;
6432 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6433 } while ( 0 <= (int64_t) aSig0 );
158142c2 6434 add128(
bb98fe42 6435 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6436 if ( ( sigMean0 < 0 )
6437 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6438 aSig0 = alternateASig0;
6439 aSig1 = alternateASig1;
6440 }
bb98fe42 6441 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6442 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6443 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6444 status);
158142c2
FB
6445}
6446
71bfd65c
RH
6447static inline FloatRelation
6448floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6449 float_status *status)
f6714d36 6450{
c120391c 6451 bool aSign, bSign;
f6714d36 6452
d1eb8f2a
AD
6453 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6454 float_raise(float_flag_invalid, status);
6455 return float_relation_unordered;
6456 }
f6714d36
AJ
6457 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6458 ( extractFloatx80Frac( a )<<1 ) ) ||
6459 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6460 ( extractFloatx80Frac( b )<<1 ) )) {
6461 if (!is_quiet ||
af39bc8c
AM
6462 floatx80_is_signaling_nan(a, status) ||
6463 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6464 float_raise(float_flag_invalid, status);
f6714d36
AJ
6465 }
6466 return float_relation_unordered;
6467 }
6468 aSign = extractFloatx80Sign( a );
6469 bSign = extractFloatx80Sign( b );
6470 if ( aSign != bSign ) {
6471
6472 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6473 ( ( a.low | b.low ) == 0 ) ) {
6474 /* zero case */
6475 return float_relation_equal;
6476 } else {
6477 return 1 - (2 * aSign);
6478 }
6479 } else {
be53fa78
JM
6480 /* Normalize pseudo-denormals before comparison. */
6481 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6482 ++a.high;
6483 }
6484 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6485 ++b.high;
6486 }
f6714d36
AJ
6487 if (a.low == b.low && a.high == b.high) {
6488 return float_relation_equal;
6489 } else {
6490 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6491 }
6492 }
6493}
6494
71bfd65c 6495FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6496{
ff32e16e 6497 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6498}
6499
71bfd65c
RH
6500FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6501 float_status *status)
f6714d36 6502{
ff32e16e 6503 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6504}
6505
e5a41ffa 6506floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6507{
c120391c 6508 bool aSign;
326b9e98 6509 int32_t aExp;
bb98fe42 6510 uint64_t aSig;
9ee6e8bb 6511
d1eb8f2a
AD
6512 if (floatx80_invalid_encoding(a)) {
6513 float_raise(float_flag_invalid, status);
6514 return floatx80_default_nan(status);
6515 }
9ee6e8bb
PB
6516 aSig = extractFloatx80Frac( a );
6517 aExp = extractFloatx80Exp( a );
6518 aSign = extractFloatx80Sign( a );
6519
326b9e98
AJ
6520 if ( aExp == 0x7FFF ) {
6521 if ( aSig<<1 ) {
ff32e16e 6522 return propagateFloatx80NaN(a, a, status);
326b9e98 6523 }
9ee6e8bb
PB
6524 return a;
6525 }
326b9e98 6526
3c85c37f
PM
6527 if (aExp == 0) {
6528 if (aSig == 0) {
6529 return a;
6530 }
6531 aExp++;
6532 }
69397542 6533
326b9e98
AJ
6534 if (n > 0x10000) {
6535 n = 0x10000;
6536 } else if (n < -0x10000) {
6537 n = -0x10000;
6538 }
6539
9ee6e8bb 6540 aExp += n;
a2f2d288
PM
6541 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6542 aSign, aExp, aSig, 0, status);
9ee6e8bb 6543}
9ee6e8bb 6544
f6b3b108
EC
6545static void __attribute__((constructor)) softfloat_init(void)
6546{
6547 union_float64 ua, ub, uc, ur;
6548
6549 if (QEMU_NO_HARDFLOAT) {
6550 return;
6551 }
6552 /*
6553 * Test that the host's FMA is not obviously broken. For example,
6554 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6555 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6556 */
6557 ua.s = 0x0020000000000001ULL;
6558 ub.s = 0x3ca0000000000000ULL;
6559 uc.s = 0x0020000000000000ULL;
6560 ur.h = fma(ua.h, ub.h, uc.h);
6561 if (ur.s != 0x0020000000000001ULL) {
6562 force_soft_fma = true;
6563 }
6564}