]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Split out parts_uncanon_normal
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
e1c4667a
RH
485/* Flags for parts_minmax. */
486enum {
487 /* Set for minimum; clear for maximum. */
488 minmax_ismin = 1,
489 /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
490 minmax_isnum = 2,
491 /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
492 minmax_ismag = 4,
493};
134eda00 494
247d1f21
RH
495/* Simple helpers for checking if, or what kind of, NaN we have */
496static inline __attribute__((unused)) bool is_nan(FloatClass c)
497{
498 return unlikely(c >= float_class_qnan);
499}
500
501static inline __attribute__((unused)) bool is_snan(FloatClass c)
502{
503 return c == float_class_snan;
504}
505
506static inline __attribute__((unused)) bool is_qnan(FloatClass c)
507{
508 return c == float_class_qnan;
509}
510
a90119b5 511/*
0018b1f4
RH
512 * Structure holding all of the decomposed parts of a float.
513 * The exponent is unbiased and the fraction is normalized.
a90119b5 514 *
0018b1f4
RH
515 * The fraction words are stored in big-endian word ordering,
516 * so that truncation from a larger format to a smaller format
517 * can be done simply by ignoring subsequent elements.
a90119b5
AB
518 */
519
520typedef struct {
a90119b5
AB
521 FloatClass cls;
522 bool sign;
4109b9ea
RH
523 int32_t exp;
524 union {
525 /* Routines that know the structure may reference the singular name. */
526 uint64_t frac;
527 /*
528 * Routines expanded with multiple structures reference "hi" and "lo"
529 * depending on the operation. In FloatParts64, "hi" and "lo" are
530 * both the same word and aliased here.
531 */
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534 };
f8155c1d 535} FloatParts64;
a90119b5 536
0018b1f4
RH
537typedef struct {
538 FloatClass cls;
539 bool sign;
540 int32_t exp;
541 uint64_t frac_hi;
542 uint64_t frac_lo;
543} FloatParts128;
544
aca84527
RH
545typedef struct {
546 FloatClass cls;
547 bool sign;
548 int32_t exp;
549 uint64_t frac_hi;
550 uint64_t frac_hm; /* high-middle */
551 uint64_t frac_lm; /* low-middle */
552 uint64_t frac_lo;
553} FloatParts256;
554
0018b1f4 555/* These apply to the most significant word of each FloatPartsN. */
e99c4373 556#define DECOMPOSED_BINARY_POINT 63
a90119b5 557#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
558
559/* Structure holding all of the relevant parameters for a format.
560 * exp_size: the size of the exponent field
561 * exp_bias: the offset applied to the exponent field
562 * exp_max: the maximum normalised exponent
563 * frac_size: the size of the fraction field
564 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
565 * The following are computed based the size of fraction
566 * frac_lsb: least significant bit of fraction
ca3a3d5a 567 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 568 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
569 * The following optional modifiers are available:
570 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
571 */
572typedef struct {
573 int exp_size;
574 int exp_bias;
575 int exp_max;
576 int frac_size;
577 int frac_shift;
578 uint64_t frac_lsb;
579 uint64_t frac_lsbm1;
580 uint64_t round_mask;
581 uint64_t roundeven_mask;
ca3a3d5a 582 bool arm_althp;
a90119b5
AB
583} FloatFmt;
584
585/* Expand fields based on the size of exponent and fraction */
586#define FLOAT_PARAMS(E, F) \
587 .exp_size = E, \
588 .exp_bias = ((1 << E) - 1) >> 1, \
589 .exp_max = (1 << E) - 1, \
590 .frac_size = F, \
0018b1f4
RH
591 .frac_shift = (-F - 1) & 63, \
592 .frac_lsb = 1ull << ((-F - 1) & 63), \
593 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
594 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
595 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
596
597static const FloatFmt float16_params = {
598 FLOAT_PARAMS(5, 10)
599};
600
6fed16b2
AB
601static const FloatFmt float16_params_ahp = {
602 FLOAT_PARAMS(5, 10),
603 .arm_althp = true
604};
605
8282310d
LZ
606static const FloatFmt bfloat16_params = {
607 FLOAT_PARAMS(8, 7)
608};
609
a90119b5
AB
610static const FloatFmt float32_params = {
611 FLOAT_PARAMS(8, 23)
612};
613
614static const FloatFmt float64_params = {
615 FLOAT_PARAMS(11, 52)
616};
617
0018b1f4
RH
618static const FloatFmt float128_params = {
619 FLOAT_PARAMS(15, 112)
620};
621
6fff2167 622/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 623static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 624{
d8fdd172
RH
625 const int f_size = fmt->frac_size;
626 const int e_size = fmt->exp_size;
6fff2167 627
d8fdd172 628 *r = (FloatParts64) {
6fff2167 629 .cls = float_class_unclassified,
d8fdd172
RH
630 .sign = extract64(raw, f_size + e_size, 1),
631 .exp = extract64(raw, f_size, e_size),
632 .frac = extract64(raw, 0, f_size)
6fff2167
AB
633 };
634}
635
3dddb203 636static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 637{
3dddb203 638 unpack_raw64(p, &float16_params, f);
6fff2167
AB
639}
640
3dddb203 641static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 642{
3dddb203 643 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
644}
645
3dddb203 646static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 647{
3dddb203 648 unpack_raw64(p, &float32_params, f);
6fff2167
AB
649}
650
3dddb203 651static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 652{
3dddb203 653 unpack_raw64(p, &float64_params, f);
6fff2167
AB
654}
655
0018b1f4
RH
656static void float128_unpack_raw(FloatParts128 *p, float128 f)
657{
658 const int f_size = float128_params.frac_size - 64;
659 const int e_size = float128_params.exp_size;
660
661 *p = (FloatParts128) {
662 .cls = float_class_unclassified,
663 .sign = extract64(f.high, f_size + e_size, 1),
664 .exp = extract64(f.high, f_size, e_size),
665 .frac_hi = extract64(f.high, 0, f_size),
666 .frac_lo = f.low,
667 };
668}
669
6fff2167 670/* Pack a float from parts, but do not canonicalize. */
9e4af58c 671static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 672{
9e4af58c
RH
673 const int f_size = fmt->frac_size;
674 const int e_size = fmt->exp_size;
675 uint64_t ret;
676
677 ret = (uint64_t)p->sign << (f_size + e_size);
678 ret = deposit64(ret, f_size, e_size, p->exp);
679 ret = deposit64(ret, 0, f_size, p->frac);
680 return ret;
6fff2167
AB
681}
682
71fd178e 683static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 684{
71fd178e 685 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
686}
687
71fd178e 688static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 689{
71fd178e 690 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
691}
692
71fd178e 693static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 694{
71fd178e 695 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
696}
697
71fd178e 698static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 699{
71fd178e 700 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
701}
702
0018b1f4
RH
703static float128 float128_pack_raw(const FloatParts128 *p)
704{
705 const int f_size = float128_params.frac_size - 64;
706 const int e_size = float128_params.exp_size;
707 uint64_t hi;
708
709 hi = (uint64_t)p->sign << (f_size + e_size);
710 hi = deposit64(hi, f_size, e_size, p->exp);
711 hi = deposit64(hi, 0, f_size, p->frac_hi);
712 return make_float128(hi, p->frac_lo);
713}
714
0664335a
RH
715/*----------------------------------------------------------------------------
716| Functions and definitions to determine: (1) whether tininess for underflow
717| is detected before or after rounding by default, (2) what (if anything)
718| happens when exceptions are raised, (3) how signaling NaNs are distinguished
719| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
720| are propagated from function inputs to output. These details are target-
721| specific.
722*----------------------------------------------------------------------------*/
139c1837 723#include "softfloat-specialize.c.inc"
0664335a 724
0018b1f4
RH
725#define PARTS_GENERIC_64_128(NAME, P) \
726 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
727
dedd123c
RH
728#define PARTS_GENERIC_64_128_256(NAME, P) \
729 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
730 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
731
e9034ea8 732#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
733#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
734
7c45bad8
RH
735static void parts64_return_nan(FloatParts64 *a, float_status *s);
736static void parts128_return_nan(FloatParts128 *a, float_status *s);
737
738#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 739
22c355f4
RH
740static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
741 float_status *s);
742static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
743 float_status *s);
744
745#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
746
979582d0
RH
747static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
748 FloatParts64 *c, float_status *s,
749 int ab_mask, int abc_mask);
750static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
751 FloatParts128 *b,
752 FloatParts128 *c,
753 float_status *s,
754 int ab_mask, int abc_mask);
755
756#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
757 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
758
d46975bc
RH
759static void parts64_canonicalize(FloatParts64 *p, float_status *status,
760 const FloatFmt *fmt);
761static void parts128_canonicalize(FloatParts128 *p, float_status *status,
762 const FloatFmt *fmt);
763
764#define parts_canonicalize(A, S, F) \
765 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
766
25fdedf0
RH
767static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
768 const FloatFmt *fmt);
769static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
770 const FloatFmt *fmt);
771
772#define parts_uncanon_normal(A, S, F) \
773 PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
774
ee6959f2
RH
775static void parts64_uncanon(FloatParts64 *p, float_status *status,
776 const FloatFmt *fmt);
777static void parts128_uncanon(FloatParts128 *p, float_status *status,
778 const FloatFmt *fmt);
779
780#define parts_uncanon(A, S, F) \
781 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
782
da10a907
RH
783static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
784static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 785static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
786
787#define parts_add_normal(A, B) \
dedd123c 788 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
789
790static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
791static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 792static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
793
794#define parts_sub_normal(A, B) \
dedd123c 795 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
796
797static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
798 float_status *s, bool subtract);
799static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
800 float_status *s, bool subtract);
801
802#define parts_addsub(A, B, S, Z) \
803 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
804
aca84527
RH
805static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
806 float_status *s);
807static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
808 float_status *s);
809
810#define parts_mul(A, B, S) \
811 PARTS_GENERIC_64_128(mul, A)(A, B, S)
812
dedd123c
RH
813static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
814 FloatParts64 *c, int flags,
815 float_status *s);
816static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
817 FloatParts128 *c, int flags,
818 float_status *s);
819
820#define parts_muladd(A, B, C, Z, S) \
821 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
822
ec961b81
RH
823static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
824 float_status *s);
825static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
826 float_status *s);
827
828#define parts_div(A, B, S) \
829 PARTS_GENERIC_64_128(div, A)(A, B, S)
830
9261b245
RH
831static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
832static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
833
834#define parts_sqrt(A, S, F) \
835 PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
836
afc34931
RH
837static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
838 int scale, int frac_size);
839static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
840 int scale, int frac_size);
841
842#define parts_round_to_int_normal(A, R, C, F) \
843 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
844
845static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
846 int scale, float_status *s,
847 const FloatFmt *fmt);
848static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
849 int scale, float_status *s,
850 const FloatFmt *fmt);
851
852#define parts_round_to_int(A, R, C, S, F) \
853 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
854
463b3f0d
RH
855static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
856 int scale, int64_t min, int64_t max,
857 float_status *s);
858static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
859 int scale, int64_t min, int64_t max,
860 float_status *s);
861
862#define parts_float_to_sint(P, R, Z, MN, MX, S) \
863 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
864
4ab4aef0
RH
865static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
866 int scale, uint64_t max,
867 float_status *s);
868static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
869 int scale, uint64_t max,
870 float_status *s);
871
872#define parts_float_to_uint(P, R, Z, M, S) \
873 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
874
e3689519
RH
875static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
876 int scale, float_status *s);
877static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
878 int scale, float_status *s);
879
880#define parts_sint_to_float(P, I, Z, S) \
881 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
882
37c954a1
RH
883static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
884 int scale, float_status *s);
885static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
886 int scale, float_status *s);
887
888#define parts_uint_to_float(P, I, Z, S) \
889 PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
890
e1c4667a
RH
891static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
892 float_status *s, int flags);
893static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
894 float_status *s, int flags);
895
896#define parts_minmax(A, B, S, F) \
897 PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
898
6eb169b8
RH
899static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
900 float_status *s, bool q);
901static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
902 float_status *s, bool q);
903
904#define parts_compare(A, B, S, Q) \
905 PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
906
39626b0c
RH
907static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
908static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
909
910#define parts_scalbn(A, N, S) \
911 PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
912
0018b1f4
RH
913/*
914 * Helper functions for softfloat-parts.c.inc, per-size operations.
915 */
916
22c355f4
RH
917#define FRAC_GENERIC_64_128(NAME, P) \
918 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
919
dedd123c
RH
920#define FRAC_GENERIC_64_128_256(NAME, P) \
921 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
922 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
923
da10a907
RH
924static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
925{
926 return uadd64_overflow(a->frac, b->frac, &r->frac);
927}
928
929static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
930{
931 bool c = 0;
932 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
933 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
934 return c;
935}
936
dedd123c
RH
937static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
938{
939 bool c = 0;
940 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
941 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
942 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
943 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
944 return c;
945}
946
947#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 948
ee6959f2
RH
949static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
950{
951 return uadd64_overflow(a->frac, c, &r->frac);
952}
953
954static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
955{
956 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
957 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
958}
959
960#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
961
962static void frac64_allones(FloatParts64 *a)
963{
964 a->frac = -1;
965}
966
967static void frac128_allones(FloatParts128 *a)
968{
969 a->frac_hi = a->frac_lo = -1;
970}
971
972#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
973
22c355f4
RH
974static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
975{
976 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
977}
978
979static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
980{
981 uint64_t ta = a->frac_hi, tb = b->frac_hi;
982 if (ta == tb) {
983 ta = a->frac_lo, tb = b->frac_lo;
984 if (ta == tb) {
985 return 0;
986 }
987 }
988 return ta < tb ? -1 : 1;
989}
990
991#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
992
d46975bc 993static void frac64_clear(FloatParts64 *a)
0018b1f4 994{
d46975bc
RH
995 a->frac = 0;
996}
997
998static void frac128_clear(FloatParts128 *a)
999{
1000 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
1001}
1002
d46975bc 1003#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 1004
ec961b81
RH
1005static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
1006{
1007 uint64_t n1, n0, r, q;
1008 bool ret;
1009
1010 /*
1011 * We want a 2*N / N-bit division to produce exactly an N-bit
1012 * result, so that we do not lose any precision and so that we
1013 * do not have to renormalize afterward. If A.frac < B.frac,
1014 * then division would produce an (N-1)-bit result; shift A left
1015 * by one to produce the an N-bit result, and return true to
1016 * decrement the exponent to match.
1017 *
1018 * The udiv_qrnnd algorithm that we're using requires normalization,
1019 * i.e. the msb of the denominator must be set, which is already true.
1020 */
1021 ret = a->frac < b->frac;
1022 if (ret) {
1023 n0 = a->frac;
1024 n1 = 0;
1025 } else {
1026 n0 = a->frac >> 1;
1027 n1 = a->frac << 63;
1028 }
1029 q = udiv_qrnnd(&r, n0, n1, b->frac);
1030
1031 /* Set lsb if there is a remainder, to set inexact. */
1032 a->frac = q | (r != 0);
1033
1034 return ret;
1035}
1036
1037static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1038{
1039 uint64_t q0, q1, a0, a1, b0, b1;
1040 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1041 bool ret = false;
1042
1043 a0 = a->frac_hi, a1 = a->frac_lo;
1044 b0 = b->frac_hi, b1 = b->frac_lo;
1045
1046 ret = lt128(a0, a1, b0, b1);
1047 if (!ret) {
1048 a1 = shr_double(a0, a1, 1);
1049 a0 = a0 >> 1;
1050 }
1051
1052 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1053 q0 = estimateDiv128To64(a0, a1, b0);
1054
1055 /*
1056 * Estimate is high because B1 was not included (unless B1 == 0).
1057 * Reduce quotient and increase remainder until remainder is non-negative.
1058 * This loop will execute 0 to 2 times.
1059 */
1060 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1061 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1062 while (r0 != 0) {
1063 q0--;
1064 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1065 }
1066
1067 /* Repeat using the remainder, producing a second word of quotient. */
1068 q1 = estimateDiv128To64(r1, r2, b0);
1069 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1070 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1071 while (r1 != 0) {
1072 q1--;
1073 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1074 }
1075
1076 /* Any remainder indicates inexact; set sticky bit. */
1077 q1 |= (r2 | r3) != 0;
1078
1079 a->frac_hi = q0;
1080 a->frac_lo = q1;
1081 return ret;
1082}
1083
1084#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1085
d46975bc 1086static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1087{
d46975bc
RH
1088 return a->frac == 0;
1089}
1090
1091static bool frac128_eqz(FloatParts128 *a)
1092{
1093 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1094}
1095
d46975bc 1096#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1097
aca84527
RH
1098static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1099{
1100 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1101}
1102
1103static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1104{
1105 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1106 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1107}
1108
1109#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1110
da10a907
RH
1111static void frac64_neg(FloatParts64 *a)
1112{
1113 a->frac = -a->frac;
1114}
1115
1116static void frac128_neg(FloatParts128 *a)
1117{
1118 bool c = 0;
1119 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1120 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1121}
1122
dedd123c
RH
1123static void frac256_neg(FloatParts256 *a)
1124{
1125 bool c = 0;
1126 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1127 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1128 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1129 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1130}
1131
1132#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1133
d46975bc 1134static int frac64_normalize(FloatParts64 *a)
6fff2167 1135{
d46975bc
RH
1136 if (a->frac) {
1137 int shift = clz64(a->frac);
1138 a->frac <<= shift;
1139 return shift;
1140 }
1141 return 64;
1142}
1143
1144static int frac128_normalize(FloatParts128 *a)
1145{
1146 if (a->frac_hi) {
1147 int shl = clz64(a->frac_hi);
463e45dc
RH
1148 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1149 a->frac_lo <<= shl;
d46975bc
RH
1150 return shl;
1151 } else if (a->frac_lo) {
1152 int shl = clz64(a->frac_lo);
463e45dc 1153 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1154 a->frac_lo = 0;
1155 return shl + 64;
6fff2167 1156 }
d46975bc 1157 return 128;
6fff2167
AB
1158}
1159
dedd123c
RH
1160static int frac256_normalize(FloatParts256 *a)
1161{
1162 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1163 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1164 int ret, shl;
dedd123c
RH
1165
1166 if (likely(a0)) {
1167 shl = clz64(a0);
1168 if (shl == 0) {
1169 return 0;
1170 }
1171 ret = shl;
1172 } else {
1173 if (a1) {
1174 ret = 64;
1175 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1176 } else if (a2) {
1177 ret = 128;
1178 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1179 } else if (a3) {
1180 ret = 192;
1181 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1182 } else {
1183 ret = 256;
1184 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1185 goto done;
1186 }
1187 shl = clz64(a0);
1188 if (shl == 0) {
1189 goto done;
1190 }
1191 ret += shl;
1192 }
1193
463e45dc
RH
1194 a0 = shl_double(a0, a1, shl);
1195 a1 = shl_double(a1, a2, shl);
1196 a2 = shl_double(a2, a3, shl);
1197 a3 <<= shl;
dedd123c
RH
1198
1199 done:
1200 a->frac_hi = a0;
1201 a->frac_hm = a1;
1202 a->frac_lm = a2;
1203 a->frac_lo = a3;
1204 return ret;
1205}
1206
1207#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1208
1209static void frac64_shl(FloatParts64 *a, int c)
1210{
1211 a->frac <<= c;
1212}
1213
1214static void frac128_shl(FloatParts128 *a, int c)
1215{
463e45dc
RH
1216 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1217
1218 if (c & 64) {
1219 a0 = a1, a1 = 0;
1220 }
1221
1222 c &= 63;
1223 if (c) {
1224 a0 = shl_double(a0, a1, c);
1225 a1 = a1 << c;
1226 }
1227
1228 a->frac_hi = a0;
1229 a->frac_lo = a1;
d46975bc
RH
1230}
1231
1232#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1233
1234static void frac64_shr(FloatParts64 *a, int c)
1235{
1236 a->frac >>= c;
1237}
1238
1239static void frac128_shr(FloatParts128 *a, int c)
1240{
463e45dc
RH
1241 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1242
1243 if (c & 64) {
1244 a1 = a0, a0 = 0;
1245 }
1246
1247 c &= 63;
1248 if (c) {
1249 a1 = shr_double(a0, a1, c);
1250 a0 = a0 >> c;
1251 }
1252
1253 a->frac_hi = a0;
1254 a->frac_lo = a1;
d46975bc
RH
1255}
1256
1257#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1258
ee6959f2 1259static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1260{
463e45dc
RH
1261 uint64_t a0 = a->frac;
1262
1263 if (likely(c != 0)) {
1264 if (likely(c < 64)) {
1265 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1266 } else {
1267 a0 = a0 != 0;
1268 }
1269 a->frac = a0;
1270 }
ee6959f2 1271}
6fff2167 1272
ee6959f2
RH
1273static void frac128_shrjam(FloatParts128 *a, int c)
1274{
463e45dc
RH
1275 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1276 uint64_t sticky = 0;
1277
1278 if (unlikely(c == 0)) {
1279 return;
1280 } else if (likely(c < 64)) {
1281 /* nothing */
1282 } else if (likely(c < 128)) {
1283 sticky = a1;
1284 a1 = a0;
1285 a0 = 0;
1286 c &= 63;
1287 if (c == 0) {
1288 goto done;
1289 }
1290 } else {
1291 sticky = a0 | a1;
1292 a0 = a1 = 0;
1293 goto done;
1294 }
1295
1296 sticky |= shr_double(a1, 0, c);
1297 a1 = shr_double(a0, a1, c);
1298 a0 = a0 >> c;
1299
1300 done:
1301 a->frac_lo = a1 | (sticky != 0);
1302 a->frac_hi = a0;
6fff2167
AB
1303}
1304
dedd123c
RH
1305static void frac256_shrjam(FloatParts256 *a, int c)
1306{
1307 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1308 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1309 uint64_t sticky = 0;
dedd123c
RH
1310
1311 if (unlikely(c == 0)) {
1312 return;
1313 } else if (likely(c < 64)) {
1314 /* nothing */
1315 } else if (likely(c < 256)) {
1316 if (unlikely(c & 128)) {
1317 sticky |= a2 | a3;
1318 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1319 }
1320 if (unlikely(c & 64)) {
1321 sticky |= a3;
1322 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1323 }
1324 c &= 63;
1325 if (c == 0) {
1326 goto done;
1327 }
1328 } else {
1329 sticky = a0 | a1 | a2 | a3;
1330 a0 = a1 = a2 = a3 = 0;
1331 goto done;
1332 }
1333
463e45dc
RH
1334 sticky |= shr_double(a3, 0, c);
1335 a3 = shr_double(a2, a3, c);
1336 a2 = shr_double(a1, a2, c);
1337 a1 = shr_double(a0, a1, c);
1338 a0 = a0 >> c;
dedd123c
RH
1339
1340 done:
1341 a->frac_lo = a3 | (sticky != 0);
1342 a->frac_lm = a2;
1343 a->frac_hm = a1;
1344 a->frac_hi = a0;
1345}
1346
1347#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1348
da10a907
RH
1349static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1350{
1351 return usub64_overflow(a->frac, b->frac, &r->frac);
1352}
7c45bad8 1353
da10a907
RH
1354static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1355{
1356 bool c = 0;
1357 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1358 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1359 return c;
1360}
1361
dedd123c
RH
1362static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1363{
1364 bool c = 0;
1365 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1366 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1367 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1368 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1369 return c;
1370}
1371
1372#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1373
aca84527
RH
1374static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1375{
1376 r->frac = a->frac_hi | (a->frac_lo != 0);
1377}
1378
1379static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1380{
1381 r->frac_hi = a->frac_hi;
1382 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1383}
1384
1385#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1386
dedd123c
RH
1387static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1388{
1389 r->frac_hi = a->frac;
1390 r->frac_lo = 0;
1391}
1392
1393static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1394{
1395 r->frac_hi = a->frac_hi;
1396 r->frac_hm = a->frac_lo;
1397 r->frac_lm = 0;
1398 r->frac_lo = 0;
1399}
1400
1401#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1402
9261b245
RH
1403/*
1404 * Reciprocal sqrt table. 1 bit of exponent, 6-bits of mantessa.
1405 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1406 * and thus MIT licenced.
1407 */
1408static const uint16_t rsqrt_tab[128] = {
1409 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1410 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1411 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1412 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1413 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1414 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1415 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1416 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1417 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1418 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1419 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1420 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1421 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1422 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1423 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1424 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1425};
1426
da10a907
RH
1427#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1428#define FloatPartsN glue(FloatParts,N)
aca84527 1429#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1430
1431#define N 64
aca84527 1432#define W 128
da10a907
RH
1433
1434#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1435#include "softfloat-parts.c.inc"
1436
da10a907 1437#undef N
aca84527 1438#undef W
da10a907 1439#define N 128
aca84527 1440#define W 256
7c45bad8 1441
da10a907 1442#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1443#include "softfloat-parts.c.inc"
1444
dedd123c
RH
1445#undef N
1446#undef W
1447#define N 256
1448
1449#include "softfloat-parts-addsub.c.inc"
1450
da10a907 1451#undef N
aca84527 1452#undef W
7c45bad8
RH
1453#undef partsN
1454#undef FloatPartsN
aca84527 1455#undef FloatPartsW
7c45bad8 1456
aaffb7bf
RH
1457/*
1458 * Pack/unpack routines with a specific FloatFmt.
1459 */
1460
98e256fc
RH
1461static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1462 float_status *s, const FloatFmt *params)
aaffb7bf 1463{
98e256fc 1464 float16_unpack_raw(p, f);
d46975bc 1465 parts_canonicalize(p, s, params);
aaffb7bf
RH
1466}
1467
98e256fc
RH
1468static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1469 float_status *s)
aaffb7bf 1470{
98e256fc 1471 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1472}
1473
98e256fc
RH
1474static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1475 float_status *s)
aaffb7bf 1476{
98e256fc 1477 bfloat16_unpack_raw(p, f);
d46975bc 1478 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1479}
1480
e293e927
RH
1481static float16 float16a_round_pack_canonical(FloatParts64 *p,
1482 float_status *s,
aaffb7bf
RH
1483 const FloatFmt *params)
1484{
ee6959f2 1485 parts_uncanon(p, s, params);
e293e927 1486 return float16_pack_raw(p);
aaffb7bf
RH
1487}
1488
e293e927
RH
1489static float16 float16_round_pack_canonical(FloatParts64 *p,
1490 float_status *s)
aaffb7bf
RH
1491{
1492 return float16a_round_pack_canonical(p, s, &float16_params);
1493}
1494
e293e927
RH
1495static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1496 float_status *s)
aaffb7bf 1497{
ee6959f2 1498 parts_uncanon(p, s, &bfloat16_params);
e293e927 1499 return bfloat16_pack_raw(p);
aaffb7bf
RH
1500}
1501
98e256fc
RH
1502static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1503 float_status *s)
aaffb7bf 1504{
98e256fc 1505 float32_unpack_raw(p, f);
d46975bc 1506 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1507}
1508
e293e927
RH
1509static float32 float32_round_pack_canonical(FloatParts64 *p,
1510 float_status *s)
aaffb7bf 1511{
ee6959f2 1512 parts_uncanon(p, s, &float32_params);
e293e927 1513 return float32_pack_raw(p);
aaffb7bf
RH
1514}
1515
98e256fc
RH
1516static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1517 float_status *s)
aaffb7bf 1518{
98e256fc 1519 float64_unpack_raw(p, f);
d46975bc 1520 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1521}
1522
e293e927
RH
1523static float64 float64_round_pack_canonical(FloatParts64 *p,
1524 float_status *s)
aaffb7bf 1525{
ee6959f2 1526 parts_uncanon(p, s, &float64_params);
e293e927 1527 return float64_pack_raw(p);
aaffb7bf
RH
1528}
1529
3ff49e56
RH
1530static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1531 float_status *s)
1532{
1533 float128_unpack_raw(p, f);
1534 parts_canonicalize(p, s, &float128_params);
1535}
1536
1537static float128 float128_round_pack_canonical(FloatParts128 *p,
1538 float_status *s)
1539{
1540 parts_uncanon(p, s, &float128_params);
1541 return float128_pack_raw(p);
1542}
1543
6fff2167 1544/*
da10a907 1545 * Addition and subtraction
6fff2167
AB
1546 */
1547
da10a907
RH
1548static float16 QEMU_FLATTEN
1549float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1550{
da10a907 1551 FloatParts64 pa, pb, *pr;
98e256fc
RH
1552
1553 float16_unpack_canonical(&pa, a, status);
1554 float16_unpack_canonical(&pb, b, status);
da10a907 1555 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1556
da10a907 1557 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1558}
1559
da10a907 1560float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1561{
da10a907
RH
1562 return float16_addsub(a, b, status, false);
1563}
1b615d48 1564
da10a907
RH
1565float16 float16_sub(float16 a, float16 b, float_status *status)
1566{
1567 return float16_addsub(a, b, status, true);
1b615d48
EC
1568}
1569
1570static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1571soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1572{
da10a907 1573 FloatParts64 pa, pb, *pr;
98e256fc
RH
1574
1575 float32_unpack_canonical(&pa, a, status);
1576 float32_unpack_canonical(&pb, b, status);
da10a907 1577 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1578
da10a907 1579 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1580}
1581
da10a907 1582static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1583{
da10a907 1584 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1585}
1586
da10a907 1587static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1588{
da10a907 1589 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1590}
1591
1592static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1593soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1594{
da10a907 1595 FloatParts64 pa, pb, *pr;
98e256fc
RH
1596
1597 float64_unpack_canonical(&pa, a, status);
1598 float64_unpack_canonical(&pb, b, status);
da10a907 1599 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1600
da10a907 1601 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1602}
1603
da10a907 1604static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1605{
da10a907 1606 return soft_f64_addsub(a, b, status, false);
1b615d48 1607}
6fff2167 1608
da10a907 1609static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1610{
da10a907 1611 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1612}
1613
1b615d48 1614static float hard_f32_add(float a, float b)
6fff2167 1615{
1b615d48
EC
1616 return a + b;
1617}
6fff2167 1618
1b615d48
EC
1619static float hard_f32_sub(float a, float b)
1620{
1621 return a - b;
6fff2167
AB
1622}
1623
1b615d48 1624static double hard_f64_add(double a, double b)
6fff2167 1625{
1b615d48
EC
1626 return a + b;
1627}
6fff2167 1628
1b615d48
EC
1629static double hard_f64_sub(double a, double b)
1630{
1631 return a - b;
1632}
1633
b240c9c4 1634static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1635{
1636 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1637 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1638 }
1639 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1640}
1641
b240c9c4 1642static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1643{
1644 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1645 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1646 } else {
1647 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1648 }
1649}
1650
1651static float32 float32_addsub(float32 a, float32 b, float_status *s,
1652 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1653{
1654 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1655 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1656}
1657
1658static float64 float64_addsub(float64 a, float64 b, float_status *s,
1659 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1660{
1661 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1662 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1663}
1664
1665float32 QEMU_FLATTEN
1666float32_add(float32 a, float32 b, float_status *s)
1667{
1668 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1669}
1670
1671float32 QEMU_FLATTEN
1672float32_sub(float32 a, float32 b, float_status *s)
1673{
1674 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1675}
1676
1677float64 QEMU_FLATTEN
1678float64_add(float64 a, float64 b, float_status *s)
1679{
1680 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1681}
1682
1683float64 QEMU_FLATTEN
1684float64_sub(float64 a, float64 b, float_status *s)
1685{
1686 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1687}
1688
da10a907
RH
1689static bfloat16 QEMU_FLATTEN
1690bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1691{
da10a907 1692 FloatParts64 pa, pb, *pr;
98e256fc
RH
1693
1694 bfloat16_unpack_canonical(&pa, a, status);
1695 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1696 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1697
da10a907 1698 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1699}
1700
da10a907 1701bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1702{
da10a907
RH
1703 return bfloat16_addsub(a, b, status, false);
1704}
8282310d 1705
da10a907
RH
1706bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1707{
1708 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1709}
1710
3ff49e56
RH
1711static float128 QEMU_FLATTEN
1712float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1713{
1714 FloatParts128 pa, pb, *pr;
1715
1716 float128_unpack_canonical(&pa, a, status);
1717 float128_unpack_canonical(&pb, b, status);
1718 pr = parts_addsub(&pa, &pb, status, subtract);
1719
1720 return float128_round_pack_canonical(pr, status);
1721}
1722
1723float128 float128_add(float128 a, float128 b, float_status *status)
1724{
1725 return float128_addsub(a, b, status, false);
1726}
1727
1728float128 float128_sub(float128 a, float128 b, float_status *status)
1729{
1730 return float128_addsub(a, b, status, true);
1731}
1732
74d707e2 1733/*
aca84527 1734 * Multiplication
74d707e2
AB
1735 */
1736
97ff87c0 1737float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1738{
aca84527 1739 FloatParts64 pa, pb, *pr;
98e256fc
RH
1740
1741 float16_unpack_canonical(&pa, a, status);
1742 float16_unpack_canonical(&pb, b, status);
aca84527 1743 pr = parts_mul(&pa, &pb, status);
74d707e2 1744
aca84527 1745 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1746}
1747
2dfabc86
EC
1748static float32 QEMU_SOFTFLOAT_ATTR
1749soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1750{
aca84527 1751 FloatParts64 pa, pb, *pr;
98e256fc
RH
1752
1753 float32_unpack_canonical(&pa, a, status);
1754 float32_unpack_canonical(&pb, b, status);
aca84527 1755 pr = parts_mul(&pa, &pb, status);
74d707e2 1756
aca84527 1757 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1758}
1759
2dfabc86
EC
1760static float64 QEMU_SOFTFLOAT_ATTR
1761soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1762{
aca84527 1763 FloatParts64 pa, pb, *pr;
98e256fc
RH
1764
1765 float64_unpack_canonical(&pa, a, status);
1766 float64_unpack_canonical(&pb, b, status);
aca84527 1767 pr = parts_mul(&pa, &pb, status);
74d707e2 1768
aca84527 1769 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1770}
1771
2dfabc86
EC
1772static float hard_f32_mul(float a, float b)
1773{
1774 return a * b;
1775}
1776
1777static double hard_f64_mul(double a, double b)
1778{
1779 return a * b;
1780}
1781
2dfabc86
EC
1782float32 QEMU_FLATTEN
1783float32_mul(float32 a, float32 b, float_status *s)
1784{
1785 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1786 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1787}
1788
1789float64 QEMU_FLATTEN
1790float64_mul(float64 a, float64 b, float_status *s)
1791{
1792 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1793 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1794}
1795
aca84527
RH
1796bfloat16 QEMU_FLATTEN
1797bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1798{
aca84527 1799 FloatParts64 pa, pb, *pr;
98e256fc
RH
1800
1801 bfloat16_unpack_canonical(&pa, a, status);
1802 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1803 pr = parts_mul(&pa, &pb, status);
8282310d 1804
aca84527
RH
1805 return bfloat16_round_pack_canonical(pr, status);
1806}
1807
1808float128 QEMU_FLATTEN
1809float128_mul(float128 a, float128 b, float_status *status)
1810{
1811 FloatParts128 pa, pb, *pr;
1812
1813 float128_unpack_canonical(&pa, a, status);
1814 float128_unpack_canonical(&pb, b, status);
1815 pr = parts_mul(&pa, &pb, status);
1816
1817 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1818}
1819
d446830a 1820/*
dedd123c 1821 * Fused multiply-add
d446830a
AB
1822 */
1823
97ff87c0 1824float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1825 int flags, float_status *status)
d446830a 1826{
dedd123c 1827 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1828
1829 float16_unpack_canonical(&pa, a, status);
1830 float16_unpack_canonical(&pb, b, status);
1831 float16_unpack_canonical(&pc, c, status);
dedd123c 1832 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1833
dedd123c 1834 return float16_round_pack_canonical(pr, status);
d446830a
AB
1835}
1836
ccf770ba
EC
1837static float32 QEMU_SOFTFLOAT_ATTR
1838soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1839 float_status *status)
d446830a 1840{
dedd123c 1841 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1842
1843 float32_unpack_canonical(&pa, a, status);
1844 float32_unpack_canonical(&pb, b, status);
1845 float32_unpack_canonical(&pc, c, status);
dedd123c 1846 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1847
dedd123c 1848 return float32_round_pack_canonical(pr, status);
d446830a
AB
1849}
1850
ccf770ba
EC
1851static float64 QEMU_SOFTFLOAT_ATTR
1852soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1853 float_status *status)
d446830a 1854{
dedd123c 1855 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1856
1857 float64_unpack_canonical(&pa, a, status);
1858 float64_unpack_canonical(&pb, b, status);
1859 float64_unpack_canonical(&pc, c, status);
dedd123c 1860 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1861
dedd123c 1862 return float64_round_pack_canonical(pr, status);
d446830a
AB
1863}
1864
f6b3b108
EC
1865static bool force_soft_fma;
1866
ccf770ba
EC
1867float32 QEMU_FLATTEN
1868float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1869{
1870 union_float32 ua, ub, uc, ur;
1871
1872 ua.s = xa;
1873 ub.s = xb;
1874 uc.s = xc;
1875
1876 if (unlikely(!can_use_fpu(s))) {
1877 goto soft;
1878 }
1879 if (unlikely(flags & float_muladd_halve_result)) {
1880 goto soft;
1881 }
1882
1883 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1884 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1885 goto soft;
1886 }
f6b3b108
EC
1887
1888 if (unlikely(force_soft_fma)) {
1889 goto soft;
1890 }
1891
ccf770ba
EC
1892 /*
1893 * When (a || b) == 0, there's no need to check for under/over flow,
1894 * since we know the addend is (normal || 0) and the product is 0.
1895 */
1896 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1897 union_float32 up;
1898 bool prod_sign;
1899
1900 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1901 prod_sign ^= !!(flags & float_muladd_negate_product);
1902 up.s = float32_set_sign(float32_zero, prod_sign);
1903
1904 if (flags & float_muladd_negate_c) {
1905 uc.h = -uc.h;
1906 }
1907 ur.h = up.h + uc.h;
1908 } else {
896f51fb
KC
1909 union_float32 ua_orig = ua;
1910 union_float32 uc_orig = uc;
1911
ccf770ba
EC
1912 if (flags & float_muladd_negate_product) {
1913 ua.h = -ua.h;
1914 }
1915 if (flags & float_muladd_negate_c) {
1916 uc.h = -uc.h;
1917 }
1918
1919 ur.h = fmaf(ua.h, ub.h, uc.h);
1920
1921 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1922 float_raise(float_flag_overflow, s);
ccf770ba 1923 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1924 ua = ua_orig;
1925 uc = uc_orig;
ccf770ba
EC
1926 goto soft;
1927 }
1928 }
1929 if (flags & float_muladd_negate_result) {
1930 return float32_chs(ur.s);
1931 }
1932 return ur.s;
1933
1934 soft:
1935 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1936}
1937
1938float64 QEMU_FLATTEN
1939float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1940{
1941 union_float64 ua, ub, uc, ur;
1942
1943 ua.s = xa;
1944 ub.s = xb;
1945 uc.s = xc;
1946
1947 if (unlikely(!can_use_fpu(s))) {
1948 goto soft;
1949 }
1950 if (unlikely(flags & float_muladd_halve_result)) {
1951 goto soft;
1952 }
1953
1954 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1955 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1956 goto soft;
1957 }
f6b3b108
EC
1958
1959 if (unlikely(force_soft_fma)) {
1960 goto soft;
1961 }
1962
ccf770ba
EC
1963 /*
1964 * When (a || b) == 0, there's no need to check for under/over flow,
1965 * since we know the addend is (normal || 0) and the product is 0.
1966 */
1967 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1968 union_float64 up;
1969 bool prod_sign;
1970
1971 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1972 prod_sign ^= !!(flags & float_muladd_negate_product);
1973 up.s = float64_set_sign(float64_zero, prod_sign);
1974
1975 if (flags & float_muladd_negate_c) {
1976 uc.h = -uc.h;
1977 }
1978 ur.h = up.h + uc.h;
1979 } else {
896f51fb
KC
1980 union_float64 ua_orig = ua;
1981 union_float64 uc_orig = uc;
1982
ccf770ba
EC
1983 if (flags & float_muladd_negate_product) {
1984 ua.h = -ua.h;
1985 }
1986 if (flags & float_muladd_negate_c) {
1987 uc.h = -uc.h;
1988 }
1989
1990 ur.h = fma(ua.h, ub.h, uc.h);
1991
1992 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1993 float_raise(float_flag_overflow, s);
ccf770ba 1994 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1995 ua = ua_orig;
1996 uc = uc_orig;
ccf770ba
EC
1997 goto soft;
1998 }
1999 }
2000 if (flags & float_muladd_negate_result) {
2001 return float64_chs(ur.s);
2002 }
2003 return ur.s;
2004
2005 soft:
2006 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2007}
2008
8282310d
LZ
2009bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2010 int flags, float_status *status)
2011{
dedd123c 2012 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
2013
2014 bfloat16_unpack_canonical(&pa, a, status);
2015 bfloat16_unpack_canonical(&pb, b, status);
2016 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
2017 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2018
2019 return bfloat16_round_pack_canonical(pr, status);
2020}
8282310d 2021
dedd123c
RH
2022float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2023 int flags, float_status *status)
2024{
2025 FloatParts128 pa, pb, pc, *pr;
2026
2027 float128_unpack_canonical(&pa, a, status);
2028 float128_unpack_canonical(&pb, b, status);
2029 float128_unpack_canonical(&pc, c, status);
2030 pr = parts_muladd(&pa, &pb, &pc, flags, status);
2031
2032 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2033}
2034
cf07323d 2035/*
ec961b81 2036 * Division
cf07323d
AB
2037 */
2038
cf07323d
AB
2039float16 float16_div(float16 a, float16 b, float_status *status)
2040{
ec961b81 2041 FloatParts64 pa, pb, *pr;
98e256fc
RH
2042
2043 float16_unpack_canonical(&pa, a, status);
2044 float16_unpack_canonical(&pb, b, status);
ec961b81 2045 pr = parts_div(&pa, &pb, status);
cf07323d 2046
ec961b81 2047 return float16_round_pack_canonical(pr, status);
cf07323d
AB
2048}
2049
4a629561
EC
2050static float32 QEMU_SOFTFLOAT_ATTR
2051soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 2052{
ec961b81 2053 FloatParts64 pa, pb, *pr;
98e256fc
RH
2054
2055 float32_unpack_canonical(&pa, a, status);
2056 float32_unpack_canonical(&pb, b, status);
ec961b81 2057 pr = parts_div(&pa, &pb, status);
cf07323d 2058
ec961b81 2059 return float32_round_pack_canonical(pr, status);
cf07323d
AB
2060}
2061
4a629561
EC
2062static float64 QEMU_SOFTFLOAT_ATTR
2063soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 2064{
ec961b81 2065 FloatParts64 pa, pb, *pr;
98e256fc
RH
2066
2067 float64_unpack_canonical(&pa, a, status);
2068 float64_unpack_canonical(&pb, b, status);
ec961b81 2069 pr = parts_div(&pa, &pb, status);
cf07323d 2070
ec961b81 2071 return float64_round_pack_canonical(pr, status);
cf07323d
AB
2072}
2073
4a629561
EC
2074static float hard_f32_div(float a, float b)
2075{
2076 return a / b;
2077}
2078
2079static double hard_f64_div(double a, double b)
2080{
2081 return a / b;
2082}
2083
2084static bool f32_div_pre(union_float32 a, union_float32 b)
2085{
2086 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2087 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2088 fpclassify(b.h) == FP_NORMAL;
2089 }
2090 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2091}
2092
2093static bool f64_div_pre(union_float64 a, union_float64 b)
2094{
2095 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2096 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2097 fpclassify(b.h) == FP_NORMAL;
2098 }
2099 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2100}
2101
2102static bool f32_div_post(union_float32 a, union_float32 b)
2103{
2104 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2105 return fpclassify(a.h) != FP_ZERO;
2106 }
2107 return !float32_is_zero(a.s);
2108}
2109
2110static bool f64_div_post(union_float64 a, union_float64 b)
2111{
2112 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2113 return fpclassify(a.h) != FP_ZERO;
2114 }
2115 return !float64_is_zero(a.s);
2116}
2117
2118float32 QEMU_FLATTEN
2119float32_div(float32 a, float32 b, float_status *s)
2120{
2121 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2122 f32_div_pre, f32_div_post);
4a629561
EC
2123}
2124
2125float64 QEMU_FLATTEN
2126float64_div(float64 a, float64 b, float_status *s)
2127{
2128 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2129 f64_div_pre, f64_div_post);
4a629561
EC
2130}
2131
ec961b81
RH
2132bfloat16 QEMU_FLATTEN
2133bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2134{
ec961b81 2135 FloatParts64 pa, pb, *pr;
98e256fc
RH
2136
2137 bfloat16_unpack_canonical(&pa, a, status);
2138 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2139 pr = parts_div(&pa, &pb, status);
8282310d 2140
ec961b81
RH
2141 return bfloat16_round_pack_canonical(pr, status);
2142}
2143
2144float128 QEMU_FLATTEN
2145float128_div(float128 a, float128 b, float_status *status)
2146{
2147 FloatParts128 pa, pb, *pr;
2148
2149 float128_unpack_canonical(&pa, a, status);
2150 float128_unpack_canonical(&pb, b, status);
2151 pr = parts_div(&pa, &pb, status);
2152
2153 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2154}
2155
6fed16b2
AB
2156/*
2157 * Float to Float conversions
2158 *
2159 * Returns the result of converting one float format to another. The
2160 * conversion is performed according to the IEC/IEEE Standard for
2161 * Binary Floating-Point Arithmetic.
2162 *
c3f1875e
RH
2163 * Usually this only needs to take care of raising invalid exceptions
2164 * and handling the conversion on NaNs.
6fed16b2
AB
2165 */
2166
c3f1875e
RH
2167static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2168{
2169 switch (a->cls) {
2170 case float_class_qnan:
2171 case float_class_snan:
2172 /*
2173 * There is no NaN in the destination format. Raise Invalid
2174 * and return a zero with the sign of the input NaN.
2175 */
2176 float_raise(float_flag_invalid, s);
2177 a->cls = float_class_zero;
2178 break;
2179
2180 case float_class_inf:
2181 /*
2182 * There is no Inf in the destination format. Raise Invalid
2183 * and return the maximum normal with the correct sign.
2184 */
2185 float_raise(float_flag_invalid, s);
2186 a->cls = float_class_normal;
2187 a->exp = float16_params_ahp.exp_max;
2188 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2189 float16_params_ahp.frac_size + 1);
2190 break;
2191
2192 case float_class_normal:
2193 case float_class_zero:
2194 break;
2195
2196 default:
2197 g_assert_not_reached();
2198 }
2199}
2200
2201static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2202{
2203 if (is_nan(a->cls)) {
2204 parts_return_nan(a, s);
6fed16b2 2205 }
6fed16b2
AB
2206}
2207
c3f1875e
RH
2208static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2209{
2210 if (is_nan(a->cls)) {
2211 parts_return_nan(a, s);
2212 }
2213}
2214
2215#define parts_float_to_float(P, S) \
2216 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2217
9882ccaf
RH
2218static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2219 float_status *s)
2220{
2221 a->cls = b->cls;
2222 a->sign = b->sign;
2223 a->exp = b->exp;
2224
2225 if (a->cls == float_class_normal) {
2226 frac_truncjam(a, b);
2227 } else if (is_nan(a->cls)) {
2228 /* Discard the low bits of the NaN. */
2229 a->frac = b->frac_hi;
2230 parts_return_nan(a, s);
2231 }
2232}
2233
2234static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2235 float_status *s)
2236{
2237 a->cls = b->cls;
2238 a->sign = b->sign;
2239 a->exp = b->exp;
2240 frac_widen(a, b);
2241
2242 if (is_nan(a->cls)) {
2243 parts_return_nan(a, s);
2244 }
2245}
2246
6fed16b2
AB
2247float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2248{
2249 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2250 FloatParts64 p;
98e256fc 2251
c3f1875e
RH
2252 float16a_unpack_canonical(&p, a, s, fmt16);
2253 parts_float_to_float(&p, s);
2254 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2255}
2256
2257float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2258{
2259 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2260 FloatParts64 p;
98e256fc 2261
c3f1875e
RH
2262 float16a_unpack_canonical(&p, a, s, fmt16);
2263 parts_float_to_float(&p, s);
2264 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2265}
2266
2267float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2268{
c3f1875e
RH
2269 FloatParts64 p;
2270 const FloatFmt *fmt;
98e256fc 2271
c3f1875e
RH
2272 float32_unpack_canonical(&p, a, s);
2273 if (ieee) {
2274 parts_float_to_float(&p, s);
2275 fmt = &float16_params;
2276 } else {
2277 parts_float_to_ahp(&p, s);
2278 fmt = &float16_params_ahp;
2279 }
2280 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2281}
2282
21381dcf
MK
2283static float64 QEMU_SOFTFLOAT_ATTR
2284soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2285{
c3f1875e 2286 FloatParts64 p;
98e256fc 2287
c3f1875e
RH
2288 float32_unpack_canonical(&p, a, s);
2289 parts_float_to_float(&p, s);
2290 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2291}
2292
21381dcf
MK
2293float64 float32_to_float64(float32 a, float_status *s)
2294{
2295 if (likely(float32_is_normal(a))) {
2296 /* Widening conversion can never produce inexact results. */
2297 union_float32 uf;
2298 union_float64 ud;
2299 uf.s = a;
2300 ud.h = uf.h;
2301 return ud.s;
2302 } else if (float32_is_zero(a)) {
2303 return float64_set_sign(float64_zero, float32_is_neg(a));
2304 } else {
2305 return soft_float32_to_float64(a, s);
2306 }
2307}
2308
6fed16b2
AB
2309float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2310{
c3f1875e
RH
2311 FloatParts64 p;
2312 const FloatFmt *fmt;
98e256fc 2313
c3f1875e
RH
2314 float64_unpack_canonical(&p, a, s);
2315 if (ieee) {
2316 parts_float_to_float(&p, s);
2317 fmt = &float16_params;
2318 } else {
2319 parts_float_to_ahp(&p, s);
2320 fmt = &float16_params_ahp;
2321 }
2322 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2323}
2324
2325float32 float64_to_float32(float64 a, float_status *s)
2326{
c3f1875e 2327 FloatParts64 p;
98e256fc 2328
c3f1875e
RH
2329 float64_unpack_canonical(&p, a, s);
2330 parts_float_to_float(&p, s);
2331 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2332}
2333
34f0c0a9
LZ
2334float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2335{
c3f1875e 2336 FloatParts64 p;
98e256fc 2337
c3f1875e
RH
2338 bfloat16_unpack_canonical(&p, a, s);
2339 parts_float_to_float(&p, s);
2340 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2341}
2342
2343float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2344{
c3f1875e 2345 FloatParts64 p;
98e256fc 2346
c3f1875e
RH
2347 bfloat16_unpack_canonical(&p, a, s);
2348 parts_float_to_float(&p, s);
2349 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2350}
2351
2352bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2353{
c3f1875e 2354 FloatParts64 p;
98e256fc 2355
c3f1875e
RH
2356 float32_unpack_canonical(&p, a, s);
2357 parts_float_to_float(&p, s);
2358 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2359}
2360
2361bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2362{
c3f1875e 2363 FloatParts64 p;
98e256fc 2364
c3f1875e
RH
2365 float64_unpack_canonical(&p, a, s);
2366 parts_float_to_float(&p, s);
2367 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2368}
2369
9882ccaf
RH
2370float32 float128_to_float32(float128 a, float_status *s)
2371{
2372 FloatParts64 p64;
2373 FloatParts128 p128;
2374
2375 float128_unpack_canonical(&p128, a, s);
2376 parts_float_to_float_narrow(&p64, &p128, s);
2377 return float32_round_pack_canonical(&p64, s);
2378}
2379
2380float64 float128_to_float64(float128 a, float_status *s)
2381{
2382 FloatParts64 p64;
2383 FloatParts128 p128;
2384
2385 float128_unpack_canonical(&p128, a, s);
2386 parts_float_to_float_narrow(&p64, &p128, s);
2387 return float64_round_pack_canonical(&p64, s);
2388}
2389
2390float128 float32_to_float128(float32 a, float_status *s)
2391{
2392 FloatParts64 p64;
2393 FloatParts128 p128;
2394
2395 float32_unpack_canonical(&p64, a, s);
2396 parts_float_to_float_widen(&p128, &p64, s);
2397 return float128_round_pack_canonical(&p128, s);
2398}
2399
2400float128 float64_to_float128(float64 a, float_status *s)
2401{
2402 FloatParts64 p64;
2403 FloatParts128 p128;
2404
2405 float64_unpack_canonical(&p64, a, s);
2406 parts_float_to_float_widen(&p128, &p64, s);
2407 return float128_round_pack_canonical(&p128, s);
2408}
2409
dbe4d53a 2410/*
afc34931 2411 * Round to integral value
dbe4d53a
AB
2412 */
2413
dbe4d53a
AB
2414float16 float16_round_to_int(float16 a, float_status *s)
2415{
afc34931 2416 FloatParts64 p;
98e256fc 2417
afc34931
RH
2418 float16_unpack_canonical(&p, a, s);
2419 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2420 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2421}
2422
2423float32 float32_round_to_int(float32 a, float_status *s)
2424{
afc34931 2425 FloatParts64 p;
98e256fc 2426
afc34931
RH
2427 float32_unpack_canonical(&p, a, s);
2428 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2429 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2430}
2431
2432float64 float64_round_to_int(float64 a, float_status *s)
2433{
afc34931 2434 FloatParts64 p;
98e256fc 2435
afc34931
RH
2436 float64_unpack_canonical(&p, a, s);
2437 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2438 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2439}
2440
34f0c0a9
LZ
2441bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2442{
afc34931 2443 FloatParts64 p;
98e256fc 2444
afc34931
RH
2445 bfloat16_unpack_canonical(&p, a, s);
2446 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2447 return bfloat16_round_pack_canonical(&p, s);
2448}
2449
2450float128 float128_round_to_int(float128 a, float_status *s)
2451{
2452 FloatParts128 p;
2453
2454 float128_unpack_canonical(&p, a, s);
2455 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2456 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2457}
2458
ab52f973 2459/*
463b3f0d
RH
2460 * Floating-point to signed integer conversions
2461 */
ab52f973 2462
0d93d8ec
FC
2463int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2464 float_status *s)
2465{
98e256fc
RH
2466 FloatParts64 p;
2467
2468 float16_unpack_canonical(&p, a, s);
463b3f0d 2469 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2470}
2471
3dede407 2472int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2473 float_status *s)
2474{
98e256fc
RH
2475 FloatParts64 p;
2476
2477 float16_unpack_canonical(&p, a, s);
463b3f0d 2478 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2479}
2480
3dede407 2481int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2482 float_status *s)
2483{
98e256fc
RH
2484 FloatParts64 p;
2485
2486 float16_unpack_canonical(&p, a, s);
463b3f0d 2487 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2488}
2489
3dede407 2490int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2491 float_status *s)
2492{
98e256fc
RH
2493 FloatParts64 p;
2494
2495 float16_unpack_canonical(&p, a, s);
463b3f0d 2496 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2497}
2498
3dede407 2499int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2500 float_status *s)
2501{
98e256fc
RH
2502 FloatParts64 p;
2503
2504 float32_unpack_canonical(&p, a, s);
463b3f0d 2505 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2506}
2507
3dede407 2508int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2509 float_status *s)
2510{
98e256fc
RH
2511 FloatParts64 p;
2512
2513 float32_unpack_canonical(&p, a, s);
463b3f0d 2514 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2515}
2516
3dede407 2517int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2518 float_status *s)
2519{
98e256fc
RH
2520 FloatParts64 p;
2521
2522 float32_unpack_canonical(&p, a, s);
463b3f0d 2523 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2524}
2525
3dede407 2526int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2527 float_status *s)
2528{
98e256fc
RH
2529 FloatParts64 p;
2530
2531 float64_unpack_canonical(&p, a, s);
463b3f0d 2532 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2533}
2534
3dede407 2535int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2536 float_status *s)
2537{
98e256fc
RH
2538 FloatParts64 p;
2539
2540 float64_unpack_canonical(&p, a, s);
463b3f0d 2541 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2542}
2543
3dede407 2544int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2545 float_status *s)
2546{
98e256fc
RH
2547 FloatParts64 p;
2548
2549 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2550 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2551}
2552
2553int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2554 float_status *s)
2555{
2556 FloatParts64 p;
2557
2558 bfloat16_unpack_canonical(&p, a, s);
2559 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2560}
2561
2562int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2563 float_status *s)
2564{
2565 FloatParts64 p;
2566
2567 bfloat16_unpack_canonical(&p, a, s);
2568 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2569}
2570
2571int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2572 float_status *s)
2573{
2574 FloatParts64 p;
2575
2576 bfloat16_unpack_canonical(&p, a, s);
2577 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2578}
2579
2580static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2581 int scale, float_status *s)
2582{
2583 FloatParts128 p;
2584
2585 float128_unpack_canonical(&p, a, s);
2586 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2587}
2588
2589static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2590 int scale, float_status *s)
2591{
2592 FloatParts128 p;
2593
2594 float128_unpack_canonical(&p, a, s);
2595 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2596}
2597
0d93d8ec
FC
2598int8_t float16_to_int8(float16 a, float_status *s)
2599{
2600 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2601}
2602
2f6c74be
RH
2603int16_t float16_to_int16(float16 a, float_status *s)
2604{
2605 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2606}
2607
2608int32_t float16_to_int32(float16 a, float_status *s)
2609{
2610 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2611}
2612
2613int64_t float16_to_int64(float16 a, float_status *s)
2614{
2615 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2616}
2617
2618int16_t float32_to_int16(float32 a, float_status *s)
2619{
2620 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2621}
2622
2623int32_t float32_to_int32(float32 a, float_status *s)
2624{
2625 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2626}
2627
2628int64_t float32_to_int64(float32 a, float_status *s)
2629{
2630 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2631}
2632
2633int16_t float64_to_int16(float64 a, float_status *s)
2634{
2635 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2636}
2637
2638int32_t float64_to_int32(float64 a, float_status *s)
2639{
2640 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2641}
2642
2643int64_t float64_to_int64(float64 a, float_status *s)
2644{
2645 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2646}
2647
463b3f0d
RH
2648int32_t float128_to_int32(float128 a, float_status *s)
2649{
2650 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2651}
2652
2653int64_t float128_to_int64(float128 a, float_status *s)
2654{
2655 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2656}
2657
2f6c74be
RH
2658int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2659{
2660 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2661}
2662
2663int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2664{
2665 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2666}
2667
2668int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2669{
2670 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2671}
2672
2f6c74be
RH
2673int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2674{
2675 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2676}
ab52f973 2677
2f6c74be
RH
2678int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2679{
2680 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2681}
2682
2683int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2684{
2685 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2686}
2687
2688int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2689{
2690 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2691}
ab52f973 2692
2f6c74be
RH
2693int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2694{
2695 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2696}
ab52f973 2697
2f6c74be
RH
2698int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2699{
2700 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2701}
ab52f973 2702
463b3f0d 2703int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2704{
463b3f0d 2705 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2706}
2707
463b3f0d 2708int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2709{
463b3f0d 2710 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2711}
2712
2713int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2714{
2715 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2716}
2717
2718int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2719{
2720 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2721}
2722
2723int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2724{
2725 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2726}
2727
2728int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2729{
2730 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2731}
2732
2733int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2734{
2735 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2736}
2737
2738int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2739{
2740 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2741}
2742
ab52f973 2743/*
4ab4aef0 2744 * Floating-point to unsigned integer conversions
ab52f973
AB
2745 */
2746
0d93d8ec
FC
2747uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2748 float_status *s)
2749{
98e256fc
RH
2750 FloatParts64 p;
2751
2752 float16_unpack_canonical(&p, a, s);
4ab4aef0 2753 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2754}
2755
3dede407 2756uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2757 float_status *s)
2758{
98e256fc
RH
2759 FloatParts64 p;
2760
2761 float16_unpack_canonical(&p, a, s);
4ab4aef0 2762 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2763}
2764
3dede407 2765uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2766 float_status *s)
2767{
98e256fc
RH
2768 FloatParts64 p;
2769
2770 float16_unpack_canonical(&p, a, s);
4ab4aef0 2771 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2772}
2773
3dede407 2774uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2775 float_status *s)
2776{
98e256fc
RH
2777 FloatParts64 p;
2778
2779 float16_unpack_canonical(&p, a, s);
4ab4aef0 2780 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2781}
2782
3dede407 2783uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2784 float_status *s)
2785{
98e256fc
RH
2786 FloatParts64 p;
2787
2788 float32_unpack_canonical(&p, a, s);
4ab4aef0 2789 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2790}
2791
3dede407 2792uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2793 float_status *s)
2794{
98e256fc
RH
2795 FloatParts64 p;
2796
2797 float32_unpack_canonical(&p, a, s);
4ab4aef0 2798 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2799}
2800
3dede407 2801uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2802 float_status *s)
2803{
98e256fc
RH
2804 FloatParts64 p;
2805
2806 float32_unpack_canonical(&p, a, s);
4ab4aef0 2807 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2808}
2809
3dede407 2810uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2811 float_status *s)
2812{
98e256fc
RH
2813 FloatParts64 p;
2814
2815 float64_unpack_canonical(&p, a, s);
4ab4aef0 2816 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2817}
2818
3dede407 2819uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2820 float_status *s)
2821{
98e256fc
RH
2822 FloatParts64 p;
2823
2824 float64_unpack_canonical(&p, a, s);
4ab4aef0 2825 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2826}
2827
3dede407 2828uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2829 float_status *s)
2830{
98e256fc
RH
2831 FloatParts64 p;
2832
2833 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
2834 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2835}
2836
2837uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2838 int scale, float_status *s)
2839{
2840 FloatParts64 p;
2841
2842 bfloat16_unpack_canonical(&p, a, s);
2843 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2844}
2845
2846uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2847 int scale, float_status *s)
2848{
2849 FloatParts64 p;
2850
2851 bfloat16_unpack_canonical(&p, a, s);
2852 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2853}
2854
2855uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2856 int scale, float_status *s)
2857{
2858 FloatParts64 p;
2859
2860 bfloat16_unpack_canonical(&p, a, s);
2861 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2862}
2863
2864static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2865 int scale, float_status *s)
2866{
2867 FloatParts128 p;
2868
2869 float128_unpack_canonical(&p, a, s);
2870 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2871}
2872
2873static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2874 int scale, float_status *s)
2875{
2876 FloatParts128 p;
2877
2878 float128_unpack_canonical(&p, a, s);
2879 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2880}
2881
0d93d8ec
FC
2882uint8_t float16_to_uint8(float16 a, float_status *s)
2883{
2884 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2885}
2886
2f6c74be
RH
2887uint16_t float16_to_uint16(float16 a, float_status *s)
2888{
2889 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2890}
2891
2892uint32_t float16_to_uint32(float16 a, float_status *s)
2893{
2894 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2895}
2896
2897uint64_t float16_to_uint64(float16 a, float_status *s)
2898{
2899 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2900}
2901
2902uint16_t float32_to_uint16(float32 a, float_status *s)
2903{
2904 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2905}
2906
2907uint32_t float32_to_uint32(float32 a, float_status *s)
2908{
2909 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2910}
2911
2912uint64_t float32_to_uint64(float32 a, float_status *s)
2913{
2914 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2915}
2916
2917uint16_t float64_to_uint16(float64 a, float_status *s)
2918{
2919 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2920}
2921
2922uint32_t float64_to_uint32(float64 a, float_status *s)
2923{
2924 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2925}
2926
2927uint64_t float64_to_uint64(float64 a, float_status *s)
2928{
2929 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2930}
2931
4ab4aef0
RH
2932uint32_t float128_to_uint32(float128 a, float_status *s)
2933{
2934 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2935}
2936
2937uint64_t float128_to_uint64(float128 a, float_status *s)
2938{
2939 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2940}
2941
2f6c74be
RH
2942uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2943{
2944 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2945}
2946
2947uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2948{
2949 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2950}
2951
2952uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2953{
2954 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2955}
2956
2957uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2958{
2959 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2960}
2961
2962uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2963{
2964 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2965}
2966
2967uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2968{
2969 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2970}
2971
2972uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2973{
2974 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2975}
2976
2977uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2978{
2979 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2980}
2981
2982uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2983{
2984 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2985}
ab52f973 2986
4ab4aef0 2987uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2988{
4ab4aef0 2989 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2990}
2991
4ab4aef0 2992uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2993{
4ab4aef0 2994 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2995}
2996
2997uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2998{
2999 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3000}
3001
3002uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3003{
3004 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3005}
3006
3007uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3008{
3009 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3010}
3011
3012uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3013{
3014 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3015}
3016
3017uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3018{
3019 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3020}
3021
3022uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3023{
3024 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3025}
3026
c02e1fb8 3027/*
e3689519 3028 * Signed integer to floating-point conversions
c02e1fb8
AB
3029 */
3030
2abdfe24 3031float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3032{
e3689519
RH
3033 FloatParts64 p;
3034
3035 parts_sint_to_float(&p, a, scale, status);
3036 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3037}
3038
2abdfe24
RH
3039float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3040{
3041 return int64_to_float16_scalbn(a, scale, status);
3042}
3043
3044float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3045{
3046 return int64_to_float16_scalbn(a, scale, status);
3047}
3048
3049float16 int64_to_float16(int64_t a, float_status *status)
3050{
3051 return int64_to_float16_scalbn(a, 0, status);
3052}
3053
c02e1fb8
AB
3054float16 int32_to_float16(int32_t a, float_status *status)
3055{
2abdfe24 3056 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3057}
3058
3059float16 int16_to_float16(int16_t a, float_status *status)
3060{
2abdfe24 3061 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3062}
3063
0d93d8ec
FC
3064float16 int8_to_float16(int8_t a, float_status *status)
3065{
3066 return int64_to_float16_scalbn(a, 0, status);
3067}
3068
2abdfe24 3069float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3070{
e3689519
RH
3071 FloatParts64 p;
3072
3073 parts64_sint_to_float(&p, a, scale, status);
3074 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3075}
3076
2abdfe24
RH
3077float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3078{
3079 return int64_to_float32_scalbn(a, scale, status);
3080}
3081
3082float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3083{
3084 return int64_to_float32_scalbn(a, scale, status);
3085}
3086
3087float32 int64_to_float32(int64_t a, float_status *status)
3088{
3089 return int64_to_float32_scalbn(a, 0, status);
3090}
3091
c02e1fb8
AB
3092float32 int32_to_float32(int32_t a, float_status *status)
3093{
2abdfe24 3094 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3095}
3096
3097float32 int16_to_float32(int16_t a, float_status *status)
3098{
2abdfe24 3099 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3100}
3101
2abdfe24 3102float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3103{
e3689519
RH
3104 FloatParts64 p;
3105
3106 parts_sint_to_float(&p, a, scale, status);
3107 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3108}
3109
2abdfe24
RH
3110float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3111{
3112 return int64_to_float64_scalbn(a, scale, status);
3113}
3114
3115float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3116{
3117 return int64_to_float64_scalbn(a, scale, status);
3118}
3119
3120float64 int64_to_float64(int64_t a, float_status *status)
3121{
3122 return int64_to_float64_scalbn(a, 0, status);
3123}
3124
c02e1fb8
AB
3125float64 int32_to_float64(int32_t a, float_status *status)
3126{
2abdfe24 3127 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3128}
3129
3130float64 int16_to_float64(int16_t a, float_status *status)
3131{
2abdfe24 3132 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3133}
3134
34f0c0a9
LZ
3135bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3136{
e3689519
RH
3137 FloatParts64 p;
3138
3139 parts_sint_to_float(&p, a, scale, status);
3140 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3141}
3142
3143bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3144{
3145 return int64_to_bfloat16_scalbn(a, scale, status);
3146}
3147
3148bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3149{
3150 return int64_to_bfloat16_scalbn(a, scale, status);
3151}
3152
3153bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3154{
3155 return int64_to_bfloat16_scalbn(a, 0, status);
3156}
3157
3158bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3159{
3160 return int64_to_bfloat16_scalbn(a, 0, status);
3161}
3162
3163bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3164{
3165 return int64_to_bfloat16_scalbn(a, 0, status);
3166}
c02e1fb8 3167
e3689519
RH
3168float128 int64_to_float128(int64_t a, float_status *status)
3169{
3170 FloatParts128 p;
3171
3172 parts_sint_to_float(&p, a, 0, status);
3173 return float128_round_pack_canonical(&p, status);
3174}
3175
3176float128 int32_to_float128(int32_t a, float_status *status)
3177{
3178 return int64_to_float128(a, status);
3179}
3180
c02e1fb8 3181/*
37c954a1 3182 * Unsigned Integer to floating-point conversions
c02e1fb8
AB
3183 */
3184
2abdfe24 3185float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3186{
37c954a1
RH
3187 FloatParts64 p;
3188
3189 parts_uint_to_float(&p, a, scale, status);
3190 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
3191}
3192
2abdfe24
RH
3193float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3194{
3195 return uint64_to_float16_scalbn(a, scale, status);
3196}
3197
3198float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3199{
3200 return uint64_to_float16_scalbn(a, scale, status);
3201}
3202
3203float16 uint64_to_float16(uint64_t a, float_status *status)
3204{
3205 return uint64_to_float16_scalbn(a, 0, status);
3206}
3207
c02e1fb8
AB
3208float16 uint32_to_float16(uint32_t a, float_status *status)
3209{
2abdfe24 3210 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3211}
3212
3213float16 uint16_to_float16(uint16_t a, float_status *status)
3214{
2abdfe24 3215 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3216}
3217
0d93d8ec
FC
3218float16 uint8_to_float16(uint8_t a, float_status *status)
3219{
3220 return uint64_to_float16_scalbn(a, 0, status);
3221}
3222
2abdfe24 3223float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3224{
37c954a1
RH
3225 FloatParts64 p;
3226
3227 parts_uint_to_float(&p, a, scale, status);
3228 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
3229}
3230
2abdfe24
RH
3231float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3232{
3233 return uint64_to_float32_scalbn(a, scale, status);
3234}
3235
3236float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3237{
3238 return uint64_to_float32_scalbn(a, scale, status);
3239}
3240
3241float32 uint64_to_float32(uint64_t a, float_status *status)
3242{
3243 return uint64_to_float32_scalbn(a, 0, status);
3244}
3245
c02e1fb8
AB
3246float32 uint32_to_float32(uint32_t a, float_status *status)
3247{
2abdfe24 3248 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3249}
3250
3251float32 uint16_to_float32(uint16_t a, float_status *status)
3252{
2abdfe24 3253 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3254}
3255
2abdfe24 3256float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3257{
37c954a1
RH
3258 FloatParts64 p;
3259
3260 parts_uint_to_float(&p, a, scale, status);
3261 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3262}
3263
2abdfe24
RH
3264float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3265{
3266 return uint64_to_float64_scalbn(a, scale, status);
3267}
3268
3269float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3270{
3271 return uint64_to_float64_scalbn(a, scale, status);
3272}
3273
3274float64 uint64_to_float64(uint64_t a, float_status *status)
3275{
3276 return uint64_to_float64_scalbn(a, 0, status);
3277}
3278
c02e1fb8
AB
3279float64 uint32_to_float64(uint32_t a, float_status *status)
3280{
2abdfe24 3281 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3282}
3283
3284float64 uint16_to_float64(uint16_t a, float_status *status)
3285{
2abdfe24 3286 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3287}
3288
34f0c0a9
LZ
3289bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3290{
37c954a1
RH
3291 FloatParts64 p;
3292
3293 parts_uint_to_float(&p, a, scale, status);
3294 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3295}
3296
3297bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3298{
3299 return uint64_to_bfloat16_scalbn(a, scale, status);
3300}
3301
3302bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3303{
3304 return uint64_to_bfloat16_scalbn(a, scale, status);
3305}
3306
3307bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3308{
3309 return uint64_to_bfloat16_scalbn(a, 0, status);
3310}
3311
3312bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3313{
3314 return uint64_to_bfloat16_scalbn(a, 0, status);
3315}
3316
3317bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3318{
3319 return uint64_to_bfloat16_scalbn(a, 0, status);
3320}
3321
37c954a1
RH
3322float128 uint64_to_float128(uint64_t a, float_status *status)
3323{
3324 FloatParts128 p;
3325
3326 parts_uint_to_float(&p, a, 0, status);
3327 return float128_round_pack_canonical(&p, status);
3328}
3329
e1c4667a
RH
3330/*
3331 * Minimum and maximum
89360067 3332 */
89360067 3333
e1c4667a
RH
3334static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3335{
3336 FloatParts64 pa, pb, *pr;
89360067 3337
e1c4667a
RH
3338 float16_unpack_canonical(&pa, a, s);
3339 float16_unpack_canonical(&pb, b, s);
3340 pr = parts_minmax(&pa, &pb, s, flags);
3341
3342 return float16_round_pack_canonical(pr, s);
89360067
AB
3343}
3344
e1c4667a
RH
3345static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3346 float_status *s, int flags)
3347{
3348 FloatParts64 pa, pb, *pr;
3349
3350 bfloat16_unpack_canonical(&pa, a, s);
3351 bfloat16_unpack_canonical(&pb, b, s);
3352 pr = parts_minmax(&pa, &pb, s, flags);
3353
3354 return bfloat16_round_pack_canonical(pr, s);
3355}
3356
3357static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3358{
3359 FloatParts64 pa, pb, *pr;
3360
3361 float32_unpack_canonical(&pa, a, s);
3362 float32_unpack_canonical(&pb, b, s);
3363 pr = parts_minmax(&pa, &pb, s, flags);
3364
3365 return float32_round_pack_canonical(pr, s);
3366}
3367
3368static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3369{
3370 FloatParts64 pa, pb, *pr;
3371
3372 float64_unpack_canonical(&pa, a, s);
3373 float64_unpack_canonical(&pb, b, s);
3374 pr = parts_minmax(&pa, &pb, s, flags);
3375
3376 return float64_round_pack_canonical(pr, s);
3377}
3378
ceebc129
DH
3379static float128 float128_minmax(float128 a, float128 b,
3380 float_status *s, int flags)
3381{
3382 FloatParts128 pa, pb, *pr;
3383
3384 float128_unpack_canonical(&pa, a, s);
3385 float128_unpack_canonical(&pb, b, s);
3386 pr = parts_minmax(&pa, &pb, s, flags);
3387
3388 return float128_round_pack_canonical(pr, s);
3389}
3390
e1c4667a
RH
3391#define MINMAX_1(type, name, flags) \
3392 type type##_##name(type a, type b, float_status *s) \
3393 { return type##_minmax(a, b, s, flags); }
3394
3395#define MINMAX_2(type) \
3396 MINMAX_1(type, max, 0) \
3397 MINMAX_1(type, maxnum, minmax_isnum) \
3398 MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag) \
3399 MINMAX_1(type, min, minmax_ismin) \
3400 MINMAX_1(type, minnum, minmax_ismin | minmax_isnum) \
3401 MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag)
3402
3403MINMAX_2(float16)
3404MINMAX_2(bfloat16)
3405MINMAX_2(float32)
3406MINMAX_2(float64)
ceebc129 3407MINMAX_2(float128)
e1c4667a
RH
3408
3409#undef MINMAX_1
3410#undef MINMAX_2
8282310d 3411
6eb169b8
RH
3412/*
3413 * Floating point compare
3414 */
0c4c9092 3415
6eb169b8
RH
3416static FloatRelation QEMU_FLATTEN
3417float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3418{
3419 FloatParts64 pa, pb;
0c4c9092 3420
6eb169b8
RH
3421 float16_unpack_canonical(&pa, a, s);
3422 float16_unpack_canonical(&pb, b, s);
3423 return parts_compare(&pa, &pb, s, is_quiet);
0c4c9092
AB
3424}
3425
71bfd65c 3426FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9 3427{
6eb169b8 3428 return float16_do_compare(a, b, s, false);
d9fe9db9
EC
3429}
3430
71bfd65c 3431FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9 3432{
6eb169b8
RH
3433 return float16_do_compare(a, b, s, true);
3434}
3435
3436static FloatRelation QEMU_SOFTFLOAT_ATTR
3437float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3438{
3439 FloatParts64 pa, pb;
3440
3441 float32_unpack_canonical(&pa, a, s);
3442 float32_unpack_canonical(&pb, b, s);
3443 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3444}
3445
71bfd65c 3446static FloatRelation QEMU_FLATTEN
6eb169b8 3447float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3448{
3449 union_float32 ua, ub;
3450
3451 ua.s = xa;
3452 ub.s = xb;
3453
3454 if (QEMU_NO_HARDFLOAT) {
3455 goto soft;
3456 }
3457
3458 float32_input_flush2(&ua.s, &ub.s, s);
3459 if (isgreaterequal(ua.h, ub.h)) {
3460 if (isgreater(ua.h, ub.h)) {
3461 return float_relation_greater;
3462 }
3463 return float_relation_equal;
3464 }
3465 if (likely(isless(ua.h, ub.h))) {
3466 return float_relation_less;
3467 }
6eb169b8
RH
3468 /*
3469 * The only condition remaining is unordered.
d9fe9db9
EC
3470 * Fall through to set flags.
3471 */
3472 soft:
6eb169b8 3473 return float32_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3474}
3475
71bfd65c 3476FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9 3477{
6eb169b8 3478 return float32_hs_compare(a, b, s, false);
d9fe9db9
EC
3479}
3480
71bfd65c 3481FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9 3482{
6eb169b8
RH
3483 return float32_hs_compare(a, b, s, true);
3484}
3485
3486static FloatRelation QEMU_SOFTFLOAT_ATTR
3487float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
3488{
3489 FloatParts64 pa, pb;
3490
3491 float64_unpack_canonical(&pa, a, s);
3492 float64_unpack_canonical(&pb, b, s);
3493 return parts_compare(&pa, &pb, s, is_quiet);
d9fe9db9
EC
3494}
3495
71bfd65c 3496static FloatRelation QEMU_FLATTEN
6eb169b8 3497float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
d9fe9db9
EC
3498{
3499 union_float64 ua, ub;
3500
3501 ua.s = xa;
3502 ub.s = xb;
3503
3504 if (QEMU_NO_HARDFLOAT) {
3505 goto soft;
3506 }
3507
3508 float64_input_flush2(&ua.s, &ub.s, s);
3509 if (isgreaterequal(ua.h, ub.h)) {
3510 if (isgreater(ua.h, ub.h)) {
3511 return float_relation_greater;
3512 }
3513 return float_relation_equal;
3514 }
3515 if (likely(isless(ua.h, ub.h))) {
3516 return float_relation_less;
3517 }
6eb169b8
RH
3518 /*
3519 * The only condition remaining is unordered.
d9fe9db9
EC
3520 * Fall through to set flags.
3521 */
3522 soft:
6eb169b8 3523 return float64_do_compare(ua.s, ub.s, s, is_quiet);
d9fe9db9
EC
3524}
3525
71bfd65c 3526FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9 3527{
6eb169b8 3528 return float64_hs_compare(a, b, s, false);
d9fe9db9
EC
3529}
3530
71bfd65c 3531FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9 3532{
6eb169b8 3533 return float64_hs_compare(a, b, s, true);
d9fe9db9
EC
3534}
3535
8282310d 3536static FloatRelation QEMU_FLATTEN
6eb169b8 3537bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
8282310d 3538{
98e256fc
RH
3539 FloatParts64 pa, pb;
3540
3541 bfloat16_unpack_canonical(&pa, a, s);
3542 bfloat16_unpack_canonical(&pb, b, s);
6eb169b8 3543 return parts_compare(&pa, &pb, s, is_quiet);
8282310d
LZ
3544}
3545
3546FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3547{
6eb169b8 3548 return bfloat16_do_compare(a, b, s, false);
8282310d
LZ
3549}
3550
3551FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3552{
6eb169b8
RH
3553 return bfloat16_do_compare(a, b, s, true);
3554}
3555
3556static FloatRelation QEMU_FLATTEN
3557float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
3558{
3559 FloatParts128 pa, pb;
3560
3561 float128_unpack_canonical(&pa, a, s);
3562 float128_unpack_canonical(&pb, b, s);
3563 return parts_compare(&pa, &pb, s, is_quiet);
3564}
3565
3566FloatRelation float128_compare(float128 a, float128 b, float_status *s)
3567{
3568 return float128_do_compare(a, b, s, false);
3569}
3570
3571FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
3572{
3573 return float128_do_compare(a, b, s, true);
8282310d
LZ
3574}
3575
39626b0c
RH
3576/*
3577 * Scale by 2**N
3578 */
0bfc9f19
AB
3579
3580float16 float16_scalbn(float16 a, int n, float_status *status)
3581{
39626b0c 3582 FloatParts64 p;
98e256fc 3583
39626b0c
RH
3584 float16_unpack_canonical(&p, a, status);
3585 parts_scalbn(&p, n, status);
3586 return float16_round_pack_canonical(&p, status);
0bfc9f19
AB
3587}
3588
3589float32 float32_scalbn(float32 a, int n, float_status *status)
3590{
39626b0c 3591 FloatParts64 p;
98e256fc 3592
39626b0c
RH
3593 float32_unpack_canonical(&p, a, status);
3594 parts_scalbn(&p, n, status);
3595 return float32_round_pack_canonical(&p, status);
0bfc9f19
AB
3596}
3597
3598float64 float64_scalbn(float64 a, int n, float_status *status)
3599{
39626b0c 3600 FloatParts64 p;
98e256fc 3601
39626b0c
RH
3602 float64_unpack_canonical(&p, a, status);
3603 parts_scalbn(&p, n, status);
3604 return float64_round_pack_canonical(&p, status);
0bfc9f19
AB
3605}
3606
8282310d
LZ
3607bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3608{
39626b0c 3609 FloatParts64 p;
98e256fc 3610
39626b0c
RH
3611 bfloat16_unpack_canonical(&p, a, status);
3612 parts_scalbn(&p, n, status);
3613 return bfloat16_round_pack_canonical(&p, status);
3614}
3615
3616float128 float128_scalbn(float128 a, int n, float_status *status)
3617{
3618 FloatParts128 p;
3619
3620 float128_unpack_canonical(&p, a, status);
3621 parts_scalbn(&p, n, status);
3622 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3623}
3624
c13bb2da
AB
3625/*
3626 * Square Root
c13bb2da
AB
3627 */
3628
97ff87c0 3629float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3630{
9261b245 3631 FloatParts64 p;
98e256fc 3632
9261b245
RH
3633 float16_unpack_canonical(&p, a, status);
3634 parts_sqrt(&p, status, &float16_params);
3635 return float16_round_pack_canonical(&p, status);
c13bb2da
AB
3636}
3637
f131bae8
EC
3638static float32 QEMU_SOFTFLOAT_ATTR
3639soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3640{
9261b245 3641 FloatParts64 p;
98e256fc 3642
9261b245
RH
3643 float32_unpack_canonical(&p, a, status);
3644 parts_sqrt(&p, status, &float32_params);
3645 return float32_round_pack_canonical(&p, status);
c13bb2da
AB
3646}
3647
f131bae8
EC
3648static float64 QEMU_SOFTFLOAT_ATTR
3649soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3650{
9261b245 3651 FloatParts64 p;
98e256fc 3652
9261b245
RH
3653 float64_unpack_canonical(&p, a, status);
3654 parts_sqrt(&p, status, &float64_params);
3655 return float64_round_pack_canonical(&p, status);
c13bb2da
AB
3656}
3657
f131bae8
EC
3658float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3659{
3660 union_float32 ua, ur;
3661
3662 ua.s = xa;
3663 if (unlikely(!can_use_fpu(s))) {
3664 goto soft;
3665 }
3666
3667 float32_input_flush1(&ua.s, s);
3668 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3669 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3670 fpclassify(ua.h) == FP_ZERO) ||
3671 signbit(ua.h))) {
3672 goto soft;
3673 }
3674 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3675 float32_is_neg(ua.s))) {
3676 goto soft;
3677 }
3678 ur.h = sqrtf(ua.h);
3679 return ur.s;
3680
3681 soft:
3682 return soft_f32_sqrt(ua.s, s);
3683}
3684
3685float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3686{
3687 union_float64 ua, ur;
3688
3689 ua.s = xa;
3690 if (unlikely(!can_use_fpu(s))) {
3691 goto soft;
3692 }
3693
3694 float64_input_flush1(&ua.s, s);
3695 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3696 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3697 fpclassify(ua.h) == FP_ZERO) ||
3698 signbit(ua.h))) {
3699 goto soft;
3700 }
3701 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3702 float64_is_neg(ua.s))) {
3703 goto soft;
3704 }
3705 ur.h = sqrt(ua.h);
3706 return ur.s;
3707
3708 soft:
3709 return soft_f64_sqrt(ua.s, s);
3710}
3711
8282310d
LZ
3712bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3713{
9261b245 3714 FloatParts64 p;
98e256fc 3715
9261b245
RH
3716 bfloat16_unpack_canonical(&p, a, status);
3717 parts_sqrt(&p, status, &bfloat16_params);
3718 return bfloat16_round_pack_canonical(&p, status);
3719}
3720
3721float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
3722{
3723 FloatParts128 p;
3724
3725 float128_unpack_canonical(&p, a, status);
3726 parts_sqrt(&p, status, &float128_params);
3727 return float128_round_pack_canonical(&p, status);
8282310d
LZ
3728}
3729
0218a16e
RH
3730/*----------------------------------------------------------------------------
3731| The pattern for a default generated NaN.
3732*----------------------------------------------------------------------------*/
3733
3734float16 float16_default_nan(float_status *status)
3735{
0fc07cad
RH
3736 FloatParts64 p;
3737
3738 parts_default_nan(&p, status);
0218a16e 3739 p.frac >>= float16_params.frac_shift;
71fd178e 3740 return float16_pack_raw(&p);
0218a16e
RH
3741}
3742
3743float32 float32_default_nan(float_status *status)
3744{
0fc07cad
RH
3745 FloatParts64 p;
3746
3747 parts_default_nan(&p, status);
0218a16e 3748 p.frac >>= float32_params.frac_shift;
71fd178e 3749 return float32_pack_raw(&p);
0218a16e
RH
3750}
3751
3752float64 float64_default_nan(float_status *status)
3753{
0fc07cad
RH
3754 FloatParts64 p;
3755
3756 parts_default_nan(&p, status);
0218a16e 3757 p.frac >>= float64_params.frac_shift;
71fd178e 3758 return float64_pack_raw(&p);
0218a16e
RH
3759}
3760
3761float128 float128_default_nan(float_status *status)
3762{
e9034ea8 3763 FloatParts128 p;
0218a16e 3764
0fc07cad 3765 parts_default_nan(&p, status);
e9034ea8
RH
3766 frac_shr(&p, float128_params.frac_shift);
3767 return float128_pack_raw(&p);
0218a16e 3768}
c13bb2da 3769
8282310d
LZ
3770bfloat16 bfloat16_default_nan(float_status *status)
3771{
0fc07cad
RH
3772 FloatParts64 p;
3773
3774 parts_default_nan(&p, status);
8282310d 3775 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3776 return bfloat16_pack_raw(&p);
8282310d
LZ
3777}
3778
158142c2 3779/*----------------------------------------------------------------------------
377ed926
RH
3780| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3781*----------------------------------------------------------------------------*/
3782
3783float16 float16_silence_nan(float16 a, float_status *status)
3784{
3dddb203
RH
3785 FloatParts64 p;
3786
3787 float16_unpack_raw(&p, a);
377ed926 3788 p.frac <<= float16_params.frac_shift;
92ff426d 3789 parts_silence_nan(&p, status);
377ed926 3790 p.frac >>= float16_params.frac_shift;
71fd178e 3791 return float16_pack_raw(&p);
377ed926
RH
3792}
3793
3794float32 float32_silence_nan(float32 a, float_status *status)
3795{
3dddb203
RH
3796 FloatParts64 p;
3797
3798 float32_unpack_raw(&p, a);
377ed926 3799 p.frac <<= float32_params.frac_shift;
92ff426d 3800 parts_silence_nan(&p, status);
377ed926 3801 p.frac >>= float32_params.frac_shift;
71fd178e 3802 return float32_pack_raw(&p);
377ed926
RH
3803}
3804
3805float64 float64_silence_nan(float64 a, float_status *status)
3806{
3dddb203
RH
3807 FloatParts64 p;
3808
3809 float64_unpack_raw(&p, a);
377ed926 3810 p.frac <<= float64_params.frac_shift;
92ff426d 3811 parts_silence_nan(&p, status);
377ed926 3812 p.frac >>= float64_params.frac_shift;
71fd178e 3813 return float64_pack_raw(&p);
377ed926
RH
3814}
3815
8282310d
LZ
3816bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3817{
3dddb203
RH
3818 FloatParts64 p;
3819
3820 bfloat16_unpack_raw(&p, a);
8282310d 3821 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3822 parts_silence_nan(&p, status);
8282310d 3823 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3824 return bfloat16_pack_raw(&p);
8282310d 3825}
e6b405fe 3826
0018b1f4
RH
3827float128 float128_silence_nan(float128 a, float_status *status)
3828{
3829 FloatParts128 p;
3830
3831 float128_unpack_raw(&p, a);
3832 frac_shl(&p, float128_params.frac_shift);
3833 parts_silence_nan(&p, status);
3834 frac_shr(&p, float128_params.frac_shift);
3835 return float128_pack_raw(&p);
3836}
3837
e6b405fe
AB
3838/*----------------------------------------------------------------------------
3839| If `a' is denormal and we are in flush-to-zero mode then set the
3840| input-denormal exception and return zero. Otherwise just return the value.
3841*----------------------------------------------------------------------------*/
3842
f8155c1d 3843static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3844{
3845 if (p.exp == 0 && p.frac != 0) {
3846 float_raise(float_flag_input_denormal, status);
3847 return true;
3848 }
3849
3850 return false;
3851}
3852
3853float16 float16_squash_input_denormal(float16 a, float_status *status)
3854{
3855 if (status->flush_inputs_to_zero) {
3dddb203
RH
3856 FloatParts64 p;
3857
3858 float16_unpack_raw(&p, a);
e6b405fe
AB
3859 if (parts_squash_denormal(p, status)) {
3860 return float16_set_sign(float16_zero, p.sign);
3861 }
3862 }
3863 return a;
3864}
3865
3866float32 float32_squash_input_denormal(float32 a, float_status *status)
3867{
3868 if (status->flush_inputs_to_zero) {
3dddb203
RH
3869 FloatParts64 p;
3870
3871 float32_unpack_raw(&p, a);
e6b405fe
AB
3872 if (parts_squash_denormal(p, status)) {
3873 return float32_set_sign(float32_zero, p.sign);
3874 }
3875 }
3876 return a;
3877}
3878
3879float64 float64_squash_input_denormal(float64 a, float_status *status)
3880{
3881 if (status->flush_inputs_to_zero) {
3dddb203
RH
3882 FloatParts64 p;
3883
3884 float64_unpack_raw(&p, a);
e6b405fe
AB
3885 if (parts_squash_denormal(p, status)) {
3886 return float64_set_sign(float64_zero, p.sign);
3887 }
3888 }
3889 return a;
3890}
3891
8282310d
LZ
3892bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3893{
3894 if (status->flush_inputs_to_zero) {
3dddb203
RH
3895 FloatParts64 p;
3896
3897 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3898 if (parts_squash_denormal(p, status)) {
3899 return bfloat16_set_sign(bfloat16_zero, p.sign);
3900 }
3901 }
3902 return a;
3903}
3904
377ed926 3905/*----------------------------------------------------------------------------
158142c2
FB
3906| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3907| and 7, and returns the properly rounded 32-bit integer corresponding to the
3908| input. If `zSign' is 1, the input is negated before being converted to an
3909| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3910| is simply rounded to an integer, with the inexact exception raised if the
3911| input cannot be represented exactly as an integer. However, if the fixed-
3912| point input is too large, the invalid exception is raised and the largest
3913| positive or negative integer is returned.
3914*----------------------------------------------------------------------------*/
3915
c120391c
RH
3916static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3917 float_status *status)
158142c2 3918{
8f506c70 3919 int8_t roundingMode;
c120391c 3920 bool roundNearestEven;
8f506c70 3921 int8_t roundIncrement, roundBits;
760e1416 3922 int32_t z;
158142c2 3923
a2f2d288 3924 roundingMode = status->float_rounding_mode;
158142c2 3925 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3926 switch (roundingMode) {
3927 case float_round_nearest_even:
f9288a76 3928 case float_round_ties_away:
dc355b76
PM
3929 roundIncrement = 0x40;
3930 break;
3931 case float_round_to_zero:
3932 roundIncrement = 0;
3933 break;
3934 case float_round_up:
3935 roundIncrement = zSign ? 0 : 0x7f;
3936 break;
3937 case float_round_down:
3938 roundIncrement = zSign ? 0x7f : 0;
3939 break;
5d64abb3
RH
3940 case float_round_to_odd:
3941 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3942 break;
dc355b76
PM
3943 default:
3944 abort();
158142c2
FB
3945 }
3946 roundBits = absZ & 0x7F;
3947 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3948 if (!(roundBits ^ 0x40) && roundNearestEven) {
3949 absZ &= ~1;
3950 }
158142c2
FB
3951 z = absZ;
3952 if ( zSign ) z = - z;
3953 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3954 float_raise(float_flag_invalid, status);
2c217da0 3955 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3956 }
a2f2d288 3957 if (roundBits) {
d82f3b2d 3958 float_raise(float_flag_inexact, status);
a2f2d288 3959 }
158142c2
FB
3960 return z;
3961
3962}
3963
3964/*----------------------------------------------------------------------------
3965| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3966| `absZ1', with binary point between bits 63 and 64 (between the input words),
3967| and returns the properly rounded 64-bit integer corresponding to the input.
3968| If `zSign' is 1, the input is negated before being converted to an integer.
3969| Ordinarily, the fixed-point input is simply rounded to an integer, with
3970| the inexact exception raised if the input cannot be represented exactly as
3971| an integer. However, if the fixed-point input is too large, the invalid
3972| exception is raised and the largest positive or negative integer is
3973| returned.
3974*----------------------------------------------------------------------------*/
3975
c120391c 3976static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3977 float_status *status)
158142c2 3978{
8f506c70 3979 int8_t roundingMode;
c120391c 3980 bool roundNearestEven, increment;
760e1416 3981 int64_t z;
158142c2 3982
a2f2d288 3983 roundingMode = status->float_rounding_mode;
158142c2 3984 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3985 switch (roundingMode) {
3986 case float_round_nearest_even:
f9288a76 3987 case float_round_ties_away:
dc355b76
PM
3988 increment = ((int64_t) absZ1 < 0);
3989 break;
3990 case float_round_to_zero:
3991 increment = 0;
3992 break;
3993 case float_round_up:
3994 increment = !zSign && absZ1;
3995 break;
3996 case float_round_down:
3997 increment = zSign && absZ1;
3998 break;
5d64abb3
RH
3999 case float_round_to_odd:
4000 increment = !(absZ0 & 1) && absZ1;
4001 break;
dc355b76
PM
4002 default:
4003 abort();
158142c2
FB
4004 }
4005 if ( increment ) {
4006 ++absZ0;
4007 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4008 if (!(absZ1 << 1) && roundNearestEven) {
4009 absZ0 &= ~1;
4010 }
158142c2
FB
4011 }
4012 z = absZ0;
4013 if ( zSign ) z = - z;
4014 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4015 overflow:
ff32e16e 4016 float_raise(float_flag_invalid, status);
2c217da0 4017 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4018 }
a2f2d288 4019 if (absZ1) {
d82f3b2d 4020 float_raise(float_flag_inexact, status);
a2f2d288 4021 }
158142c2
FB
4022 return z;
4023
4024}
4025
158142c2
FB
4026/*----------------------------------------------------------------------------
4027| Normalizes the subnormal single-precision floating-point value represented
4028| by the denormalized significand `aSig'. The normalized exponent and
4029| significand are stored at the locations pointed to by `zExpPtr' and
4030| `zSigPtr', respectively.
4031*----------------------------------------------------------------------------*/
4032
4033static void
0c48262d 4034 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4035{
8f506c70 4036 int8_t shiftCount;
158142c2 4037
0019d5c3 4038 shiftCount = clz32(aSig) - 8;
158142c2
FB
4039 *zSigPtr = aSig<<shiftCount;
4040 *zExpPtr = 1 - shiftCount;
4041
4042}
4043
158142c2
FB
4044/*----------------------------------------------------------------------------
4045| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4046| and significand `zSig', and returns the proper single-precision floating-
4047| point value corresponding to the abstract input. Ordinarily, the abstract
4048| value is simply rounded and packed into the single-precision format, with
4049| the inexact exception raised if the abstract input cannot be represented
4050| exactly. However, if the abstract value is too large, the overflow and
4051| inexact exceptions are raised and an infinity or maximal finite value is
4052| returned. If the abstract value is too small, the input value is rounded to
4053| a subnormal number, and the underflow and inexact exceptions are raised if
4054| the abstract input cannot be represented exactly as a subnormal single-
4055| precision floating-point number.
4056| The input significand `zSig' has its binary point between bits 30
4057| and 29, which is 7 bits to the left of the usual location. This shifted
4058| significand must be normalized or smaller. If `zSig' is not normalized,
4059| `zExp' must be 0; in that case, the result returned is a subnormal number,
4060| and it must not require rounding. In the usual case that `zSig' is
4061| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4062| The handling of underflow and overflow follows the IEC/IEEE Standard for
4063| Binary Floating-Point Arithmetic.
4064*----------------------------------------------------------------------------*/
4065
c120391c 4066static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4067 float_status *status)
158142c2 4068{
8f506c70 4069 int8_t roundingMode;
c120391c 4070 bool roundNearestEven;
8f506c70 4071 int8_t roundIncrement, roundBits;
c120391c 4072 bool isTiny;
158142c2 4073
a2f2d288 4074 roundingMode = status->float_rounding_mode;
158142c2 4075 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4076 switch (roundingMode) {
4077 case float_round_nearest_even:
f9288a76 4078 case float_round_ties_away:
dc355b76
PM
4079 roundIncrement = 0x40;
4080 break;
4081 case float_round_to_zero:
4082 roundIncrement = 0;
4083 break;
4084 case float_round_up:
4085 roundIncrement = zSign ? 0 : 0x7f;
4086 break;
4087 case float_round_down:
4088 roundIncrement = zSign ? 0x7f : 0;
4089 break;
5d64abb3
RH
4090 case float_round_to_odd:
4091 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4092 break;
dc355b76
PM
4093 default:
4094 abort();
4095 break;
158142c2
FB
4096 }
4097 roundBits = zSig & 0x7F;
bb98fe42 4098 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4099 if ( ( 0xFD < zExp )
4100 || ( ( zExp == 0xFD )
bb98fe42 4101 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4102 ) {
5d64abb3
RH
4103 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4104 roundIncrement != 0;
ff32e16e 4105 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4106 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4107 }
4108 if ( zExp < 0 ) {
a2f2d288 4109 if (status->flush_to_zero) {
ff32e16e 4110 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4111 return packFloat32(zSign, 0, 0);
4112 }
a828b373
RH
4113 isTiny = status->tininess_before_rounding
4114 || (zExp < -1)
4115 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4116 shift32RightJamming( zSig, - zExp, &zSig );
4117 zExp = 0;
4118 roundBits = zSig & 0x7F;
ff32e16e
PM
4119 if (isTiny && roundBits) {
4120 float_raise(float_flag_underflow, status);
4121 }
5d64abb3
RH
4122 if (roundingMode == float_round_to_odd) {
4123 /*
4124 * For round-to-odd case, the roundIncrement depends on
4125 * zSig which just changed.
4126 */
4127 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4128 }
158142c2
FB
4129 }
4130 }
a2f2d288 4131 if (roundBits) {
d82f3b2d 4132 float_raise(float_flag_inexact, status);
a2f2d288 4133 }
158142c2 4134 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4135 if (!(roundBits ^ 0x40) && roundNearestEven) {
4136 zSig &= ~1;
4137 }
158142c2
FB
4138 if ( zSig == 0 ) zExp = 0;
4139 return packFloat32( zSign, zExp, zSig );
4140
4141}
4142
4143/*----------------------------------------------------------------------------
4144| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4145| and significand `zSig', and returns the proper single-precision floating-
4146| point value corresponding to the abstract input. This routine is just like
4147| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4148| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4149| floating-point exponent.
4150*----------------------------------------------------------------------------*/
4151
4152static float32
c120391c 4153 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4154 float_status *status)
158142c2 4155{
8f506c70 4156 int8_t shiftCount;
158142c2 4157
0019d5c3 4158 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4159 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4160 status);
158142c2
FB
4161
4162}
4163
158142c2
FB
4164/*----------------------------------------------------------------------------
4165| Normalizes the subnormal double-precision floating-point value represented
4166| by the denormalized significand `aSig'. The normalized exponent and
4167| significand are stored at the locations pointed to by `zExpPtr' and
4168| `zSigPtr', respectively.
4169*----------------------------------------------------------------------------*/
4170
4171static void
0c48262d 4172 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4173{
8f506c70 4174 int8_t shiftCount;
158142c2 4175
0019d5c3 4176 shiftCount = clz64(aSig) - 11;
158142c2
FB
4177 *zSigPtr = aSig<<shiftCount;
4178 *zExpPtr = 1 - shiftCount;
4179
4180}
4181
4182/*----------------------------------------------------------------------------
4183| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4184| double-precision floating-point value, returning the result. After being
4185| shifted into the proper positions, the three fields are simply added
4186| together to form the result. This means that any integer portion of `zSig'
4187| will be added into the exponent. Since a properly normalized significand
4188| will have an integer portion equal to 1, the `zExp' input should be 1 less
4189| than the desired result exponent whenever `zSig' is a complete, normalized
4190| significand.
4191*----------------------------------------------------------------------------*/
4192
c120391c 4193static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4194{
4195
f090c9d4 4196 return make_float64(
bb98fe42 4197 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4198
4199}
4200
4201/*----------------------------------------------------------------------------
4202| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4203| and significand `zSig', and returns the proper double-precision floating-
4204| point value corresponding to the abstract input. Ordinarily, the abstract
4205| value is simply rounded and packed into the double-precision format, with
4206| the inexact exception raised if the abstract input cannot be represented
4207| exactly. However, if the abstract value is too large, the overflow and
4208| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4209| returned. If the abstract value is too small, the input value is rounded to
4210| a subnormal number, and the underflow and inexact exceptions are raised if
4211| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4212| precision floating-point number.
4213| The input significand `zSig' has its binary point between bits 62
4214| and 61, which is 10 bits to the left of the usual location. This shifted
4215| significand must be normalized or smaller. If `zSig' is not normalized,
4216| `zExp' must be 0; in that case, the result returned is a subnormal number,
4217| and it must not require rounding. In the usual case that `zSig' is
4218| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4219| The handling of underflow and overflow follows the IEC/IEEE Standard for
4220| Binary Floating-Point Arithmetic.
4221*----------------------------------------------------------------------------*/
4222
c120391c 4223static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4224 float_status *status)
158142c2 4225{
8f506c70 4226 int8_t roundingMode;
c120391c 4227 bool roundNearestEven;
0c48262d 4228 int roundIncrement, roundBits;
c120391c 4229 bool isTiny;
158142c2 4230
a2f2d288 4231 roundingMode = status->float_rounding_mode;
158142c2 4232 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4233 switch (roundingMode) {
4234 case float_round_nearest_even:
f9288a76 4235 case float_round_ties_away:
dc355b76
PM
4236 roundIncrement = 0x200;
4237 break;
4238 case float_round_to_zero:
4239 roundIncrement = 0;
4240 break;
4241 case float_round_up:
4242 roundIncrement = zSign ? 0 : 0x3ff;
4243 break;
4244 case float_round_down:
4245 roundIncrement = zSign ? 0x3ff : 0;
4246 break;
9ee6f678
BR
4247 case float_round_to_odd:
4248 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4249 break;
dc355b76
PM
4250 default:
4251 abort();
158142c2
FB
4252 }
4253 roundBits = zSig & 0x3FF;
bb98fe42 4254 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4255 if ( ( 0x7FD < zExp )
4256 || ( ( zExp == 0x7FD )
bb98fe42 4257 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4258 ) {
9ee6f678
BR
4259 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4260 roundIncrement != 0;
ff32e16e 4261 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4262 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4263 }
4264 if ( zExp < 0 ) {
a2f2d288 4265 if (status->flush_to_zero) {
ff32e16e 4266 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4267 return packFloat64(zSign, 0, 0);
4268 }
a828b373
RH
4269 isTiny = status->tininess_before_rounding
4270 || (zExp < -1)
4271 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4272 shift64RightJamming( zSig, - zExp, &zSig );
4273 zExp = 0;
4274 roundBits = zSig & 0x3FF;
ff32e16e
PM
4275 if (isTiny && roundBits) {
4276 float_raise(float_flag_underflow, status);
4277 }
9ee6f678
BR
4278 if (roundingMode == float_round_to_odd) {
4279 /*
4280 * For round-to-odd case, the roundIncrement depends on
4281 * zSig which just changed.
4282 */
4283 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4284 }
158142c2
FB
4285 }
4286 }
a2f2d288 4287 if (roundBits) {
d82f3b2d 4288 float_raise(float_flag_inexact, status);
a2f2d288 4289 }
158142c2 4290 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4291 if (!(roundBits ^ 0x200) && roundNearestEven) {
4292 zSig &= ~1;
4293 }
158142c2
FB
4294 if ( zSig == 0 ) zExp = 0;
4295 return packFloat64( zSign, zExp, zSig );
4296
4297}
4298
4299/*----------------------------------------------------------------------------
4300| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4301| and significand `zSig', and returns the proper double-precision floating-
4302| point value corresponding to the abstract input. This routine is just like
4303| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4304| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4305| floating-point exponent.
4306*----------------------------------------------------------------------------*/
4307
4308static float64
c120391c 4309 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4310 float_status *status)
158142c2 4311{
8f506c70 4312 int8_t shiftCount;
158142c2 4313
0019d5c3 4314 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4315 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4316 status);
158142c2
FB
4317
4318}
4319
158142c2
FB
4320/*----------------------------------------------------------------------------
4321| Normalizes the subnormal extended double-precision floating-point value
4322| represented by the denormalized significand `aSig'. The normalized exponent
4323| and significand are stored at the locations pointed to by `zExpPtr' and
4324| `zSigPtr', respectively.
4325*----------------------------------------------------------------------------*/
4326
88857aca
LV
4327void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4328 uint64_t *zSigPtr)
158142c2 4329{
8f506c70 4330 int8_t shiftCount;
158142c2 4331
0019d5c3 4332 shiftCount = clz64(aSig);
158142c2
FB
4333 *zSigPtr = aSig<<shiftCount;
4334 *zExpPtr = 1 - shiftCount;
158142c2
FB
4335}
4336
4337/*----------------------------------------------------------------------------
4338| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4339| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4340| and returns the proper extended double-precision floating-point value
4341| corresponding to the abstract input. Ordinarily, the abstract value is
4342| rounded and packed into the extended double-precision format, with the
4343| inexact exception raised if the abstract input cannot be represented
4344| exactly. However, if the abstract value is too large, the overflow and
4345| inexact exceptions are raised and an infinity or maximal finite value is
4346| returned. If the abstract value is too small, the input value is rounded to
4347| a subnormal number, and the underflow and inexact exceptions are raised if
4348| the abstract input cannot be represented exactly as a subnormal extended
4349| double-precision floating-point number.
4350| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4351| number of bits as single or double precision, respectively. Otherwise, the
4352| result is rounded to the full precision of the extended double-precision
4353| format.
4354| The input significand must be normalized or smaller. If the input
4355| significand is not normalized, `zExp' must be 0; in that case, the result
4356| returned is a subnormal number, and it must not require rounding. The
4357| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4358| Floating-Point Arithmetic.
4359*----------------------------------------------------------------------------*/
4360
c120391c 4361floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4362 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4363 float_status *status)
158142c2 4364{
8f506c70 4365 int8_t roundingMode;
c120391c 4366 bool roundNearestEven, increment, isTiny;
f42c2224 4367 int64_t roundIncrement, roundMask, roundBits;
158142c2 4368
a2f2d288 4369 roundingMode = status->float_rounding_mode;
158142c2
FB
4370 roundNearestEven = ( roundingMode == float_round_nearest_even );
4371 if ( roundingPrecision == 80 ) goto precision80;
4372 if ( roundingPrecision == 64 ) {
e9321124
AB
4373 roundIncrement = UINT64_C(0x0000000000000400);
4374 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4375 }
4376 else if ( roundingPrecision == 32 ) {
e9321124
AB
4377 roundIncrement = UINT64_C(0x0000008000000000);
4378 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4379 }
4380 else {
4381 goto precision80;
4382 }
4383 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4384 switch (roundingMode) {
4385 case float_round_nearest_even:
f9288a76 4386 case float_round_ties_away:
dc355b76
PM
4387 break;
4388 case float_round_to_zero:
4389 roundIncrement = 0;
4390 break;
4391 case float_round_up:
4392 roundIncrement = zSign ? 0 : roundMask;
4393 break;
4394 case float_round_down:
4395 roundIncrement = zSign ? roundMask : 0;
4396 break;
4397 default:
4398 abort();
158142c2
FB
4399 }
4400 roundBits = zSig0 & roundMask;
bb98fe42 4401 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4402 if ( ( 0x7FFE < zExp )
4403 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4404 ) {
4405 goto overflow;
4406 }
4407 if ( zExp <= 0 ) {
a2f2d288 4408 if (status->flush_to_zero) {
ff32e16e 4409 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4410 return packFloatx80(zSign, 0, 0);
4411 }
a828b373
RH
4412 isTiny = status->tininess_before_rounding
4413 || (zExp < 0 )
4414 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4415 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4416 zExp = 0;
4417 roundBits = zSig0 & roundMask;
ff32e16e
PM
4418 if (isTiny && roundBits) {
4419 float_raise(float_flag_underflow, status);
4420 }
a2f2d288 4421 if (roundBits) {
d82f3b2d 4422 float_raise(float_flag_inexact, status);
a2f2d288 4423 }
158142c2 4424 zSig0 += roundIncrement;
bb98fe42 4425 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4426 roundIncrement = roundMask + 1;
4427 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4428 roundMask |= roundIncrement;
4429 }
4430 zSig0 &= ~ roundMask;
4431 return packFloatx80( zSign, zExp, zSig0 );
4432 }
4433 }
a2f2d288 4434 if (roundBits) {
d82f3b2d 4435 float_raise(float_flag_inexact, status);
a2f2d288 4436 }
158142c2
FB
4437 zSig0 += roundIncrement;
4438 if ( zSig0 < roundIncrement ) {
4439 ++zExp;
e9321124 4440 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4441 }
4442 roundIncrement = roundMask + 1;
4443 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4444 roundMask |= roundIncrement;
4445 }
4446 zSig0 &= ~ roundMask;
4447 if ( zSig0 == 0 ) zExp = 0;
4448 return packFloatx80( zSign, zExp, zSig0 );
4449 precision80:
dc355b76
PM
4450 switch (roundingMode) {
4451 case float_round_nearest_even:
f9288a76 4452 case float_round_ties_away:
dc355b76
PM
4453 increment = ((int64_t)zSig1 < 0);
4454 break;
4455 case float_round_to_zero:
4456 increment = 0;
4457 break;
4458 case float_round_up:
4459 increment = !zSign && zSig1;
4460 break;
4461 case float_round_down:
4462 increment = zSign && zSig1;
4463 break;
4464 default:
4465 abort();
158142c2 4466 }
bb98fe42 4467 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4468 if ( ( 0x7FFE < zExp )
4469 || ( ( zExp == 0x7FFE )
e9321124 4470 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4471 && increment
4472 )
4473 ) {
4474 roundMask = 0;
4475 overflow:
ff32e16e 4476 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4477 if ( ( roundingMode == float_round_to_zero )
4478 || ( zSign && ( roundingMode == float_round_up ) )
4479 || ( ! zSign && ( roundingMode == float_round_down ) )
4480 ) {
4481 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4482 }
0f605c88
LV
4483 return packFloatx80(zSign,
4484 floatx80_infinity_high,
4485 floatx80_infinity_low);
158142c2
FB
4486 }
4487 if ( zExp <= 0 ) {
a828b373
RH
4488 isTiny = status->tininess_before_rounding
4489 || (zExp < 0)
4490 || !increment
4491 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4492 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4493 zExp = 0;
ff32e16e
PM
4494 if (isTiny && zSig1) {
4495 float_raise(float_flag_underflow, status);
4496 }
a2f2d288 4497 if (zSig1) {
d82f3b2d 4498 float_raise(float_flag_inexact, status);
a2f2d288 4499 }
dc355b76
PM
4500 switch (roundingMode) {
4501 case float_round_nearest_even:
f9288a76 4502 case float_round_ties_away:
dc355b76
PM
4503 increment = ((int64_t)zSig1 < 0);
4504 break;
4505 case float_round_to_zero:
4506 increment = 0;
4507 break;
4508 case float_round_up:
4509 increment = !zSign && zSig1;
4510 break;
4511 case float_round_down:
4512 increment = zSign && zSig1;
4513 break;
4514 default:
4515 abort();
158142c2
FB
4516 }
4517 if ( increment ) {
4518 ++zSig0;
40662886
PMD
4519 if (!(zSig1 << 1) && roundNearestEven) {
4520 zSig0 &= ~1;
4521 }
bb98fe42 4522 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4523 }
4524 return packFloatx80( zSign, zExp, zSig0 );
4525 }
4526 }
a2f2d288 4527 if (zSig1) {
d82f3b2d 4528 float_raise(float_flag_inexact, status);
a2f2d288 4529 }
158142c2
FB
4530 if ( increment ) {
4531 ++zSig0;
4532 if ( zSig0 == 0 ) {
4533 ++zExp;
e9321124 4534 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4535 }
4536 else {
40662886
PMD
4537 if (!(zSig1 << 1) && roundNearestEven) {
4538 zSig0 &= ~1;
4539 }
158142c2
FB
4540 }
4541 }
4542 else {
4543 if ( zSig0 == 0 ) zExp = 0;
4544 }
4545 return packFloatx80( zSign, zExp, zSig0 );
4546
4547}
4548
4549/*----------------------------------------------------------------------------
4550| Takes an abstract floating-point value having sign `zSign', exponent
4551| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4552| and returns the proper extended double-precision floating-point value
4553| corresponding to the abstract input. This routine is just like
4554| `roundAndPackFloatx80' except that the input significand does not have to be
4555| normalized.
4556*----------------------------------------------------------------------------*/
4557
88857aca 4558floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4559 bool zSign, int32_t zExp,
88857aca
LV
4560 uint64_t zSig0, uint64_t zSig1,
4561 float_status *status)
158142c2 4562{
8f506c70 4563 int8_t shiftCount;
158142c2
FB
4564
4565 if ( zSig0 == 0 ) {
4566 zSig0 = zSig1;
4567 zSig1 = 0;
4568 zExp -= 64;
4569 }
0019d5c3 4570 shiftCount = clz64(zSig0);
158142c2
FB
4571 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4572 zExp -= shiftCount;
ff32e16e
PM
4573 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4574 zSig0, zSig1, status);
158142c2
FB
4575
4576}
4577
158142c2
FB
4578/*----------------------------------------------------------------------------
4579| Returns the least-significant 64 fraction bits of the quadruple-precision
4580| floating-point value `a'.
4581*----------------------------------------------------------------------------*/
4582
a49db98d 4583static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4584{
4585
4586 return a.low;
4587
4588}
4589
4590/*----------------------------------------------------------------------------
4591| Returns the most-significant 48 fraction bits of the quadruple-precision
4592| floating-point value `a'.
4593*----------------------------------------------------------------------------*/
4594
a49db98d 4595static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4596{
4597
e9321124 4598 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4599
4600}
4601
4602/*----------------------------------------------------------------------------
4603| Returns the exponent bits of the quadruple-precision floating-point value
4604| `a'.
4605*----------------------------------------------------------------------------*/
4606
f4014512 4607static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4608{
4609
4610 return ( a.high>>48 ) & 0x7FFF;
4611
4612}
4613
4614/*----------------------------------------------------------------------------
4615| Returns the sign bit of the quadruple-precision floating-point value `a'.
4616*----------------------------------------------------------------------------*/
4617
c120391c 4618static inline bool extractFloat128Sign(float128 a)
158142c2 4619{
c120391c 4620 return a.high >> 63;
158142c2
FB
4621}
4622
4623/*----------------------------------------------------------------------------
4624| Normalizes the subnormal quadruple-precision floating-point value
4625| represented by the denormalized significand formed by the concatenation of
4626| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4627| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4628| significand are stored at the location pointed to by `zSig0Ptr', and the
4629| least significant 64 bits of the normalized significand are stored at the
4630| location pointed to by `zSig1Ptr'.
4631*----------------------------------------------------------------------------*/
4632
4633static void
4634 normalizeFloat128Subnormal(
bb98fe42
AF
4635 uint64_t aSig0,
4636 uint64_t aSig1,
f4014512 4637 int32_t *zExpPtr,
bb98fe42
AF
4638 uint64_t *zSig0Ptr,
4639 uint64_t *zSig1Ptr
158142c2
FB
4640 )
4641{
8f506c70 4642 int8_t shiftCount;
158142c2
FB
4643
4644 if ( aSig0 == 0 ) {
0019d5c3 4645 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4646 if ( shiftCount < 0 ) {
4647 *zSig0Ptr = aSig1>>( - shiftCount );
4648 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4649 }
4650 else {
4651 *zSig0Ptr = aSig1<<shiftCount;
4652 *zSig1Ptr = 0;
4653 }
4654 *zExpPtr = - shiftCount - 63;
4655 }
4656 else {
0019d5c3 4657 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4658 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4659 *zExpPtr = 1 - shiftCount;
4660 }
4661
4662}
4663
4664/*----------------------------------------------------------------------------
4665| Packs the sign `zSign', the exponent `zExp', and the significand formed
4666| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4667| floating-point value, returning the result. After being shifted into the
4668| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4669| added together to form the most significant 32 bits of the result. This
4670| means that any integer portion of `zSig0' will be added into the exponent.
4671| Since a properly normalized significand will have an integer portion equal
4672| to 1, the `zExp' input should be 1 less than the desired result exponent
4673| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4674| significand.
4675*----------------------------------------------------------------------------*/
4676
a49db98d 4677static inline float128
c120391c 4678packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4679{
4680 float128 z;
4681
4682 z.low = zSig1;
c120391c 4683 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4684 return z;
158142c2
FB
4685}
4686
4687/*----------------------------------------------------------------------------
4688| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4689| and extended significand formed by the concatenation of `zSig0', `zSig1',
4690| and `zSig2', and returns the proper quadruple-precision floating-point value
4691| corresponding to the abstract input. Ordinarily, the abstract value is
4692| simply rounded and packed into the quadruple-precision format, with the
4693| inexact exception raised if the abstract input cannot be represented
4694| exactly. However, if the abstract value is too large, the overflow and
4695| inexact exceptions are raised and an infinity or maximal finite value is
4696| returned. If the abstract value is too small, the input value is rounded to
4697| a subnormal number, and the underflow and inexact exceptions are raised if
4698| the abstract input cannot be represented exactly as a subnormal quadruple-
4699| precision floating-point number.
4700| The input significand must be normalized or smaller. If the input
4701| significand is not normalized, `zExp' must be 0; in that case, the result
4702| returned is a subnormal number, and it must not require rounding. In the
4703| usual case that the input significand is normalized, `zExp' must be 1 less
4704| than the ``true'' floating-point exponent. The handling of underflow and
4705| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4706*----------------------------------------------------------------------------*/
4707
c120391c 4708static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4709 uint64_t zSig0, uint64_t zSig1,
4710 uint64_t zSig2, float_status *status)
158142c2 4711{
8f506c70 4712 int8_t roundingMode;
c120391c 4713 bool roundNearestEven, increment, isTiny;
158142c2 4714
a2f2d288 4715 roundingMode = status->float_rounding_mode;
158142c2 4716 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4717 switch (roundingMode) {
4718 case float_round_nearest_even:
f9288a76 4719 case float_round_ties_away:
dc355b76
PM
4720 increment = ((int64_t)zSig2 < 0);
4721 break;
4722 case float_round_to_zero:
4723 increment = 0;
4724 break;
4725 case float_round_up:
4726 increment = !zSign && zSig2;
4727 break;
4728 case float_round_down:
4729 increment = zSign && zSig2;
4730 break;
9ee6f678
BR
4731 case float_round_to_odd:
4732 increment = !(zSig1 & 0x1) && zSig2;
4733 break;
dc355b76
PM
4734 default:
4735 abort();
158142c2 4736 }
bb98fe42 4737 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4738 if ( ( 0x7FFD < zExp )
4739 || ( ( zExp == 0x7FFD )
4740 && eq128(
e9321124
AB
4741 UINT64_C(0x0001FFFFFFFFFFFF),
4742 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4743 zSig0,
4744 zSig1
4745 )
4746 && increment
4747 )
4748 ) {
ff32e16e 4749 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4750 if ( ( roundingMode == float_round_to_zero )
4751 || ( zSign && ( roundingMode == float_round_up ) )
4752 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4753 || (roundingMode == float_round_to_odd)
158142c2
FB
4754 ) {
4755 return
4756 packFloat128(
4757 zSign,
4758 0x7FFE,
e9321124
AB
4759 UINT64_C(0x0000FFFFFFFFFFFF),
4760 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4761 );
4762 }
4763 return packFloat128( zSign, 0x7FFF, 0, 0 );
4764 }
4765 if ( zExp < 0 ) {
a2f2d288 4766 if (status->flush_to_zero) {
ff32e16e 4767 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4768 return packFloat128(zSign, 0, 0, 0);
4769 }
a828b373
RH
4770 isTiny = status->tininess_before_rounding
4771 || (zExp < -1)
4772 || !increment
4773 || lt128(zSig0, zSig1,
4774 UINT64_C(0x0001FFFFFFFFFFFF),
4775 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4776 shift128ExtraRightJamming(
4777 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4778 zExp = 0;
ff32e16e
PM
4779 if (isTiny && zSig2) {
4780 float_raise(float_flag_underflow, status);
4781 }
dc355b76
PM
4782 switch (roundingMode) {
4783 case float_round_nearest_even:
f9288a76 4784 case float_round_ties_away:
dc355b76
PM
4785 increment = ((int64_t)zSig2 < 0);
4786 break;
4787 case float_round_to_zero:
4788 increment = 0;
4789 break;
4790 case float_round_up:
4791 increment = !zSign && zSig2;
4792 break;
4793 case float_round_down:
4794 increment = zSign && zSig2;
4795 break;
9ee6f678
BR
4796 case float_round_to_odd:
4797 increment = !(zSig1 & 0x1) && zSig2;
4798 break;
dc355b76
PM
4799 default:
4800 abort();
158142c2
FB
4801 }
4802 }
4803 }
a2f2d288 4804 if (zSig2) {
d82f3b2d 4805 float_raise(float_flag_inexact, status);
a2f2d288 4806 }
158142c2
FB
4807 if ( increment ) {
4808 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4809 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4810 zSig1 &= ~1;
4811 }
158142c2
FB
4812 }
4813 else {
4814 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4815 }
4816 return packFloat128( zSign, zExp, zSig0, zSig1 );
4817
4818}
4819
4820/*----------------------------------------------------------------------------
4821| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4822| and significand formed by the concatenation of `zSig0' and `zSig1', and
4823| returns the proper quadruple-precision floating-point value corresponding
4824| to the abstract input. This routine is just like `roundAndPackFloat128'
4825| except that the input significand has fewer bits and does not have to be
4826| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4827| point exponent.
4828*----------------------------------------------------------------------------*/
4829
c120391c 4830static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4831 uint64_t zSig0, uint64_t zSig1,
4832 float_status *status)
158142c2 4833{
8f506c70 4834 int8_t shiftCount;
bb98fe42 4835 uint64_t zSig2;
158142c2
FB
4836
4837 if ( zSig0 == 0 ) {
4838 zSig0 = zSig1;
4839 zSig1 = 0;
4840 zExp -= 64;
4841 }
0019d5c3 4842 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4843 if ( 0 <= shiftCount ) {
4844 zSig2 = 0;
4845 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4846 }
4847 else {
4848 shift128ExtraRightJamming(
4849 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4850 }
4851 zExp -= shiftCount;
ff32e16e 4852 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4853
4854}
4855
158142c2 4856
158142c2
FB
4857/*----------------------------------------------------------------------------
4858| Returns the result of converting the 32-bit two's complement integer `a'
4859| to the extended double-precision floating-point format. The conversion
4860| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4861| Arithmetic.
4862*----------------------------------------------------------------------------*/
4863
e5a41ffa 4864floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4865{
c120391c 4866 bool zSign;
3a87d009 4867 uint32_t absA;
8f506c70 4868 int8_t shiftCount;
bb98fe42 4869 uint64_t zSig;
158142c2
FB
4870
4871 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4872 zSign = ( a < 0 );
4873 absA = zSign ? - a : a;
0019d5c3 4874 shiftCount = clz32(absA) + 32;
158142c2
FB
4875 zSig = absA;
4876 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4877
4878}
4879
158142c2
FB
4880/*----------------------------------------------------------------------------
4881| Returns the result of converting the 64-bit two's complement integer `a'
4882| to the extended double-precision floating-point format. The conversion
4883| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4884| Arithmetic.
4885*----------------------------------------------------------------------------*/
4886
e5a41ffa 4887floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4888{
c120391c 4889 bool zSign;
182f42fd 4890 uint64_t absA;
8f506c70 4891 int8_t shiftCount;
158142c2
FB
4892
4893 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4894 zSign = ( a < 0 );
4895 absA = zSign ? - a : a;
0019d5c3 4896 shiftCount = clz64(absA);
158142c2
FB
4897 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4898
4899}
4900
158142c2
FB
4901/*----------------------------------------------------------------------------
4902| Returns the result of converting the single-precision floating-point value
4903| `a' to the extended double-precision floating-point format. The conversion
4904| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4905| Arithmetic.
4906*----------------------------------------------------------------------------*/
4907
e5a41ffa 4908floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4909{
c120391c 4910 bool aSign;
0c48262d 4911 int aExp;
bb98fe42 4912 uint32_t aSig;
158142c2 4913
ff32e16e 4914 a = float32_squash_input_denormal(a, status);
158142c2
FB
4915 aSig = extractFloat32Frac( a );
4916 aExp = extractFloat32Exp( a );
4917 aSign = extractFloat32Sign( a );
4918 if ( aExp == 0xFF ) {
ff32e16e 4919 if (aSig) {
7537c2b4
JM
4920 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4921 status);
4922 return floatx80_silence_nan(res, status);
ff32e16e 4923 }
0f605c88
LV
4924 return packFloatx80(aSign,
4925 floatx80_infinity_high,
4926 floatx80_infinity_low);
158142c2
FB
4927 }
4928 if ( aExp == 0 ) {
4929 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4930 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4931 }
4932 aSig |= 0x00800000;
bb98fe42 4933 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4934
4935}
4936
158142c2
FB
4937/*----------------------------------------------------------------------------
4938| Returns the remainder of the single-precision floating-point value `a'
4939| with respect to the corresponding value `b'. The operation is performed
4940| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4941*----------------------------------------------------------------------------*/
4942
e5a41ffa 4943float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4944{
c120391c 4945 bool aSign, zSign;
0c48262d 4946 int aExp, bExp, expDiff;
bb98fe42
AF
4947 uint32_t aSig, bSig;
4948 uint32_t q;
4949 uint64_t aSig64, bSig64, q64;
4950 uint32_t alternateASig;
4951 int32_t sigMean;
ff32e16e
PM
4952 a = float32_squash_input_denormal(a, status);
4953 b = float32_squash_input_denormal(b, status);
158142c2
FB
4954
4955 aSig = extractFloat32Frac( a );
4956 aExp = extractFloat32Exp( a );
4957 aSign = extractFloat32Sign( a );
4958 bSig = extractFloat32Frac( b );
4959 bExp = extractFloat32Exp( b );
158142c2
FB
4960 if ( aExp == 0xFF ) {
4961 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4962 return propagateFloat32NaN(a, b, status);
158142c2 4963 }
ff32e16e 4964 float_raise(float_flag_invalid, status);
af39bc8c 4965 return float32_default_nan(status);
158142c2
FB
4966 }
4967 if ( bExp == 0xFF ) {
ff32e16e
PM
4968 if (bSig) {
4969 return propagateFloat32NaN(a, b, status);
4970 }
158142c2
FB
4971 return a;
4972 }
4973 if ( bExp == 0 ) {
4974 if ( bSig == 0 ) {
ff32e16e 4975 float_raise(float_flag_invalid, status);
af39bc8c 4976 return float32_default_nan(status);
158142c2
FB
4977 }
4978 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4979 }
4980 if ( aExp == 0 ) {
4981 if ( aSig == 0 ) return a;
4982 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4983 }
4984 expDiff = aExp - bExp;
4985 aSig |= 0x00800000;
4986 bSig |= 0x00800000;
4987 if ( expDiff < 32 ) {
4988 aSig <<= 8;
4989 bSig <<= 8;
4990 if ( expDiff < 0 ) {
4991 if ( expDiff < -1 ) return a;
4992 aSig >>= 1;
4993 }
4994 q = ( bSig <= aSig );
4995 if ( q ) aSig -= bSig;
4996 if ( 0 < expDiff ) {
bb98fe42 4997 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4998 q >>= 32 - expDiff;
4999 bSig >>= 2;
5000 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5001 }
5002 else {
5003 aSig >>= 2;
5004 bSig >>= 2;
5005 }
5006 }
5007 else {
5008 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5009 aSig64 = ( (uint64_t) aSig )<<40;
5010 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5011 expDiff -= 64;
5012 while ( 0 < expDiff ) {
5013 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5014 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5015 aSig64 = - ( ( bSig * q64 )<<38 );
5016 expDiff -= 62;
5017 }
5018 expDiff += 64;
5019 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5020 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5021 q = q64>>( 64 - expDiff );
5022 bSig <<= 6;
5023 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5024 }
5025 do {
5026 alternateASig = aSig;
5027 ++q;
5028 aSig -= bSig;
bb98fe42 5029 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5030 sigMean = aSig + alternateASig;
5031 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5032 aSig = alternateASig;
5033 }
bb98fe42 5034 zSign = ( (int32_t) aSig < 0 );
158142c2 5035 if ( zSign ) aSig = - aSig;
ff32e16e 5036 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5037}
5038
369be8f6 5039
158142c2 5040
8229c991
AJ
5041/*----------------------------------------------------------------------------
5042| Returns the binary exponential of the single-precision floating-point value
5043| `a'. The operation is performed according to the IEC/IEEE Standard for
5044| Binary Floating-Point Arithmetic.
5045|
5046| Uses the following identities:
5047|
5048| 1. -------------------------------------------------------------------------
5049| x x*ln(2)
5050| 2 = e
5051|
5052| 2. -------------------------------------------------------------------------
5053| 2 3 4 5 n
5054| x x x x x x x
5055| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5056| 1! 2! 3! 4! 5! n!
5057*----------------------------------------------------------------------------*/
5058
5059static const float64 float32_exp2_coefficients[15] =
5060{
d5138cf4
PM
5061 const_float64( 0x3ff0000000000000ll ), /* 1 */
5062 const_float64( 0x3fe0000000000000ll ), /* 2 */
5063 const_float64( 0x3fc5555555555555ll ), /* 3 */
5064 const_float64( 0x3fa5555555555555ll ), /* 4 */
5065 const_float64( 0x3f81111111111111ll ), /* 5 */
5066 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5067 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5068 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5069 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5070 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5071 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5072 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5073 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5074 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5075 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5076};
5077
e5a41ffa 5078float32 float32_exp2(float32 a, float_status *status)
8229c991 5079{
c120391c 5080 bool aSign;
0c48262d 5081 int aExp;
bb98fe42 5082 uint32_t aSig;
8229c991
AJ
5083 float64 r, x, xn;
5084 int i;
ff32e16e 5085 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5086
5087 aSig = extractFloat32Frac( a );
5088 aExp = extractFloat32Exp( a );
5089 aSign = extractFloat32Sign( a );
5090
5091 if ( aExp == 0xFF) {
ff32e16e
PM
5092 if (aSig) {
5093 return propagateFloat32NaN(a, float32_zero, status);
5094 }
8229c991
AJ
5095 return (aSign) ? float32_zero : a;
5096 }
5097 if (aExp == 0) {
5098 if (aSig == 0) return float32_one;
5099 }
5100
ff32e16e 5101 float_raise(float_flag_inexact, status);
8229c991
AJ
5102
5103 /* ******************************* */
5104 /* using float64 for approximation */
5105 /* ******************************* */
ff32e16e
PM
5106 x = float32_to_float64(a, status);
5107 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5108
5109 xn = x;
5110 r = float64_one;
5111 for (i = 0 ; i < 15 ; i++) {
5112 float64 f;
5113
ff32e16e
PM
5114 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5115 r = float64_add(r, f, status);
8229c991 5116
ff32e16e 5117 xn = float64_mul(xn, x, status);
8229c991
AJ
5118 }
5119
5120 return float64_to_float32(r, status);
5121}
5122
374dfc33
AJ
5123/*----------------------------------------------------------------------------
5124| Returns the binary log of the single-precision floating-point value `a'.
5125| The operation is performed according to the IEC/IEEE Standard for Binary
5126| Floating-Point Arithmetic.
5127*----------------------------------------------------------------------------*/
e5a41ffa 5128float32 float32_log2(float32 a, float_status *status)
374dfc33 5129{
c120391c 5130 bool aSign, zSign;
0c48262d 5131 int aExp;
bb98fe42 5132 uint32_t aSig, zSig, i;
374dfc33 5133
ff32e16e 5134 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5135 aSig = extractFloat32Frac( a );
5136 aExp = extractFloat32Exp( a );
5137 aSign = extractFloat32Sign( a );
5138
5139 if ( aExp == 0 ) {
5140 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5141 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5142 }
5143 if ( aSign ) {
ff32e16e 5144 float_raise(float_flag_invalid, status);
af39bc8c 5145 return float32_default_nan(status);
374dfc33
AJ
5146 }
5147 if ( aExp == 0xFF ) {
ff32e16e
PM
5148 if (aSig) {
5149 return propagateFloat32NaN(a, float32_zero, status);
5150 }
374dfc33
AJ
5151 return a;
5152 }
5153
5154 aExp -= 0x7F;
5155 aSig |= 0x00800000;
5156 zSign = aExp < 0;
5157 zSig = aExp << 23;
5158
5159 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5160 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5161 if ( aSig & 0x01000000 ) {
5162 aSig >>= 1;
5163 zSig |= i;
5164 }
5165 }
5166
5167 if ( zSign )
5168 zSig = -zSig;
5169
ff32e16e 5170 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5171}
5172
158142c2 5173/*----------------------------------------------------------------------------
158142c2
FB
5174| Returns the result of converting the double-precision floating-point value
5175| `a' to the extended double-precision floating-point format. The conversion
5176| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5177| Arithmetic.
5178*----------------------------------------------------------------------------*/
5179
e5a41ffa 5180floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5181{
c120391c 5182 bool aSign;
0c48262d 5183 int aExp;
bb98fe42 5184 uint64_t aSig;
158142c2 5185
ff32e16e 5186 a = float64_squash_input_denormal(a, status);
158142c2
FB
5187 aSig = extractFloat64Frac( a );
5188 aExp = extractFloat64Exp( a );
5189 aSign = extractFloat64Sign( a );
5190 if ( aExp == 0x7FF ) {
ff32e16e 5191 if (aSig) {
7537c2b4
JM
5192 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5193 status);
5194 return floatx80_silence_nan(res, status);
ff32e16e 5195 }
0f605c88
LV
5196 return packFloatx80(aSign,
5197 floatx80_infinity_high,
5198 floatx80_infinity_low);
158142c2
FB
5199 }
5200 if ( aExp == 0 ) {
5201 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5202 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5203 }
5204 return
5205 packFloatx80(
e9321124 5206 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5207
5208}
5209
158142c2
FB
5210/*----------------------------------------------------------------------------
5211| Returns the remainder of the double-precision floating-point value `a'
5212| with respect to the corresponding value `b'. The operation is performed
5213| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5214*----------------------------------------------------------------------------*/
5215
e5a41ffa 5216float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5217{
c120391c 5218 bool aSign, zSign;
0c48262d 5219 int aExp, bExp, expDiff;
bb98fe42
AF
5220 uint64_t aSig, bSig;
5221 uint64_t q, alternateASig;
5222 int64_t sigMean;
158142c2 5223
ff32e16e
PM
5224 a = float64_squash_input_denormal(a, status);
5225 b = float64_squash_input_denormal(b, status);
158142c2
FB
5226 aSig = extractFloat64Frac( a );
5227 aExp = extractFloat64Exp( a );
5228 aSign = extractFloat64Sign( a );
5229 bSig = extractFloat64Frac( b );
5230 bExp = extractFloat64Exp( b );
158142c2
FB
5231 if ( aExp == 0x7FF ) {
5232 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5233 return propagateFloat64NaN(a, b, status);
158142c2 5234 }
ff32e16e 5235 float_raise(float_flag_invalid, status);
af39bc8c 5236 return float64_default_nan(status);
158142c2
FB
5237 }
5238 if ( bExp == 0x7FF ) {
ff32e16e
PM
5239 if (bSig) {
5240 return propagateFloat64NaN(a, b, status);
5241 }
158142c2
FB
5242 return a;
5243 }
5244 if ( bExp == 0 ) {
5245 if ( bSig == 0 ) {
ff32e16e 5246 float_raise(float_flag_invalid, status);
af39bc8c 5247 return float64_default_nan(status);
158142c2
FB
5248 }
5249 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5250 }
5251 if ( aExp == 0 ) {
5252 if ( aSig == 0 ) return a;
5253 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5254 }
5255 expDiff = aExp - bExp;
e9321124
AB
5256 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5257 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5258 if ( expDiff < 0 ) {
5259 if ( expDiff < -1 ) return a;
5260 aSig >>= 1;
5261 }
5262 q = ( bSig <= aSig );
5263 if ( q ) aSig -= bSig;
5264 expDiff -= 64;
5265 while ( 0 < expDiff ) {
5266 q = estimateDiv128To64( aSig, 0, bSig );
5267 q = ( 2 < q ) ? q - 2 : 0;
5268 aSig = - ( ( bSig>>2 ) * q );
5269 expDiff -= 62;
5270 }
5271 expDiff += 64;
5272 if ( 0 < expDiff ) {
5273 q = estimateDiv128To64( aSig, 0, bSig );
5274 q = ( 2 < q ) ? q - 2 : 0;
5275 q >>= 64 - expDiff;
5276 bSig >>= 2;
5277 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5278 }
5279 else {
5280 aSig >>= 2;
5281 bSig >>= 2;
5282 }
5283 do {
5284 alternateASig = aSig;
5285 ++q;
5286 aSig -= bSig;
bb98fe42 5287 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5288 sigMean = aSig + alternateASig;
5289 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5290 aSig = alternateASig;
5291 }
bb98fe42 5292 zSign = ( (int64_t) aSig < 0 );
158142c2 5293 if ( zSign ) aSig = - aSig;
ff32e16e 5294 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5295
5296}
5297
374dfc33
AJ
5298/*----------------------------------------------------------------------------
5299| Returns the binary log of the double-precision floating-point value `a'.
5300| The operation is performed according to the IEC/IEEE Standard for Binary
5301| Floating-Point Arithmetic.
5302*----------------------------------------------------------------------------*/
e5a41ffa 5303float64 float64_log2(float64 a, float_status *status)
374dfc33 5304{
c120391c 5305 bool aSign, zSign;
0c48262d 5306 int aExp;
bb98fe42 5307 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5308 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5309
5310 aSig = extractFloat64Frac( a );
5311 aExp = extractFloat64Exp( a );
5312 aSign = extractFloat64Sign( a );
5313
5314 if ( aExp == 0 ) {
5315 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5316 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5317 }
5318 if ( aSign ) {
ff32e16e 5319 float_raise(float_flag_invalid, status);
af39bc8c 5320 return float64_default_nan(status);
374dfc33
AJ
5321 }
5322 if ( aExp == 0x7FF ) {
ff32e16e
PM
5323 if (aSig) {
5324 return propagateFloat64NaN(a, float64_zero, status);
5325 }
374dfc33
AJ
5326 return a;
5327 }
5328
5329 aExp -= 0x3FF;
e9321124 5330 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5331 zSign = aExp < 0;
bb98fe42 5332 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5333 for (i = 1LL << 51; i > 0; i >>= 1) {
5334 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5335 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5336 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5337 aSig >>= 1;
5338 zSig |= i;
5339 }
5340 }
5341
5342 if ( zSign )
5343 zSig = -zSig;
ff32e16e 5344 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5345}
5346
158142c2
FB
5347/*----------------------------------------------------------------------------
5348| Returns the result of converting the extended double-precision floating-
5349| point value `a' to the 32-bit two's complement integer format. The
5350| conversion is performed according to the IEC/IEEE Standard for Binary
5351| Floating-Point Arithmetic---which means in particular that the conversion
5352| is rounded according to the current rounding mode. If `a' is a NaN, the
5353| largest positive integer is returned. Otherwise, if the conversion
5354| overflows, the largest integer with the same sign as `a' is returned.
5355*----------------------------------------------------------------------------*/
5356
f4014512 5357int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5358{
c120391c 5359 bool aSign;
f4014512 5360 int32_t aExp, shiftCount;
bb98fe42 5361 uint64_t aSig;
158142c2 5362
d1eb8f2a
AD
5363 if (floatx80_invalid_encoding(a)) {
5364 float_raise(float_flag_invalid, status);
5365 return 1 << 31;
5366 }
158142c2
FB
5367 aSig = extractFloatx80Frac( a );
5368 aExp = extractFloatx80Exp( a );
5369 aSign = extractFloatx80Sign( a );
bb98fe42 5370 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5371 shiftCount = 0x4037 - aExp;
5372 if ( shiftCount <= 0 ) shiftCount = 1;
5373 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5374 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5375
5376}
5377
5378/*----------------------------------------------------------------------------
5379| Returns the result of converting the extended double-precision floating-
5380| point value `a' to the 32-bit two's complement integer format. The
5381| conversion is performed according to the IEC/IEEE Standard for Binary
5382| Floating-Point Arithmetic, except that the conversion is always rounded
5383| toward zero. If `a' is a NaN, the largest positive integer is returned.
5384| Otherwise, if the conversion overflows, the largest integer with the same
5385| sign as `a' is returned.
5386*----------------------------------------------------------------------------*/
5387
f4014512 5388int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5389{
c120391c 5390 bool aSign;
f4014512 5391 int32_t aExp, shiftCount;
bb98fe42 5392 uint64_t aSig, savedASig;
b3a6a2e0 5393 int32_t z;
158142c2 5394
d1eb8f2a
AD
5395 if (floatx80_invalid_encoding(a)) {
5396 float_raise(float_flag_invalid, status);
5397 return 1 << 31;
5398 }
158142c2
FB
5399 aSig = extractFloatx80Frac( a );
5400 aExp = extractFloatx80Exp( a );
5401 aSign = extractFloatx80Sign( a );
5402 if ( 0x401E < aExp ) {
bb98fe42 5403 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5404 goto invalid;
5405 }
5406 else if ( aExp < 0x3FFF ) {
a2f2d288 5407 if (aExp || aSig) {
d82f3b2d 5408 float_raise(float_flag_inexact, status);
a2f2d288 5409 }
158142c2
FB
5410 return 0;
5411 }
5412 shiftCount = 0x403E - aExp;
5413 savedASig = aSig;
5414 aSig >>= shiftCount;
5415 z = aSig;
5416 if ( aSign ) z = - z;
5417 if ( ( z < 0 ) ^ aSign ) {
5418 invalid:
ff32e16e 5419 float_raise(float_flag_invalid, status);
bb98fe42 5420 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5421 }
5422 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5423 float_raise(float_flag_inexact, status);
158142c2
FB
5424 }
5425 return z;
5426
5427}
5428
5429/*----------------------------------------------------------------------------
5430| Returns the result of converting the extended double-precision floating-
5431| point value `a' to the 64-bit two's complement integer format. The
5432| conversion is performed according to the IEC/IEEE Standard for Binary
5433| Floating-Point Arithmetic---which means in particular that the conversion
5434| is rounded according to the current rounding mode. If `a' is a NaN,
5435| the largest positive integer is returned. Otherwise, if the conversion
5436| overflows, the largest integer with the same sign as `a' is returned.
5437*----------------------------------------------------------------------------*/
5438
f42c2224 5439int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5440{
c120391c 5441 bool aSign;
f4014512 5442 int32_t aExp, shiftCount;
bb98fe42 5443 uint64_t aSig, aSigExtra;
158142c2 5444
d1eb8f2a
AD
5445 if (floatx80_invalid_encoding(a)) {
5446 float_raise(float_flag_invalid, status);
5447 return 1ULL << 63;
5448 }
158142c2
FB
5449 aSig = extractFloatx80Frac( a );
5450 aExp = extractFloatx80Exp( a );
5451 aSign = extractFloatx80Sign( a );
5452 shiftCount = 0x403E - aExp;
5453 if ( shiftCount <= 0 ) {
5454 if ( shiftCount ) {
ff32e16e 5455 float_raise(float_flag_invalid, status);
0f605c88 5456 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5457 return INT64_MAX;
158142c2 5458 }
2c217da0 5459 return INT64_MIN;
158142c2
FB
5460 }
5461 aSigExtra = 0;
5462 }
5463 else {
5464 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5465 }
ff32e16e 5466 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5467
5468}
5469
5470/*----------------------------------------------------------------------------
5471| Returns the result of converting the extended double-precision floating-
5472| point value `a' to the 64-bit two's complement integer format. The
5473| conversion is performed according to the IEC/IEEE Standard for Binary
5474| Floating-Point Arithmetic, except that the conversion is always rounded
5475| toward zero. If `a' is a NaN, the largest positive integer is returned.
5476| Otherwise, if the conversion overflows, the largest integer with the same
5477| sign as `a' is returned.
5478*----------------------------------------------------------------------------*/
5479
f42c2224 5480int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5481{
c120391c 5482 bool aSign;
f4014512 5483 int32_t aExp, shiftCount;
bb98fe42 5484 uint64_t aSig;
f42c2224 5485 int64_t z;
158142c2 5486
d1eb8f2a
AD
5487 if (floatx80_invalid_encoding(a)) {
5488 float_raise(float_flag_invalid, status);
5489 return 1ULL << 63;
5490 }
158142c2
FB
5491 aSig = extractFloatx80Frac( a );
5492 aExp = extractFloatx80Exp( a );
5493 aSign = extractFloatx80Sign( a );
5494 shiftCount = aExp - 0x403E;
5495 if ( 0 <= shiftCount ) {
e9321124 5496 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5497 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5498 float_raise(float_flag_invalid, status);
158142c2 5499 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5500 return INT64_MAX;
158142c2
FB
5501 }
5502 }
2c217da0 5503 return INT64_MIN;
158142c2
FB
5504 }
5505 else if ( aExp < 0x3FFF ) {
a2f2d288 5506 if (aExp | aSig) {
d82f3b2d 5507 float_raise(float_flag_inexact, status);
a2f2d288 5508 }
158142c2
FB
5509 return 0;
5510 }
5511 z = aSig>>( - shiftCount );
bb98fe42 5512 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5513 float_raise(float_flag_inexact, status);
158142c2
FB
5514 }
5515 if ( aSign ) z = - z;
5516 return z;
5517
5518}
5519
5520/*----------------------------------------------------------------------------
5521| Returns the result of converting the extended double-precision floating-
5522| point value `a' to the single-precision floating-point format. The
5523| conversion is performed according to the IEC/IEEE Standard for Binary
5524| Floating-Point Arithmetic.
5525*----------------------------------------------------------------------------*/
5526
e5a41ffa 5527float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5528{
c120391c 5529 bool aSign;
f4014512 5530 int32_t aExp;
bb98fe42 5531 uint64_t aSig;
158142c2 5532
d1eb8f2a
AD
5533 if (floatx80_invalid_encoding(a)) {
5534 float_raise(float_flag_invalid, status);
5535 return float32_default_nan(status);
5536 }
158142c2
FB
5537 aSig = extractFloatx80Frac( a );
5538 aExp = extractFloatx80Exp( a );
5539 aSign = extractFloatx80Sign( a );
5540 if ( aExp == 0x7FFF ) {
bb98fe42 5541 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5542 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5543 status);
5544 return float32_silence_nan(res, status);
158142c2
FB
5545 }
5546 return packFloat32( aSign, 0xFF, 0 );
5547 }
5548 shift64RightJamming( aSig, 33, &aSig );
5549 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5550 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5551
5552}
5553
5554/*----------------------------------------------------------------------------
5555| Returns the result of converting the extended double-precision floating-
5556| point value `a' to the double-precision floating-point format. The
5557| conversion is performed according to the IEC/IEEE Standard for Binary
5558| Floating-Point Arithmetic.
5559*----------------------------------------------------------------------------*/
5560
e5a41ffa 5561float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5562{
c120391c 5563 bool aSign;
f4014512 5564 int32_t aExp;
bb98fe42 5565 uint64_t aSig, zSig;
158142c2 5566
d1eb8f2a
AD
5567 if (floatx80_invalid_encoding(a)) {
5568 float_raise(float_flag_invalid, status);
5569 return float64_default_nan(status);
5570 }
158142c2
FB
5571 aSig = extractFloatx80Frac( a );
5572 aExp = extractFloatx80Exp( a );
5573 aSign = extractFloatx80Sign( a );
5574 if ( aExp == 0x7FFF ) {
bb98fe42 5575 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5576 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5577 status);
5578 return float64_silence_nan(res, status);
158142c2
FB
5579 }
5580 return packFloat64( aSign, 0x7FF, 0 );
5581 }
5582 shift64RightJamming( aSig, 1, &zSig );
5583 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5584 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5585
5586}
5587
158142c2
FB
5588/*----------------------------------------------------------------------------
5589| Returns the result of converting the extended double-precision floating-
5590| point value `a' to the quadruple-precision floating-point format. The
5591| conversion is performed according to the IEC/IEEE Standard for Binary
5592| Floating-Point Arithmetic.
5593*----------------------------------------------------------------------------*/
5594
e5a41ffa 5595float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5596{
c120391c 5597 bool aSign;
0c48262d 5598 int aExp;
bb98fe42 5599 uint64_t aSig, zSig0, zSig1;
158142c2 5600
d1eb8f2a
AD
5601 if (floatx80_invalid_encoding(a)) {
5602 float_raise(float_flag_invalid, status);
5603 return float128_default_nan(status);
5604 }
158142c2
FB
5605 aSig = extractFloatx80Frac( a );
5606 aExp = extractFloatx80Exp( a );
5607 aSign = extractFloatx80Sign( a );
bb98fe42 5608 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5609 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5610 status);
5611 return float128_silence_nan(res, status);
158142c2
FB
5612 }
5613 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5614 return packFloat128( aSign, aExp, zSig0, zSig1 );
5615
5616}
5617
0f721292
LV
5618/*----------------------------------------------------------------------------
5619| Rounds the extended double-precision floating-point value `a'
5620| to the precision provided by floatx80_rounding_precision and returns the
5621| result as an extended double-precision floating-point value.
5622| The operation is performed according to the IEC/IEEE Standard for Binary
5623| Floating-Point Arithmetic.
5624*----------------------------------------------------------------------------*/
5625
5626floatx80 floatx80_round(floatx80 a, float_status *status)
5627{
5628 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5629 extractFloatx80Sign(a),
5630 extractFloatx80Exp(a),
5631 extractFloatx80Frac(a), 0, status);
5632}
5633
158142c2
FB
5634/*----------------------------------------------------------------------------
5635| Rounds the extended double-precision floating-point value `a' to an integer,
5636| and returns the result as an extended quadruple-precision floating-point
5637| value. The operation is performed according to the IEC/IEEE Standard for
5638| Binary Floating-Point Arithmetic.
5639*----------------------------------------------------------------------------*/
5640
e5a41ffa 5641floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5642{
c120391c 5643 bool aSign;
f4014512 5644 int32_t aExp;
bb98fe42 5645 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5646 floatx80 z;
5647
d1eb8f2a
AD
5648 if (floatx80_invalid_encoding(a)) {
5649 float_raise(float_flag_invalid, status);
5650 return floatx80_default_nan(status);
5651 }
158142c2
FB
5652 aExp = extractFloatx80Exp( a );
5653 if ( 0x403E <= aExp ) {
bb98fe42 5654 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5655 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5656 }
5657 return a;
5658 }
5659 if ( aExp < 0x3FFF ) {
5660 if ( ( aExp == 0 )
9ecaf5cc 5661 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5662 return a;
5663 }
d82f3b2d 5664 float_raise(float_flag_inexact, status);
158142c2 5665 aSign = extractFloatx80Sign( a );
a2f2d288 5666 switch (status->float_rounding_mode) {
158142c2 5667 case float_round_nearest_even:
bb98fe42 5668 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5669 ) {
5670 return
e9321124 5671 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5672 }
5673 break;
f9288a76
PM
5674 case float_round_ties_away:
5675 if (aExp == 0x3FFE) {
e9321124 5676 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5677 }
5678 break;
158142c2
FB
5679 case float_round_down:
5680 return
5681 aSign ?
e9321124 5682 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5683 : packFloatx80( 0, 0, 0 );
5684 case float_round_up:
5685 return
5686 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5687 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5688
5689 case float_round_to_zero:
5690 break;
5691 default:
5692 g_assert_not_reached();
158142c2
FB
5693 }
5694 return packFloatx80( aSign, 0, 0 );
5695 }
5696 lastBitMask = 1;
5697 lastBitMask <<= 0x403E - aExp;
5698 roundBitsMask = lastBitMask - 1;
5699 z = a;
a2f2d288 5700 switch (status->float_rounding_mode) {
dc355b76 5701 case float_round_nearest_even:
158142c2 5702 z.low += lastBitMask>>1;
dc355b76
PM
5703 if ((z.low & roundBitsMask) == 0) {
5704 z.low &= ~lastBitMask;
5705 }
5706 break;
f9288a76
PM
5707 case float_round_ties_away:
5708 z.low += lastBitMask >> 1;
5709 break;
dc355b76
PM
5710 case float_round_to_zero:
5711 break;
5712 case float_round_up:
5713 if (!extractFloatx80Sign(z)) {
5714 z.low += roundBitsMask;
5715 }
5716 break;
5717 case float_round_down:
5718 if (extractFloatx80Sign(z)) {
158142c2
FB
5719 z.low += roundBitsMask;
5720 }
dc355b76
PM
5721 break;
5722 default:
5723 abort();
158142c2
FB
5724 }
5725 z.low &= ~ roundBitsMask;
5726 if ( z.low == 0 ) {
5727 ++z.high;
e9321124 5728 z.low = UINT64_C(0x8000000000000000);
158142c2 5729 }
a2f2d288 5730 if (z.low != a.low) {
d82f3b2d 5731 float_raise(float_flag_inexact, status);
a2f2d288 5732 }
158142c2
FB
5733 return z;
5734
5735}
5736
5737/*----------------------------------------------------------------------------
5738| Returns the result of adding the absolute values of the extended double-
5739| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5740| negated before being returned. `zSign' is ignored if the result is a NaN.
5741| The addition is performed according to the IEC/IEEE Standard for Binary
5742| Floating-Point Arithmetic.
5743*----------------------------------------------------------------------------*/
5744
c120391c 5745static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5746 float_status *status)
158142c2 5747{
f4014512 5748 int32_t aExp, bExp, zExp;
bb98fe42 5749 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5750 int32_t expDiff;
158142c2
FB
5751
5752 aSig = extractFloatx80Frac( a );
5753 aExp = extractFloatx80Exp( a );
5754 bSig = extractFloatx80Frac( b );
5755 bExp = extractFloatx80Exp( b );
5756 expDiff = aExp - bExp;
5757 if ( 0 < expDiff ) {
5758 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5759 if ((uint64_t)(aSig << 1)) {
5760 return propagateFloatx80NaN(a, b, status);
5761 }
158142c2
FB
5762 return a;
5763 }
5764 if ( bExp == 0 ) --expDiff;
5765 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5766 zExp = aExp;
5767 }
5768 else if ( expDiff < 0 ) {
5769 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5770 if ((uint64_t)(bSig << 1)) {
5771 return propagateFloatx80NaN(a, b, status);
5772 }
0f605c88
LV
5773 return packFloatx80(zSign,
5774 floatx80_infinity_high,
5775 floatx80_infinity_low);
158142c2
FB
5776 }
5777 if ( aExp == 0 ) ++expDiff;
5778 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5779 zExp = bExp;
5780 }
5781 else {
5782 if ( aExp == 0x7FFF ) {
bb98fe42 5783 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5784 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5785 }
5786 return a;
5787 }
5788 zSig1 = 0;
5789 zSig0 = aSig + bSig;
5790 if ( aExp == 0 ) {
41602807
JM
5791 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5792 /* At least one of the values is a pseudo-denormal,
5793 * and there is a carry out of the result. */
5794 zExp = 1;
5795 goto shiftRight1;
5796 }
2f311075
RH
5797 if (zSig0 == 0) {
5798 return packFloatx80(zSign, 0, 0);
5799 }
158142c2
FB
5800 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5801 goto roundAndPack;
5802 }
5803 zExp = aExp;
5804 goto shiftRight1;
5805 }
5806 zSig0 = aSig + bSig;
bb98fe42 5807 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5808 shiftRight1:
5809 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5810 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5811 ++zExp;
5812 roundAndPack:
a2f2d288 5813 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5814 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5815}
5816
5817/*----------------------------------------------------------------------------
5818| Returns the result of subtracting the absolute values of the extended
5819| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5820| difference is negated before being returned. `zSign' is ignored if the
5821| result is a NaN. The subtraction is performed according to the IEC/IEEE
5822| Standard for Binary Floating-Point Arithmetic.
5823*----------------------------------------------------------------------------*/
5824
c120391c 5825static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5826 float_status *status)
158142c2 5827{
f4014512 5828 int32_t aExp, bExp, zExp;
bb98fe42 5829 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5830 int32_t expDiff;
158142c2
FB
5831
5832 aSig = extractFloatx80Frac( a );
5833 aExp = extractFloatx80Exp( a );
5834 bSig = extractFloatx80Frac( b );
5835 bExp = extractFloatx80Exp( b );
5836 expDiff = aExp - bExp;
5837 if ( 0 < expDiff ) goto aExpBigger;
5838 if ( expDiff < 0 ) goto bExpBigger;
5839 if ( aExp == 0x7FFF ) {
bb98fe42 5840 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5841 return propagateFloatx80NaN(a, b, status);
158142c2 5842 }
ff32e16e 5843 float_raise(float_flag_invalid, status);
af39bc8c 5844 return floatx80_default_nan(status);
158142c2
FB
5845 }
5846 if ( aExp == 0 ) {
5847 aExp = 1;
5848 bExp = 1;
5849 }
5850 zSig1 = 0;
5851 if ( bSig < aSig ) goto aBigger;
5852 if ( aSig < bSig ) goto bBigger;
a2f2d288 5853 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5854 bExpBigger:
5855 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5856 if ((uint64_t)(bSig << 1)) {
5857 return propagateFloatx80NaN(a, b, status);
5858 }
0f605c88
LV
5859 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5860 floatx80_infinity_low);
158142c2
FB
5861 }
5862 if ( aExp == 0 ) ++expDiff;
5863 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5864 bBigger:
5865 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5866 zExp = bExp;
5867 zSign ^= 1;
5868 goto normalizeRoundAndPack;
5869 aExpBigger:
5870 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5871 if ((uint64_t)(aSig << 1)) {
5872 return propagateFloatx80NaN(a, b, status);
5873 }
158142c2
FB
5874 return a;
5875 }
5876 if ( bExp == 0 ) --expDiff;
5877 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5878 aBigger:
5879 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5880 zExp = aExp;
5881 normalizeRoundAndPack:
a2f2d288 5882 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5883 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5884}
5885
5886/*----------------------------------------------------------------------------
5887| Returns the result of adding the extended double-precision floating-point
5888| values `a' and `b'. The operation is performed according to the IEC/IEEE
5889| Standard for Binary Floating-Point Arithmetic.
5890*----------------------------------------------------------------------------*/
5891
e5a41ffa 5892floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5893{
c120391c 5894 bool aSign, bSign;
158142c2 5895
d1eb8f2a
AD
5896 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5897 float_raise(float_flag_invalid, status);
5898 return floatx80_default_nan(status);
5899 }
158142c2
FB
5900 aSign = extractFloatx80Sign( a );
5901 bSign = extractFloatx80Sign( b );
5902 if ( aSign == bSign ) {
ff32e16e 5903 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5904 }
5905 else {
ff32e16e 5906 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5907 }
5908
5909}
5910
5911/*----------------------------------------------------------------------------
5912| Returns the result of subtracting the extended double-precision floating-
5913| point values `a' and `b'. The operation is performed according to the
5914| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5915*----------------------------------------------------------------------------*/
5916
e5a41ffa 5917floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5918{
c120391c 5919 bool aSign, bSign;
158142c2 5920
d1eb8f2a
AD
5921 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5922 float_raise(float_flag_invalid, status);
5923 return floatx80_default_nan(status);
5924 }
158142c2
FB
5925 aSign = extractFloatx80Sign( a );
5926 bSign = extractFloatx80Sign( b );
5927 if ( aSign == bSign ) {
ff32e16e 5928 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5929 }
5930 else {
ff32e16e 5931 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5932 }
5933
5934}
5935
5936/*----------------------------------------------------------------------------
5937| Returns the result of multiplying the extended double-precision floating-
5938| point values `a' and `b'. The operation is performed according to the
5939| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5940*----------------------------------------------------------------------------*/
5941
e5a41ffa 5942floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5943{
c120391c 5944 bool aSign, bSign, zSign;
f4014512 5945 int32_t aExp, bExp, zExp;
bb98fe42 5946 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5947
d1eb8f2a
AD
5948 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5949 float_raise(float_flag_invalid, status);
5950 return floatx80_default_nan(status);
5951 }
158142c2
FB
5952 aSig = extractFloatx80Frac( a );
5953 aExp = extractFloatx80Exp( a );
5954 aSign = extractFloatx80Sign( a );
5955 bSig = extractFloatx80Frac( b );
5956 bExp = extractFloatx80Exp( b );
5957 bSign = extractFloatx80Sign( b );
5958 zSign = aSign ^ bSign;
5959 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5960 if ( (uint64_t) ( aSig<<1 )
5961 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5962 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5963 }
5964 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5965 return packFloatx80(zSign, floatx80_infinity_high,
5966 floatx80_infinity_low);
158142c2
FB
5967 }
5968 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5969 if ((uint64_t)(bSig << 1)) {
5970 return propagateFloatx80NaN(a, b, status);
5971 }
158142c2
FB
5972 if ( ( aExp | aSig ) == 0 ) {
5973 invalid:
ff32e16e 5974 float_raise(float_flag_invalid, status);
af39bc8c 5975 return floatx80_default_nan(status);
158142c2 5976 }
0f605c88
LV
5977 return packFloatx80(zSign, floatx80_infinity_high,
5978 floatx80_infinity_low);
158142c2
FB
5979 }
5980 if ( aExp == 0 ) {
5981 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5982 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5983 }
5984 if ( bExp == 0 ) {
5985 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5986 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5987 }
5988 zExp = aExp + bExp - 0x3FFE;
5989 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5990 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5991 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5992 --zExp;
5993 }
a2f2d288 5994 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5995 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5996}
5997
5998/*----------------------------------------------------------------------------
5999| Returns the result of dividing the extended double-precision floating-point
6000| value `a' by the corresponding value `b'. The operation is performed
6001| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6002*----------------------------------------------------------------------------*/
6003
e5a41ffa 6004floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6005{
c120391c 6006 bool aSign, bSign, zSign;
f4014512 6007 int32_t aExp, bExp, zExp;
bb98fe42
AF
6008 uint64_t aSig, bSig, zSig0, zSig1;
6009 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6010
d1eb8f2a
AD
6011 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6012 float_raise(float_flag_invalid, status);
6013 return floatx80_default_nan(status);
6014 }
158142c2
FB
6015 aSig = extractFloatx80Frac( a );
6016 aExp = extractFloatx80Exp( a );
6017 aSign = extractFloatx80Sign( a );
6018 bSig = extractFloatx80Frac( b );
6019 bExp = extractFloatx80Exp( b );
6020 bSign = extractFloatx80Sign( b );
6021 zSign = aSign ^ bSign;
6022 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6023 if ((uint64_t)(aSig << 1)) {
6024 return propagateFloatx80NaN(a, b, status);
6025 }
158142c2 6026 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6027 if ((uint64_t)(bSig << 1)) {
6028 return propagateFloatx80NaN(a, b, status);
6029 }
158142c2
FB
6030 goto invalid;
6031 }
0f605c88
LV
6032 return packFloatx80(zSign, floatx80_infinity_high,
6033 floatx80_infinity_low);
158142c2
FB
6034 }
6035 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6036 if ((uint64_t)(bSig << 1)) {
6037 return propagateFloatx80NaN(a, b, status);
6038 }
158142c2
FB
6039 return packFloatx80( zSign, 0, 0 );
6040 }
6041 if ( bExp == 0 ) {
6042 if ( bSig == 0 ) {
6043 if ( ( aExp | aSig ) == 0 ) {
6044 invalid:
ff32e16e 6045 float_raise(float_flag_invalid, status);
af39bc8c 6046 return floatx80_default_nan(status);
158142c2 6047 }
ff32e16e 6048 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6049 return packFloatx80(zSign, floatx80_infinity_high,
6050 floatx80_infinity_low);
158142c2
FB
6051 }
6052 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6053 }
6054 if ( aExp == 0 ) {
6055 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6056 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6057 }
6058 zExp = aExp - bExp + 0x3FFE;
6059 rem1 = 0;
6060 if ( bSig <= aSig ) {
6061 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6062 ++zExp;
6063 }
6064 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6065 mul64To128( bSig, zSig0, &term0, &term1 );
6066 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6067 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6068 --zSig0;
6069 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6070 }
6071 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6072 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6073 mul64To128( bSig, zSig1, &term1, &term2 );
6074 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6075 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6076 --zSig1;
6077 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6078 }
6079 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6080 }
a2f2d288 6081 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6082 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6083}
6084
6085/*----------------------------------------------------------------------------
6086| Returns the remainder of the extended double-precision floating-point value
6087| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6088| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6089| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6090| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6091| the absolute value of the integer quotient.
158142c2
FB
6092*----------------------------------------------------------------------------*/
6093
445810ec 6094floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6095 float_status *status)
158142c2 6096{
c120391c 6097 bool aSign, zSign;
b662495d 6098 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6099 uint64_t aSig0, aSig1, bSig;
6100 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6101
445810ec 6102 *quotient = 0;
d1eb8f2a
AD
6103 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6104 float_raise(float_flag_invalid, status);
6105 return floatx80_default_nan(status);
6106 }
158142c2 6107 aSig0 = extractFloatx80Frac( a );
b662495d 6108 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6109 aSign = extractFloatx80Sign( a );
6110 bSig = extractFloatx80Frac( b );
6111 bExp = extractFloatx80Exp( b );
158142c2 6112 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6113 if ( (uint64_t) ( aSig0<<1 )
6114 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6115 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6116 }
6117 goto invalid;
6118 }
6119 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6120 if ((uint64_t)(bSig << 1)) {
6121 return propagateFloatx80NaN(a, b, status);
6122 }
b662495d
JM
6123 if (aExp == 0 && aSig0 >> 63) {
6124 /*
6125 * Pseudo-denormal argument must be returned in normalized
6126 * form.
6127 */
6128 return packFloatx80(aSign, 1, aSig0);
6129 }
158142c2
FB
6130 return a;
6131 }
6132 if ( bExp == 0 ) {
6133 if ( bSig == 0 ) {
6134 invalid:
ff32e16e 6135 float_raise(float_flag_invalid, status);
af39bc8c 6136 return floatx80_default_nan(status);
158142c2
FB
6137 }
6138 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6139 }
6140 if ( aExp == 0 ) {
499a2f7b 6141 if ( aSig0 == 0 ) return a;
158142c2
FB
6142 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6143 }
158142c2
FB
6144 zSign = aSign;
6145 expDiff = aExp - bExp;
6146 aSig1 = 0;
6147 if ( expDiff < 0 ) {
b662495d
JM
6148 if ( mod || expDiff < -1 ) {
6149 if (aExp == 1 && aExpOrig == 0) {
6150 /*
6151 * Pseudo-denormal argument must be returned in
6152 * normalized form.
6153 */
6154 return packFloatx80(aSign, aExp, aSig0);
6155 }
6156 return a;
6157 }
158142c2
FB
6158 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6159 expDiff = 0;
6160 }
445810ec 6161 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6162 if ( q ) aSig0 -= bSig;
6163 expDiff -= 64;
6164 while ( 0 < expDiff ) {
6165 q = estimateDiv128To64( aSig0, aSig1, bSig );
6166 q = ( 2 < q ) ? q - 2 : 0;
6167 mul64To128( bSig, q, &term0, &term1 );
6168 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6169 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6170 expDiff -= 62;
445810ec
JM
6171 *quotient <<= 62;
6172 *quotient += q;
158142c2
FB
6173 }
6174 expDiff += 64;
6175 if ( 0 < expDiff ) {
6176 q = estimateDiv128To64( aSig0, aSig1, bSig );
6177 q = ( 2 < q ) ? q - 2 : 0;
6178 q >>= 64 - expDiff;
6179 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6180 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6181 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6182 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6183 ++q;
6184 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6185 }
445810ec
JM
6186 if (expDiff < 64) {
6187 *quotient <<= expDiff;
6188 } else {
6189 *quotient = 0;
6190 }
6191 *quotient += q;
158142c2
FB
6192 }
6193 else {
6194 term1 = 0;
6195 term0 = bSig;
6196 }
6b8b0136
JM
6197 if (!mod) {
6198 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6199 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6200 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6201 && ( q & 1 ) )
6202 ) {
6203 aSig0 = alternateASig0;
6204 aSig1 = alternateASig1;
6205 zSign = ! zSign;
445810ec 6206 ++*quotient;
6b8b0136 6207 }
158142c2
FB
6208 }
6209 return
6210 normalizeRoundAndPackFloatx80(
ff32e16e 6211 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6212
6213}
6214
6b8b0136
JM
6215/*----------------------------------------------------------------------------
6216| Returns the remainder of the extended double-precision floating-point value
6217| `a' with respect to the corresponding value `b'. The operation is performed
6218| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6219*----------------------------------------------------------------------------*/
6220
6221floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6222{
445810ec
JM
6223 uint64_t quotient;
6224 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6225}
6226
6227/*----------------------------------------------------------------------------
6228| Returns the remainder of the extended double-precision floating-point value
6229| `a' with respect to the corresponding value `b', with the quotient truncated
6230| toward zero.
6231*----------------------------------------------------------------------------*/
6232
6233floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6234{
445810ec
JM
6235 uint64_t quotient;
6236 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6237}
6238
158142c2
FB
6239/*----------------------------------------------------------------------------
6240| Returns the square root of the extended double-precision floating-point
6241| value `a'. The operation is performed according to the IEC/IEEE Standard
6242| for Binary Floating-Point Arithmetic.
6243*----------------------------------------------------------------------------*/
6244
e5a41ffa 6245floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6246{
c120391c 6247 bool aSign;
f4014512 6248 int32_t aExp, zExp;
bb98fe42
AF
6249 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6250 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6251
d1eb8f2a
AD
6252 if (floatx80_invalid_encoding(a)) {
6253 float_raise(float_flag_invalid, status);
6254 return floatx80_default_nan(status);
6255 }
158142c2
FB
6256 aSig0 = extractFloatx80Frac( a );
6257 aExp = extractFloatx80Exp( a );
6258 aSign = extractFloatx80Sign( a );
6259 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6260 if ((uint64_t)(aSig0 << 1)) {
6261 return propagateFloatx80NaN(a, a, status);
6262 }
158142c2
FB
6263 if ( ! aSign ) return a;
6264 goto invalid;
6265 }
6266 if ( aSign ) {
6267 if ( ( aExp | aSig0 ) == 0 ) return a;
6268 invalid:
ff32e16e 6269 float_raise(float_flag_invalid, status);
af39bc8c 6270 return floatx80_default_nan(status);
158142c2
FB
6271 }
6272 if ( aExp == 0 ) {
6273 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6274 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6275 }
6276 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6277 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6278 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6279 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6280 doubleZSig0 = zSig0<<1;
6281 mul64To128( zSig0, zSig0, &term0, &term1 );
6282 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6283 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6284 --zSig0;
6285 doubleZSig0 -= 2;
6286 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6287 }
6288 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6289 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6290 if ( zSig1 == 0 ) zSig1 = 1;
6291 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6292 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6293 mul64To128( zSig1, zSig1, &term2, &term3 );
6294 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6295 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6296 --zSig1;
6297 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6298 term3 |= 1;
6299 term2 |= doubleZSig0;
6300 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6301 }
6302 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6303 }
6304 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6305 zSig0 |= doubleZSig0;
a2f2d288
PM
6306 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6307 0, zExp, zSig0, zSig1, status);
158142c2
FB
6308}
6309
158142c2
FB
6310/*----------------------------------------------------------------------------
6311| Returns the result of converting the quadruple-precision floating-point
6312| value `a' to the extended double-precision floating-point format. The
6313| conversion is performed according to the IEC/IEEE Standard for Binary
6314| Floating-Point Arithmetic.
6315*----------------------------------------------------------------------------*/
6316
e5a41ffa 6317floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6318{
c120391c 6319 bool aSign;
f4014512 6320 int32_t aExp;
bb98fe42 6321 uint64_t aSig0, aSig1;
158142c2
FB
6322
6323 aSig1 = extractFloat128Frac1( a );
6324 aSig0 = extractFloat128Frac0( a );
6325 aExp = extractFloat128Exp( a );
6326 aSign = extractFloat128Sign( a );
6327 if ( aExp == 0x7FFF ) {
6328 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6329 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6330 status);
6331 return floatx80_silence_nan(res, status);
158142c2 6332 }
0f605c88
LV
6333 return packFloatx80(aSign, floatx80_infinity_high,
6334 floatx80_infinity_low);
158142c2
FB
6335 }
6336 if ( aExp == 0 ) {
6337 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6338 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6339 }
6340 else {
e9321124 6341 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6342 }
6343 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6344 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6345
6346}
6347
158142c2
FB
6348/*----------------------------------------------------------------------------
6349| Returns the remainder of the quadruple-precision floating-point value `a'
6350| with respect to the corresponding value `b'. The operation is performed
6351| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6352*----------------------------------------------------------------------------*/
6353
e5a41ffa 6354float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6355{
c120391c 6356 bool aSign, zSign;
f4014512 6357 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6358 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6359 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6360 int64_t sigMean0;
158142c2
FB
6361
6362 aSig1 = extractFloat128Frac1( a );
6363 aSig0 = extractFloat128Frac0( a );
6364 aExp = extractFloat128Exp( a );
6365 aSign = extractFloat128Sign( a );
6366 bSig1 = extractFloat128Frac1( b );
6367 bSig0 = extractFloat128Frac0( b );
6368 bExp = extractFloat128Exp( b );
158142c2
FB
6369 if ( aExp == 0x7FFF ) {
6370 if ( ( aSig0 | aSig1 )
6371 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6372 return propagateFloat128NaN(a, b, status);
158142c2
FB
6373 }
6374 goto invalid;
6375 }
6376 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6377 if (bSig0 | bSig1) {
6378 return propagateFloat128NaN(a, b, status);
6379 }
158142c2
FB
6380 return a;
6381 }
6382 if ( bExp == 0 ) {
6383 if ( ( bSig0 | bSig1 ) == 0 ) {
6384 invalid:
ff32e16e 6385 float_raise(float_flag_invalid, status);
af39bc8c 6386 return float128_default_nan(status);
158142c2
FB
6387 }
6388 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6389 }
6390 if ( aExp == 0 ) {
6391 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6392 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6393 }
6394 expDiff = aExp - bExp;
6395 if ( expDiff < -1 ) return a;
6396 shortShift128Left(
e9321124 6397 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6398 aSig1,
6399 15 - ( expDiff < 0 ),
6400 &aSig0,
6401 &aSig1
6402 );
6403 shortShift128Left(
e9321124 6404 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6405 q = le128( bSig0, bSig1, aSig0, aSig1 );
6406 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6407 expDiff -= 64;
6408 while ( 0 < expDiff ) {
6409 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6410 q = ( 4 < q ) ? q - 4 : 0;
6411 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6412 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6413 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6414 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6415 expDiff -= 61;
6416 }
6417 if ( -64 < expDiff ) {
6418 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6419 q = ( 4 < q ) ? q - 4 : 0;
6420 q >>= - expDiff;
6421 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6422 expDiff += 52;
6423 if ( expDiff < 0 ) {
6424 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6425 }
6426 else {
6427 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6428 }
6429 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6430 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6431 }
6432 else {
6433 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6434 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6435 }
6436 do {
6437 alternateASig0 = aSig0;
6438 alternateASig1 = aSig1;
6439 ++q;
6440 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6441 } while ( 0 <= (int64_t) aSig0 );
158142c2 6442 add128(
bb98fe42 6443 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6444 if ( ( sigMean0 < 0 )
6445 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6446 aSig0 = alternateASig0;
6447 aSig1 = alternateASig1;
6448 }
bb98fe42 6449 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6450 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6451 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6452 status);
158142c2
FB
6453}
6454
71bfd65c
RH
6455static inline FloatRelation
6456floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6457 float_status *status)
f6714d36 6458{
c120391c 6459 bool aSign, bSign;
f6714d36 6460
d1eb8f2a
AD
6461 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6462 float_raise(float_flag_invalid, status);
6463 return float_relation_unordered;
6464 }
f6714d36
AJ
6465 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6466 ( extractFloatx80Frac( a )<<1 ) ) ||
6467 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6468 ( extractFloatx80Frac( b )<<1 ) )) {
6469 if (!is_quiet ||
af39bc8c
AM
6470 floatx80_is_signaling_nan(a, status) ||
6471 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6472 float_raise(float_flag_invalid, status);
f6714d36
AJ
6473 }
6474 return float_relation_unordered;
6475 }
6476 aSign = extractFloatx80Sign( a );
6477 bSign = extractFloatx80Sign( b );
6478 if ( aSign != bSign ) {
6479
6480 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6481 ( ( a.low | b.low ) == 0 ) ) {
6482 /* zero case */
6483 return float_relation_equal;
6484 } else {
6485 return 1 - (2 * aSign);
6486 }
6487 } else {
be53fa78
JM
6488 /* Normalize pseudo-denormals before comparison. */
6489 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6490 ++a.high;
6491 }
6492 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6493 ++b.high;
6494 }
f6714d36
AJ
6495 if (a.low == b.low && a.high == b.high) {
6496 return float_relation_equal;
6497 } else {
6498 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6499 }
6500 }
6501}
6502
71bfd65c 6503FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6504{
ff32e16e 6505 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6506}
6507
71bfd65c
RH
6508FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6509 float_status *status)
f6714d36 6510{
ff32e16e 6511 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6512}
6513
e5a41ffa 6514floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6515{
c120391c 6516 bool aSign;
326b9e98 6517 int32_t aExp;
bb98fe42 6518 uint64_t aSig;
9ee6e8bb 6519
d1eb8f2a
AD
6520 if (floatx80_invalid_encoding(a)) {
6521 float_raise(float_flag_invalid, status);
6522 return floatx80_default_nan(status);
6523 }
9ee6e8bb
PB
6524 aSig = extractFloatx80Frac( a );
6525 aExp = extractFloatx80Exp( a );
6526 aSign = extractFloatx80Sign( a );
6527
326b9e98
AJ
6528 if ( aExp == 0x7FFF ) {
6529 if ( aSig<<1 ) {
ff32e16e 6530 return propagateFloatx80NaN(a, a, status);
326b9e98 6531 }
9ee6e8bb
PB
6532 return a;
6533 }
326b9e98 6534
3c85c37f
PM
6535 if (aExp == 0) {
6536 if (aSig == 0) {
6537 return a;
6538 }
6539 aExp++;
6540 }
69397542 6541
326b9e98
AJ
6542 if (n > 0x10000) {
6543 n = 0x10000;
6544 } else if (n < -0x10000) {
6545 n = -0x10000;
6546 }
6547
9ee6e8bb 6548 aExp += n;
a2f2d288
PM
6549 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6550 aSign, aExp, aSig, 0, status);
9ee6e8bb 6551}
9ee6e8bb 6552
f6b3b108
EC
6553static void __attribute__((constructor)) softfloat_init(void)
6554{
6555 union_float64 ua, ub, uc, ur;
6556
6557 if (QEMU_NO_HARDFLOAT) {
6558 return;
6559 }
6560 /*
6561 * Test that the host's FMA is not obviously broken. For example,
6562 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6563 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6564 */
6565 ua.s = 0x0020000000000001ULL;
6566 ub.s = 0x3ca0000000000000ULL;
6567 uc.s = 0x0020000000000000ULL;
6568 ur.h = fma(ua.h, ub.h, uc.h);
6569 if (ur.s != 0x0020000000000001ULL) {
6570 force_soft_fma = true;
6571 }
6572}