]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Move int_to_float to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5 502/*
0018b1f4
RH
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
a90119b5 505 *
0018b1f4
RH
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
a90119b5
AB
509 */
510
511typedef struct {
a90119b5
AB
512 FloatClass cls;
513 bool sign;
4109b9ea
RH
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
f8155c1d 526} FloatParts64;
a90119b5 527
0018b1f4
RH
528typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534} FloatParts128;
535
aca84527
RH
536typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544} FloatParts256;
545
0018b1f4 546/* These apply to the most significant word of each FloatPartsN. */
e99c4373 547#define DECOMPOSED_BINARY_POINT 63
a90119b5 548#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
549
550/* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
ca3a3d5a 558 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 559 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
562 */
563typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
ca3a3d5a 573 bool arm_althp;
a90119b5
AB
574} FloatFmt;
575
576/* Expand fields based on the size of exponent and fraction */
577#define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
0018b1f4
RH
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
587
588static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590};
591
6fed16b2
AB
592static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595};
596
8282310d
LZ
597static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599};
600
a90119b5
AB
601static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603};
604
605static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607};
608
0018b1f4
RH
609static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611};
612
6fff2167 613/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 614static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 615{
d8fdd172
RH
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
6fff2167 618
d8fdd172 619 *r = (FloatParts64) {
6fff2167 620 .cls = float_class_unclassified,
d8fdd172
RH
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
6fff2167
AB
624 };
625}
626
3dddb203 627static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 628{
3dddb203 629 unpack_raw64(p, &float16_params, f);
6fff2167
AB
630}
631
3dddb203 632static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 633{
3dddb203 634 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
635}
636
3dddb203 637static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 638{
3dddb203 639 unpack_raw64(p, &float32_params, f);
6fff2167
AB
640}
641
3dddb203 642static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 643{
3dddb203 644 unpack_raw64(p, &float64_params, f);
6fff2167
AB
645}
646
0018b1f4
RH
647static void float128_unpack_raw(FloatParts128 *p, float128 f)
648{
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659}
660
6fff2167 661/* Pack a float from parts, but do not canonicalize. */
9e4af58c 662static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 663{
9e4af58c
RH
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
6fff2167
AB
672}
673
71fd178e 674static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 675{
71fd178e 676 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
677}
678
71fd178e 679static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 680{
71fd178e 681 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
682}
683
71fd178e 684static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 685{
71fd178e 686 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
687}
688
71fd178e 689static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 690{
71fd178e 691 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
692}
693
0018b1f4
RH
694static float128 float128_pack_raw(const FloatParts128 *p)
695{
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704}
705
0664335a
RH
706/*----------------------------------------------------------------------------
707| Functions and definitions to determine: (1) whether tininess for underflow
708| is detected before or after rounding by default, (2) what (if anything)
709| happens when exceptions are raised, (3) how signaling NaNs are distinguished
710| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711| are propagated from function inputs to output. These details are target-
712| specific.
713*----------------------------------------------------------------------------*/
139c1837 714#include "softfloat-specialize.c.inc"
0664335a 715
0018b1f4
RH
716#define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
dedd123c
RH
719#define PARTS_GENERIC_64_128_256(NAME, P) \
720 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
e9034ea8 723#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
724#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725
7c45bad8
RH
726static void parts64_return_nan(FloatParts64 *a, float_status *s);
727static void parts128_return_nan(FloatParts128 *a, float_status *s);
728
729#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 730
22c355f4
RH
731static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732 float_status *s);
733static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734 float_status *s);
735
736#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737
979582d0
RH
738static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739 FloatParts64 *c, float_status *s,
740 int ab_mask, int abc_mask);
741static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742 FloatParts128 *b,
743 FloatParts128 *c,
744 float_status *s,
745 int ab_mask, int abc_mask);
746
747#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749
d46975bc
RH
750static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751 const FloatFmt *fmt);
752static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753 const FloatFmt *fmt);
754
755#define parts_canonicalize(A, S, F) \
756 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757
ee6959f2
RH
758static void parts64_uncanon(FloatParts64 *p, float_status *status,
759 const FloatFmt *fmt);
760static void parts128_uncanon(FloatParts128 *p, float_status *status,
761 const FloatFmt *fmt);
762
763#define parts_uncanon(A, S, F) \
764 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765
da10a907
RH
766static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 768static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
769
770#define parts_add_normal(A, B) \
dedd123c 771 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
772
773static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 775static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
776
777#define parts_sub_normal(A, B) \
dedd123c 778 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
779
780static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781 float_status *s, bool subtract);
782static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783 float_status *s, bool subtract);
784
785#define parts_addsub(A, B, S, Z) \
786 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787
aca84527
RH
788static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789 float_status *s);
790static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791 float_status *s);
792
793#define parts_mul(A, B, S) \
794 PARTS_GENERIC_64_128(mul, A)(A, B, S)
795
dedd123c
RH
796static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797 FloatParts64 *c, int flags,
798 float_status *s);
799static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800 FloatParts128 *c, int flags,
801 float_status *s);
802
803#define parts_muladd(A, B, C, Z, S) \
804 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805
ec961b81
RH
806static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807 float_status *s);
808static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809 float_status *s);
810
811#define parts_div(A, B, S) \
812 PARTS_GENERIC_64_128(div, A)(A, B, S)
813
afc34931
RH
814static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
815 int scale, int frac_size);
816static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
817 int scale, int frac_size);
818
819#define parts_round_to_int_normal(A, R, C, F) \
820 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
821
822static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
823 int scale, float_status *s,
824 const FloatFmt *fmt);
825static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
826 int scale, float_status *s,
827 const FloatFmt *fmt);
828
829#define parts_round_to_int(A, R, C, S, F) \
830 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
831
463b3f0d
RH
832static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
833 int scale, int64_t min, int64_t max,
834 float_status *s);
835static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
836 int scale, int64_t min, int64_t max,
837 float_status *s);
838
839#define parts_float_to_sint(P, R, Z, MN, MX, S) \
840 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
841
4ab4aef0
RH
842static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
843 int scale, uint64_t max,
844 float_status *s);
845static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
846 int scale, uint64_t max,
847 float_status *s);
848
849#define parts_float_to_uint(P, R, Z, M, S) \
850 PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
851
e3689519
RH
852static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
853 int scale, float_status *s);
854static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
855 int scale, float_status *s);
856
857#define parts_sint_to_float(P, I, Z, S) \
858 PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
859
0018b1f4
RH
860/*
861 * Helper functions for softfloat-parts.c.inc, per-size operations.
862 */
863
22c355f4
RH
864#define FRAC_GENERIC_64_128(NAME, P) \
865 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
866
dedd123c
RH
867#define FRAC_GENERIC_64_128_256(NAME, P) \
868 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
869 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
870
da10a907
RH
871static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
872{
873 return uadd64_overflow(a->frac, b->frac, &r->frac);
874}
875
876static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
877{
878 bool c = 0;
879 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
880 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
881 return c;
882}
883
dedd123c
RH
884static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
885{
886 bool c = 0;
887 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
888 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
889 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
890 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
891 return c;
892}
893
894#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 895
ee6959f2
RH
896static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
897{
898 return uadd64_overflow(a->frac, c, &r->frac);
899}
900
901static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
902{
903 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
904 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
905}
906
907#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
908
909static void frac64_allones(FloatParts64 *a)
910{
911 a->frac = -1;
912}
913
914static void frac128_allones(FloatParts128 *a)
915{
916 a->frac_hi = a->frac_lo = -1;
917}
918
919#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
920
22c355f4
RH
921static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
922{
923 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
924}
925
926static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
927{
928 uint64_t ta = a->frac_hi, tb = b->frac_hi;
929 if (ta == tb) {
930 ta = a->frac_lo, tb = b->frac_lo;
931 if (ta == tb) {
932 return 0;
933 }
934 }
935 return ta < tb ? -1 : 1;
936}
937
938#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
939
d46975bc 940static void frac64_clear(FloatParts64 *a)
0018b1f4 941{
d46975bc
RH
942 a->frac = 0;
943}
944
945static void frac128_clear(FloatParts128 *a)
946{
947 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
948}
949
d46975bc 950#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 951
ec961b81
RH
952static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
953{
954 uint64_t n1, n0, r, q;
955 bool ret;
956
957 /*
958 * We want a 2*N / N-bit division to produce exactly an N-bit
959 * result, so that we do not lose any precision and so that we
960 * do not have to renormalize afterward. If A.frac < B.frac,
961 * then division would produce an (N-1)-bit result; shift A left
962 * by one to produce the an N-bit result, and return true to
963 * decrement the exponent to match.
964 *
965 * The udiv_qrnnd algorithm that we're using requires normalization,
966 * i.e. the msb of the denominator must be set, which is already true.
967 */
968 ret = a->frac < b->frac;
969 if (ret) {
970 n0 = a->frac;
971 n1 = 0;
972 } else {
973 n0 = a->frac >> 1;
974 n1 = a->frac << 63;
975 }
976 q = udiv_qrnnd(&r, n0, n1, b->frac);
977
978 /* Set lsb if there is a remainder, to set inexact. */
979 a->frac = q | (r != 0);
980
981 return ret;
982}
983
984static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
985{
986 uint64_t q0, q1, a0, a1, b0, b1;
987 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
988 bool ret = false;
989
990 a0 = a->frac_hi, a1 = a->frac_lo;
991 b0 = b->frac_hi, b1 = b->frac_lo;
992
993 ret = lt128(a0, a1, b0, b1);
994 if (!ret) {
995 a1 = shr_double(a0, a1, 1);
996 a0 = a0 >> 1;
997 }
998
999 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1000 q0 = estimateDiv128To64(a0, a1, b0);
1001
1002 /*
1003 * Estimate is high because B1 was not included (unless B1 == 0).
1004 * Reduce quotient and increase remainder until remainder is non-negative.
1005 * This loop will execute 0 to 2 times.
1006 */
1007 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1008 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1009 while (r0 != 0) {
1010 q0--;
1011 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1012 }
1013
1014 /* Repeat using the remainder, producing a second word of quotient. */
1015 q1 = estimateDiv128To64(r1, r2, b0);
1016 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1017 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1018 while (r1 != 0) {
1019 q1--;
1020 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1021 }
1022
1023 /* Any remainder indicates inexact; set sticky bit. */
1024 q1 |= (r2 | r3) != 0;
1025
1026 a->frac_hi = q0;
1027 a->frac_lo = q1;
1028 return ret;
1029}
1030
1031#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1032
d46975bc 1033static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1034{
d46975bc
RH
1035 return a->frac == 0;
1036}
1037
1038static bool frac128_eqz(FloatParts128 *a)
1039{
1040 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1041}
1042
d46975bc 1043#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1044
aca84527
RH
1045static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1046{
1047 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1048}
1049
1050static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1051{
1052 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1053 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1054}
1055
1056#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1057
da10a907
RH
1058static void frac64_neg(FloatParts64 *a)
1059{
1060 a->frac = -a->frac;
1061}
1062
1063static void frac128_neg(FloatParts128 *a)
1064{
1065 bool c = 0;
1066 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1067 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1068}
1069
dedd123c
RH
1070static void frac256_neg(FloatParts256 *a)
1071{
1072 bool c = 0;
1073 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1074 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1075 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1076 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1077}
1078
1079#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1080
d46975bc 1081static int frac64_normalize(FloatParts64 *a)
6fff2167 1082{
d46975bc
RH
1083 if (a->frac) {
1084 int shift = clz64(a->frac);
1085 a->frac <<= shift;
1086 return shift;
1087 }
1088 return 64;
1089}
1090
1091static int frac128_normalize(FloatParts128 *a)
1092{
1093 if (a->frac_hi) {
1094 int shl = clz64(a->frac_hi);
463e45dc
RH
1095 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1096 a->frac_lo <<= shl;
d46975bc
RH
1097 return shl;
1098 } else if (a->frac_lo) {
1099 int shl = clz64(a->frac_lo);
463e45dc 1100 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1101 a->frac_lo = 0;
1102 return shl + 64;
6fff2167 1103 }
d46975bc 1104 return 128;
6fff2167
AB
1105}
1106
dedd123c
RH
1107static int frac256_normalize(FloatParts256 *a)
1108{
1109 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1110 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1111 int ret, shl;
dedd123c
RH
1112
1113 if (likely(a0)) {
1114 shl = clz64(a0);
1115 if (shl == 0) {
1116 return 0;
1117 }
1118 ret = shl;
1119 } else {
1120 if (a1) {
1121 ret = 64;
1122 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1123 } else if (a2) {
1124 ret = 128;
1125 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1126 } else if (a3) {
1127 ret = 192;
1128 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1129 } else {
1130 ret = 256;
1131 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1132 goto done;
1133 }
1134 shl = clz64(a0);
1135 if (shl == 0) {
1136 goto done;
1137 }
1138 ret += shl;
1139 }
1140
463e45dc
RH
1141 a0 = shl_double(a0, a1, shl);
1142 a1 = shl_double(a1, a2, shl);
1143 a2 = shl_double(a2, a3, shl);
1144 a3 <<= shl;
dedd123c
RH
1145
1146 done:
1147 a->frac_hi = a0;
1148 a->frac_hm = a1;
1149 a->frac_lm = a2;
1150 a->frac_lo = a3;
1151 return ret;
1152}
1153
1154#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1155
1156static void frac64_shl(FloatParts64 *a, int c)
1157{
1158 a->frac <<= c;
1159}
1160
1161static void frac128_shl(FloatParts128 *a, int c)
1162{
463e45dc
RH
1163 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1164
1165 if (c & 64) {
1166 a0 = a1, a1 = 0;
1167 }
1168
1169 c &= 63;
1170 if (c) {
1171 a0 = shl_double(a0, a1, c);
1172 a1 = a1 << c;
1173 }
1174
1175 a->frac_hi = a0;
1176 a->frac_lo = a1;
d46975bc
RH
1177}
1178
1179#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1180
1181static void frac64_shr(FloatParts64 *a, int c)
1182{
1183 a->frac >>= c;
1184}
1185
1186static void frac128_shr(FloatParts128 *a, int c)
1187{
463e45dc
RH
1188 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1189
1190 if (c & 64) {
1191 a1 = a0, a0 = 0;
1192 }
1193
1194 c &= 63;
1195 if (c) {
1196 a1 = shr_double(a0, a1, c);
1197 a0 = a0 >> c;
1198 }
1199
1200 a->frac_hi = a0;
1201 a->frac_lo = a1;
d46975bc
RH
1202}
1203
1204#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1205
ee6959f2 1206static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1207{
463e45dc
RH
1208 uint64_t a0 = a->frac;
1209
1210 if (likely(c != 0)) {
1211 if (likely(c < 64)) {
1212 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1213 } else {
1214 a0 = a0 != 0;
1215 }
1216 a->frac = a0;
1217 }
ee6959f2 1218}
6fff2167 1219
ee6959f2
RH
1220static void frac128_shrjam(FloatParts128 *a, int c)
1221{
463e45dc
RH
1222 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1223 uint64_t sticky = 0;
1224
1225 if (unlikely(c == 0)) {
1226 return;
1227 } else if (likely(c < 64)) {
1228 /* nothing */
1229 } else if (likely(c < 128)) {
1230 sticky = a1;
1231 a1 = a0;
1232 a0 = 0;
1233 c &= 63;
1234 if (c == 0) {
1235 goto done;
1236 }
1237 } else {
1238 sticky = a0 | a1;
1239 a0 = a1 = 0;
1240 goto done;
1241 }
1242
1243 sticky |= shr_double(a1, 0, c);
1244 a1 = shr_double(a0, a1, c);
1245 a0 = a0 >> c;
1246
1247 done:
1248 a->frac_lo = a1 | (sticky != 0);
1249 a->frac_hi = a0;
6fff2167
AB
1250}
1251
dedd123c
RH
1252static void frac256_shrjam(FloatParts256 *a, int c)
1253{
1254 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1255 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1256 uint64_t sticky = 0;
dedd123c
RH
1257
1258 if (unlikely(c == 0)) {
1259 return;
1260 } else if (likely(c < 64)) {
1261 /* nothing */
1262 } else if (likely(c < 256)) {
1263 if (unlikely(c & 128)) {
1264 sticky |= a2 | a3;
1265 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1266 }
1267 if (unlikely(c & 64)) {
1268 sticky |= a3;
1269 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1270 }
1271 c &= 63;
1272 if (c == 0) {
1273 goto done;
1274 }
1275 } else {
1276 sticky = a0 | a1 | a2 | a3;
1277 a0 = a1 = a2 = a3 = 0;
1278 goto done;
1279 }
1280
463e45dc
RH
1281 sticky |= shr_double(a3, 0, c);
1282 a3 = shr_double(a2, a3, c);
1283 a2 = shr_double(a1, a2, c);
1284 a1 = shr_double(a0, a1, c);
1285 a0 = a0 >> c;
dedd123c
RH
1286
1287 done:
1288 a->frac_lo = a3 | (sticky != 0);
1289 a->frac_lm = a2;
1290 a->frac_hm = a1;
1291 a->frac_hi = a0;
1292}
1293
1294#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1295
da10a907
RH
1296static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1297{
1298 return usub64_overflow(a->frac, b->frac, &r->frac);
1299}
7c45bad8 1300
da10a907
RH
1301static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1302{
1303 bool c = 0;
1304 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1305 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1306 return c;
1307}
1308
dedd123c
RH
1309static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1310{
1311 bool c = 0;
1312 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1313 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1314 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1315 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1316 return c;
1317}
1318
1319#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1320
aca84527
RH
1321static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1322{
1323 r->frac = a->frac_hi | (a->frac_lo != 0);
1324}
1325
1326static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1327{
1328 r->frac_hi = a->frac_hi;
1329 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1330}
1331
1332#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1333
dedd123c
RH
1334static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1335{
1336 r->frac_hi = a->frac;
1337 r->frac_lo = 0;
1338}
1339
1340static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1341{
1342 r->frac_hi = a->frac_hi;
1343 r->frac_hm = a->frac_lo;
1344 r->frac_lm = 0;
1345 r->frac_lo = 0;
1346}
1347
1348#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1349
da10a907
RH
1350#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1351#define FloatPartsN glue(FloatParts,N)
aca84527 1352#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1353
1354#define N 64
aca84527 1355#define W 128
da10a907
RH
1356
1357#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1358#include "softfloat-parts.c.inc"
1359
da10a907 1360#undef N
aca84527 1361#undef W
da10a907 1362#define N 128
aca84527 1363#define W 256
7c45bad8 1364
da10a907 1365#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1366#include "softfloat-parts.c.inc"
1367
dedd123c
RH
1368#undef N
1369#undef W
1370#define N 256
1371
1372#include "softfloat-parts-addsub.c.inc"
1373
da10a907 1374#undef N
aca84527 1375#undef W
7c45bad8
RH
1376#undef partsN
1377#undef FloatPartsN
aca84527 1378#undef FloatPartsW
7c45bad8 1379
aaffb7bf
RH
1380/*
1381 * Pack/unpack routines with a specific FloatFmt.
1382 */
1383
98e256fc
RH
1384static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1385 float_status *s, const FloatFmt *params)
aaffb7bf 1386{
98e256fc 1387 float16_unpack_raw(p, f);
d46975bc 1388 parts_canonicalize(p, s, params);
aaffb7bf
RH
1389}
1390
98e256fc
RH
1391static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1392 float_status *s)
aaffb7bf 1393{
98e256fc 1394 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1395}
1396
98e256fc
RH
1397static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1398 float_status *s)
aaffb7bf 1399{
98e256fc 1400 bfloat16_unpack_raw(p, f);
d46975bc 1401 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1402}
1403
e293e927
RH
1404static float16 float16a_round_pack_canonical(FloatParts64 *p,
1405 float_status *s,
aaffb7bf
RH
1406 const FloatFmt *params)
1407{
ee6959f2 1408 parts_uncanon(p, s, params);
e293e927 1409 return float16_pack_raw(p);
aaffb7bf
RH
1410}
1411
e293e927
RH
1412static float16 float16_round_pack_canonical(FloatParts64 *p,
1413 float_status *s)
aaffb7bf
RH
1414{
1415 return float16a_round_pack_canonical(p, s, &float16_params);
1416}
1417
e293e927
RH
1418static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1419 float_status *s)
aaffb7bf 1420{
ee6959f2 1421 parts_uncanon(p, s, &bfloat16_params);
e293e927 1422 return bfloat16_pack_raw(p);
aaffb7bf
RH
1423}
1424
98e256fc
RH
1425static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1426 float_status *s)
aaffb7bf 1427{
98e256fc 1428 float32_unpack_raw(p, f);
d46975bc 1429 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1430}
1431
e293e927
RH
1432static float32 float32_round_pack_canonical(FloatParts64 *p,
1433 float_status *s)
aaffb7bf 1434{
ee6959f2 1435 parts_uncanon(p, s, &float32_params);
e293e927 1436 return float32_pack_raw(p);
aaffb7bf
RH
1437}
1438
98e256fc
RH
1439static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1440 float_status *s)
aaffb7bf 1441{
98e256fc 1442 float64_unpack_raw(p, f);
d46975bc 1443 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1444}
1445
e293e927
RH
1446static float64 float64_round_pack_canonical(FloatParts64 *p,
1447 float_status *s)
aaffb7bf 1448{
ee6959f2 1449 parts_uncanon(p, s, &float64_params);
e293e927 1450 return float64_pack_raw(p);
aaffb7bf
RH
1451}
1452
3ff49e56
RH
1453static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1454 float_status *s)
1455{
1456 float128_unpack_raw(p, f);
1457 parts_canonicalize(p, s, &float128_params);
1458}
1459
1460static float128 float128_round_pack_canonical(FloatParts128 *p,
1461 float_status *s)
1462{
1463 parts_uncanon(p, s, &float128_params);
1464 return float128_pack_raw(p);
1465}
1466
6fff2167 1467/*
da10a907 1468 * Addition and subtraction
6fff2167
AB
1469 */
1470
da10a907
RH
1471static float16 QEMU_FLATTEN
1472float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1473{
da10a907 1474 FloatParts64 pa, pb, *pr;
98e256fc
RH
1475
1476 float16_unpack_canonical(&pa, a, status);
1477 float16_unpack_canonical(&pb, b, status);
da10a907 1478 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1479
da10a907 1480 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1481}
1482
da10a907 1483float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1484{
da10a907
RH
1485 return float16_addsub(a, b, status, false);
1486}
1b615d48 1487
da10a907
RH
1488float16 float16_sub(float16 a, float16 b, float_status *status)
1489{
1490 return float16_addsub(a, b, status, true);
1b615d48
EC
1491}
1492
1493static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1494soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1495{
da10a907 1496 FloatParts64 pa, pb, *pr;
98e256fc
RH
1497
1498 float32_unpack_canonical(&pa, a, status);
1499 float32_unpack_canonical(&pb, b, status);
da10a907 1500 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1501
da10a907 1502 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1503}
1504
da10a907 1505static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1506{
da10a907 1507 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1508}
1509
da10a907 1510static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1511{
da10a907 1512 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1513}
1514
1515static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1516soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1517{
da10a907 1518 FloatParts64 pa, pb, *pr;
98e256fc
RH
1519
1520 float64_unpack_canonical(&pa, a, status);
1521 float64_unpack_canonical(&pb, b, status);
da10a907 1522 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1523
da10a907 1524 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1525}
1526
da10a907 1527static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1528{
da10a907 1529 return soft_f64_addsub(a, b, status, false);
1b615d48 1530}
6fff2167 1531
da10a907 1532static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1533{
da10a907 1534 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1535}
1536
1b615d48 1537static float hard_f32_add(float a, float b)
6fff2167 1538{
1b615d48
EC
1539 return a + b;
1540}
6fff2167 1541
1b615d48
EC
1542static float hard_f32_sub(float a, float b)
1543{
1544 return a - b;
6fff2167
AB
1545}
1546
1b615d48 1547static double hard_f64_add(double a, double b)
6fff2167 1548{
1b615d48
EC
1549 return a + b;
1550}
6fff2167 1551
1b615d48
EC
1552static double hard_f64_sub(double a, double b)
1553{
1554 return a - b;
1555}
1556
b240c9c4 1557static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1558{
1559 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1560 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1561 }
1562 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1563}
1564
b240c9c4 1565static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1566{
1567 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1568 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1569 } else {
1570 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1571 }
1572}
1573
1574static float32 float32_addsub(float32 a, float32 b, float_status *s,
1575 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1576{
1577 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1578 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1579}
1580
1581static float64 float64_addsub(float64 a, float64 b, float_status *s,
1582 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1583{
1584 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1585 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1586}
1587
1588float32 QEMU_FLATTEN
1589float32_add(float32 a, float32 b, float_status *s)
1590{
1591 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1592}
1593
1594float32 QEMU_FLATTEN
1595float32_sub(float32 a, float32 b, float_status *s)
1596{
1597 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1598}
1599
1600float64 QEMU_FLATTEN
1601float64_add(float64 a, float64 b, float_status *s)
1602{
1603 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1604}
1605
1606float64 QEMU_FLATTEN
1607float64_sub(float64 a, float64 b, float_status *s)
1608{
1609 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1610}
1611
da10a907
RH
1612static bfloat16 QEMU_FLATTEN
1613bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1614{
da10a907 1615 FloatParts64 pa, pb, *pr;
98e256fc
RH
1616
1617 bfloat16_unpack_canonical(&pa, a, status);
1618 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1619 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1620
da10a907 1621 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1622}
1623
da10a907 1624bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1625{
da10a907
RH
1626 return bfloat16_addsub(a, b, status, false);
1627}
8282310d 1628
da10a907
RH
1629bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1630{
1631 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1632}
1633
3ff49e56
RH
1634static float128 QEMU_FLATTEN
1635float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1636{
1637 FloatParts128 pa, pb, *pr;
1638
1639 float128_unpack_canonical(&pa, a, status);
1640 float128_unpack_canonical(&pb, b, status);
1641 pr = parts_addsub(&pa, &pb, status, subtract);
1642
1643 return float128_round_pack_canonical(pr, status);
1644}
1645
1646float128 float128_add(float128 a, float128 b, float_status *status)
1647{
1648 return float128_addsub(a, b, status, false);
1649}
1650
1651float128 float128_sub(float128 a, float128 b, float_status *status)
1652{
1653 return float128_addsub(a, b, status, true);
1654}
1655
74d707e2 1656/*
aca84527 1657 * Multiplication
74d707e2
AB
1658 */
1659
97ff87c0 1660float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1661{
aca84527 1662 FloatParts64 pa, pb, *pr;
98e256fc
RH
1663
1664 float16_unpack_canonical(&pa, a, status);
1665 float16_unpack_canonical(&pb, b, status);
aca84527 1666 pr = parts_mul(&pa, &pb, status);
74d707e2 1667
aca84527 1668 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1669}
1670
2dfabc86
EC
1671static float32 QEMU_SOFTFLOAT_ATTR
1672soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1673{
aca84527 1674 FloatParts64 pa, pb, *pr;
98e256fc
RH
1675
1676 float32_unpack_canonical(&pa, a, status);
1677 float32_unpack_canonical(&pb, b, status);
aca84527 1678 pr = parts_mul(&pa, &pb, status);
74d707e2 1679
aca84527 1680 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1681}
1682
2dfabc86
EC
1683static float64 QEMU_SOFTFLOAT_ATTR
1684soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1685{
aca84527 1686 FloatParts64 pa, pb, *pr;
98e256fc
RH
1687
1688 float64_unpack_canonical(&pa, a, status);
1689 float64_unpack_canonical(&pb, b, status);
aca84527 1690 pr = parts_mul(&pa, &pb, status);
74d707e2 1691
aca84527 1692 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1693}
1694
2dfabc86
EC
1695static float hard_f32_mul(float a, float b)
1696{
1697 return a * b;
1698}
1699
1700static double hard_f64_mul(double a, double b)
1701{
1702 return a * b;
1703}
1704
2dfabc86
EC
1705float32 QEMU_FLATTEN
1706float32_mul(float32 a, float32 b, float_status *s)
1707{
1708 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1709 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1710}
1711
1712float64 QEMU_FLATTEN
1713float64_mul(float64 a, float64 b, float_status *s)
1714{
1715 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1716 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1717}
1718
aca84527
RH
1719bfloat16 QEMU_FLATTEN
1720bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1721{
aca84527 1722 FloatParts64 pa, pb, *pr;
98e256fc
RH
1723
1724 bfloat16_unpack_canonical(&pa, a, status);
1725 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1726 pr = parts_mul(&pa, &pb, status);
8282310d 1727
aca84527
RH
1728 return bfloat16_round_pack_canonical(pr, status);
1729}
1730
1731float128 QEMU_FLATTEN
1732float128_mul(float128 a, float128 b, float_status *status)
1733{
1734 FloatParts128 pa, pb, *pr;
1735
1736 float128_unpack_canonical(&pa, a, status);
1737 float128_unpack_canonical(&pb, b, status);
1738 pr = parts_mul(&pa, &pb, status);
1739
1740 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1741}
1742
d446830a 1743/*
dedd123c 1744 * Fused multiply-add
d446830a
AB
1745 */
1746
97ff87c0 1747float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1748 int flags, float_status *status)
d446830a 1749{
dedd123c 1750 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1751
1752 float16_unpack_canonical(&pa, a, status);
1753 float16_unpack_canonical(&pb, b, status);
1754 float16_unpack_canonical(&pc, c, status);
dedd123c 1755 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1756
dedd123c 1757 return float16_round_pack_canonical(pr, status);
d446830a
AB
1758}
1759
ccf770ba
EC
1760static float32 QEMU_SOFTFLOAT_ATTR
1761soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1762 float_status *status)
d446830a 1763{
dedd123c 1764 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1765
1766 float32_unpack_canonical(&pa, a, status);
1767 float32_unpack_canonical(&pb, b, status);
1768 float32_unpack_canonical(&pc, c, status);
dedd123c 1769 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1770
dedd123c 1771 return float32_round_pack_canonical(pr, status);
d446830a
AB
1772}
1773
ccf770ba
EC
1774static float64 QEMU_SOFTFLOAT_ATTR
1775soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1776 float_status *status)
d446830a 1777{
dedd123c 1778 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1779
1780 float64_unpack_canonical(&pa, a, status);
1781 float64_unpack_canonical(&pb, b, status);
1782 float64_unpack_canonical(&pc, c, status);
dedd123c 1783 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1784
dedd123c 1785 return float64_round_pack_canonical(pr, status);
d446830a
AB
1786}
1787
f6b3b108
EC
1788static bool force_soft_fma;
1789
ccf770ba
EC
1790float32 QEMU_FLATTEN
1791float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1792{
1793 union_float32 ua, ub, uc, ur;
1794
1795 ua.s = xa;
1796 ub.s = xb;
1797 uc.s = xc;
1798
1799 if (unlikely(!can_use_fpu(s))) {
1800 goto soft;
1801 }
1802 if (unlikely(flags & float_muladd_halve_result)) {
1803 goto soft;
1804 }
1805
1806 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1807 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1808 goto soft;
1809 }
f6b3b108
EC
1810
1811 if (unlikely(force_soft_fma)) {
1812 goto soft;
1813 }
1814
ccf770ba
EC
1815 /*
1816 * When (a || b) == 0, there's no need to check for under/over flow,
1817 * since we know the addend is (normal || 0) and the product is 0.
1818 */
1819 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1820 union_float32 up;
1821 bool prod_sign;
1822
1823 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1824 prod_sign ^= !!(flags & float_muladd_negate_product);
1825 up.s = float32_set_sign(float32_zero, prod_sign);
1826
1827 if (flags & float_muladd_negate_c) {
1828 uc.h = -uc.h;
1829 }
1830 ur.h = up.h + uc.h;
1831 } else {
896f51fb
KC
1832 union_float32 ua_orig = ua;
1833 union_float32 uc_orig = uc;
1834
ccf770ba
EC
1835 if (flags & float_muladd_negate_product) {
1836 ua.h = -ua.h;
1837 }
1838 if (flags & float_muladd_negate_c) {
1839 uc.h = -uc.h;
1840 }
1841
1842 ur.h = fmaf(ua.h, ub.h, uc.h);
1843
1844 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1845 float_raise(float_flag_overflow, s);
ccf770ba 1846 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1847 ua = ua_orig;
1848 uc = uc_orig;
ccf770ba
EC
1849 goto soft;
1850 }
1851 }
1852 if (flags & float_muladd_negate_result) {
1853 return float32_chs(ur.s);
1854 }
1855 return ur.s;
1856
1857 soft:
1858 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1859}
1860
1861float64 QEMU_FLATTEN
1862float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1863{
1864 union_float64 ua, ub, uc, ur;
1865
1866 ua.s = xa;
1867 ub.s = xb;
1868 uc.s = xc;
1869
1870 if (unlikely(!can_use_fpu(s))) {
1871 goto soft;
1872 }
1873 if (unlikely(flags & float_muladd_halve_result)) {
1874 goto soft;
1875 }
1876
1877 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1878 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1879 goto soft;
1880 }
f6b3b108
EC
1881
1882 if (unlikely(force_soft_fma)) {
1883 goto soft;
1884 }
1885
ccf770ba
EC
1886 /*
1887 * When (a || b) == 0, there's no need to check for under/over flow,
1888 * since we know the addend is (normal || 0) and the product is 0.
1889 */
1890 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1891 union_float64 up;
1892 bool prod_sign;
1893
1894 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1895 prod_sign ^= !!(flags & float_muladd_negate_product);
1896 up.s = float64_set_sign(float64_zero, prod_sign);
1897
1898 if (flags & float_muladd_negate_c) {
1899 uc.h = -uc.h;
1900 }
1901 ur.h = up.h + uc.h;
1902 } else {
896f51fb
KC
1903 union_float64 ua_orig = ua;
1904 union_float64 uc_orig = uc;
1905
ccf770ba
EC
1906 if (flags & float_muladd_negate_product) {
1907 ua.h = -ua.h;
1908 }
1909 if (flags & float_muladd_negate_c) {
1910 uc.h = -uc.h;
1911 }
1912
1913 ur.h = fma(ua.h, ub.h, uc.h);
1914
1915 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1916 float_raise(float_flag_overflow, s);
ccf770ba 1917 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1918 ua = ua_orig;
1919 uc = uc_orig;
ccf770ba
EC
1920 goto soft;
1921 }
1922 }
1923 if (flags & float_muladd_negate_result) {
1924 return float64_chs(ur.s);
1925 }
1926 return ur.s;
1927
1928 soft:
1929 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1930}
1931
8282310d
LZ
1932bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1933 int flags, float_status *status)
1934{
dedd123c 1935 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1936
1937 bfloat16_unpack_canonical(&pa, a, status);
1938 bfloat16_unpack_canonical(&pb, b, status);
1939 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
1940 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1941
1942 return bfloat16_round_pack_canonical(pr, status);
1943}
8282310d 1944
dedd123c
RH
1945float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1946 int flags, float_status *status)
1947{
1948 FloatParts128 pa, pb, pc, *pr;
1949
1950 float128_unpack_canonical(&pa, a, status);
1951 float128_unpack_canonical(&pb, b, status);
1952 float128_unpack_canonical(&pc, c, status);
1953 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1954
1955 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1956}
1957
cf07323d 1958/*
ec961b81 1959 * Division
cf07323d
AB
1960 */
1961
cf07323d
AB
1962float16 float16_div(float16 a, float16 b, float_status *status)
1963{
ec961b81 1964 FloatParts64 pa, pb, *pr;
98e256fc
RH
1965
1966 float16_unpack_canonical(&pa, a, status);
1967 float16_unpack_canonical(&pb, b, status);
ec961b81 1968 pr = parts_div(&pa, &pb, status);
cf07323d 1969
ec961b81 1970 return float16_round_pack_canonical(pr, status);
cf07323d
AB
1971}
1972
4a629561
EC
1973static float32 QEMU_SOFTFLOAT_ATTR
1974soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1975{
ec961b81 1976 FloatParts64 pa, pb, *pr;
98e256fc
RH
1977
1978 float32_unpack_canonical(&pa, a, status);
1979 float32_unpack_canonical(&pb, b, status);
ec961b81 1980 pr = parts_div(&pa, &pb, status);
cf07323d 1981
ec961b81 1982 return float32_round_pack_canonical(pr, status);
cf07323d
AB
1983}
1984
4a629561
EC
1985static float64 QEMU_SOFTFLOAT_ATTR
1986soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1987{
ec961b81 1988 FloatParts64 pa, pb, *pr;
98e256fc
RH
1989
1990 float64_unpack_canonical(&pa, a, status);
1991 float64_unpack_canonical(&pb, b, status);
ec961b81 1992 pr = parts_div(&pa, &pb, status);
cf07323d 1993
ec961b81 1994 return float64_round_pack_canonical(pr, status);
cf07323d
AB
1995}
1996
4a629561
EC
1997static float hard_f32_div(float a, float b)
1998{
1999 return a / b;
2000}
2001
2002static double hard_f64_div(double a, double b)
2003{
2004 return a / b;
2005}
2006
2007static bool f32_div_pre(union_float32 a, union_float32 b)
2008{
2009 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2010 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2011 fpclassify(b.h) == FP_NORMAL;
2012 }
2013 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2014}
2015
2016static bool f64_div_pre(union_float64 a, union_float64 b)
2017{
2018 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2019 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2020 fpclassify(b.h) == FP_NORMAL;
2021 }
2022 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2023}
2024
2025static bool f32_div_post(union_float32 a, union_float32 b)
2026{
2027 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2028 return fpclassify(a.h) != FP_ZERO;
2029 }
2030 return !float32_is_zero(a.s);
2031}
2032
2033static bool f64_div_post(union_float64 a, union_float64 b)
2034{
2035 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2036 return fpclassify(a.h) != FP_ZERO;
2037 }
2038 return !float64_is_zero(a.s);
2039}
2040
2041float32 QEMU_FLATTEN
2042float32_div(float32 a, float32 b, float_status *s)
2043{
2044 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2045 f32_div_pre, f32_div_post);
4a629561
EC
2046}
2047
2048float64 QEMU_FLATTEN
2049float64_div(float64 a, float64 b, float_status *s)
2050{
2051 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2052 f64_div_pre, f64_div_post);
4a629561
EC
2053}
2054
ec961b81
RH
2055bfloat16 QEMU_FLATTEN
2056bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2057{
ec961b81 2058 FloatParts64 pa, pb, *pr;
98e256fc
RH
2059
2060 bfloat16_unpack_canonical(&pa, a, status);
2061 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2062 pr = parts_div(&pa, &pb, status);
8282310d 2063
ec961b81
RH
2064 return bfloat16_round_pack_canonical(pr, status);
2065}
2066
2067float128 QEMU_FLATTEN
2068float128_div(float128 a, float128 b, float_status *status)
2069{
2070 FloatParts128 pa, pb, *pr;
2071
2072 float128_unpack_canonical(&pa, a, status);
2073 float128_unpack_canonical(&pb, b, status);
2074 pr = parts_div(&pa, &pb, status);
2075
2076 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2077}
2078
6fed16b2
AB
2079/*
2080 * Float to Float conversions
2081 *
2082 * Returns the result of converting one float format to another. The
2083 * conversion is performed according to the IEC/IEEE Standard for
2084 * Binary Floating-Point Arithmetic.
2085 *
c3f1875e
RH
2086 * Usually this only needs to take care of raising invalid exceptions
2087 * and handling the conversion on NaNs.
6fed16b2
AB
2088 */
2089
c3f1875e
RH
2090static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2091{
2092 switch (a->cls) {
2093 case float_class_qnan:
2094 case float_class_snan:
2095 /*
2096 * There is no NaN in the destination format. Raise Invalid
2097 * and return a zero with the sign of the input NaN.
2098 */
2099 float_raise(float_flag_invalid, s);
2100 a->cls = float_class_zero;
2101 break;
2102
2103 case float_class_inf:
2104 /*
2105 * There is no Inf in the destination format. Raise Invalid
2106 * and return the maximum normal with the correct sign.
2107 */
2108 float_raise(float_flag_invalid, s);
2109 a->cls = float_class_normal;
2110 a->exp = float16_params_ahp.exp_max;
2111 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2112 float16_params_ahp.frac_size + 1);
2113 break;
2114
2115 case float_class_normal:
2116 case float_class_zero:
2117 break;
2118
2119 default:
2120 g_assert_not_reached();
2121 }
2122}
2123
2124static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2125{
2126 if (is_nan(a->cls)) {
2127 parts_return_nan(a, s);
6fed16b2 2128 }
6fed16b2
AB
2129}
2130
c3f1875e
RH
2131static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2132{
2133 if (is_nan(a->cls)) {
2134 parts_return_nan(a, s);
2135 }
2136}
2137
2138#define parts_float_to_float(P, S) \
2139 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2140
9882ccaf
RH
2141static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2142 float_status *s)
2143{
2144 a->cls = b->cls;
2145 a->sign = b->sign;
2146 a->exp = b->exp;
2147
2148 if (a->cls == float_class_normal) {
2149 frac_truncjam(a, b);
2150 } else if (is_nan(a->cls)) {
2151 /* Discard the low bits of the NaN. */
2152 a->frac = b->frac_hi;
2153 parts_return_nan(a, s);
2154 }
2155}
2156
2157static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2158 float_status *s)
2159{
2160 a->cls = b->cls;
2161 a->sign = b->sign;
2162 a->exp = b->exp;
2163 frac_widen(a, b);
2164
2165 if (is_nan(a->cls)) {
2166 parts_return_nan(a, s);
2167 }
2168}
2169
6fed16b2
AB
2170float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2171{
2172 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2173 FloatParts64 p;
98e256fc 2174
c3f1875e
RH
2175 float16a_unpack_canonical(&p, a, s, fmt16);
2176 parts_float_to_float(&p, s);
2177 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2178}
2179
2180float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2181{
2182 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2183 FloatParts64 p;
98e256fc 2184
c3f1875e
RH
2185 float16a_unpack_canonical(&p, a, s, fmt16);
2186 parts_float_to_float(&p, s);
2187 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2188}
2189
2190float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2191{
c3f1875e
RH
2192 FloatParts64 p;
2193 const FloatFmt *fmt;
98e256fc 2194
c3f1875e
RH
2195 float32_unpack_canonical(&p, a, s);
2196 if (ieee) {
2197 parts_float_to_float(&p, s);
2198 fmt = &float16_params;
2199 } else {
2200 parts_float_to_ahp(&p, s);
2201 fmt = &float16_params_ahp;
2202 }
2203 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2204}
2205
21381dcf
MK
2206static float64 QEMU_SOFTFLOAT_ATTR
2207soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2208{
c3f1875e 2209 FloatParts64 p;
98e256fc 2210
c3f1875e
RH
2211 float32_unpack_canonical(&p, a, s);
2212 parts_float_to_float(&p, s);
2213 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2214}
2215
21381dcf
MK
2216float64 float32_to_float64(float32 a, float_status *s)
2217{
2218 if (likely(float32_is_normal(a))) {
2219 /* Widening conversion can never produce inexact results. */
2220 union_float32 uf;
2221 union_float64 ud;
2222 uf.s = a;
2223 ud.h = uf.h;
2224 return ud.s;
2225 } else if (float32_is_zero(a)) {
2226 return float64_set_sign(float64_zero, float32_is_neg(a));
2227 } else {
2228 return soft_float32_to_float64(a, s);
2229 }
2230}
2231
6fed16b2
AB
2232float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2233{
c3f1875e
RH
2234 FloatParts64 p;
2235 const FloatFmt *fmt;
98e256fc 2236
c3f1875e
RH
2237 float64_unpack_canonical(&p, a, s);
2238 if (ieee) {
2239 parts_float_to_float(&p, s);
2240 fmt = &float16_params;
2241 } else {
2242 parts_float_to_ahp(&p, s);
2243 fmt = &float16_params_ahp;
2244 }
2245 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2246}
2247
2248float32 float64_to_float32(float64 a, float_status *s)
2249{
c3f1875e 2250 FloatParts64 p;
98e256fc 2251
c3f1875e
RH
2252 float64_unpack_canonical(&p, a, s);
2253 parts_float_to_float(&p, s);
2254 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2255}
2256
34f0c0a9
LZ
2257float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2258{
c3f1875e 2259 FloatParts64 p;
98e256fc 2260
c3f1875e
RH
2261 bfloat16_unpack_canonical(&p, a, s);
2262 parts_float_to_float(&p, s);
2263 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2264}
2265
2266float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2267{
c3f1875e 2268 FloatParts64 p;
98e256fc 2269
c3f1875e
RH
2270 bfloat16_unpack_canonical(&p, a, s);
2271 parts_float_to_float(&p, s);
2272 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2273}
2274
2275bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2276{
c3f1875e 2277 FloatParts64 p;
98e256fc 2278
c3f1875e
RH
2279 float32_unpack_canonical(&p, a, s);
2280 parts_float_to_float(&p, s);
2281 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2282}
2283
2284bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2285{
c3f1875e 2286 FloatParts64 p;
98e256fc 2287
c3f1875e
RH
2288 float64_unpack_canonical(&p, a, s);
2289 parts_float_to_float(&p, s);
2290 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2291}
2292
9882ccaf
RH
2293float32 float128_to_float32(float128 a, float_status *s)
2294{
2295 FloatParts64 p64;
2296 FloatParts128 p128;
2297
2298 float128_unpack_canonical(&p128, a, s);
2299 parts_float_to_float_narrow(&p64, &p128, s);
2300 return float32_round_pack_canonical(&p64, s);
2301}
2302
2303float64 float128_to_float64(float128 a, float_status *s)
2304{
2305 FloatParts64 p64;
2306 FloatParts128 p128;
2307
2308 float128_unpack_canonical(&p128, a, s);
2309 parts_float_to_float_narrow(&p64, &p128, s);
2310 return float64_round_pack_canonical(&p64, s);
2311}
2312
2313float128 float32_to_float128(float32 a, float_status *s)
2314{
2315 FloatParts64 p64;
2316 FloatParts128 p128;
2317
2318 float32_unpack_canonical(&p64, a, s);
2319 parts_float_to_float_widen(&p128, &p64, s);
2320 return float128_round_pack_canonical(&p128, s);
2321}
2322
2323float128 float64_to_float128(float64 a, float_status *s)
2324{
2325 FloatParts64 p64;
2326 FloatParts128 p128;
2327
2328 float64_unpack_canonical(&p64, a, s);
2329 parts_float_to_float_widen(&p128, &p64, s);
2330 return float128_round_pack_canonical(&p128, s);
2331}
2332
dbe4d53a 2333/*
afc34931 2334 * Round to integral value
dbe4d53a
AB
2335 */
2336
dbe4d53a
AB
2337float16 float16_round_to_int(float16 a, float_status *s)
2338{
afc34931 2339 FloatParts64 p;
98e256fc 2340
afc34931
RH
2341 float16_unpack_canonical(&p, a, s);
2342 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2343 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2344}
2345
2346float32 float32_round_to_int(float32 a, float_status *s)
2347{
afc34931 2348 FloatParts64 p;
98e256fc 2349
afc34931
RH
2350 float32_unpack_canonical(&p, a, s);
2351 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2352 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2353}
2354
2355float64 float64_round_to_int(float64 a, float_status *s)
2356{
afc34931 2357 FloatParts64 p;
98e256fc 2358
afc34931
RH
2359 float64_unpack_canonical(&p, a, s);
2360 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2361 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2362}
2363
34f0c0a9
LZ
2364bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2365{
afc34931 2366 FloatParts64 p;
98e256fc 2367
afc34931
RH
2368 bfloat16_unpack_canonical(&p, a, s);
2369 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2370 return bfloat16_round_pack_canonical(&p, s);
2371}
2372
2373float128 float128_round_to_int(float128 a, float_status *s)
2374{
2375 FloatParts128 p;
2376
2377 float128_unpack_canonical(&p, a, s);
2378 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2379 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2380}
2381
ab52f973 2382/*
463b3f0d
RH
2383 * Floating-point to signed integer conversions
2384 */
ab52f973 2385
0d93d8ec
FC
2386int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2387 float_status *s)
2388{
98e256fc
RH
2389 FloatParts64 p;
2390
2391 float16_unpack_canonical(&p, a, s);
463b3f0d 2392 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2393}
2394
3dede407 2395int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2396 float_status *s)
2397{
98e256fc
RH
2398 FloatParts64 p;
2399
2400 float16_unpack_canonical(&p, a, s);
463b3f0d 2401 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2402}
2403
3dede407 2404int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2405 float_status *s)
2406{
98e256fc
RH
2407 FloatParts64 p;
2408
2409 float16_unpack_canonical(&p, a, s);
463b3f0d 2410 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2411}
2412
3dede407 2413int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2414 float_status *s)
2415{
98e256fc
RH
2416 FloatParts64 p;
2417
2418 float16_unpack_canonical(&p, a, s);
463b3f0d 2419 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2420}
2421
3dede407 2422int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2423 float_status *s)
2424{
98e256fc
RH
2425 FloatParts64 p;
2426
2427 float32_unpack_canonical(&p, a, s);
463b3f0d 2428 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2429}
2430
3dede407 2431int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2432 float_status *s)
2433{
98e256fc
RH
2434 FloatParts64 p;
2435
2436 float32_unpack_canonical(&p, a, s);
463b3f0d 2437 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2438}
2439
3dede407 2440int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2441 float_status *s)
2442{
98e256fc
RH
2443 FloatParts64 p;
2444
2445 float32_unpack_canonical(&p, a, s);
463b3f0d 2446 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2447}
2448
3dede407 2449int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2450 float_status *s)
2451{
98e256fc
RH
2452 FloatParts64 p;
2453
2454 float64_unpack_canonical(&p, a, s);
463b3f0d 2455 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2456}
2457
3dede407 2458int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2459 float_status *s)
2460{
98e256fc
RH
2461 FloatParts64 p;
2462
2463 float64_unpack_canonical(&p, a, s);
463b3f0d 2464 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2465}
2466
3dede407 2467int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2468 float_status *s)
2469{
98e256fc
RH
2470 FloatParts64 p;
2471
2472 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2473 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2474}
2475
2476int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2477 float_status *s)
2478{
2479 FloatParts64 p;
2480
2481 bfloat16_unpack_canonical(&p, a, s);
2482 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2483}
2484
2485int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2486 float_status *s)
2487{
2488 FloatParts64 p;
2489
2490 bfloat16_unpack_canonical(&p, a, s);
2491 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2492}
2493
2494int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2495 float_status *s)
2496{
2497 FloatParts64 p;
2498
2499 bfloat16_unpack_canonical(&p, a, s);
2500 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2501}
2502
2503static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2504 int scale, float_status *s)
2505{
2506 FloatParts128 p;
2507
2508 float128_unpack_canonical(&p, a, s);
2509 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2510}
2511
2512static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2513 int scale, float_status *s)
2514{
2515 FloatParts128 p;
2516
2517 float128_unpack_canonical(&p, a, s);
2518 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2519}
2520
0d93d8ec
FC
2521int8_t float16_to_int8(float16 a, float_status *s)
2522{
2523 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2524}
2525
2f6c74be
RH
2526int16_t float16_to_int16(float16 a, float_status *s)
2527{
2528 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2529}
2530
2531int32_t float16_to_int32(float16 a, float_status *s)
2532{
2533 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2534}
2535
2536int64_t float16_to_int64(float16 a, float_status *s)
2537{
2538 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2539}
2540
2541int16_t float32_to_int16(float32 a, float_status *s)
2542{
2543 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2544}
2545
2546int32_t float32_to_int32(float32 a, float_status *s)
2547{
2548 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2549}
2550
2551int64_t float32_to_int64(float32 a, float_status *s)
2552{
2553 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2554}
2555
2556int16_t float64_to_int16(float64 a, float_status *s)
2557{
2558 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2559}
2560
2561int32_t float64_to_int32(float64 a, float_status *s)
2562{
2563 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2564}
2565
2566int64_t float64_to_int64(float64 a, float_status *s)
2567{
2568 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2569}
2570
463b3f0d
RH
2571int32_t float128_to_int32(float128 a, float_status *s)
2572{
2573 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2574}
2575
2576int64_t float128_to_int64(float128 a, float_status *s)
2577{
2578 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2579}
2580
2f6c74be
RH
2581int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2582{
2583 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2584}
2585
2586int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2587{
2588 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2589}
2590
2591int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2592{
2593 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2594}
2595
2f6c74be
RH
2596int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2597{
2598 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2599}
ab52f973 2600
2f6c74be
RH
2601int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2602{
2603 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2604}
2605
2606int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2607{
2608 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2609}
2610
2611int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2612{
2613 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2614}
ab52f973 2615
2f6c74be
RH
2616int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2617{
2618 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2619}
ab52f973 2620
2f6c74be
RH
2621int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2622{
2623 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2624}
ab52f973 2625
463b3f0d 2626int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2627{
463b3f0d 2628 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2629}
2630
463b3f0d 2631int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2632{
463b3f0d 2633 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2634}
2635
2636int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2637{
2638 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2639}
2640
2641int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2642{
2643 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2644}
2645
2646int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2647{
2648 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2649}
2650
2651int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2652{
2653 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2654}
2655
2656int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2657{
2658 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2659}
2660
2661int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2662{
2663 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2664}
2665
ab52f973 2666/*
4ab4aef0 2667 * Floating-point to unsigned integer conversions
ab52f973
AB
2668 */
2669
0d93d8ec
FC
2670uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2671 float_status *s)
2672{
98e256fc
RH
2673 FloatParts64 p;
2674
2675 float16_unpack_canonical(&p, a, s);
4ab4aef0 2676 return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2677}
2678
3dede407 2679uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2680 float_status *s)
2681{
98e256fc
RH
2682 FloatParts64 p;
2683
2684 float16_unpack_canonical(&p, a, s);
4ab4aef0 2685 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2686}
2687
3dede407 2688uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2689 float_status *s)
2690{
98e256fc
RH
2691 FloatParts64 p;
2692
2693 float16_unpack_canonical(&p, a, s);
4ab4aef0 2694 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2695}
2696
3dede407 2697uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2698 float_status *s)
2699{
98e256fc
RH
2700 FloatParts64 p;
2701
2702 float16_unpack_canonical(&p, a, s);
4ab4aef0 2703 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2704}
2705
3dede407 2706uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2707 float_status *s)
2708{
98e256fc
RH
2709 FloatParts64 p;
2710
2711 float32_unpack_canonical(&p, a, s);
4ab4aef0 2712 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2713}
2714
3dede407 2715uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2716 float_status *s)
2717{
98e256fc
RH
2718 FloatParts64 p;
2719
2720 float32_unpack_canonical(&p, a, s);
4ab4aef0 2721 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2722}
2723
3dede407 2724uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2725 float_status *s)
2726{
98e256fc
RH
2727 FloatParts64 p;
2728
2729 float32_unpack_canonical(&p, a, s);
4ab4aef0 2730 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2731}
2732
3dede407 2733uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2734 float_status *s)
2735{
98e256fc
RH
2736 FloatParts64 p;
2737
2738 float64_unpack_canonical(&p, a, s);
4ab4aef0 2739 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2740}
2741
3dede407 2742uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2743 float_status *s)
2744{
98e256fc
RH
2745 FloatParts64 p;
2746
2747 float64_unpack_canonical(&p, a, s);
4ab4aef0 2748 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2749}
2750
3dede407 2751uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2752 float_status *s)
2753{
98e256fc
RH
2754 FloatParts64 p;
2755
2756 float64_unpack_canonical(&p, a, s);
4ab4aef0
RH
2757 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2758}
2759
2760uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2761 int scale, float_status *s)
2762{
2763 FloatParts64 p;
2764
2765 bfloat16_unpack_canonical(&p, a, s);
2766 return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
2767}
2768
2769uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2770 int scale, float_status *s)
2771{
2772 FloatParts64 p;
2773
2774 bfloat16_unpack_canonical(&p, a, s);
2775 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2776}
2777
2778uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2779 int scale, float_status *s)
2780{
2781 FloatParts64 p;
2782
2783 bfloat16_unpack_canonical(&p, a, s);
2784 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2785}
2786
2787static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
2788 int scale, float_status *s)
2789{
2790 FloatParts128 p;
2791
2792 float128_unpack_canonical(&p, a, s);
2793 return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
2794}
2795
2796static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
2797 int scale, float_status *s)
2798{
2799 FloatParts128 p;
2800
2801 float128_unpack_canonical(&p, a, s);
2802 return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2803}
2804
0d93d8ec
FC
2805uint8_t float16_to_uint8(float16 a, float_status *s)
2806{
2807 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2808}
2809
2f6c74be
RH
2810uint16_t float16_to_uint16(float16 a, float_status *s)
2811{
2812 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2813}
2814
2815uint32_t float16_to_uint32(float16 a, float_status *s)
2816{
2817 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2818}
2819
2820uint64_t float16_to_uint64(float16 a, float_status *s)
2821{
2822 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2823}
2824
2825uint16_t float32_to_uint16(float32 a, float_status *s)
2826{
2827 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2828}
2829
2830uint32_t float32_to_uint32(float32 a, float_status *s)
2831{
2832 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2833}
2834
2835uint64_t float32_to_uint64(float32 a, float_status *s)
2836{
2837 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2838}
2839
2840uint16_t float64_to_uint16(float64 a, float_status *s)
2841{
2842 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2843}
2844
2845uint32_t float64_to_uint32(float64 a, float_status *s)
2846{
2847 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2848}
2849
2850uint64_t float64_to_uint64(float64 a, float_status *s)
2851{
2852 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2853}
2854
4ab4aef0
RH
2855uint32_t float128_to_uint32(float128 a, float_status *s)
2856{
2857 return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2858}
2859
2860uint64_t float128_to_uint64(float128 a, float_status *s)
2861{
2862 return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2863}
2864
2f6c74be
RH
2865uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2866{
2867 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2868}
2869
2870uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2871{
2872 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2873}
2874
2875uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2876{
2877 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2878}
2879
2880uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2881{
2882 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2883}
2884
2885uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2886{
2887 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2888}
2889
2890uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2891{
2892 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2893}
2894
2895uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2896{
2897 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2898}
2899
2900uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2901{
2902 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2903}
2904
2905uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2906{
2907 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2908}
ab52f973 2909
4ab4aef0 2910uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2911{
4ab4aef0 2912 return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2913}
2914
4ab4aef0 2915uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2916{
4ab4aef0 2917 return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2918}
2919
2920uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2921{
2922 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2923}
2924
2925uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2926{
2927 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2928}
2929
2930uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2931{
2932 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2933}
2934
2935uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2936{
2937 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2938}
2939
2940uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2941{
2942 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2943}
2944
2945uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2946{
2947 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2948}
2949
c02e1fb8 2950/*
e3689519 2951 * Signed integer to floating-point conversions
c02e1fb8
AB
2952 */
2953
2abdfe24 2954float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2955{
e3689519
RH
2956 FloatParts64 p;
2957
2958 parts_sint_to_float(&p, a, scale, status);
2959 return float16_round_pack_canonical(&p, status);
c02e1fb8
AB
2960}
2961
2abdfe24
RH
2962float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2963{
2964 return int64_to_float16_scalbn(a, scale, status);
2965}
2966
2967float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2968{
2969 return int64_to_float16_scalbn(a, scale, status);
2970}
2971
2972float16 int64_to_float16(int64_t a, float_status *status)
2973{
2974 return int64_to_float16_scalbn(a, 0, status);
2975}
2976
c02e1fb8
AB
2977float16 int32_to_float16(int32_t a, float_status *status)
2978{
2abdfe24 2979 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2980}
2981
2982float16 int16_to_float16(int16_t a, float_status *status)
2983{
2abdfe24 2984 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2985}
2986
0d93d8ec
FC
2987float16 int8_to_float16(int8_t a, float_status *status)
2988{
2989 return int64_to_float16_scalbn(a, 0, status);
2990}
2991
2abdfe24 2992float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2993{
e3689519
RH
2994 FloatParts64 p;
2995
2996 parts64_sint_to_float(&p, a, scale, status);
2997 return float32_round_pack_canonical(&p, status);
c02e1fb8
AB
2998}
2999
2abdfe24
RH
3000float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3001{
3002 return int64_to_float32_scalbn(a, scale, status);
3003}
3004
3005float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3006{
3007 return int64_to_float32_scalbn(a, scale, status);
3008}
3009
3010float32 int64_to_float32(int64_t a, float_status *status)
3011{
3012 return int64_to_float32_scalbn(a, 0, status);
3013}
3014
c02e1fb8
AB
3015float32 int32_to_float32(int32_t a, float_status *status)
3016{
2abdfe24 3017 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3018}
3019
3020float32 int16_to_float32(int16_t a, float_status *status)
3021{
2abdfe24 3022 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3023}
3024
2abdfe24 3025float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3026{
e3689519
RH
3027 FloatParts64 p;
3028
3029 parts_sint_to_float(&p, a, scale, status);
3030 return float64_round_pack_canonical(&p, status);
c02e1fb8
AB
3031}
3032
2abdfe24
RH
3033float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3034{
3035 return int64_to_float64_scalbn(a, scale, status);
3036}
3037
3038float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3039{
3040 return int64_to_float64_scalbn(a, scale, status);
3041}
3042
3043float64 int64_to_float64(int64_t a, float_status *status)
3044{
3045 return int64_to_float64_scalbn(a, 0, status);
3046}
3047
c02e1fb8
AB
3048float64 int32_to_float64(int32_t a, float_status *status)
3049{
2abdfe24 3050 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3051}
3052
3053float64 int16_to_float64(int16_t a, float_status *status)
3054{
2abdfe24 3055 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3056}
3057
34f0c0a9
LZ
3058bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3059{
e3689519
RH
3060 FloatParts64 p;
3061
3062 parts_sint_to_float(&p, a, scale, status);
3063 return bfloat16_round_pack_canonical(&p, status);
34f0c0a9
LZ
3064}
3065
3066bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3067{
3068 return int64_to_bfloat16_scalbn(a, scale, status);
3069}
3070
3071bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3072{
3073 return int64_to_bfloat16_scalbn(a, scale, status);
3074}
3075
3076bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3077{
3078 return int64_to_bfloat16_scalbn(a, 0, status);
3079}
3080
3081bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3082{
3083 return int64_to_bfloat16_scalbn(a, 0, status);
3084}
3085
3086bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3087{
3088 return int64_to_bfloat16_scalbn(a, 0, status);
3089}
c02e1fb8 3090
e3689519
RH
3091float128 int64_to_float128(int64_t a, float_status *status)
3092{
3093 FloatParts128 p;
3094
3095 parts_sint_to_float(&p, a, 0, status);
3096 return float128_round_pack_canonical(&p, status);
3097}
3098
3099float128 int32_to_float128(int32_t a, float_status *status)
3100{
3101 return int64_to_float128(a, status);
3102}
3103
c02e1fb8
AB
3104/*
3105 * Unsigned Integer to float conversions
3106 *
3107 * Returns the result of converting the unsigned integer `a' to the
3108 * floating-point format. The conversion is performed according to the
3109 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3110 */
3111
f8155c1d 3112static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3113{
f8155c1d 3114 FloatParts64 r = { .sign = false };
e99c4373 3115 int shift;
c02e1fb8
AB
3116
3117 if (a == 0) {
3118 r.cls = float_class_zero;
3119 } else {
2abdfe24 3120 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3121 shift = clz64(a);
c02e1fb8 3122 r.cls = float_class_normal;
e99c4373
RH
3123 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3124 r.frac = a << shift;
c02e1fb8
AB
3125 }
3126
3127 return r;
3128}
3129
2abdfe24 3130float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3131{
f8155c1d 3132 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3133 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3134}
3135
2abdfe24
RH
3136float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3137{
3138 return uint64_to_float16_scalbn(a, scale, status);
3139}
3140
3141float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3142{
3143 return uint64_to_float16_scalbn(a, scale, status);
3144}
3145
3146float16 uint64_to_float16(uint64_t a, float_status *status)
3147{
3148 return uint64_to_float16_scalbn(a, 0, status);
3149}
3150
c02e1fb8
AB
3151float16 uint32_to_float16(uint32_t a, float_status *status)
3152{
2abdfe24 3153 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3154}
3155
3156float16 uint16_to_float16(uint16_t a, float_status *status)
3157{
2abdfe24 3158 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3159}
3160
0d93d8ec
FC
3161float16 uint8_to_float16(uint8_t a, float_status *status)
3162{
3163 return uint64_to_float16_scalbn(a, 0, status);
3164}
3165
2abdfe24 3166float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3167{
f8155c1d 3168 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3169 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3170}
3171
2abdfe24
RH
3172float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3173{
3174 return uint64_to_float32_scalbn(a, scale, status);
3175}
3176
3177float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3178{
3179 return uint64_to_float32_scalbn(a, scale, status);
3180}
3181
3182float32 uint64_to_float32(uint64_t a, float_status *status)
3183{
3184 return uint64_to_float32_scalbn(a, 0, status);
3185}
3186
c02e1fb8
AB
3187float32 uint32_to_float32(uint32_t a, float_status *status)
3188{
2abdfe24 3189 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3190}
3191
3192float32 uint16_to_float32(uint16_t a, float_status *status)
3193{
2abdfe24 3194 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3195}
3196
2abdfe24 3197float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3198{
f8155c1d 3199 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3200 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3201}
3202
2abdfe24
RH
3203float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3204{
3205 return uint64_to_float64_scalbn(a, scale, status);
3206}
3207
3208float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3209{
3210 return uint64_to_float64_scalbn(a, scale, status);
3211}
3212
3213float64 uint64_to_float64(uint64_t a, float_status *status)
3214{
3215 return uint64_to_float64_scalbn(a, 0, status);
3216}
3217
c02e1fb8
AB
3218float64 uint32_to_float64(uint32_t a, float_status *status)
3219{
2abdfe24 3220 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3221}
3222
3223float64 uint16_to_float64(uint16_t a, float_status *status)
3224{
2abdfe24 3225 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3226}
3227
34f0c0a9
LZ
3228/*
3229 * Returns the result of converting the unsigned integer `a' to the
3230 * bfloat16 format.
3231 */
3232
3233bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3234{
f8155c1d 3235 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3236 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3237}
3238
3239bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3240{
3241 return uint64_to_bfloat16_scalbn(a, scale, status);
3242}
3243
3244bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3245{
3246 return uint64_to_bfloat16_scalbn(a, scale, status);
3247}
3248
3249bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3250{
3251 return uint64_to_bfloat16_scalbn(a, 0, status);
3252}
3253
3254bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3255{
3256 return uint64_to_bfloat16_scalbn(a, 0, status);
3257}
3258
3259bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3260{
3261 return uint64_to_bfloat16_scalbn(a, 0, status);
3262}
3263
89360067
AB
3264/* Float Min/Max */
3265/* min() and max() functions. These can't be implemented as
3266 * 'compare and pick one input' because that would mishandle
3267 * NaNs and +0 vs -0.
3268 *
3269 * minnum() and maxnum() functions. These are similar to the min()
3270 * and max() functions but if one of the arguments is a QNaN and
3271 * the other is numerical then the numerical argument is returned.
3272 * SNaNs will get quietened before being returned.
3273 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3274 * and maxNum() operations. min() and max() are the typical min/max
3275 * semantics provided by many CPUs which predate that specification.
3276 *
3277 * minnummag() and maxnummag() functions correspond to minNumMag()
3278 * and minNumMag() from the IEEE-754 2008.
3279 */
f8155c1d 3280static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3281 bool ieee, bool ismag, float_status *s)
3282{
3283 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3284 if (ieee) {
3285 /* Takes two floating-point values `a' and `b', one of
3286 * which is a NaN, and returns the appropriate NaN
3287 * result. If either `a' or `b' is a signaling NaN,
3288 * the invalid exception is raised.
3289 */
3290 if (is_snan(a.cls) || is_snan(b.cls)) {
22c355f4 3291 return *parts_pick_nan(&a, &b, s);
89360067
AB
3292 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3293 return b;
3294 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3295 return a;
3296 }
3297 }
22c355f4 3298 return *parts_pick_nan(&a, &b, s);
89360067
AB
3299 } else {
3300 int a_exp, b_exp;
89360067
AB
3301
3302 switch (a.cls) {
3303 case float_class_normal:
3304 a_exp = a.exp;
3305 break;
3306 case float_class_inf:
3307 a_exp = INT_MAX;
3308 break;
3309 case float_class_zero:
3310 a_exp = INT_MIN;
3311 break;
3312 default:
3313 g_assert_not_reached();
3314 break;
3315 }
3316 switch (b.cls) {
3317 case float_class_normal:
3318 b_exp = b.exp;
3319 break;
3320 case float_class_inf:
3321 b_exp = INT_MAX;
3322 break;
3323 case float_class_zero:
3324 b_exp = INT_MIN;
3325 break;
3326 default:
3327 g_assert_not_reached();
3328 break;
3329 }
3330
6245327a
EC
3331 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3332 bool a_less = a_exp < b_exp;
3333 if (a_exp == b_exp) {
3334 a_less = a.frac < b.frac;
3335 }
3336 return a_less ^ ismin ? b : a;
89360067
AB
3337 }
3338
6245327a 3339 if (a.sign == b.sign) {
89360067
AB
3340 bool a_less = a_exp < b_exp;
3341 if (a_exp == b_exp) {
3342 a_less = a.frac < b.frac;
3343 }
6245327a 3344 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3345 } else {
6245327a 3346 return a.sign ^ ismin ? b : a;
89360067
AB
3347 }
3348 }
3349}
3350
3351#define MINMAX(sz, name, ismin, isiee, ismag) \
3352float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3353 float_status *s) \
3354{ \
98e256fc
RH
3355 FloatParts64 pa, pb, pr; \
3356 float ## sz ## _unpack_canonical(&pa, a, s); \
3357 float ## sz ## _unpack_canonical(&pb, b, s); \
3358 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3359 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3360}
3361
3362MINMAX(16, min, true, false, false)
3363MINMAX(16, minnum, true, true, false)
3364MINMAX(16, minnummag, true, true, true)
3365MINMAX(16, max, false, false, false)
3366MINMAX(16, maxnum, false, true, false)
3367MINMAX(16, maxnummag, false, true, true)
3368
3369MINMAX(32, min, true, false, false)
3370MINMAX(32, minnum, true, true, false)
3371MINMAX(32, minnummag, true, true, true)
3372MINMAX(32, max, false, false, false)
3373MINMAX(32, maxnum, false, true, false)
3374MINMAX(32, maxnummag, false, true, true)
3375
3376MINMAX(64, min, true, false, false)
3377MINMAX(64, minnum, true, true, false)
3378MINMAX(64, minnummag, true, true, true)
3379MINMAX(64, max, false, false, false)
3380MINMAX(64, maxnum, false, true, false)
3381MINMAX(64, maxnummag, false, true, true)
3382
3383#undef MINMAX
3384
8282310d
LZ
3385#define BF16_MINMAX(name, ismin, isiee, ismag) \
3386bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3387{ \
98e256fc
RH
3388 FloatParts64 pa, pb, pr; \
3389 bfloat16_unpack_canonical(&pa, a, s); \
3390 bfloat16_unpack_canonical(&pb, b, s); \
3391 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3392 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3393}
3394
3395BF16_MINMAX(min, true, false, false)
3396BF16_MINMAX(minnum, true, true, false)
3397BF16_MINMAX(minnummag, true, true, true)
3398BF16_MINMAX(max, false, false, false)
3399BF16_MINMAX(maxnum, false, true, false)
3400BF16_MINMAX(maxnummag, false, true, true)
3401
3402#undef BF16_MINMAX
3403
0c4c9092 3404/* Floating point compare */
f8155c1d 3405static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3406 float_status *s)
0c4c9092
AB
3407{
3408 if (is_nan(a.cls) || is_nan(b.cls)) {
3409 if (!is_quiet ||
3410 a.cls == float_class_snan ||
3411 b.cls == float_class_snan) {
d82f3b2d 3412 float_raise(float_flag_invalid, s);
0c4c9092
AB
3413 }
3414 return float_relation_unordered;
3415 }
3416
3417 if (a.cls == float_class_zero) {
3418 if (b.cls == float_class_zero) {
3419 return float_relation_equal;
3420 }
3421 return b.sign ? float_relation_greater : float_relation_less;
3422 } else if (b.cls == float_class_zero) {
3423 return a.sign ? float_relation_less : float_relation_greater;
3424 }
3425
3426 /* The only really important thing about infinity is its sign. If
3427 * both are infinities the sign marks the smallest of the two.
3428 */
3429 if (a.cls == float_class_inf) {
3430 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3431 return float_relation_equal;
3432 }
3433 return a.sign ? float_relation_less : float_relation_greater;
3434 } else if (b.cls == float_class_inf) {
3435 return b.sign ? float_relation_greater : float_relation_less;
3436 }
3437
3438 if (a.sign != b.sign) {
3439 return a.sign ? float_relation_less : float_relation_greater;
3440 }
3441
3442 if (a.exp == b.exp) {
3443 if (a.frac == b.frac) {
3444 return float_relation_equal;
3445 }
3446 if (a.sign) {
3447 return a.frac > b.frac ?
3448 float_relation_less : float_relation_greater;
3449 } else {
3450 return a.frac > b.frac ?
3451 float_relation_greater : float_relation_less;
3452 }
3453 } else {
3454 if (a.sign) {
3455 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3456 } else {
3457 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3458 }
3459 }
3460}
3461
d9fe9db9
EC
3462#define COMPARE(name, attr, sz) \
3463static int attr \
3464name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3465{ \
98e256fc
RH
3466 FloatParts64 pa, pb; \
3467 float ## sz ## _unpack_canonical(&pa, a, s); \
3468 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3469 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3470}
3471
d9fe9db9
EC
3472COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3473COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3474COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3475
3476#undef COMPARE
3477
71bfd65c 3478FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3479{
3480 return soft_f16_compare(a, b, false, s);
3481}
3482
71bfd65c 3483FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3484{
3485 return soft_f16_compare(a, b, true, s);
3486}
3487
71bfd65c 3488static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3489f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3490{
3491 union_float32 ua, ub;
3492
3493 ua.s = xa;
3494 ub.s = xb;
3495
3496 if (QEMU_NO_HARDFLOAT) {
3497 goto soft;
3498 }
3499
3500 float32_input_flush2(&ua.s, &ub.s, s);
3501 if (isgreaterequal(ua.h, ub.h)) {
3502 if (isgreater(ua.h, ub.h)) {
3503 return float_relation_greater;
3504 }
3505 return float_relation_equal;
3506 }
3507 if (likely(isless(ua.h, ub.h))) {
3508 return float_relation_less;
3509 }
3510 /* The only condition remaining is unordered.
3511 * Fall through to set flags.
3512 */
3513 soft:
3514 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3515}
3516
71bfd65c 3517FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3518{
3519 return f32_compare(a, b, false, s);
3520}
3521
71bfd65c 3522FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3523{
3524 return f32_compare(a, b, true, s);
3525}
3526
71bfd65c 3527static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3528f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3529{
3530 union_float64 ua, ub;
3531
3532 ua.s = xa;
3533 ub.s = xb;
3534
3535 if (QEMU_NO_HARDFLOAT) {
3536 goto soft;
3537 }
3538
3539 float64_input_flush2(&ua.s, &ub.s, s);
3540 if (isgreaterequal(ua.h, ub.h)) {
3541 if (isgreater(ua.h, ub.h)) {
3542 return float_relation_greater;
3543 }
3544 return float_relation_equal;
3545 }
3546 if (likely(isless(ua.h, ub.h))) {
3547 return float_relation_less;
3548 }
3549 /* The only condition remaining is unordered.
3550 * Fall through to set flags.
3551 */
3552 soft:
3553 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3554}
3555
71bfd65c 3556FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3557{
3558 return f64_compare(a, b, false, s);
3559}
3560
71bfd65c 3561FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3562{
3563 return f64_compare(a, b, true, s);
3564}
3565
8282310d
LZ
3566static FloatRelation QEMU_FLATTEN
3567soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3568{
98e256fc
RH
3569 FloatParts64 pa, pb;
3570
3571 bfloat16_unpack_canonical(&pa, a, s);
3572 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3573 return compare_floats(pa, pb, is_quiet, s);
3574}
3575
3576FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3577{
3578 return soft_bf16_compare(a, b, false, s);
3579}
3580
3581FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3582{
3583 return soft_bf16_compare(a, b, true, s);
3584}
3585
0bfc9f19 3586/* Multiply A by 2 raised to the power N. */
f8155c1d 3587static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3588{
3589 if (unlikely(is_nan(a.cls))) {
7c45bad8 3590 parts_return_nan(&a, s);
0bfc9f19
AB
3591 }
3592 if (a.cls == float_class_normal) {
f8155c1d 3593 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3594 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3595 * still allows rounding to infinity, without allowing overflow
f8155c1d 3596 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3597 */
3598 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3599 a.exp += n;
3600 }
3601 return a;
3602}
3603
3604float16 float16_scalbn(float16 a, int n, float_status *status)
3605{
98e256fc
RH
3606 FloatParts64 pa, pr;
3607
3608 float16_unpack_canonical(&pa, a, status);
3609 pr = scalbn_decomposed(pa, n, status);
e293e927 3610 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3611}
3612
3613float32 float32_scalbn(float32 a, int n, float_status *status)
3614{
98e256fc
RH
3615 FloatParts64 pa, pr;
3616
3617 float32_unpack_canonical(&pa, a, status);
3618 pr = scalbn_decomposed(pa, n, status);
e293e927 3619 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3620}
3621
3622float64 float64_scalbn(float64 a, int n, float_status *status)
3623{
98e256fc
RH
3624 FloatParts64 pa, pr;
3625
3626 float64_unpack_canonical(&pa, a, status);
3627 pr = scalbn_decomposed(pa, n, status);
e293e927 3628 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3629}
3630
8282310d
LZ
3631bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3632{
98e256fc
RH
3633 FloatParts64 pa, pr;
3634
3635 bfloat16_unpack_canonical(&pa, a, status);
3636 pr = scalbn_decomposed(pa, n, status);
e293e927 3637 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3638}
3639
c13bb2da
AB
3640/*
3641 * Square Root
3642 *
3643 * The old softfloat code did an approximation step before zeroing in
3644 * on the final result. However for simpleness we just compute the
3645 * square root by iterating down from the implicit bit to enough extra
3646 * bits to ensure we get a correctly rounded result.
3647 *
3648 * This does mean however the calculation is slower than before,
3649 * especially for 64 bit floats.
3650 */
3651
f8155c1d 3652static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3653{
3654 uint64_t a_frac, r_frac, s_frac;
3655 int bit, last_bit;
3656
3657 if (is_nan(a.cls)) {
7c45bad8
RH
3658 parts_return_nan(&a, s);
3659 return a;
c13bb2da
AB
3660 }
3661 if (a.cls == float_class_zero) {
3662 return a; /* sqrt(+-0) = +-0 */
3663 }
3664 if (a.sign) {
d82f3b2d 3665 float_raise(float_flag_invalid, s);
0fc07cad
RH
3666 parts_default_nan(&a, s);
3667 return a;
c13bb2da
AB
3668 }
3669 if (a.cls == float_class_inf) {
3670 return a; /* sqrt(+inf) = +inf */
3671 }
3672
3673 assert(a.cls == float_class_normal);
3674
3675 /* We need two overflow bits at the top. Adding room for that is a
3676 * right shift. If the exponent is odd, we can discard the low bit
3677 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3678 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3679 */
e99c4373 3680 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3681 a.exp >>= 1;
3682
3683 /* Bit-by-bit computation of sqrt. */
3684 r_frac = 0;
3685 s_frac = 0;
3686
3687 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3688 * properly rounded result. Remember we've inserted two more bits
3689 * at the top, so these positions are two less.
c13bb2da 3690 */
e99c4373 3691 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3692 last_bit = MAX(p->frac_shift - 4, 0);
3693 do {
3694 uint64_t q = 1ULL << bit;
3695 uint64_t t_frac = s_frac + q;
3696 if (t_frac <= a_frac) {
3697 s_frac = t_frac + q;
3698 a_frac -= t_frac;
3699 r_frac += q;
3700 }
3701 a_frac <<= 1;
3702 } while (--bit >= last_bit);
3703
3704 /* Undo the right shift done above. If there is any remaining
3705 * fraction, the result is inexact. Set the sticky bit.
3706 */
e99c4373 3707 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3708
3709 return a;
3710}
3711
97ff87c0 3712float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3713{
98e256fc
RH
3714 FloatParts64 pa, pr;
3715
3716 float16_unpack_canonical(&pa, a, status);
3717 pr = sqrt_float(pa, status, &float16_params);
e293e927 3718 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3719}
3720
f131bae8
EC
3721static float32 QEMU_SOFTFLOAT_ATTR
3722soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3723{
98e256fc
RH
3724 FloatParts64 pa, pr;
3725
3726 float32_unpack_canonical(&pa, a, status);
3727 pr = sqrt_float(pa, status, &float32_params);
e293e927 3728 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3729}
3730
f131bae8
EC
3731static float64 QEMU_SOFTFLOAT_ATTR
3732soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3733{
98e256fc
RH
3734 FloatParts64 pa, pr;
3735
3736 float64_unpack_canonical(&pa, a, status);
3737 pr = sqrt_float(pa, status, &float64_params);
e293e927 3738 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3739}
3740
f131bae8
EC
3741float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3742{
3743 union_float32 ua, ur;
3744
3745 ua.s = xa;
3746 if (unlikely(!can_use_fpu(s))) {
3747 goto soft;
3748 }
3749
3750 float32_input_flush1(&ua.s, s);
3751 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3752 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3753 fpclassify(ua.h) == FP_ZERO) ||
3754 signbit(ua.h))) {
3755 goto soft;
3756 }
3757 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3758 float32_is_neg(ua.s))) {
3759 goto soft;
3760 }
3761 ur.h = sqrtf(ua.h);
3762 return ur.s;
3763
3764 soft:
3765 return soft_f32_sqrt(ua.s, s);
3766}
3767
3768float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3769{
3770 union_float64 ua, ur;
3771
3772 ua.s = xa;
3773 if (unlikely(!can_use_fpu(s))) {
3774 goto soft;
3775 }
3776
3777 float64_input_flush1(&ua.s, s);
3778 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3779 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3780 fpclassify(ua.h) == FP_ZERO) ||
3781 signbit(ua.h))) {
3782 goto soft;
3783 }
3784 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3785 float64_is_neg(ua.s))) {
3786 goto soft;
3787 }
3788 ur.h = sqrt(ua.h);
3789 return ur.s;
3790
3791 soft:
3792 return soft_f64_sqrt(ua.s, s);
3793}
3794
8282310d
LZ
3795bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3796{
98e256fc
RH
3797 FloatParts64 pa, pr;
3798
3799 bfloat16_unpack_canonical(&pa, a, status);
3800 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3801 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3802}
3803
0218a16e
RH
3804/*----------------------------------------------------------------------------
3805| The pattern for a default generated NaN.
3806*----------------------------------------------------------------------------*/
3807
3808float16 float16_default_nan(float_status *status)
3809{
0fc07cad
RH
3810 FloatParts64 p;
3811
3812 parts_default_nan(&p, status);
0218a16e 3813 p.frac >>= float16_params.frac_shift;
71fd178e 3814 return float16_pack_raw(&p);
0218a16e
RH
3815}
3816
3817float32 float32_default_nan(float_status *status)
3818{
0fc07cad
RH
3819 FloatParts64 p;
3820
3821 parts_default_nan(&p, status);
0218a16e 3822 p.frac >>= float32_params.frac_shift;
71fd178e 3823 return float32_pack_raw(&p);
0218a16e
RH
3824}
3825
3826float64 float64_default_nan(float_status *status)
3827{
0fc07cad
RH
3828 FloatParts64 p;
3829
3830 parts_default_nan(&p, status);
0218a16e 3831 p.frac >>= float64_params.frac_shift;
71fd178e 3832 return float64_pack_raw(&p);
0218a16e
RH
3833}
3834
3835float128 float128_default_nan(float_status *status)
3836{
e9034ea8 3837 FloatParts128 p;
0218a16e 3838
0fc07cad 3839 parts_default_nan(&p, status);
e9034ea8
RH
3840 frac_shr(&p, float128_params.frac_shift);
3841 return float128_pack_raw(&p);
0218a16e 3842}
c13bb2da 3843
8282310d
LZ
3844bfloat16 bfloat16_default_nan(float_status *status)
3845{
0fc07cad
RH
3846 FloatParts64 p;
3847
3848 parts_default_nan(&p, status);
8282310d 3849 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3850 return bfloat16_pack_raw(&p);
8282310d
LZ
3851}
3852
158142c2 3853/*----------------------------------------------------------------------------
377ed926
RH
3854| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3855*----------------------------------------------------------------------------*/
3856
3857float16 float16_silence_nan(float16 a, float_status *status)
3858{
3dddb203
RH
3859 FloatParts64 p;
3860
3861 float16_unpack_raw(&p, a);
377ed926 3862 p.frac <<= float16_params.frac_shift;
92ff426d 3863 parts_silence_nan(&p, status);
377ed926 3864 p.frac >>= float16_params.frac_shift;
71fd178e 3865 return float16_pack_raw(&p);
377ed926
RH
3866}
3867
3868float32 float32_silence_nan(float32 a, float_status *status)
3869{
3dddb203
RH
3870 FloatParts64 p;
3871
3872 float32_unpack_raw(&p, a);
377ed926 3873 p.frac <<= float32_params.frac_shift;
92ff426d 3874 parts_silence_nan(&p, status);
377ed926 3875 p.frac >>= float32_params.frac_shift;
71fd178e 3876 return float32_pack_raw(&p);
377ed926
RH
3877}
3878
3879float64 float64_silence_nan(float64 a, float_status *status)
3880{
3dddb203
RH
3881 FloatParts64 p;
3882
3883 float64_unpack_raw(&p, a);
377ed926 3884 p.frac <<= float64_params.frac_shift;
92ff426d 3885 parts_silence_nan(&p, status);
377ed926 3886 p.frac >>= float64_params.frac_shift;
71fd178e 3887 return float64_pack_raw(&p);
377ed926
RH
3888}
3889
8282310d
LZ
3890bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3891{
3dddb203
RH
3892 FloatParts64 p;
3893
3894 bfloat16_unpack_raw(&p, a);
8282310d 3895 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3896 parts_silence_nan(&p, status);
8282310d 3897 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3898 return bfloat16_pack_raw(&p);
8282310d 3899}
e6b405fe 3900
0018b1f4
RH
3901float128 float128_silence_nan(float128 a, float_status *status)
3902{
3903 FloatParts128 p;
3904
3905 float128_unpack_raw(&p, a);
3906 frac_shl(&p, float128_params.frac_shift);
3907 parts_silence_nan(&p, status);
3908 frac_shr(&p, float128_params.frac_shift);
3909 return float128_pack_raw(&p);
3910}
3911
e6b405fe
AB
3912/*----------------------------------------------------------------------------
3913| If `a' is denormal and we are in flush-to-zero mode then set the
3914| input-denormal exception and return zero. Otherwise just return the value.
3915*----------------------------------------------------------------------------*/
3916
f8155c1d 3917static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3918{
3919 if (p.exp == 0 && p.frac != 0) {
3920 float_raise(float_flag_input_denormal, status);
3921 return true;
3922 }
3923
3924 return false;
3925}
3926
3927float16 float16_squash_input_denormal(float16 a, float_status *status)
3928{
3929 if (status->flush_inputs_to_zero) {
3dddb203
RH
3930 FloatParts64 p;
3931
3932 float16_unpack_raw(&p, a);
e6b405fe
AB
3933 if (parts_squash_denormal(p, status)) {
3934 return float16_set_sign(float16_zero, p.sign);
3935 }
3936 }
3937 return a;
3938}
3939
3940float32 float32_squash_input_denormal(float32 a, float_status *status)
3941{
3942 if (status->flush_inputs_to_zero) {
3dddb203
RH
3943 FloatParts64 p;
3944
3945 float32_unpack_raw(&p, a);
e6b405fe
AB
3946 if (parts_squash_denormal(p, status)) {
3947 return float32_set_sign(float32_zero, p.sign);
3948 }
3949 }
3950 return a;
3951}
3952
3953float64 float64_squash_input_denormal(float64 a, float_status *status)
3954{
3955 if (status->flush_inputs_to_zero) {
3dddb203
RH
3956 FloatParts64 p;
3957
3958 float64_unpack_raw(&p, a);
e6b405fe
AB
3959 if (parts_squash_denormal(p, status)) {
3960 return float64_set_sign(float64_zero, p.sign);
3961 }
3962 }
3963 return a;
3964}
3965
8282310d
LZ
3966bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3967{
3968 if (status->flush_inputs_to_zero) {
3dddb203
RH
3969 FloatParts64 p;
3970
3971 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3972 if (parts_squash_denormal(p, status)) {
3973 return bfloat16_set_sign(bfloat16_zero, p.sign);
3974 }
3975 }
3976 return a;
3977}
3978
377ed926 3979/*----------------------------------------------------------------------------
158142c2
FB
3980| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3981| and 7, and returns the properly rounded 32-bit integer corresponding to the
3982| input. If `zSign' is 1, the input is negated before being converted to an
3983| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3984| is simply rounded to an integer, with the inexact exception raised if the
3985| input cannot be represented exactly as an integer. However, if the fixed-
3986| point input is too large, the invalid exception is raised and the largest
3987| positive or negative integer is returned.
3988*----------------------------------------------------------------------------*/
3989
c120391c
RH
3990static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3991 float_status *status)
158142c2 3992{
8f506c70 3993 int8_t roundingMode;
c120391c 3994 bool roundNearestEven;
8f506c70 3995 int8_t roundIncrement, roundBits;
760e1416 3996 int32_t z;
158142c2 3997
a2f2d288 3998 roundingMode = status->float_rounding_mode;
158142c2 3999 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4000 switch (roundingMode) {
4001 case float_round_nearest_even:
f9288a76 4002 case float_round_ties_away:
dc355b76
PM
4003 roundIncrement = 0x40;
4004 break;
4005 case float_round_to_zero:
4006 roundIncrement = 0;
4007 break;
4008 case float_round_up:
4009 roundIncrement = zSign ? 0 : 0x7f;
4010 break;
4011 case float_round_down:
4012 roundIncrement = zSign ? 0x7f : 0;
4013 break;
5d64abb3
RH
4014 case float_round_to_odd:
4015 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4016 break;
dc355b76
PM
4017 default:
4018 abort();
158142c2
FB
4019 }
4020 roundBits = absZ & 0x7F;
4021 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
4022 if (!(roundBits ^ 0x40) && roundNearestEven) {
4023 absZ &= ~1;
4024 }
158142c2
FB
4025 z = absZ;
4026 if ( zSign ) z = - z;
4027 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 4028 float_raise(float_flag_invalid, status);
2c217da0 4029 return zSign ? INT32_MIN : INT32_MAX;
158142c2 4030 }
a2f2d288 4031 if (roundBits) {
d82f3b2d 4032 float_raise(float_flag_inexact, status);
a2f2d288 4033 }
158142c2
FB
4034 return z;
4035
4036}
4037
4038/*----------------------------------------------------------------------------
4039| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4040| `absZ1', with binary point between bits 63 and 64 (between the input words),
4041| and returns the properly rounded 64-bit integer corresponding to the input.
4042| If `zSign' is 1, the input is negated before being converted to an integer.
4043| Ordinarily, the fixed-point input is simply rounded to an integer, with
4044| the inexact exception raised if the input cannot be represented exactly as
4045| an integer. However, if the fixed-point input is too large, the invalid
4046| exception is raised and the largest positive or negative integer is
4047| returned.
4048*----------------------------------------------------------------------------*/
4049
c120391c 4050static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 4051 float_status *status)
158142c2 4052{
8f506c70 4053 int8_t roundingMode;
c120391c 4054 bool roundNearestEven, increment;
760e1416 4055 int64_t z;
158142c2 4056
a2f2d288 4057 roundingMode = status->float_rounding_mode;
158142c2 4058 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4059 switch (roundingMode) {
4060 case float_round_nearest_even:
f9288a76 4061 case float_round_ties_away:
dc355b76
PM
4062 increment = ((int64_t) absZ1 < 0);
4063 break;
4064 case float_round_to_zero:
4065 increment = 0;
4066 break;
4067 case float_round_up:
4068 increment = !zSign && absZ1;
4069 break;
4070 case float_round_down:
4071 increment = zSign && absZ1;
4072 break;
5d64abb3
RH
4073 case float_round_to_odd:
4074 increment = !(absZ0 & 1) && absZ1;
4075 break;
dc355b76
PM
4076 default:
4077 abort();
158142c2
FB
4078 }
4079 if ( increment ) {
4080 ++absZ0;
4081 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4082 if (!(absZ1 << 1) && roundNearestEven) {
4083 absZ0 &= ~1;
4084 }
158142c2
FB
4085 }
4086 z = absZ0;
4087 if ( zSign ) z = - z;
4088 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4089 overflow:
ff32e16e 4090 float_raise(float_flag_invalid, status);
2c217da0 4091 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4092 }
a2f2d288 4093 if (absZ1) {
d82f3b2d 4094 float_raise(float_flag_inexact, status);
a2f2d288 4095 }
158142c2
FB
4096 return z;
4097
4098}
4099
158142c2
FB
4100/*----------------------------------------------------------------------------
4101| Normalizes the subnormal single-precision floating-point value represented
4102| by the denormalized significand `aSig'. The normalized exponent and
4103| significand are stored at the locations pointed to by `zExpPtr' and
4104| `zSigPtr', respectively.
4105*----------------------------------------------------------------------------*/
4106
4107static void
0c48262d 4108 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4109{
8f506c70 4110 int8_t shiftCount;
158142c2 4111
0019d5c3 4112 shiftCount = clz32(aSig) - 8;
158142c2
FB
4113 *zSigPtr = aSig<<shiftCount;
4114 *zExpPtr = 1 - shiftCount;
4115
4116}
4117
158142c2
FB
4118/*----------------------------------------------------------------------------
4119| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4120| and significand `zSig', and returns the proper single-precision floating-
4121| point value corresponding to the abstract input. Ordinarily, the abstract
4122| value is simply rounded and packed into the single-precision format, with
4123| the inexact exception raised if the abstract input cannot be represented
4124| exactly. However, if the abstract value is too large, the overflow and
4125| inexact exceptions are raised and an infinity or maximal finite value is
4126| returned. If the abstract value is too small, the input value is rounded to
4127| a subnormal number, and the underflow and inexact exceptions are raised if
4128| the abstract input cannot be represented exactly as a subnormal single-
4129| precision floating-point number.
4130| The input significand `zSig' has its binary point between bits 30
4131| and 29, which is 7 bits to the left of the usual location. This shifted
4132| significand must be normalized or smaller. If `zSig' is not normalized,
4133| `zExp' must be 0; in that case, the result returned is a subnormal number,
4134| and it must not require rounding. In the usual case that `zSig' is
4135| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4136| The handling of underflow and overflow follows the IEC/IEEE Standard for
4137| Binary Floating-Point Arithmetic.
4138*----------------------------------------------------------------------------*/
4139
c120391c 4140static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4141 float_status *status)
158142c2 4142{
8f506c70 4143 int8_t roundingMode;
c120391c 4144 bool roundNearestEven;
8f506c70 4145 int8_t roundIncrement, roundBits;
c120391c 4146 bool isTiny;
158142c2 4147
a2f2d288 4148 roundingMode = status->float_rounding_mode;
158142c2 4149 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4150 switch (roundingMode) {
4151 case float_round_nearest_even:
f9288a76 4152 case float_round_ties_away:
dc355b76
PM
4153 roundIncrement = 0x40;
4154 break;
4155 case float_round_to_zero:
4156 roundIncrement = 0;
4157 break;
4158 case float_round_up:
4159 roundIncrement = zSign ? 0 : 0x7f;
4160 break;
4161 case float_round_down:
4162 roundIncrement = zSign ? 0x7f : 0;
4163 break;
5d64abb3
RH
4164 case float_round_to_odd:
4165 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4166 break;
dc355b76
PM
4167 default:
4168 abort();
4169 break;
158142c2
FB
4170 }
4171 roundBits = zSig & 0x7F;
bb98fe42 4172 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4173 if ( ( 0xFD < zExp )
4174 || ( ( zExp == 0xFD )
bb98fe42 4175 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4176 ) {
5d64abb3
RH
4177 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4178 roundIncrement != 0;
ff32e16e 4179 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4180 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4181 }
4182 if ( zExp < 0 ) {
a2f2d288 4183 if (status->flush_to_zero) {
ff32e16e 4184 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4185 return packFloat32(zSign, 0, 0);
4186 }
a828b373
RH
4187 isTiny = status->tininess_before_rounding
4188 || (zExp < -1)
4189 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4190 shift32RightJamming( zSig, - zExp, &zSig );
4191 zExp = 0;
4192 roundBits = zSig & 0x7F;
ff32e16e
PM
4193 if (isTiny && roundBits) {
4194 float_raise(float_flag_underflow, status);
4195 }
5d64abb3
RH
4196 if (roundingMode == float_round_to_odd) {
4197 /*
4198 * For round-to-odd case, the roundIncrement depends on
4199 * zSig which just changed.
4200 */
4201 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4202 }
158142c2
FB
4203 }
4204 }
a2f2d288 4205 if (roundBits) {
d82f3b2d 4206 float_raise(float_flag_inexact, status);
a2f2d288 4207 }
158142c2 4208 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4209 if (!(roundBits ^ 0x40) && roundNearestEven) {
4210 zSig &= ~1;
4211 }
158142c2
FB
4212 if ( zSig == 0 ) zExp = 0;
4213 return packFloat32( zSign, zExp, zSig );
4214
4215}
4216
4217/*----------------------------------------------------------------------------
4218| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4219| and significand `zSig', and returns the proper single-precision floating-
4220| point value corresponding to the abstract input. This routine is just like
4221| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4222| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4223| floating-point exponent.
4224*----------------------------------------------------------------------------*/
4225
4226static float32
c120391c 4227 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4228 float_status *status)
158142c2 4229{
8f506c70 4230 int8_t shiftCount;
158142c2 4231
0019d5c3 4232 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4233 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4234 status);
158142c2
FB
4235
4236}
4237
158142c2
FB
4238/*----------------------------------------------------------------------------
4239| Normalizes the subnormal double-precision floating-point value represented
4240| by the denormalized significand `aSig'. The normalized exponent and
4241| significand are stored at the locations pointed to by `zExpPtr' and
4242| `zSigPtr', respectively.
4243*----------------------------------------------------------------------------*/
4244
4245static void
0c48262d 4246 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4247{
8f506c70 4248 int8_t shiftCount;
158142c2 4249
0019d5c3 4250 shiftCount = clz64(aSig) - 11;
158142c2
FB
4251 *zSigPtr = aSig<<shiftCount;
4252 *zExpPtr = 1 - shiftCount;
4253
4254}
4255
4256/*----------------------------------------------------------------------------
4257| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4258| double-precision floating-point value, returning the result. After being
4259| shifted into the proper positions, the three fields are simply added
4260| together to form the result. This means that any integer portion of `zSig'
4261| will be added into the exponent. Since a properly normalized significand
4262| will have an integer portion equal to 1, the `zExp' input should be 1 less
4263| than the desired result exponent whenever `zSig' is a complete, normalized
4264| significand.
4265*----------------------------------------------------------------------------*/
4266
c120391c 4267static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4268{
4269
f090c9d4 4270 return make_float64(
bb98fe42 4271 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4272
4273}
4274
4275/*----------------------------------------------------------------------------
4276| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4277| and significand `zSig', and returns the proper double-precision floating-
4278| point value corresponding to the abstract input. Ordinarily, the abstract
4279| value is simply rounded and packed into the double-precision format, with
4280| the inexact exception raised if the abstract input cannot be represented
4281| exactly. However, if the abstract value is too large, the overflow and
4282| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4283| returned. If the abstract value is too small, the input value is rounded to
4284| a subnormal number, and the underflow and inexact exceptions are raised if
4285| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4286| precision floating-point number.
4287| The input significand `zSig' has its binary point between bits 62
4288| and 61, which is 10 bits to the left of the usual location. This shifted
4289| significand must be normalized or smaller. If `zSig' is not normalized,
4290| `zExp' must be 0; in that case, the result returned is a subnormal number,
4291| and it must not require rounding. In the usual case that `zSig' is
4292| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4293| The handling of underflow and overflow follows the IEC/IEEE Standard for
4294| Binary Floating-Point Arithmetic.
4295*----------------------------------------------------------------------------*/
4296
c120391c 4297static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4298 float_status *status)
158142c2 4299{
8f506c70 4300 int8_t roundingMode;
c120391c 4301 bool roundNearestEven;
0c48262d 4302 int roundIncrement, roundBits;
c120391c 4303 bool isTiny;
158142c2 4304
a2f2d288 4305 roundingMode = status->float_rounding_mode;
158142c2 4306 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4307 switch (roundingMode) {
4308 case float_round_nearest_even:
f9288a76 4309 case float_round_ties_away:
dc355b76
PM
4310 roundIncrement = 0x200;
4311 break;
4312 case float_round_to_zero:
4313 roundIncrement = 0;
4314 break;
4315 case float_round_up:
4316 roundIncrement = zSign ? 0 : 0x3ff;
4317 break;
4318 case float_round_down:
4319 roundIncrement = zSign ? 0x3ff : 0;
4320 break;
9ee6f678
BR
4321 case float_round_to_odd:
4322 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4323 break;
dc355b76
PM
4324 default:
4325 abort();
158142c2
FB
4326 }
4327 roundBits = zSig & 0x3FF;
bb98fe42 4328 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4329 if ( ( 0x7FD < zExp )
4330 || ( ( zExp == 0x7FD )
bb98fe42 4331 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4332 ) {
9ee6f678
BR
4333 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4334 roundIncrement != 0;
ff32e16e 4335 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4336 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4337 }
4338 if ( zExp < 0 ) {
a2f2d288 4339 if (status->flush_to_zero) {
ff32e16e 4340 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4341 return packFloat64(zSign, 0, 0);
4342 }
a828b373
RH
4343 isTiny = status->tininess_before_rounding
4344 || (zExp < -1)
4345 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4346 shift64RightJamming( zSig, - zExp, &zSig );
4347 zExp = 0;
4348 roundBits = zSig & 0x3FF;
ff32e16e
PM
4349 if (isTiny && roundBits) {
4350 float_raise(float_flag_underflow, status);
4351 }
9ee6f678
BR
4352 if (roundingMode == float_round_to_odd) {
4353 /*
4354 * For round-to-odd case, the roundIncrement depends on
4355 * zSig which just changed.
4356 */
4357 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4358 }
158142c2
FB
4359 }
4360 }
a2f2d288 4361 if (roundBits) {
d82f3b2d 4362 float_raise(float_flag_inexact, status);
a2f2d288 4363 }
158142c2 4364 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4365 if (!(roundBits ^ 0x200) && roundNearestEven) {
4366 zSig &= ~1;
4367 }
158142c2
FB
4368 if ( zSig == 0 ) zExp = 0;
4369 return packFloat64( zSign, zExp, zSig );
4370
4371}
4372
4373/*----------------------------------------------------------------------------
4374| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4375| and significand `zSig', and returns the proper double-precision floating-
4376| point value corresponding to the abstract input. This routine is just like
4377| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4378| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4379| floating-point exponent.
4380*----------------------------------------------------------------------------*/
4381
4382static float64
c120391c 4383 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4384 float_status *status)
158142c2 4385{
8f506c70 4386 int8_t shiftCount;
158142c2 4387
0019d5c3 4388 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4389 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4390 status);
158142c2
FB
4391
4392}
4393
158142c2
FB
4394/*----------------------------------------------------------------------------
4395| Normalizes the subnormal extended double-precision floating-point value
4396| represented by the denormalized significand `aSig'. The normalized exponent
4397| and significand are stored at the locations pointed to by `zExpPtr' and
4398| `zSigPtr', respectively.
4399*----------------------------------------------------------------------------*/
4400
88857aca
LV
4401void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4402 uint64_t *zSigPtr)
158142c2 4403{
8f506c70 4404 int8_t shiftCount;
158142c2 4405
0019d5c3 4406 shiftCount = clz64(aSig);
158142c2
FB
4407 *zSigPtr = aSig<<shiftCount;
4408 *zExpPtr = 1 - shiftCount;
158142c2
FB
4409}
4410
4411/*----------------------------------------------------------------------------
4412| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4413| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4414| and returns the proper extended double-precision floating-point value
4415| corresponding to the abstract input. Ordinarily, the abstract value is
4416| rounded and packed into the extended double-precision format, with the
4417| inexact exception raised if the abstract input cannot be represented
4418| exactly. However, if the abstract value is too large, the overflow and
4419| inexact exceptions are raised and an infinity or maximal finite value is
4420| returned. If the abstract value is too small, the input value is rounded to
4421| a subnormal number, and the underflow and inexact exceptions are raised if
4422| the abstract input cannot be represented exactly as a subnormal extended
4423| double-precision floating-point number.
4424| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4425| number of bits as single or double precision, respectively. Otherwise, the
4426| result is rounded to the full precision of the extended double-precision
4427| format.
4428| The input significand must be normalized or smaller. If the input
4429| significand is not normalized, `zExp' must be 0; in that case, the result
4430| returned is a subnormal number, and it must not require rounding. The
4431| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4432| Floating-Point Arithmetic.
4433*----------------------------------------------------------------------------*/
4434
c120391c 4435floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4436 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4437 float_status *status)
158142c2 4438{
8f506c70 4439 int8_t roundingMode;
c120391c 4440 bool roundNearestEven, increment, isTiny;
f42c2224 4441 int64_t roundIncrement, roundMask, roundBits;
158142c2 4442
a2f2d288 4443 roundingMode = status->float_rounding_mode;
158142c2
FB
4444 roundNearestEven = ( roundingMode == float_round_nearest_even );
4445 if ( roundingPrecision == 80 ) goto precision80;
4446 if ( roundingPrecision == 64 ) {
e9321124
AB
4447 roundIncrement = UINT64_C(0x0000000000000400);
4448 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4449 }
4450 else if ( roundingPrecision == 32 ) {
e9321124
AB
4451 roundIncrement = UINT64_C(0x0000008000000000);
4452 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4453 }
4454 else {
4455 goto precision80;
4456 }
4457 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4458 switch (roundingMode) {
4459 case float_round_nearest_even:
f9288a76 4460 case float_round_ties_away:
dc355b76
PM
4461 break;
4462 case float_round_to_zero:
4463 roundIncrement = 0;
4464 break;
4465 case float_round_up:
4466 roundIncrement = zSign ? 0 : roundMask;
4467 break;
4468 case float_round_down:
4469 roundIncrement = zSign ? roundMask : 0;
4470 break;
4471 default:
4472 abort();
158142c2
FB
4473 }
4474 roundBits = zSig0 & roundMask;
bb98fe42 4475 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4476 if ( ( 0x7FFE < zExp )
4477 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4478 ) {
4479 goto overflow;
4480 }
4481 if ( zExp <= 0 ) {
a2f2d288 4482 if (status->flush_to_zero) {
ff32e16e 4483 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4484 return packFloatx80(zSign, 0, 0);
4485 }
a828b373
RH
4486 isTiny = status->tininess_before_rounding
4487 || (zExp < 0 )
4488 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4489 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4490 zExp = 0;
4491 roundBits = zSig0 & roundMask;
ff32e16e
PM
4492 if (isTiny && roundBits) {
4493 float_raise(float_flag_underflow, status);
4494 }
a2f2d288 4495 if (roundBits) {
d82f3b2d 4496 float_raise(float_flag_inexact, status);
a2f2d288 4497 }
158142c2 4498 zSig0 += roundIncrement;
bb98fe42 4499 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4500 roundIncrement = roundMask + 1;
4501 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4502 roundMask |= roundIncrement;
4503 }
4504 zSig0 &= ~ roundMask;
4505 return packFloatx80( zSign, zExp, zSig0 );
4506 }
4507 }
a2f2d288 4508 if (roundBits) {
d82f3b2d 4509 float_raise(float_flag_inexact, status);
a2f2d288 4510 }
158142c2
FB
4511 zSig0 += roundIncrement;
4512 if ( zSig0 < roundIncrement ) {
4513 ++zExp;
e9321124 4514 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4515 }
4516 roundIncrement = roundMask + 1;
4517 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4518 roundMask |= roundIncrement;
4519 }
4520 zSig0 &= ~ roundMask;
4521 if ( zSig0 == 0 ) zExp = 0;
4522 return packFloatx80( zSign, zExp, zSig0 );
4523 precision80:
dc355b76
PM
4524 switch (roundingMode) {
4525 case float_round_nearest_even:
f9288a76 4526 case float_round_ties_away:
dc355b76
PM
4527 increment = ((int64_t)zSig1 < 0);
4528 break;
4529 case float_round_to_zero:
4530 increment = 0;
4531 break;
4532 case float_round_up:
4533 increment = !zSign && zSig1;
4534 break;
4535 case float_round_down:
4536 increment = zSign && zSig1;
4537 break;
4538 default:
4539 abort();
158142c2 4540 }
bb98fe42 4541 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4542 if ( ( 0x7FFE < zExp )
4543 || ( ( zExp == 0x7FFE )
e9321124 4544 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4545 && increment
4546 )
4547 ) {
4548 roundMask = 0;
4549 overflow:
ff32e16e 4550 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4551 if ( ( roundingMode == float_round_to_zero )
4552 || ( zSign && ( roundingMode == float_round_up ) )
4553 || ( ! zSign && ( roundingMode == float_round_down ) )
4554 ) {
4555 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4556 }
0f605c88
LV
4557 return packFloatx80(zSign,
4558 floatx80_infinity_high,
4559 floatx80_infinity_low);
158142c2
FB
4560 }
4561 if ( zExp <= 0 ) {
a828b373
RH
4562 isTiny = status->tininess_before_rounding
4563 || (zExp < 0)
4564 || !increment
4565 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4566 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4567 zExp = 0;
ff32e16e
PM
4568 if (isTiny && zSig1) {
4569 float_raise(float_flag_underflow, status);
4570 }
a2f2d288 4571 if (zSig1) {
d82f3b2d 4572 float_raise(float_flag_inexact, status);
a2f2d288 4573 }
dc355b76
PM
4574 switch (roundingMode) {
4575 case float_round_nearest_even:
f9288a76 4576 case float_round_ties_away:
dc355b76
PM
4577 increment = ((int64_t)zSig1 < 0);
4578 break;
4579 case float_round_to_zero:
4580 increment = 0;
4581 break;
4582 case float_round_up:
4583 increment = !zSign && zSig1;
4584 break;
4585 case float_round_down:
4586 increment = zSign && zSig1;
4587 break;
4588 default:
4589 abort();
158142c2
FB
4590 }
4591 if ( increment ) {
4592 ++zSig0;
40662886
PMD
4593 if (!(zSig1 << 1) && roundNearestEven) {
4594 zSig0 &= ~1;
4595 }
bb98fe42 4596 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4597 }
4598 return packFloatx80( zSign, zExp, zSig0 );
4599 }
4600 }
a2f2d288 4601 if (zSig1) {
d82f3b2d 4602 float_raise(float_flag_inexact, status);
a2f2d288 4603 }
158142c2
FB
4604 if ( increment ) {
4605 ++zSig0;
4606 if ( zSig0 == 0 ) {
4607 ++zExp;
e9321124 4608 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4609 }
4610 else {
40662886
PMD
4611 if (!(zSig1 << 1) && roundNearestEven) {
4612 zSig0 &= ~1;
4613 }
158142c2
FB
4614 }
4615 }
4616 else {
4617 if ( zSig0 == 0 ) zExp = 0;
4618 }
4619 return packFloatx80( zSign, zExp, zSig0 );
4620
4621}
4622
4623/*----------------------------------------------------------------------------
4624| Takes an abstract floating-point value having sign `zSign', exponent
4625| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4626| and returns the proper extended double-precision floating-point value
4627| corresponding to the abstract input. This routine is just like
4628| `roundAndPackFloatx80' except that the input significand does not have to be
4629| normalized.
4630*----------------------------------------------------------------------------*/
4631
88857aca 4632floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4633 bool zSign, int32_t zExp,
88857aca
LV
4634 uint64_t zSig0, uint64_t zSig1,
4635 float_status *status)
158142c2 4636{
8f506c70 4637 int8_t shiftCount;
158142c2
FB
4638
4639 if ( zSig0 == 0 ) {
4640 zSig0 = zSig1;
4641 zSig1 = 0;
4642 zExp -= 64;
4643 }
0019d5c3 4644 shiftCount = clz64(zSig0);
158142c2
FB
4645 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4646 zExp -= shiftCount;
ff32e16e
PM
4647 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4648 zSig0, zSig1, status);
158142c2
FB
4649
4650}
4651
158142c2
FB
4652/*----------------------------------------------------------------------------
4653| Returns the least-significant 64 fraction bits of the quadruple-precision
4654| floating-point value `a'.
4655*----------------------------------------------------------------------------*/
4656
a49db98d 4657static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4658{
4659
4660 return a.low;
4661
4662}
4663
4664/*----------------------------------------------------------------------------
4665| Returns the most-significant 48 fraction bits of the quadruple-precision
4666| floating-point value `a'.
4667*----------------------------------------------------------------------------*/
4668
a49db98d 4669static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4670{
4671
e9321124 4672 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4673
4674}
4675
4676/*----------------------------------------------------------------------------
4677| Returns the exponent bits of the quadruple-precision floating-point value
4678| `a'.
4679*----------------------------------------------------------------------------*/
4680
f4014512 4681static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4682{
4683
4684 return ( a.high>>48 ) & 0x7FFF;
4685
4686}
4687
4688/*----------------------------------------------------------------------------
4689| Returns the sign bit of the quadruple-precision floating-point value `a'.
4690*----------------------------------------------------------------------------*/
4691
c120391c 4692static inline bool extractFloat128Sign(float128 a)
158142c2 4693{
c120391c 4694 return a.high >> 63;
158142c2
FB
4695}
4696
4697/*----------------------------------------------------------------------------
4698| Normalizes the subnormal quadruple-precision floating-point value
4699| represented by the denormalized significand formed by the concatenation of
4700| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4701| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4702| significand are stored at the location pointed to by `zSig0Ptr', and the
4703| least significant 64 bits of the normalized significand are stored at the
4704| location pointed to by `zSig1Ptr'.
4705*----------------------------------------------------------------------------*/
4706
4707static void
4708 normalizeFloat128Subnormal(
bb98fe42
AF
4709 uint64_t aSig0,
4710 uint64_t aSig1,
f4014512 4711 int32_t *zExpPtr,
bb98fe42
AF
4712 uint64_t *zSig0Ptr,
4713 uint64_t *zSig1Ptr
158142c2
FB
4714 )
4715{
8f506c70 4716 int8_t shiftCount;
158142c2
FB
4717
4718 if ( aSig0 == 0 ) {
0019d5c3 4719 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4720 if ( shiftCount < 0 ) {
4721 *zSig0Ptr = aSig1>>( - shiftCount );
4722 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4723 }
4724 else {
4725 *zSig0Ptr = aSig1<<shiftCount;
4726 *zSig1Ptr = 0;
4727 }
4728 *zExpPtr = - shiftCount - 63;
4729 }
4730 else {
0019d5c3 4731 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4732 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4733 *zExpPtr = 1 - shiftCount;
4734 }
4735
4736}
4737
4738/*----------------------------------------------------------------------------
4739| Packs the sign `zSign', the exponent `zExp', and the significand formed
4740| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4741| floating-point value, returning the result. After being shifted into the
4742| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4743| added together to form the most significant 32 bits of the result. This
4744| means that any integer portion of `zSig0' will be added into the exponent.
4745| Since a properly normalized significand will have an integer portion equal
4746| to 1, the `zExp' input should be 1 less than the desired result exponent
4747| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4748| significand.
4749*----------------------------------------------------------------------------*/
4750
a49db98d 4751static inline float128
c120391c 4752packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4753{
4754 float128 z;
4755
4756 z.low = zSig1;
c120391c 4757 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4758 return z;
158142c2
FB
4759}
4760
4761/*----------------------------------------------------------------------------
4762| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4763| and extended significand formed by the concatenation of `zSig0', `zSig1',
4764| and `zSig2', and returns the proper quadruple-precision floating-point value
4765| corresponding to the abstract input. Ordinarily, the abstract value is
4766| simply rounded and packed into the quadruple-precision format, with the
4767| inexact exception raised if the abstract input cannot be represented
4768| exactly. However, if the abstract value is too large, the overflow and
4769| inexact exceptions are raised and an infinity or maximal finite value is
4770| returned. If the abstract value is too small, the input value is rounded to
4771| a subnormal number, and the underflow and inexact exceptions are raised if
4772| the abstract input cannot be represented exactly as a subnormal quadruple-
4773| precision floating-point number.
4774| The input significand must be normalized or smaller. If the input
4775| significand is not normalized, `zExp' must be 0; in that case, the result
4776| returned is a subnormal number, and it must not require rounding. In the
4777| usual case that the input significand is normalized, `zExp' must be 1 less
4778| than the ``true'' floating-point exponent. The handling of underflow and
4779| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4780*----------------------------------------------------------------------------*/
4781
c120391c 4782static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4783 uint64_t zSig0, uint64_t zSig1,
4784 uint64_t zSig2, float_status *status)
158142c2 4785{
8f506c70 4786 int8_t roundingMode;
c120391c 4787 bool roundNearestEven, increment, isTiny;
158142c2 4788
a2f2d288 4789 roundingMode = status->float_rounding_mode;
158142c2 4790 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4791 switch (roundingMode) {
4792 case float_round_nearest_even:
f9288a76 4793 case float_round_ties_away:
dc355b76
PM
4794 increment = ((int64_t)zSig2 < 0);
4795 break;
4796 case float_round_to_zero:
4797 increment = 0;
4798 break;
4799 case float_round_up:
4800 increment = !zSign && zSig2;
4801 break;
4802 case float_round_down:
4803 increment = zSign && zSig2;
4804 break;
9ee6f678
BR
4805 case float_round_to_odd:
4806 increment = !(zSig1 & 0x1) && zSig2;
4807 break;
dc355b76
PM
4808 default:
4809 abort();
158142c2 4810 }
bb98fe42 4811 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4812 if ( ( 0x7FFD < zExp )
4813 || ( ( zExp == 0x7FFD )
4814 && eq128(
e9321124
AB
4815 UINT64_C(0x0001FFFFFFFFFFFF),
4816 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4817 zSig0,
4818 zSig1
4819 )
4820 && increment
4821 )
4822 ) {
ff32e16e 4823 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4824 if ( ( roundingMode == float_round_to_zero )
4825 || ( zSign && ( roundingMode == float_round_up ) )
4826 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4827 || (roundingMode == float_round_to_odd)
158142c2
FB
4828 ) {
4829 return
4830 packFloat128(
4831 zSign,
4832 0x7FFE,
e9321124
AB
4833 UINT64_C(0x0000FFFFFFFFFFFF),
4834 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4835 );
4836 }
4837 return packFloat128( zSign, 0x7FFF, 0, 0 );
4838 }
4839 if ( zExp < 0 ) {
a2f2d288 4840 if (status->flush_to_zero) {
ff32e16e 4841 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4842 return packFloat128(zSign, 0, 0, 0);
4843 }
a828b373
RH
4844 isTiny = status->tininess_before_rounding
4845 || (zExp < -1)
4846 || !increment
4847 || lt128(zSig0, zSig1,
4848 UINT64_C(0x0001FFFFFFFFFFFF),
4849 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4850 shift128ExtraRightJamming(
4851 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4852 zExp = 0;
ff32e16e
PM
4853 if (isTiny && zSig2) {
4854 float_raise(float_flag_underflow, status);
4855 }
dc355b76
PM
4856 switch (roundingMode) {
4857 case float_round_nearest_even:
f9288a76 4858 case float_round_ties_away:
dc355b76
PM
4859 increment = ((int64_t)zSig2 < 0);
4860 break;
4861 case float_round_to_zero:
4862 increment = 0;
4863 break;
4864 case float_round_up:
4865 increment = !zSign && zSig2;
4866 break;
4867 case float_round_down:
4868 increment = zSign && zSig2;
4869 break;
9ee6f678
BR
4870 case float_round_to_odd:
4871 increment = !(zSig1 & 0x1) && zSig2;
4872 break;
dc355b76
PM
4873 default:
4874 abort();
158142c2
FB
4875 }
4876 }
4877 }
a2f2d288 4878 if (zSig2) {
d82f3b2d 4879 float_raise(float_flag_inexact, status);
a2f2d288 4880 }
158142c2
FB
4881 if ( increment ) {
4882 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4883 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4884 zSig1 &= ~1;
4885 }
158142c2
FB
4886 }
4887 else {
4888 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4889 }
4890 return packFloat128( zSign, zExp, zSig0, zSig1 );
4891
4892}
4893
4894/*----------------------------------------------------------------------------
4895| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4896| and significand formed by the concatenation of `zSig0' and `zSig1', and
4897| returns the proper quadruple-precision floating-point value corresponding
4898| to the abstract input. This routine is just like `roundAndPackFloat128'
4899| except that the input significand has fewer bits and does not have to be
4900| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4901| point exponent.
4902*----------------------------------------------------------------------------*/
4903
c120391c 4904static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4905 uint64_t zSig0, uint64_t zSig1,
4906 float_status *status)
158142c2 4907{
8f506c70 4908 int8_t shiftCount;
bb98fe42 4909 uint64_t zSig2;
158142c2
FB
4910
4911 if ( zSig0 == 0 ) {
4912 zSig0 = zSig1;
4913 zSig1 = 0;
4914 zExp -= 64;
4915 }
0019d5c3 4916 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4917 if ( 0 <= shiftCount ) {
4918 zSig2 = 0;
4919 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4920 }
4921 else {
4922 shift128ExtraRightJamming(
4923 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4924 }
4925 zExp -= shiftCount;
ff32e16e 4926 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4927
4928}
4929
158142c2 4930
158142c2
FB
4931/*----------------------------------------------------------------------------
4932| Returns the result of converting the 32-bit two's complement integer `a'
4933| to the extended double-precision floating-point format. The conversion
4934| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4935| Arithmetic.
4936*----------------------------------------------------------------------------*/
4937
e5a41ffa 4938floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4939{
c120391c 4940 bool zSign;
3a87d009 4941 uint32_t absA;
8f506c70 4942 int8_t shiftCount;
bb98fe42 4943 uint64_t zSig;
158142c2
FB
4944
4945 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4946 zSign = ( a < 0 );
4947 absA = zSign ? - a : a;
0019d5c3 4948 shiftCount = clz32(absA) + 32;
158142c2
FB
4949 zSig = absA;
4950 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4951
4952}
4953
158142c2
FB
4954/*----------------------------------------------------------------------------
4955| Returns the result of converting the 64-bit two's complement integer `a'
4956| to the extended double-precision floating-point format. The conversion
4957| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4958| Arithmetic.
4959*----------------------------------------------------------------------------*/
4960
e5a41ffa 4961floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4962{
c120391c 4963 bool zSign;
182f42fd 4964 uint64_t absA;
8f506c70 4965 int8_t shiftCount;
158142c2
FB
4966
4967 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4968 zSign = ( a < 0 );
4969 absA = zSign ? - a : a;
0019d5c3 4970 shiftCount = clz64(absA);
158142c2
FB
4971 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4972
4973}
4974
6bb8e0f1
PM
4975/*----------------------------------------------------------------------------
4976| Returns the result of converting the 64-bit unsigned integer `a'
4977| to the quadruple-precision floating-point format. The conversion is performed
4978| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4979*----------------------------------------------------------------------------*/
4980
e5a41ffa 4981float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4982{
4983 if (a == 0) {
4984 return float128_zero;
4985 }
6603d506 4986 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4987}
4988
158142c2
FB
4989/*----------------------------------------------------------------------------
4990| Returns the result of converting the single-precision floating-point value
4991| `a' to the extended double-precision floating-point format. The conversion
4992| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4993| Arithmetic.
4994*----------------------------------------------------------------------------*/
4995
e5a41ffa 4996floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4997{
c120391c 4998 bool aSign;
0c48262d 4999 int aExp;
bb98fe42 5000 uint32_t aSig;
158142c2 5001
ff32e16e 5002 a = float32_squash_input_denormal(a, status);
158142c2
FB
5003 aSig = extractFloat32Frac( a );
5004 aExp = extractFloat32Exp( a );
5005 aSign = extractFloat32Sign( a );
5006 if ( aExp == 0xFF ) {
ff32e16e 5007 if (aSig) {
7537c2b4
JM
5008 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5009 status);
5010 return floatx80_silence_nan(res, status);
ff32e16e 5011 }
0f605c88
LV
5012 return packFloatx80(aSign,
5013 floatx80_infinity_high,
5014 floatx80_infinity_low);
158142c2
FB
5015 }
5016 if ( aExp == 0 ) {
5017 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5018 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5019 }
5020 aSig |= 0x00800000;
bb98fe42 5021 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5022
5023}
5024
158142c2
FB
5025/*----------------------------------------------------------------------------
5026| Returns the remainder of the single-precision floating-point value `a'
5027| with respect to the corresponding value `b'. The operation is performed
5028| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5029*----------------------------------------------------------------------------*/
5030
e5a41ffa 5031float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5032{
c120391c 5033 bool aSign, zSign;
0c48262d 5034 int aExp, bExp, expDiff;
bb98fe42
AF
5035 uint32_t aSig, bSig;
5036 uint32_t q;
5037 uint64_t aSig64, bSig64, q64;
5038 uint32_t alternateASig;
5039 int32_t sigMean;
ff32e16e
PM
5040 a = float32_squash_input_denormal(a, status);
5041 b = float32_squash_input_denormal(b, status);
158142c2
FB
5042
5043 aSig = extractFloat32Frac( a );
5044 aExp = extractFloat32Exp( a );
5045 aSign = extractFloat32Sign( a );
5046 bSig = extractFloat32Frac( b );
5047 bExp = extractFloat32Exp( b );
158142c2
FB
5048 if ( aExp == 0xFF ) {
5049 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5050 return propagateFloat32NaN(a, b, status);
158142c2 5051 }
ff32e16e 5052 float_raise(float_flag_invalid, status);
af39bc8c 5053 return float32_default_nan(status);
158142c2
FB
5054 }
5055 if ( bExp == 0xFF ) {
ff32e16e
PM
5056 if (bSig) {
5057 return propagateFloat32NaN(a, b, status);
5058 }
158142c2
FB
5059 return a;
5060 }
5061 if ( bExp == 0 ) {
5062 if ( bSig == 0 ) {
ff32e16e 5063 float_raise(float_flag_invalid, status);
af39bc8c 5064 return float32_default_nan(status);
158142c2
FB
5065 }
5066 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5067 }
5068 if ( aExp == 0 ) {
5069 if ( aSig == 0 ) return a;
5070 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5071 }
5072 expDiff = aExp - bExp;
5073 aSig |= 0x00800000;
5074 bSig |= 0x00800000;
5075 if ( expDiff < 32 ) {
5076 aSig <<= 8;
5077 bSig <<= 8;
5078 if ( expDiff < 0 ) {
5079 if ( expDiff < -1 ) return a;
5080 aSig >>= 1;
5081 }
5082 q = ( bSig <= aSig );
5083 if ( q ) aSig -= bSig;
5084 if ( 0 < expDiff ) {
bb98fe42 5085 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5086 q >>= 32 - expDiff;
5087 bSig >>= 2;
5088 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5089 }
5090 else {
5091 aSig >>= 2;
5092 bSig >>= 2;
5093 }
5094 }
5095 else {
5096 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5097 aSig64 = ( (uint64_t) aSig )<<40;
5098 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5099 expDiff -= 64;
5100 while ( 0 < expDiff ) {
5101 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5102 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5103 aSig64 = - ( ( bSig * q64 )<<38 );
5104 expDiff -= 62;
5105 }
5106 expDiff += 64;
5107 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5108 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5109 q = q64>>( 64 - expDiff );
5110 bSig <<= 6;
5111 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5112 }
5113 do {
5114 alternateASig = aSig;
5115 ++q;
5116 aSig -= bSig;
bb98fe42 5117 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5118 sigMean = aSig + alternateASig;
5119 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5120 aSig = alternateASig;
5121 }
bb98fe42 5122 zSign = ( (int32_t) aSig < 0 );
158142c2 5123 if ( zSign ) aSig = - aSig;
ff32e16e 5124 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5125}
5126
369be8f6 5127
158142c2 5128
8229c991
AJ
5129/*----------------------------------------------------------------------------
5130| Returns the binary exponential of the single-precision floating-point value
5131| `a'. The operation is performed according to the IEC/IEEE Standard for
5132| Binary Floating-Point Arithmetic.
5133|
5134| Uses the following identities:
5135|
5136| 1. -------------------------------------------------------------------------
5137| x x*ln(2)
5138| 2 = e
5139|
5140| 2. -------------------------------------------------------------------------
5141| 2 3 4 5 n
5142| x x x x x x x
5143| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5144| 1! 2! 3! 4! 5! n!
5145*----------------------------------------------------------------------------*/
5146
5147static const float64 float32_exp2_coefficients[15] =
5148{
d5138cf4
PM
5149 const_float64( 0x3ff0000000000000ll ), /* 1 */
5150 const_float64( 0x3fe0000000000000ll ), /* 2 */
5151 const_float64( 0x3fc5555555555555ll ), /* 3 */
5152 const_float64( 0x3fa5555555555555ll ), /* 4 */
5153 const_float64( 0x3f81111111111111ll ), /* 5 */
5154 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5155 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5156 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5157 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5158 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5159 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5160 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5161 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5162 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5163 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5164};
5165
e5a41ffa 5166float32 float32_exp2(float32 a, float_status *status)
8229c991 5167{
c120391c 5168 bool aSign;
0c48262d 5169 int aExp;
bb98fe42 5170 uint32_t aSig;
8229c991
AJ
5171 float64 r, x, xn;
5172 int i;
ff32e16e 5173 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5174
5175 aSig = extractFloat32Frac( a );
5176 aExp = extractFloat32Exp( a );
5177 aSign = extractFloat32Sign( a );
5178
5179 if ( aExp == 0xFF) {
ff32e16e
PM
5180 if (aSig) {
5181 return propagateFloat32NaN(a, float32_zero, status);
5182 }
8229c991
AJ
5183 return (aSign) ? float32_zero : a;
5184 }
5185 if (aExp == 0) {
5186 if (aSig == 0) return float32_one;
5187 }
5188
ff32e16e 5189 float_raise(float_flag_inexact, status);
8229c991
AJ
5190
5191 /* ******************************* */
5192 /* using float64 for approximation */
5193 /* ******************************* */
ff32e16e
PM
5194 x = float32_to_float64(a, status);
5195 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5196
5197 xn = x;
5198 r = float64_one;
5199 for (i = 0 ; i < 15 ; i++) {
5200 float64 f;
5201
ff32e16e
PM
5202 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5203 r = float64_add(r, f, status);
8229c991 5204
ff32e16e 5205 xn = float64_mul(xn, x, status);
8229c991
AJ
5206 }
5207
5208 return float64_to_float32(r, status);
5209}
5210
374dfc33
AJ
5211/*----------------------------------------------------------------------------
5212| Returns the binary log of the single-precision floating-point value `a'.
5213| The operation is performed according to the IEC/IEEE Standard for Binary
5214| Floating-Point Arithmetic.
5215*----------------------------------------------------------------------------*/
e5a41ffa 5216float32 float32_log2(float32 a, float_status *status)
374dfc33 5217{
c120391c 5218 bool aSign, zSign;
0c48262d 5219 int aExp;
bb98fe42 5220 uint32_t aSig, zSig, i;
374dfc33 5221
ff32e16e 5222 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5223 aSig = extractFloat32Frac( a );
5224 aExp = extractFloat32Exp( a );
5225 aSign = extractFloat32Sign( a );
5226
5227 if ( aExp == 0 ) {
5228 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5229 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5230 }
5231 if ( aSign ) {
ff32e16e 5232 float_raise(float_flag_invalid, status);
af39bc8c 5233 return float32_default_nan(status);
374dfc33
AJ
5234 }
5235 if ( aExp == 0xFF ) {
ff32e16e
PM
5236 if (aSig) {
5237 return propagateFloat32NaN(a, float32_zero, status);
5238 }
374dfc33
AJ
5239 return a;
5240 }
5241
5242 aExp -= 0x7F;
5243 aSig |= 0x00800000;
5244 zSign = aExp < 0;
5245 zSig = aExp << 23;
5246
5247 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5248 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5249 if ( aSig & 0x01000000 ) {
5250 aSig >>= 1;
5251 zSig |= i;
5252 }
5253 }
5254
5255 if ( zSign )
5256 zSig = -zSig;
5257
ff32e16e 5258 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5259}
5260
158142c2 5261/*----------------------------------------------------------------------------
158142c2
FB
5262| Returns the result of converting the double-precision floating-point value
5263| `a' to the extended double-precision floating-point format. The conversion
5264| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5265| Arithmetic.
5266*----------------------------------------------------------------------------*/
5267
e5a41ffa 5268floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5269{
c120391c 5270 bool aSign;
0c48262d 5271 int aExp;
bb98fe42 5272 uint64_t aSig;
158142c2 5273
ff32e16e 5274 a = float64_squash_input_denormal(a, status);
158142c2
FB
5275 aSig = extractFloat64Frac( a );
5276 aExp = extractFloat64Exp( a );
5277 aSign = extractFloat64Sign( a );
5278 if ( aExp == 0x7FF ) {
ff32e16e 5279 if (aSig) {
7537c2b4
JM
5280 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5281 status);
5282 return floatx80_silence_nan(res, status);
ff32e16e 5283 }
0f605c88
LV
5284 return packFloatx80(aSign,
5285 floatx80_infinity_high,
5286 floatx80_infinity_low);
158142c2
FB
5287 }
5288 if ( aExp == 0 ) {
5289 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5290 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5291 }
5292 return
5293 packFloatx80(
e9321124 5294 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5295
5296}
5297
158142c2
FB
5298/*----------------------------------------------------------------------------
5299| Returns the remainder of the double-precision floating-point value `a'
5300| with respect to the corresponding value `b'. The operation is performed
5301| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5302*----------------------------------------------------------------------------*/
5303
e5a41ffa 5304float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5305{
c120391c 5306 bool aSign, zSign;
0c48262d 5307 int aExp, bExp, expDiff;
bb98fe42
AF
5308 uint64_t aSig, bSig;
5309 uint64_t q, alternateASig;
5310 int64_t sigMean;
158142c2 5311
ff32e16e
PM
5312 a = float64_squash_input_denormal(a, status);
5313 b = float64_squash_input_denormal(b, status);
158142c2
FB
5314 aSig = extractFloat64Frac( a );
5315 aExp = extractFloat64Exp( a );
5316 aSign = extractFloat64Sign( a );
5317 bSig = extractFloat64Frac( b );
5318 bExp = extractFloat64Exp( b );
158142c2
FB
5319 if ( aExp == 0x7FF ) {
5320 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5321 return propagateFloat64NaN(a, b, status);
158142c2 5322 }
ff32e16e 5323 float_raise(float_flag_invalid, status);
af39bc8c 5324 return float64_default_nan(status);
158142c2
FB
5325 }
5326 if ( bExp == 0x7FF ) {
ff32e16e
PM
5327 if (bSig) {
5328 return propagateFloat64NaN(a, b, status);
5329 }
158142c2
FB
5330 return a;
5331 }
5332 if ( bExp == 0 ) {
5333 if ( bSig == 0 ) {
ff32e16e 5334 float_raise(float_flag_invalid, status);
af39bc8c 5335 return float64_default_nan(status);
158142c2
FB
5336 }
5337 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5338 }
5339 if ( aExp == 0 ) {
5340 if ( aSig == 0 ) return a;
5341 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5342 }
5343 expDiff = aExp - bExp;
e9321124
AB
5344 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5345 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5346 if ( expDiff < 0 ) {
5347 if ( expDiff < -1 ) return a;
5348 aSig >>= 1;
5349 }
5350 q = ( bSig <= aSig );
5351 if ( q ) aSig -= bSig;
5352 expDiff -= 64;
5353 while ( 0 < expDiff ) {
5354 q = estimateDiv128To64( aSig, 0, bSig );
5355 q = ( 2 < q ) ? q - 2 : 0;
5356 aSig = - ( ( bSig>>2 ) * q );
5357 expDiff -= 62;
5358 }
5359 expDiff += 64;
5360 if ( 0 < expDiff ) {
5361 q = estimateDiv128To64( aSig, 0, bSig );
5362 q = ( 2 < q ) ? q - 2 : 0;
5363 q >>= 64 - expDiff;
5364 bSig >>= 2;
5365 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5366 }
5367 else {
5368 aSig >>= 2;
5369 bSig >>= 2;
5370 }
5371 do {
5372 alternateASig = aSig;
5373 ++q;
5374 aSig -= bSig;
bb98fe42 5375 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5376 sigMean = aSig + alternateASig;
5377 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5378 aSig = alternateASig;
5379 }
bb98fe42 5380 zSign = ( (int64_t) aSig < 0 );
158142c2 5381 if ( zSign ) aSig = - aSig;
ff32e16e 5382 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5383
5384}
5385
374dfc33
AJ
5386/*----------------------------------------------------------------------------
5387| Returns the binary log of the double-precision floating-point value `a'.
5388| The operation is performed according to the IEC/IEEE Standard for Binary
5389| Floating-Point Arithmetic.
5390*----------------------------------------------------------------------------*/
e5a41ffa 5391float64 float64_log2(float64 a, float_status *status)
374dfc33 5392{
c120391c 5393 bool aSign, zSign;
0c48262d 5394 int aExp;
bb98fe42 5395 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5396 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5397
5398 aSig = extractFloat64Frac( a );
5399 aExp = extractFloat64Exp( a );
5400 aSign = extractFloat64Sign( a );
5401
5402 if ( aExp == 0 ) {
5403 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5404 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5405 }
5406 if ( aSign ) {
ff32e16e 5407 float_raise(float_flag_invalid, status);
af39bc8c 5408 return float64_default_nan(status);
374dfc33
AJ
5409 }
5410 if ( aExp == 0x7FF ) {
ff32e16e
PM
5411 if (aSig) {
5412 return propagateFloat64NaN(a, float64_zero, status);
5413 }
374dfc33
AJ
5414 return a;
5415 }
5416
5417 aExp -= 0x3FF;
e9321124 5418 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5419 zSign = aExp < 0;
bb98fe42 5420 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5421 for (i = 1LL << 51; i > 0; i >>= 1) {
5422 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5423 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5424 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5425 aSig >>= 1;
5426 zSig |= i;
5427 }
5428 }
5429
5430 if ( zSign )
5431 zSig = -zSig;
ff32e16e 5432 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5433}
5434
158142c2
FB
5435/*----------------------------------------------------------------------------
5436| Returns the result of converting the extended double-precision floating-
5437| point value `a' to the 32-bit two's complement integer format. The
5438| conversion is performed according to the IEC/IEEE Standard for Binary
5439| Floating-Point Arithmetic---which means in particular that the conversion
5440| is rounded according to the current rounding mode. If `a' is a NaN, the
5441| largest positive integer is returned. Otherwise, if the conversion
5442| overflows, the largest integer with the same sign as `a' is returned.
5443*----------------------------------------------------------------------------*/
5444
f4014512 5445int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5446{
c120391c 5447 bool aSign;
f4014512 5448 int32_t aExp, shiftCount;
bb98fe42 5449 uint64_t aSig;
158142c2 5450
d1eb8f2a
AD
5451 if (floatx80_invalid_encoding(a)) {
5452 float_raise(float_flag_invalid, status);
5453 return 1 << 31;
5454 }
158142c2
FB
5455 aSig = extractFloatx80Frac( a );
5456 aExp = extractFloatx80Exp( a );
5457 aSign = extractFloatx80Sign( a );
bb98fe42 5458 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5459 shiftCount = 0x4037 - aExp;
5460 if ( shiftCount <= 0 ) shiftCount = 1;
5461 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5462 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5463
5464}
5465
5466/*----------------------------------------------------------------------------
5467| Returns the result of converting the extended double-precision floating-
5468| point value `a' to the 32-bit two's complement integer format. The
5469| conversion is performed according to the IEC/IEEE Standard for Binary
5470| Floating-Point Arithmetic, except that the conversion is always rounded
5471| toward zero. If `a' is a NaN, the largest positive integer is returned.
5472| Otherwise, if the conversion overflows, the largest integer with the same
5473| sign as `a' is returned.
5474*----------------------------------------------------------------------------*/
5475
f4014512 5476int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5477{
c120391c 5478 bool aSign;
f4014512 5479 int32_t aExp, shiftCount;
bb98fe42 5480 uint64_t aSig, savedASig;
b3a6a2e0 5481 int32_t z;
158142c2 5482
d1eb8f2a
AD
5483 if (floatx80_invalid_encoding(a)) {
5484 float_raise(float_flag_invalid, status);
5485 return 1 << 31;
5486 }
158142c2
FB
5487 aSig = extractFloatx80Frac( a );
5488 aExp = extractFloatx80Exp( a );
5489 aSign = extractFloatx80Sign( a );
5490 if ( 0x401E < aExp ) {
bb98fe42 5491 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5492 goto invalid;
5493 }
5494 else if ( aExp < 0x3FFF ) {
a2f2d288 5495 if (aExp || aSig) {
d82f3b2d 5496 float_raise(float_flag_inexact, status);
a2f2d288 5497 }
158142c2
FB
5498 return 0;
5499 }
5500 shiftCount = 0x403E - aExp;
5501 savedASig = aSig;
5502 aSig >>= shiftCount;
5503 z = aSig;
5504 if ( aSign ) z = - z;
5505 if ( ( z < 0 ) ^ aSign ) {
5506 invalid:
ff32e16e 5507 float_raise(float_flag_invalid, status);
bb98fe42 5508 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5509 }
5510 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5511 float_raise(float_flag_inexact, status);
158142c2
FB
5512 }
5513 return z;
5514
5515}
5516
5517/*----------------------------------------------------------------------------
5518| Returns the result of converting the extended double-precision floating-
5519| point value `a' to the 64-bit two's complement integer format. The
5520| conversion is performed according to the IEC/IEEE Standard for Binary
5521| Floating-Point Arithmetic---which means in particular that the conversion
5522| is rounded according to the current rounding mode. If `a' is a NaN,
5523| the largest positive integer is returned. Otherwise, if the conversion
5524| overflows, the largest integer with the same sign as `a' is returned.
5525*----------------------------------------------------------------------------*/
5526
f42c2224 5527int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5528{
c120391c 5529 bool aSign;
f4014512 5530 int32_t aExp, shiftCount;
bb98fe42 5531 uint64_t aSig, aSigExtra;
158142c2 5532
d1eb8f2a
AD
5533 if (floatx80_invalid_encoding(a)) {
5534 float_raise(float_flag_invalid, status);
5535 return 1ULL << 63;
5536 }
158142c2
FB
5537 aSig = extractFloatx80Frac( a );
5538 aExp = extractFloatx80Exp( a );
5539 aSign = extractFloatx80Sign( a );
5540 shiftCount = 0x403E - aExp;
5541 if ( shiftCount <= 0 ) {
5542 if ( shiftCount ) {
ff32e16e 5543 float_raise(float_flag_invalid, status);
0f605c88 5544 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5545 return INT64_MAX;
158142c2 5546 }
2c217da0 5547 return INT64_MIN;
158142c2
FB
5548 }
5549 aSigExtra = 0;
5550 }
5551 else {
5552 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5553 }
ff32e16e 5554 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5555
5556}
5557
5558/*----------------------------------------------------------------------------
5559| Returns the result of converting the extended double-precision floating-
5560| point value `a' to the 64-bit two's complement integer format. The
5561| conversion is performed according to the IEC/IEEE Standard for Binary
5562| Floating-Point Arithmetic, except that the conversion is always rounded
5563| toward zero. If `a' is a NaN, the largest positive integer is returned.
5564| Otherwise, if the conversion overflows, the largest integer with the same
5565| sign as `a' is returned.
5566*----------------------------------------------------------------------------*/
5567
f42c2224 5568int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5569{
c120391c 5570 bool aSign;
f4014512 5571 int32_t aExp, shiftCount;
bb98fe42 5572 uint64_t aSig;
f42c2224 5573 int64_t z;
158142c2 5574
d1eb8f2a
AD
5575 if (floatx80_invalid_encoding(a)) {
5576 float_raise(float_flag_invalid, status);
5577 return 1ULL << 63;
5578 }
158142c2
FB
5579 aSig = extractFloatx80Frac( a );
5580 aExp = extractFloatx80Exp( a );
5581 aSign = extractFloatx80Sign( a );
5582 shiftCount = aExp - 0x403E;
5583 if ( 0 <= shiftCount ) {
e9321124 5584 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5585 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5586 float_raise(float_flag_invalid, status);
158142c2 5587 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5588 return INT64_MAX;
158142c2
FB
5589 }
5590 }
2c217da0 5591 return INT64_MIN;
158142c2
FB
5592 }
5593 else if ( aExp < 0x3FFF ) {
a2f2d288 5594 if (aExp | aSig) {
d82f3b2d 5595 float_raise(float_flag_inexact, status);
a2f2d288 5596 }
158142c2
FB
5597 return 0;
5598 }
5599 z = aSig>>( - shiftCount );
bb98fe42 5600 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5601 float_raise(float_flag_inexact, status);
158142c2
FB
5602 }
5603 if ( aSign ) z = - z;
5604 return z;
5605
5606}
5607
5608/*----------------------------------------------------------------------------
5609| Returns the result of converting the extended double-precision floating-
5610| point value `a' to the single-precision floating-point format. The
5611| conversion is performed according to the IEC/IEEE Standard for Binary
5612| Floating-Point Arithmetic.
5613*----------------------------------------------------------------------------*/
5614
e5a41ffa 5615float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5616{
c120391c 5617 bool aSign;
f4014512 5618 int32_t aExp;
bb98fe42 5619 uint64_t aSig;
158142c2 5620
d1eb8f2a
AD
5621 if (floatx80_invalid_encoding(a)) {
5622 float_raise(float_flag_invalid, status);
5623 return float32_default_nan(status);
5624 }
158142c2
FB
5625 aSig = extractFloatx80Frac( a );
5626 aExp = extractFloatx80Exp( a );
5627 aSign = extractFloatx80Sign( a );
5628 if ( aExp == 0x7FFF ) {
bb98fe42 5629 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5630 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5631 status);
5632 return float32_silence_nan(res, status);
158142c2
FB
5633 }
5634 return packFloat32( aSign, 0xFF, 0 );
5635 }
5636 shift64RightJamming( aSig, 33, &aSig );
5637 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5638 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5639
5640}
5641
5642/*----------------------------------------------------------------------------
5643| Returns the result of converting the extended double-precision floating-
5644| point value `a' to the double-precision floating-point format. The
5645| conversion is performed according to the IEC/IEEE Standard for Binary
5646| Floating-Point Arithmetic.
5647*----------------------------------------------------------------------------*/
5648
e5a41ffa 5649float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5650{
c120391c 5651 bool aSign;
f4014512 5652 int32_t aExp;
bb98fe42 5653 uint64_t aSig, zSig;
158142c2 5654
d1eb8f2a
AD
5655 if (floatx80_invalid_encoding(a)) {
5656 float_raise(float_flag_invalid, status);
5657 return float64_default_nan(status);
5658 }
158142c2
FB
5659 aSig = extractFloatx80Frac( a );
5660 aExp = extractFloatx80Exp( a );
5661 aSign = extractFloatx80Sign( a );
5662 if ( aExp == 0x7FFF ) {
bb98fe42 5663 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5664 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5665 status);
5666 return float64_silence_nan(res, status);
158142c2
FB
5667 }
5668 return packFloat64( aSign, 0x7FF, 0 );
5669 }
5670 shift64RightJamming( aSig, 1, &zSig );
5671 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5672 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5673
5674}
5675
158142c2
FB
5676/*----------------------------------------------------------------------------
5677| Returns the result of converting the extended double-precision floating-
5678| point value `a' to the quadruple-precision floating-point format. The
5679| conversion is performed according to the IEC/IEEE Standard for Binary
5680| Floating-Point Arithmetic.
5681*----------------------------------------------------------------------------*/
5682
e5a41ffa 5683float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5684{
c120391c 5685 bool aSign;
0c48262d 5686 int aExp;
bb98fe42 5687 uint64_t aSig, zSig0, zSig1;
158142c2 5688
d1eb8f2a
AD
5689 if (floatx80_invalid_encoding(a)) {
5690 float_raise(float_flag_invalid, status);
5691 return float128_default_nan(status);
5692 }
158142c2
FB
5693 aSig = extractFloatx80Frac( a );
5694 aExp = extractFloatx80Exp( a );
5695 aSign = extractFloatx80Sign( a );
bb98fe42 5696 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5697 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5698 status);
5699 return float128_silence_nan(res, status);
158142c2
FB
5700 }
5701 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5702 return packFloat128( aSign, aExp, zSig0, zSig1 );
5703
5704}
5705
0f721292
LV
5706/*----------------------------------------------------------------------------
5707| Rounds the extended double-precision floating-point value `a'
5708| to the precision provided by floatx80_rounding_precision and returns the
5709| result as an extended double-precision floating-point value.
5710| The operation is performed according to the IEC/IEEE Standard for Binary
5711| Floating-Point Arithmetic.
5712*----------------------------------------------------------------------------*/
5713
5714floatx80 floatx80_round(floatx80 a, float_status *status)
5715{
5716 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5717 extractFloatx80Sign(a),
5718 extractFloatx80Exp(a),
5719 extractFloatx80Frac(a), 0, status);
5720}
5721
158142c2
FB
5722/*----------------------------------------------------------------------------
5723| Rounds the extended double-precision floating-point value `a' to an integer,
5724| and returns the result as an extended quadruple-precision floating-point
5725| value. The operation is performed according to the IEC/IEEE Standard for
5726| Binary Floating-Point Arithmetic.
5727*----------------------------------------------------------------------------*/
5728
e5a41ffa 5729floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5730{
c120391c 5731 bool aSign;
f4014512 5732 int32_t aExp;
bb98fe42 5733 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5734 floatx80 z;
5735
d1eb8f2a
AD
5736 if (floatx80_invalid_encoding(a)) {
5737 float_raise(float_flag_invalid, status);
5738 return floatx80_default_nan(status);
5739 }
158142c2
FB
5740 aExp = extractFloatx80Exp( a );
5741 if ( 0x403E <= aExp ) {
bb98fe42 5742 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5743 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5744 }
5745 return a;
5746 }
5747 if ( aExp < 0x3FFF ) {
5748 if ( ( aExp == 0 )
9ecaf5cc 5749 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5750 return a;
5751 }
d82f3b2d 5752 float_raise(float_flag_inexact, status);
158142c2 5753 aSign = extractFloatx80Sign( a );
a2f2d288 5754 switch (status->float_rounding_mode) {
158142c2 5755 case float_round_nearest_even:
bb98fe42 5756 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5757 ) {
5758 return
e9321124 5759 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5760 }
5761 break;
f9288a76
PM
5762 case float_round_ties_away:
5763 if (aExp == 0x3FFE) {
e9321124 5764 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5765 }
5766 break;
158142c2
FB
5767 case float_round_down:
5768 return
5769 aSign ?
e9321124 5770 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5771 : packFloatx80( 0, 0, 0 );
5772 case float_round_up:
5773 return
5774 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5775 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5776
5777 case float_round_to_zero:
5778 break;
5779 default:
5780 g_assert_not_reached();
158142c2
FB
5781 }
5782 return packFloatx80( aSign, 0, 0 );
5783 }
5784 lastBitMask = 1;
5785 lastBitMask <<= 0x403E - aExp;
5786 roundBitsMask = lastBitMask - 1;
5787 z = a;
a2f2d288 5788 switch (status->float_rounding_mode) {
dc355b76 5789 case float_round_nearest_even:
158142c2 5790 z.low += lastBitMask>>1;
dc355b76
PM
5791 if ((z.low & roundBitsMask) == 0) {
5792 z.low &= ~lastBitMask;
5793 }
5794 break;
f9288a76
PM
5795 case float_round_ties_away:
5796 z.low += lastBitMask >> 1;
5797 break;
dc355b76
PM
5798 case float_round_to_zero:
5799 break;
5800 case float_round_up:
5801 if (!extractFloatx80Sign(z)) {
5802 z.low += roundBitsMask;
5803 }
5804 break;
5805 case float_round_down:
5806 if (extractFloatx80Sign(z)) {
158142c2
FB
5807 z.low += roundBitsMask;
5808 }
dc355b76
PM
5809 break;
5810 default:
5811 abort();
158142c2
FB
5812 }
5813 z.low &= ~ roundBitsMask;
5814 if ( z.low == 0 ) {
5815 ++z.high;
e9321124 5816 z.low = UINT64_C(0x8000000000000000);
158142c2 5817 }
a2f2d288 5818 if (z.low != a.low) {
d82f3b2d 5819 float_raise(float_flag_inexact, status);
a2f2d288 5820 }
158142c2
FB
5821 return z;
5822
5823}
5824
5825/*----------------------------------------------------------------------------
5826| Returns the result of adding the absolute values of the extended double-
5827| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5828| negated before being returned. `zSign' is ignored if the result is a NaN.
5829| The addition is performed according to the IEC/IEEE Standard for Binary
5830| Floating-Point Arithmetic.
5831*----------------------------------------------------------------------------*/
5832
c120391c 5833static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5834 float_status *status)
158142c2 5835{
f4014512 5836 int32_t aExp, bExp, zExp;
bb98fe42 5837 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5838 int32_t expDiff;
158142c2
FB
5839
5840 aSig = extractFloatx80Frac( a );
5841 aExp = extractFloatx80Exp( a );
5842 bSig = extractFloatx80Frac( b );
5843 bExp = extractFloatx80Exp( b );
5844 expDiff = aExp - bExp;
5845 if ( 0 < expDiff ) {
5846 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5847 if ((uint64_t)(aSig << 1)) {
5848 return propagateFloatx80NaN(a, b, status);
5849 }
158142c2
FB
5850 return a;
5851 }
5852 if ( bExp == 0 ) --expDiff;
5853 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5854 zExp = aExp;
5855 }
5856 else if ( expDiff < 0 ) {
5857 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5858 if ((uint64_t)(bSig << 1)) {
5859 return propagateFloatx80NaN(a, b, status);
5860 }
0f605c88
LV
5861 return packFloatx80(zSign,
5862 floatx80_infinity_high,
5863 floatx80_infinity_low);
158142c2
FB
5864 }
5865 if ( aExp == 0 ) ++expDiff;
5866 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5867 zExp = bExp;
5868 }
5869 else {
5870 if ( aExp == 0x7FFF ) {
bb98fe42 5871 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5872 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5873 }
5874 return a;
5875 }
5876 zSig1 = 0;
5877 zSig0 = aSig + bSig;
5878 if ( aExp == 0 ) {
41602807
JM
5879 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5880 /* At least one of the values is a pseudo-denormal,
5881 * and there is a carry out of the result. */
5882 zExp = 1;
5883 goto shiftRight1;
5884 }
2f311075
RH
5885 if (zSig0 == 0) {
5886 return packFloatx80(zSign, 0, 0);
5887 }
158142c2
FB
5888 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5889 goto roundAndPack;
5890 }
5891 zExp = aExp;
5892 goto shiftRight1;
5893 }
5894 zSig0 = aSig + bSig;
bb98fe42 5895 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5896 shiftRight1:
5897 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5898 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5899 ++zExp;
5900 roundAndPack:
a2f2d288 5901 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5902 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5903}
5904
5905/*----------------------------------------------------------------------------
5906| Returns the result of subtracting the absolute values of the extended
5907| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5908| difference is negated before being returned. `zSign' is ignored if the
5909| result is a NaN. The subtraction is performed according to the IEC/IEEE
5910| Standard for Binary Floating-Point Arithmetic.
5911*----------------------------------------------------------------------------*/
5912
c120391c 5913static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5914 float_status *status)
158142c2 5915{
f4014512 5916 int32_t aExp, bExp, zExp;
bb98fe42 5917 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5918 int32_t expDiff;
158142c2
FB
5919
5920 aSig = extractFloatx80Frac( a );
5921 aExp = extractFloatx80Exp( a );
5922 bSig = extractFloatx80Frac( b );
5923 bExp = extractFloatx80Exp( b );
5924 expDiff = aExp - bExp;
5925 if ( 0 < expDiff ) goto aExpBigger;
5926 if ( expDiff < 0 ) goto bExpBigger;
5927 if ( aExp == 0x7FFF ) {
bb98fe42 5928 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5929 return propagateFloatx80NaN(a, b, status);
158142c2 5930 }
ff32e16e 5931 float_raise(float_flag_invalid, status);
af39bc8c 5932 return floatx80_default_nan(status);
158142c2
FB
5933 }
5934 if ( aExp == 0 ) {
5935 aExp = 1;
5936 bExp = 1;
5937 }
5938 zSig1 = 0;
5939 if ( bSig < aSig ) goto aBigger;
5940 if ( aSig < bSig ) goto bBigger;
a2f2d288 5941 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5942 bExpBigger:
5943 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5944 if ((uint64_t)(bSig << 1)) {
5945 return propagateFloatx80NaN(a, b, status);
5946 }
0f605c88
LV
5947 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5948 floatx80_infinity_low);
158142c2
FB
5949 }
5950 if ( aExp == 0 ) ++expDiff;
5951 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5952 bBigger:
5953 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5954 zExp = bExp;
5955 zSign ^= 1;
5956 goto normalizeRoundAndPack;
5957 aExpBigger:
5958 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5959 if ((uint64_t)(aSig << 1)) {
5960 return propagateFloatx80NaN(a, b, status);
5961 }
158142c2
FB
5962 return a;
5963 }
5964 if ( bExp == 0 ) --expDiff;
5965 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5966 aBigger:
5967 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5968 zExp = aExp;
5969 normalizeRoundAndPack:
a2f2d288 5970 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5971 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5972}
5973
5974/*----------------------------------------------------------------------------
5975| Returns the result of adding the extended double-precision floating-point
5976| values `a' and `b'. The operation is performed according to the IEC/IEEE
5977| Standard for Binary Floating-Point Arithmetic.
5978*----------------------------------------------------------------------------*/
5979
e5a41ffa 5980floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5981{
c120391c 5982 bool aSign, bSign;
158142c2 5983
d1eb8f2a
AD
5984 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5985 float_raise(float_flag_invalid, status);
5986 return floatx80_default_nan(status);
5987 }
158142c2
FB
5988 aSign = extractFloatx80Sign( a );
5989 bSign = extractFloatx80Sign( b );
5990 if ( aSign == bSign ) {
ff32e16e 5991 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5992 }
5993 else {
ff32e16e 5994 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5995 }
5996
5997}
5998
5999/*----------------------------------------------------------------------------
6000| Returns the result of subtracting the extended double-precision floating-
6001| point values `a' and `b'. The operation is performed according to the
6002| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6003*----------------------------------------------------------------------------*/
6004
e5a41ffa 6005floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6006{
c120391c 6007 bool aSign, bSign;
158142c2 6008
d1eb8f2a
AD
6009 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6010 float_raise(float_flag_invalid, status);
6011 return floatx80_default_nan(status);
6012 }
158142c2
FB
6013 aSign = extractFloatx80Sign( a );
6014 bSign = extractFloatx80Sign( b );
6015 if ( aSign == bSign ) {
ff32e16e 6016 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6017 }
6018 else {
ff32e16e 6019 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6020 }
6021
6022}
6023
6024/*----------------------------------------------------------------------------
6025| Returns the result of multiplying the extended double-precision floating-
6026| point values `a' and `b'. The operation is performed according to the
6027| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6028*----------------------------------------------------------------------------*/
6029
e5a41ffa 6030floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6031{
c120391c 6032 bool aSign, bSign, zSign;
f4014512 6033 int32_t aExp, bExp, zExp;
bb98fe42 6034 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6035
d1eb8f2a
AD
6036 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6037 float_raise(float_flag_invalid, status);
6038 return floatx80_default_nan(status);
6039 }
158142c2
FB
6040 aSig = extractFloatx80Frac( a );
6041 aExp = extractFloatx80Exp( a );
6042 aSign = extractFloatx80Sign( a );
6043 bSig = extractFloatx80Frac( b );
6044 bExp = extractFloatx80Exp( b );
6045 bSign = extractFloatx80Sign( b );
6046 zSign = aSign ^ bSign;
6047 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6048 if ( (uint64_t) ( aSig<<1 )
6049 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6050 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6051 }
6052 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6053 return packFloatx80(zSign, floatx80_infinity_high,
6054 floatx80_infinity_low);
158142c2
FB
6055 }
6056 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6057 if ((uint64_t)(bSig << 1)) {
6058 return propagateFloatx80NaN(a, b, status);
6059 }
158142c2
FB
6060 if ( ( aExp | aSig ) == 0 ) {
6061 invalid:
ff32e16e 6062 float_raise(float_flag_invalid, status);
af39bc8c 6063 return floatx80_default_nan(status);
158142c2 6064 }
0f605c88
LV
6065 return packFloatx80(zSign, floatx80_infinity_high,
6066 floatx80_infinity_low);
158142c2
FB
6067 }
6068 if ( aExp == 0 ) {
6069 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6070 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6071 }
6072 if ( bExp == 0 ) {
6073 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6074 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6075 }
6076 zExp = aExp + bExp - 0x3FFE;
6077 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6078 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6079 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6080 --zExp;
6081 }
a2f2d288 6082 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6083 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6084}
6085
6086/*----------------------------------------------------------------------------
6087| Returns the result of dividing the extended double-precision floating-point
6088| value `a' by the corresponding value `b'. The operation is performed
6089| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6090*----------------------------------------------------------------------------*/
6091
e5a41ffa 6092floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6093{
c120391c 6094 bool aSign, bSign, zSign;
f4014512 6095 int32_t aExp, bExp, zExp;
bb98fe42
AF
6096 uint64_t aSig, bSig, zSig0, zSig1;
6097 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6098
d1eb8f2a
AD
6099 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6100 float_raise(float_flag_invalid, status);
6101 return floatx80_default_nan(status);
6102 }
158142c2
FB
6103 aSig = extractFloatx80Frac( a );
6104 aExp = extractFloatx80Exp( a );
6105 aSign = extractFloatx80Sign( a );
6106 bSig = extractFloatx80Frac( b );
6107 bExp = extractFloatx80Exp( b );
6108 bSign = extractFloatx80Sign( b );
6109 zSign = aSign ^ bSign;
6110 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6111 if ((uint64_t)(aSig << 1)) {
6112 return propagateFloatx80NaN(a, b, status);
6113 }
158142c2 6114 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6115 if ((uint64_t)(bSig << 1)) {
6116 return propagateFloatx80NaN(a, b, status);
6117 }
158142c2
FB
6118 goto invalid;
6119 }
0f605c88
LV
6120 return packFloatx80(zSign, floatx80_infinity_high,
6121 floatx80_infinity_low);
158142c2
FB
6122 }
6123 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6124 if ((uint64_t)(bSig << 1)) {
6125 return propagateFloatx80NaN(a, b, status);
6126 }
158142c2
FB
6127 return packFloatx80( zSign, 0, 0 );
6128 }
6129 if ( bExp == 0 ) {
6130 if ( bSig == 0 ) {
6131 if ( ( aExp | aSig ) == 0 ) {
6132 invalid:
ff32e16e 6133 float_raise(float_flag_invalid, status);
af39bc8c 6134 return floatx80_default_nan(status);
158142c2 6135 }
ff32e16e 6136 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6137 return packFloatx80(zSign, floatx80_infinity_high,
6138 floatx80_infinity_low);
158142c2
FB
6139 }
6140 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6141 }
6142 if ( aExp == 0 ) {
6143 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6144 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6145 }
6146 zExp = aExp - bExp + 0x3FFE;
6147 rem1 = 0;
6148 if ( bSig <= aSig ) {
6149 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6150 ++zExp;
6151 }
6152 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6153 mul64To128( bSig, zSig0, &term0, &term1 );
6154 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6155 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6156 --zSig0;
6157 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6158 }
6159 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6160 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6161 mul64To128( bSig, zSig1, &term1, &term2 );
6162 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6163 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6164 --zSig1;
6165 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6166 }
6167 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6168 }
a2f2d288 6169 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6170 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6171}
6172
6173/*----------------------------------------------------------------------------
6174| Returns the remainder of the extended double-precision floating-point value
6175| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6176| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6177| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6178| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6179| the absolute value of the integer quotient.
158142c2
FB
6180*----------------------------------------------------------------------------*/
6181
445810ec 6182floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6183 float_status *status)
158142c2 6184{
c120391c 6185 bool aSign, zSign;
b662495d 6186 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6187 uint64_t aSig0, aSig1, bSig;
6188 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6189
445810ec 6190 *quotient = 0;
d1eb8f2a
AD
6191 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6192 float_raise(float_flag_invalid, status);
6193 return floatx80_default_nan(status);
6194 }
158142c2 6195 aSig0 = extractFloatx80Frac( a );
b662495d 6196 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6197 aSign = extractFloatx80Sign( a );
6198 bSig = extractFloatx80Frac( b );
6199 bExp = extractFloatx80Exp( b );
158142c2 6200 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6201 if ( (uint64_t) ( aSig0<<1 )
6202 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6203 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6204 }
6205 goto invalid;
6206 }
6207 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6208 if ((uint64_t)(bSig << 1)) {
6209 return propagateFloatx80NaN(a, b, status);
6210 }
b662495d
JM
6211 if (aExp == 0 && aSig0 >> 63) {
6212 /*
6213 * Pseudo-denormal argument must be returned in normalized
6214 * form.
6215 */
6216 return packFloatx80(aSign, 1, aSig0);
6217 }
158142c2
FB
6218 return a;
6219 }
6220 if ( bExp == 0 ) {
6221 if ( bSig == 0 ) {
6222 invalid:
ff32e16e 6223 float_raise(float_flag_invalid, status);
af39bc8c 6224 return floatx80_default_nan(status);
158142c2
FB
6225 }
6226 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6227 }
6228 if ( aExp == 0 ) {
499a2f7b 6229 if ( aSig0 == 0 ) return a;
158142c2
FB
6230 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6231 }
158142c2
FB
6232 zSign = aSign;
6233 expDiff = aExp - bExp;
6234 aSig1 = 0;
6235 if ( expDiff < 0 ) {
b662495d
JM
6236 if ( mod || expDiff < -1 ) {
6237 if (aExp == 1 && aExpOrig == 0) {
6238 /*
6239 * Pseudo-denormal argument must be returned in
6240 * normalized form.
6241 */
6242 return packFloatx80(aSign, aExp, aSig0);
6243 }
6244 return a;
6245 }
158142c2
FB
6246 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6247 expDiff = 0;
6248 }
445810ec 6249 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6250 if ( q ) aSig0 -= bSig;
6251 expDiff -= 64;
6252 while ( 0 < expDiff ) {
6253 q = estimateDiv128To64( aSig0, aSig1, bSig );
6254 q = ( 2 < q ) ? q - 2 : 0;
6255 mul64To128( bSig, q, &term0, &term1 );
6256 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6257 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6258 expDiff -= 62;
445810ec
JM
6259 *quotient <<= 62;
6260 *quotient += q;
158142c2
FB
6261 }
6262 expDiff += 64;
6263 if ( 0 < expDiff ) {
6264 q = estimateDiv128To64( aSig0, aSig1, bSig );
6265 q = ( 2 < q ) ? q - 2 : 0;
6266 q >>= 64 - expDiff;
6267 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6268 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6269 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6270 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6271 ++q;
6272 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6273 }
445810ec
JM
6274 if (expDiff < 64) {
6275 *quotient <<= expDiff;
6276 } else {
6277 *quotient = 0;
6278 }
6279 *quotient += q;
158142c2
FB
6280 }
6281 else {
6282 term1 = 0;
6283 term0 = bSig;
6284 }
6b8b0136
JM
6285 if (!mod) {
6286 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6287 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6288 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6289 && ( q & 1 ) )
6290 ) {
6291 aSig0 = alternateASig0;
6292 aSig1 = alternateASig1;
6293 zSign = ! zSign;
445810ec 6294 ++*quotient;
6b8b0136 6295 }
158142c2
FB
6296 }
6297 return
6298 normalizeRoundAndPackFloatx80(
ff32e16e 6299 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6300
6301}
6302
6b8b0136
JM
6303/*----------------------------------------------------------------------------
6304| Returns the remainder of the extended double-precision floating-point value
6305| `a' with respect to the corresponding value `b'. The operation is performed
6306| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6307*----------------------------------------------------------------------------*/
6308
6309floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6310{
445810ec
JM
6311 uint64_t quotient;
6312 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6313}
6314
6315/*----------------------------------------------------------------------------
6316| Returns the remainder of the extended double-precision floating-point value
6317| `a' with respect to the corresponding value `b', with the quotient truncated
6318| toward zero.
6319*----------------------------------------------------------------------------*/
6320
6321floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6322{
445810ec
JM
6323 uint64_t quotient;
6324 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6325}
6326
158142c2
FB
6327/*----------------------------------------------------------------------------
6328| Returns the square root of the extended double-precision floating-point
6329| value `a'. The operation is performed according to the IEC/IEEE Standard
6330| for Binary Floating-Point Arithmetic.
6331*----------------------------------------------------------------------------*/
6332
e5a41ffa 6333floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6334{
c120391c 6335 bool aSign;
f4014512 6336 int32_t aExp, zExp;
bb98fe42
AF
6337 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6338 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6339
d1eb8f2a
AD
6340 if (floatx80_invalid_encoding(a)) {
6341 float_raise(float_flag_invalid, status);
6342 return floatx80_default_nan(status);
6343 }
158142c2
FB
6344 aSig0 = extractFloatx80Frac( a );
6345 aExp = extractFloatx80Exp( a );
6346 aSign = extractFloatx80Sign( a );
6347 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6348 if ((uint64_t)(aSig0 << 1)) {
6349 return propagateFloatx80NaN(a, a, status);
6350 }
158142c2
FB
6351 if ( ! aSign ) return a;
6352 goto invalid;
6353 }
6354 if ( aSign ) {
6355 if ( ( aExp | aSig0 ) == 0 ) return a;
6356 invalid:
ff32e16e 6357 float_raise(float_flag_invalid, status);
af39bc8c 6358 return floatx80_default_nan(status);
158142c2
FB
6359 }
6360 if ( aExp == 0 ) {
6361 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6362 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6363 }
6364 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6365 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6366 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6367 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6368 doubleZSig0 = zSig0<<1;
6369 mul64To128( zSig0, zSig0, &term0, &term1 );
6370 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6371 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6372 --zSig0;
6373 doubleZSig0 -= 2;
6374 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6375 }
6376 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6377 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6378 if ( zSig1 == 0 ) zSig1 = 1;
6379 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6380 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6381 mul64To128( zSig1, zSig1, &term2, &term3 );
6382 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6383 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6384 --zSig1;
6385 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6386 term3 |= 1;
6387 term2 |= doubleZSig0;
6388 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6389 }
6390 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6391 }
6392 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6393 zSig0 |= doubleZSig0;
a2f2d288
PM
6394 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6395 0, zExp, zSig0, zSig1, status);
158142c2
FB
6396}
6397
158142c2
FB
6398/*----------------------------------------------------------------------------
6399| Returns the result of converting the quadruple-precision floating-point
6400| value `a' to the extended double-precision floating-point format. The
6401| conversion is performed according to the IEC/IEEE Standard for Binary
6402| Floating-Point Arithmetic.
6403*----------------------------------------------------------------------------*/
6404
e5a41ffa 6405floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6406{
c120391c 6407 bool aSign;
f4014512 6408 int32_t aExp;
bb98fe42 6409 uint64_t aSig0, aSig1;
158142c2
FB
6410
6411 aSig1 = extractFloat128Frac1( a );
6412 aSig0 = extractFloat128Frac0( a );
6413 aExp = extractFloat128Exp( a );
6414 aSign = extractFloat128Sign( a );
6415 if ( aExp == 0x7FFF ) {
6416 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6417 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6418 status);
6419 return floatx80_silence_nan(res, status);
158142c2 6420 }
0f605c88
LV
6421 return packFloatx80(aSign, floatx80_infinity_high,
6422 floatx80_infinity_low);
158142c2
FB
6423 }
6424 if ( aExp == 0 ) {
6425 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6426 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6427 }
6428 else {
e9321124 6429 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6430 }
6431 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6432 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6433
6434}
6435
158142c2
FB
6436/*----------------------------------------------------------------------------
6437| Returns the remainder of the quadruple-precision floating-point value `a'
6438| with respect to the corresponding value `b'. The operation is performed
6439| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6440*----------------------------------------------------------------------------*/
6441
e5a41ffa 6442float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6443{
c120391c 6444 bool aSign, zSign;
f4014512 6445 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6446 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6447 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6448 int64_t sigMean0;
158142c2
FB
6449
6450 aSig1 = extractFloat128Frac1( a );
6451 aSig0 = extractFloat128Frac0( a );
6452 aExp = extractFloat128Exp( a );
6453 aSign = extractFloat128Sign( a );
6454 bSig1 = extractFloat128Frac1( b );
6455 bSig0 = extractFloat128Frac0( b );
6456 bExp = extractFloat128Exp( b );
158142c2
FB
6457 if ( aExp == 0x7FFF ) {
6458 if ( ( aSig0 | aSig1 )
6459 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6460 return propagateFloat128NaN(a, b, status);
158142c2
FB
6461 }
6462 goto invalid;
6463 }
6464 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6465 if (bSig0 | bSig1) {
6466 return propagateFloat128NaN(a, b, status);
6467 }
158142c2
FB
6468 return a;
6469 }
6470 if ( bExp == 0 ) {
6471 if ( ( bSig0 | bSig1 ) == 0 ) {
6472 invalid:
ff32e16e 6473 float_raise(float_flag_invalid, status);
af39bc8c 6474 return float128_default_nan(status);
158142c2
FB
6475 }
6476 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6477 }
6478 if ( aExp == 0 ) {
6479 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6480 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6481 }
6482 expDiff = aExp - bExp;
6483 if ( expDiff < -1 ) return a;
6484 shortShift128Left(
e9321124 6485 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6486 aSig1,
6487 15 - ( expDiff < 0 ),
6488 &aSig0,
6489 &aSig1
6490 );
6491 shortShift128Left(
e9321124 6492 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6493 q = le128( bSig0, bSig1, aSig0, aSig1 );
6494 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6495 expDiff -= 64;
6496 while ( 0 < expDiff ) {
6497 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6498 q = ( 4 < q ) ? q - 4 : 0;
6499 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6500 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6501 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6502 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6503 expDiff -= 61;
6504 }
6505 if ( -64 < expDiff ) {
6506 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6507 q = ( 4 < q ) ? q - 4 : 0;
6508 q >>= - expDiff;
6509 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6510 expDiff += 52;
6511 if ( expDiff < 0 ) {
6512 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6513 }
6514 else {
6515 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6516 }
6517 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6518 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6519 }
6520 else {
6521 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6522 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6523 }
6524 do {
6525 alternateASig0 = aSig0;
6526 alternateASig1 = aSig1;
6527 ++q;
6528 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6529 } while ( 0 <= (int64_t) aSig0 );
158142c2 6530 add128(
bb98fe42 6531 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6532 if ( ( sigMean0 < 0 )
6533 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6534 aSig0 = alternateASig0;
6535 aSig1 = alternateASig1;
6536 }
bb98fe42 6537 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6538 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6539 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6540 status);
158142c2
FB
6541}
6542
6543/*----------------------------------------------------------------------------
6544| Returns the square root of the quadruple-precision floating-point value `a'.
6545| The operation is performed according to the IEC/IEEE Standard for Binary
6546| Floating-Point Arithmetic.
6547*----------------------------------------------------------------------------*/
6548
e5a41ffa 6549float128 float128_sqrt(float128 a, float_status *status)
158142c2 6550{
c120391c 6551 bool aSign;
f4014512 6552 int32_t aExp, zExp;
bb98fe42
AF
6553 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6554 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6555
6556 aSig1 = extractFloat128Frac1( a );
6557 aSig0 = extractFloat128Frac0( a );
6558 aExp = extractFloat128Exp( a );
6559 aSign = extractFloat128Sign( a );
6560 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6561 if (aSig0 | aSig1) {
6562 return propagateFloat128NaN(a, a, status);
6563 }
158142c2
FB
6564 if ( ! aSign ) return a;
6565 goto invalid;
6566 }
6567 if ( aSign ) {
6568 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6569 invalid:
ff32e16e 6570 float_raise(float_flag_invalid, status);
af39bc8c 6571 return float128_default_nan(status);
158142c2
FB
6572 }
6573 if ( aExp == 0 ) {
6574 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6575 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6576 }
6577 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 6578 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6579 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6580 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6581 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6582 doubleZSig0 = zSig0<<1;
6583 mul64To128( zSig0, zSig0, &term0, &term1 );
6584 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6585 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6586 --zSig0;
6587 doubleZSig0 -= 2;
6588 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6589 }
6590 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6591 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6592 if ( zSig1 == 0 ) zSig1 = 1;
6593 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6594 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6595 mul64To128( zSig1, zSig1, &term2, &term3 );
6596 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6597 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6598 --zSig1;
6599 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6600 term3 |= 1;
6601 term2 |= doubleZSig0;
6602 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6603 }
6604 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6605 }
6606 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6607 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6608
6609}
6610
71bfd65c
RH
6611static inline FloatRelation
6612floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6613 float_status *status)
f6714d36 6614{
c120391c 6615 bool aSign, bSign;
f6714d36 6616
d1eb8f2a
AD
6617 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6618 float_raise(float_flag_invalid, status);
6619 return float_relation_unordered;
6620 }
f6714d36
AJ
6621 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6622 ( extractFloatx80Frac( a )<<1 ) ) ||
6623 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6624 ( extractFloatx80Frac( b )<<1 ) )) {
6625 if (!is_quiet ||
af39bc8c
AM
6626 floatx80_is_signaling_nan(a, status) ||
6627 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6628 float_raise(float_flag_invalid, status);
f6714d36
AJ
6629 }
6630 return float_relation_unordered;
6631 }
6632 aSign = extractFloatx80Sign( a );
6633 bSign = extractFloatx80Sign( b );
6634 if ( aSign != bSign ) {
6635
6636 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6637 ( ( a.low | b.low ) == 0 ) ) {
6638 /* zero case */
6639 return float_relation_equal;
6640 } else {
6641 return 1 - (2 * aSign);
6642 }
6643 } else {
be53fa78
JM
6644 /* Normalize pseudo-denormals before comparison. */
6645 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6646 ++a.high;
6647 }
6648 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6649 ++b.high;
6650 }
f6714d36
AJ
6651 if (a.low == b.low && a.high == b.high) {
6652 return float_relation_equal;
6653 } else {
6654 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6655 }
6656 }
6657}
6658
71bfd65c 6659FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6660{
ff32e16e 6661 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6662}
6663
71bfd65c
RH
6664FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6665 float_status *status)
f6714d36 6666{
ff32e16e 6667 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6668}
6669
71bfd65c
RH
6670static inline FloatRelation
6671float128_compare_internal(float128 a, float128 b, bool is_quiet,
6672 float_status *status)
1f587329 6673{
c120391c 6674 bool aSign, bSign;
1f587329
BS
6675
6676 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6677 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6678 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6679 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6680 if (!is_quiet ||
af39bc8c
AM
6681 float128_is_signaling_nan(a, status) ||
6682 float128_is_signaling_nan(b, status)) {
ff32e16e 6683 float_raise(float_flag_invalid, status);
1f587329
BS
6684 }
6685 return float_relation_unordered;
6686 }
6687 aSign = extractFloat128Sign( a );
6688 bSign = extractFloat128Sign( b );
6689 if ( aSign != bSign ) {
6690 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6691 /* zero case */
6692 return float_relation_equal;
6693 } else {
6694 return 1 - (2 * aSign);
6695 }
6696 } else {
6697 if (a.low == b.low && a.high == b.high) {
6698 return float_relation_equal;
6699 } else {
6700 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6701 }
6702 }
6703}
6704
71bfd65c 6705FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 6706{
ff32e16e 6707 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6708}
6709
71bfd65c
RH
6710FloatRelation float128_compare_quiet(float128 a, float128 b,
6711 float_status *status)
1f587329 6712{
ff32e16e 6713 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6714}
6715
e5a41ffa 6716floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6717{
c120391c 6718 bool aSign;
326b9e98 6719 int32_t aExp;
bb98fe42 6720 uint64_t aSig;
9ee6e8bb 6721
d1eb8f2a
AD
6722 if (floatx80_invalid_encoding(a)) {
6723 float_raise(float_flag_invalid, status);
6724 return floatx80_default_nan(status);
6725 }
9ee6e8bb
PB
6726 aSig = extractFloatx80Frac( a );
6727 aExp = extractFloatx80Exp( a );
6728 aSign = extractFloatx80Sign( a );
6729
326b9e98
AJ
6730 if ( aExp == 0x7FFF ) {
6731 if ( aSig<<1 ) {
ff32e16e 6732 return propagateFloatx80NaN(a, a, status);
326b9e98 6733 }
9ee6e8bb
PB
6734 return a;
6735 }
326b9e98 6736
3c85c37f
PM
6737 if (aExp == 0) {
6738 if (aSig == 0) {
6739 return a;
6740 }
6741 aExp++;
6742 }
69397542 6743
326b9e98
AJ
6744 if (n > 0x10000) {
6745 n = 0x10000;
6746 } else if (n < -0x10000) {
6747 n = -0x10000;
6748 }
6749
9ee6e8bb 6750 aExp += n;
a2f2d288
PM
6751 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6752 aSign, aExp, aSig, 0, status);
9ee6e8bb 6753}
9ee6e8bb 6754
e5a41ffa 6755float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 6756{
c120391c 6757 bool aSign;
326b9e98 6758 int32_t aExp;
bb98fe42 6759 uint64_t aSig0, aSig1;
9ee6e8bb
PB
6760
6761 aSig1 = extractFloat128Frac1( a );
6762 aSig0 = extractFloat128Frac0( a );
6763 aExp = extractFloat128Exp( a );
6764 aSign = extractFloat128Sign( a );
6765 if ( aExp == 0x7FFF ) {
326b9e98 6766 if ( aSig0 | aSig1 ) {
ff32e16e 6767 return propagateFloat128NaN(a, a, status);
326b9e98 6768 }
9ee6e8bb
PB
6769 return a;
6770 }
3c85c37f 6771 if (aExp != 0) {
e9321124 6772 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 6773 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 6774 return a;
3c85c37f
PM
6775 } else {
6776 aExp++;
6777 }
69397542 6778
326b9e98
AJ
6779 if (n > 0x10000) {
6780 n = 0x10000;
6781 } else if (n < -0x10000) {
6782 n = -0x10000;
6783 }
6784
69397542
PB
6785 aExp += n - 1;
6786 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 6787 , status);
9ee6e8bb
PB
6788
6789}
f6b3b108
EC
6790
6791static void __attribute__((constructor)) softfloat_init(void)
6792{
6793 union_float64 ua, ub, uc, ur;
6794
6795 if (QEMU_NO_HARDFLOAT) {
6796 return;
6797 }
6798 /*
6799 * Test that the host's FMA is not obviously broken. For example,
6800 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
6801 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
6802 */
6803 ua.s = 0x0020000000000001ULL;
6804 ub.s = 0x3ca0000000000000ULL;
6805 uc.s = 0x0020000000000000ULL;
6806 ur.h = fma(ua.h, ub.h, uc.h);
6807 if (ur.s != 0x0020000000000001ULL) {
6808 force_soft_fma = true;
6809 }
6810}