]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210603' into...
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5 502/*
0018b1f4
RH
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
a90119b5 505 *
0018b1f4
RH
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
a90119b5
AB
509 */
510
511typedef struct {
a90119b5
AB
512 FloatClass cls;
513 bool sign;
4109b9ea
RH
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
f8155c1d 526} FloatParts64;
a90119b5 527
0018b1f4
RH
528typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534} FloatParts128;
535
aca84527
RH
536typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544} FloatParts256;
545
0018b1f4 546/* These apply to the most significant word of each FloatPartsN. */
e99c4373 547#define DECOMPOSED_BINARY_POINT 63
a90119b5 548#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
549
550/* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
ca3a3d5a 558 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 559 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
562 */
563typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
ca3a3d5a 573 bool arm_althp;
a90119b5
AB
574} FloatFmt;
575
576/* Expand fields based on the size of exponent and fraction */
577#define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
0018b1f4
RH
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
587
588static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590};
591
6fed16b2
AB
592static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595};
596
8282310d
LZ
597static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599};
600
a90119b5
AB
601static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603};
604
605static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607};
608
0018b1f4
RH
609static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611};
612
6fff2167 613/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 614static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 615{
d8fdd172
RH
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
6fff2167 618
d8fdd172 619 *r = (FloatParts64) {
6fff2167 620 .cls = float_class_unclassified,
d8fdd172
RH
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
6fff2167
AB
624 };
625}
626
3dddb203 627static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 628{
3dddb203 629 unpack_raw64(p, &float16_params, f);
6fff2167
AB
630}
631
3dddb203 632static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 633{
3dddb203 634 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
635}
636
3dddb203 637static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 638{
3dddb203 639 unpack_raw64(p, &float32_params, f);
6fff2167
AB
640}
641
3dddb203 642static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 643{
3dddb203 644 unpack_raw64(p, &float64_params, f);
6fff2167
AB
645}
646
0018b1f4
RH
647static void float128_unpack_raw(FloatParts128 *p, float128 f)
648{
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659}
660
6fff2167 661/* Pack a float from parts, but do not canonicalize. */
9e4af58c 662static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 663{
9e4af58c
RH
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
6fff2167
AB
672}
673
71fd178e 674static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 675{
71fd178e 676 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
677}
678
71fd178e 679static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 680{
71fd178e 681 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
682}
683
71fd178e 684static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 685{
71fd178e 686 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
687}
688
71fd178e 689static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 690{
71fd178e 691 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
692}
693
0018b1f4
RH
694static float128 float128_pack_raw(const FloatParts128 *p)
695{
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704}
705
0664335a
RH
706/*----------------------------------------------------------------------------
707| Functions and definitions to determine: (1) whether tininess for underflow
708| is detected before or after rounding by default, (2) what (if anything)
709| happens when exceptions are raised, (3) how signaling NaNs are distinguished
710| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711| are propagated from function inputs to output. These details are target-
712| specific.
713*----------------------------------------------------------------------------*/
139c1837 714#include "softfloat-specialize.c.inc"
0664335a 715
0018b1f4
RH
716#define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
dedd123c
RH
719#define PARTS_GENERIC_64_128_256(NAME, P) \
720 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
e9034ea8 723#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
724#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725
7c45bad8
RH
726static void parts64_return_nan(FloatParts64 *a, float_status *s);
727static void parts128_return_nan(FloatParts128 *a, float_status *s);
728
729#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 730
22c355f4
RH
731static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732 float_status *s);
733static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734 float_status *s);
735
736#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737
979582d0
RH
738static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739 FloatParts64 *c, float_status *s,
740 int ab_mask, int abc_mask);
741static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742 FloatParts128 *b,
743 FloatParts128 *c,
744 float_status *s,
745 int ab_mask, int abc_mask);
746
747#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749
d46975bc
RH
750static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751 const FloatFmt *fmt);
752static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753 const FloatFmt *fmt);
754
755#define parts_canonicalize(A, S, F) \
756 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757
ee6959f2
RH
758static void parts64_uncanon(FloatParts64 *p, float_status *status,
759 const FloatFmt *fmt);
760static void parts128_uncanon(FloatParts128 *p, float_status *status,
761 const FloatFmt *fmt);
762
763#define parts_uncanon(A, S, F) \
764 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765
da10a907
RH
766static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 768static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
769
770#define parts_add_normal(A, B) \
dedd123c 771 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
772
773static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 775static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
776
777#define parts_sub_normal(A, B) \
dedd123c 778 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
779
780static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781 float_status *s, bool subtract);
782static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783 float_status *s, bool subtract);
784
785#define parts_addsub(A, B, S, Z) \
786 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787
aca84527
RH
788static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789 float_status *s);
790static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791 float_status *s);
792
793#define parts_mul(A, B, S) \
794 PARTS_GENERIC_64_128(mul, A)(A, B, S)
795
dedd123c
RH
796static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797 FloatParts64 *c, int flags,
798 float_status *s);
799static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800 FloatParts128 *c, int flags,
801 float_status *s);
802
803#define parts_muladd(A, B, C, Z, S) \
804 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805
ec961b81
RH
806static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807 float_status *s);
808static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809 float_status *s);
810
811#define parts_div(A, B, S) \
812 PARTS_GENERIC_64_128(div, A)(A, B, S)
813
afc34931
RH
814static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
815 int scale, int frac_size);
816static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
817 int scale, int frac_size);
818
819#define parts_round_to_int_normal(A, R, C, F) \
820 PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
821
822static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
823 int scale, float_status *s,
824 const FloatFmt *fmt);
825static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
826 int scale, float_status *s,
827 const FloatFmt *fmt);
828
829#define parts_round_to_int(A, R, C, S, F) \
830 PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
831
463b3f0d
RH
832static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
833 int scale, int64_t min, int64_t max,
834 float_status *s);
835static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
836 int scale, int64_t min, int64_t max,
837 float_status *s);
838
839#define parts_float_to_sint(P, R, Z, MN, MX, S) \
840 PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
841
0018b1f4
RH
842/*
843 * Helper functions for softfloat-parts.c.inc, per-size operations.
844 */
845
22c355f4
RH
846#define FRAC_GENERIC_64_128(NAME, P) \
847 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
848
dedd123c
RH
849#define FRAC_GENERIC_64_128_256(NAME, P) \
850 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
851 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
852
da10a907
RH
853static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
854{
855 return uadd64_overflow(a->frac, b->frac, &r->frac);
856}
857
858static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
859{
860 bool c = 0;
861 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
862 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
863 return c;
864}
865
dedd123c
RH
866static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
867{
868 bool c = 0;
869 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
870 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
871 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
872 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
873 return c;
874}
875
876#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 877
ee6959f2
RH
878static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
879{
880 return uadd64_overflow(a->frac, c, &r->frac);
881}
882
883static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
884{
885 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
886 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
887}
888
889#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
890
891static void frac64_allones(FloatParts64 *a)
892{
893 a->frac = -1;
894}
895
896static void frac128_allones(FloatParts128 *a)
897{
898 a->frac_hi = a->frac_lo = -1;
899}
900
901#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
902
22c355f4
RH
903static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
904{
905 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
906}
907
908static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
909{
910 uint64_t ta = a->frac_hi, tb = b->frac_hi;
911 if (ta == tb) {
912 ta = a->frac_lo, tb = b->frac_lo;
913 if (ta == tb) {
914 return 0;
915 }
916 }
917 return ta < tb ? -1 : 1;
918}
919
920#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
921
d46975bc 922static void frac64_clear(FloatParts64 *a)
0018b1f4 923{
d46975bc
RH
924 a->frac = 0;
925}
926
927static void frac128_clear(FloatParts128 *a)
928{
929 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
930}
931
d46975bc 932#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 933
ec961b81
RH
934static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
935{
936 uint64_t n1, n0, r, q;
937 bool ret;
938
939 /*
940 * We want a 2*N / N-bit division to produce exactly an N-bit
941 * result, so that we do not lose any precision and so that we
942 * do not have to renormalize afterward. If A.frac < B.frac,
943 * then division would produce an (N-1)-bit result; shift A left
944 * by one to produce the an N-bit result, and return true to
945 * decrement the exponent to match.
946 *
947 * The udiv_qrnnd algorithm that we're using requires normalization,
948 * i.e. the msb of the denominator must be set, which is already true.
949 */
950 ret = a->frac < b->frac;
951 if (ret) {
952 n0 = a->frac;
953 n1 = 0;
954 } else {
955 n0 = a->frac >> 1;
956 n1 = a->frac << 63;
957 }
958 q = udiv_qrnnd(&r, n0, n1, b->frac);
959
960 /* Set lsb if there is a remainder, to set inexact. */
961 a->frac = q | (r != 0);
962
963 return ret;
964}
965
966static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
967{
968 uint64_t q0, q1, a0, a1, b0, b1;
969 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
970 bool ret = false;
971
972 a0 = a->frac_hi, a1 = a->frac_lo;
973 b0 = b->frac_hi, b1 = b->frac_lo;
974
975 ret = lt128(a0, a1, b0, b1);
976 if (!ret) {
977 a1 = shr_double(a0, a1, 1);
978 a0 = a0 >> 1;
979 }
980
981 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
982 q0 = estimateDiv128To64(a0, a1, b0);
983
984 /*
985 * Estimate is high because B1 was not included (unless B1 == 0).
986 * Reduce quotient and increase remainder until remainder is non-negative.
987 * This loop will execute 0 to 2 times.
988 */
989 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
990 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
991 while (r0 != 0) {
992 q0--;
993 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
994 }
995
996 /* Repeat using the remainder, producing a second word of quotient. */
997 q1 = estimateDiv128To64(r1, r2, b0);
998 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
999 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1000 while (r1 != 0) {
1001 q1--;
1002 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1003 }
1004
1005 /* Any remainder indicates inexact; set sticky bit. */
1006 q1 |= (r2 | r3) != 0;
1007
1008 a->frac_hi = q0;
1009 a->frac_lo = q1;
1010 return ret;
1011}
1012
1013#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
1014
d46975bc 1015static bool frac64_eqz(FloatParts64 *a)
0018b1f4 1016{
d46975bc
RH
1017 return a->frac == 0;
1018}
1019
1020static bool frac128_eqz(FloatParts128 *a)
1021{
1022 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
1023}
1024
d46975bc 1025#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 1026
aca84527
RH
1027static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1028{
1029 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1030}
1031
1032static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1033{
1034 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1035 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1036}
1037
1038#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1039
da10a907
RH
1040static void frac64_neg(FloatParts64 *a)
1041{
1042 a->frac = -a->frac;
1043}
1044
1045static void frac128_neg(FloatParts128 *a)
1046{
1047 bool c = 0;
1048 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1049 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1050}
1051
dedd123c
RH
1052static void frac256_neg(FloatParts256 *a)
1053{
1054 bool c = 0;
1055 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1056 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1057 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1058 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1059}
1060
1061#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1062
d46975bc 1063static int frac64_normalize(FloatParts64 *a)
6fff2167 1064{
d46975bc
RH
1065 if (a->frac) {
1066 int shift = clz64(a->frac);
1067 a->frac <<= shift;
1068 return shift;
1069 }
1070 return 64;
1071}
1072
1073static int frac128_normalize(FloatParts128 *a)
1074{
1075 if (a->frac_hi) {
1076 int shl = clz64(a->frac_hi);
463e45dc
RH
1077 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1078 a->frac_lo <<= shl;
d46975bc
RH
1079 return shl;
1080 } else if (a->frac_lo) {
1081 int shl = clz64(a->frac_lo);
463e45dc 1082 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1083 a->frac_lo = 0;
1084 return shl + 64;
6fff2167 1085 }
d46975bc 1086 return 128;
6fff2167
AB
1087}
1088
dedd123c
RH
1089static int frac256_normalize(FloatParts256 *a)
1090{
1091 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1092 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1093 int ret, shl;
dedd123c
RH
1094
1095 if (likely(a0)) {
1096 shl = clz64(a0);
1097 if (shl == 0) {
1098 return 0;
1099 }
1100 ret = shl;
1101 } else {
1102 if (a1) {
1103 ret = 64;
1104 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1105 } else if (a2) {
1106 ret = 128;
1107 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1108 } else if (a3) {
1109 ret = 192;
1110 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1111 } else {
1112 ret = 256;
1113 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1114 goto done;
1115 }
1116 shl = clz64(a0);
1117 if (shl == 0) {
1118 goto done;
1119 }
1120 ret += shl;
1121 }
1122
463e45dc
RH
1123 a0 = shl_double(a0, a1, shl);
1124 a1 = shl_double(a1, a2, shl);
1125 a2 = shl_double(a2, a3, shl);
1126 a3 <<= shl;
dedd123c
RH
1127
1128 done:
1129 a->frac_hi = a0;
1130 a->frac_hm = a1;
1131 a->frac_lm = a2;
1132 a->frac_lo = a3;
1133 return ret;
1134}
1135
1136#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1137
1138static void frac64_shl(FloatParts64 *a, int c)
1139{
1140 a->frac <<= c;
1141}
1142
1143static void frac128_shl(FloatParts128 *a, int c)
1144{
463e45dc
RH
1145 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1146
1147 if (c & 64) {
1148 a0 = a1, a1 = 0;
1149 }
1150
1151 c &= 63;
1152 if (c) {
1153 a0 = shl_double(a0, a1, c);
1154 a1 = a1 << c;
1155 }
1156
1157 a->frac_hi = a0;
1158 a->frac_lo = a1;
d46975bc
RH
1159}
1160
1161#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1162
1163static void frac64_shr(FloatParts64 *a, int c)
1164{
1165 a->frac >>= c;
1166}
1167
1168static void frac128_shr(FloatParts128 *a, int c)
1169{
463e45dc
RH
1170 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1171
1172 if (c & 64) {
1173 a1 = a0, a0 = 0;
1174 }
1175
1176 c &= 63;
1177 if (c) {
1178 a1 = shr_double(a0, a1, c);
1179 a0 = a0 >> c;
1180 }
1181
1182 a->frac_hi = a0;
1183 a->frac_lo = a1;
d46975bc
RH
1184}
1185
1186#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1187
ee6959f2 1188static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1189{
463e45dc
RH
1190 uint64_t a0 = a->frac;
1191
1192 if (likely(c != 0)) {
1193 if (likely(c < 64)) {
1194 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1195 } else {
1196 a0 = a0 != 0;
1197 }
1198 a->frac = a0;
1199 }
ee6959f2 1200}
6fff2167 1201
ee6959f2
RH
1202static void frac128_shrjam(FloatParts128 *a, int c)
1203{
463e45dc
RH
1204 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1205 uint64_t sticky = 0;
1206
1207 if (unlikely(c == 0)) {
1208 return;
1209 } else if (likely(c < 64)) {
1210 /* nothing */
1211 } else if (likely(c < 128)) {
1212 sticky = a1;
1213 a1 = a0;
1214 a0 = 0;
1215 c &= 63;
1216 if (c == 0) {
1217 goto done;
1218 }
1219 } else {
1220 sticky = a0 | a1;
1221 a0 = a1 = 0;
1222 goto done;
1223 }
1224
1225 sticky |= shr_double(a1, 0, c);
1226 a1 = shr_double(a0, a1, c);
1227 a0 = a0 >> c;
1228
1229 done:
1230 a->frac_lo = a1 | (sticky != 0);
1231 a->frac_hi = a0;
6fff2167
AB
1232}
1233
dedd123c
RH
1234static void frac256_shrjam(FloatParts256 *a, int c)
1235{
1236 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1237 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1238 uint64_t sticky = 0;
dedd123c
RH
1239
1240 if (unlikely(c == 0)) {
1241 return;
1242 } else if (likely(c < 64)) {
1243 /* nothing */
1244 } else if (likely(c < 256)) {
1245 if (unlikely(c & 128)) {
1246 sticky |= a2 | a3;
1247 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1248 }
1249 if (unlikely(c & 64)) {
1250 sticky |= a3;
1251 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1252 }
1253 c &= 63;
1254 if (c == 0) {
1255 goto done;
1256 }
1257 } else {
1258 sticky = a0 | a1 | a2 | a3;
1259 a0 = a1 = a2 = a3 = 0;
1260 goto done;
1261 }
1262
463e45dc
RH
1263 sticky |= shr_double(a3, 0, c);
1264 a3 = shr_double(a2, a3, c);
1265 a2 = shr_double(a1, a2, c);
1266 a1 = shr_double(a0, a1, c);
1267 a0 = a0 >> c;
dedd123c
RH
1268
1269 done:
1270 a->frac_lo = a3 | (sticky != 0);
1271 a->frac_lm = a2;
1272 a->frac_hm = a1;
1273 a->frac_hi = a0;
1274}
1275
1276#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1277
da10a907
RH
1278static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1279{
1280 return usub64_overflow(a->frac, b->frac, &r->frac);
1281}
7c45bad8 1282
da10a907
RH
1283static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1284{
1285 bool c = 0;
1286 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1287 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1288 return c;
1289}
1290
dedd123c
RH
1291static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1292{
1293 bool c = 0;
1294 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1295 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1296 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1297 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1298 return c;
1299}
1300
1301#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1302
aca84527
RH
1303static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1304{
1305 r->frac = a->frac_hi | (a->frac_lo != 0);
1306}
1307
1308static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1309{
1310 r->frac_hi = a->frac_hi;
1311 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1312}
1313
1314#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1315
dedd123c
RH
1316static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1317{
1318 r->frac_hi = a->frac;
1319 r->frac_lo = 0;
1320}
1321
1322static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1323{
1324 r->frac_hi = a->frac_hi;
1325 r->frac_hm = a->frac_lo;
1326 r->frac_lm = 0;
1327 r->frac_lo = 0;
1328}
1329
1330#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1331
da10a907
RH
1332#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1333#define FloatPartsN glue(FloatParts,N)
aca84527 1334#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1335
1336#define N 64
aca84527 1337#define W 128
da10a907
RH
1338
1339#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1340#include "softfloat-parts.c.inc"
1341
da10a907 1342#undef N
aca84527 1343#undef W
da10a907 1344#define N 128
aca84527 1345#define W 256
7c45bad8 1346
da10a907 1347#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1348#include "softfloat-parts.c.inc"
1349
dedd123c
RH
1350#undef N
1351#undef W
1352#define N 256
1353
1354#include "softfloat-parts-addsub.c.inc"
1355
da10a907 1356#undef N
aca84527 1357#undef W
7c45bad8
RH
1358#undef partsN
1359#undef FloatPartsN
aca84527 1360#undef FloatPartsW
7c45bad8 1361
aaffb7bf
RH
1362/*
1363 * Pack/unpack routines with a specific FloatFmt.
1364 */
1365
98e256fc
RH
1366static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1367 float_status *s, const FloatFmt *params)
aaffb7bf 1368{
98e256fc 1369 float16_unpack_raw(p, f);
d46975bc 1370 parts_canonicalize(p, s, params);
aaffb7bf
RH
1371}
1372
98e256fc
RH
1373static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1374 float_status *s)
aaffb7bf 1375{
98e256fc 1376 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1377}
1378
98e256fc
RH
1379static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1380 float_status *s)
aaffb7bf 1381{
98e256fc 1382 bfloat16_unpack_raw(p, f);
d46975bc 1383 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1384}
1385
e293e927
RH
1386static float16 float16a_round_pack_canonical(FloatParts64 *p,
1387 float_status *s,
aaffb7bf
RH
1388 const FloatFmt *params)
1389{
ee6959f2 1390 parts_uncanon(p, s, params);
e293e927 1391 return float16_pack_raw(p);
aaffb7bf
RH
1392}
1393
e293e927
RH
1394static float16 float16_round_pack_canonical(FloatParts64 *p,
1395 float_status *s)
aaffb7bf
RH
1396{
1397 return float16a_round_pack_canonical(p, s, &float16_params);
1398}
1399
e293e927
RH
1400static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1401 float_status *s)
aaffb7bf 1402{
ee6959f2 1403 parts_uncanon(p, s, &bfloat16_params);
e293e927 1404 return bfloat16_pack_raw(p);
aaffb7bf
RH
1405}
1406
98e256fc
RH
1407static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1408 float_status *s)
aaffb7bf 1409{
98e256fc 1410 float32_unpack_raw(p, f);
d46975bc 1411 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1412}
1413
e293e927
RH
1414static float32 float32_round_pack_canonical(FloatParts64 *p,
1415 float_status *s)
aaffb7bf 1416{
ee6959f2 1417 parts_uncanon(p, s, &float32_params);
e293e927 1418 return float32_pack_raw(p);
aaffb7bf
RH
1419}
1420
98e256fc
RH
1421static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1422 float_status *s)
aaffb7bf 1423{
98e256fc 1424 float64_unpack_raw(p, f);
d46975bc 1425 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1426}
1427
e293e927
RH
1428static float64 float64_round_pack_canonical(FloatParts64 *p,
1429 float_status *s)
aaffb7bf 1430{
ee6959f2 1431 parts_uncanon(p, s, &float64_params);
e293e927 1432 return float64_pack_raw(p);
aaffb7bf
RH
1433}
1434
3ff49e56
RH
1435static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1436 float_status *s)
1437{
1438 float128_unpack_raw(p, f);
1439 parts_canonicalize(p, s, &float128_params);
1440}
1441
1442static float128 float128_round_pack_canonical(FloatParts128 *p,
1443 float_status *s)
1444{
1445 parts_uncanon(p, s, &float128_params);
1446 return float128_pack_raw(p);
1447}
1448
6fff2167 1449/*
da10a907 1450 * Addition and subtraction
6fff2167
AB
1451 */
1452
da10a907
RH
1453static float16 QEMU_FLATTEN
1454float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1455{
da10a907 1456 FloatParts64 pa, pb, *pr;
98e256fc
RH
1457
1458 float16_unpack_canonical(&pa, a, status);
1459 float16_unpack_canonical(&pb, b, status);
da10a907 1460 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1461
da10a907 1462 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1463}
1464
da10a907 1465float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1466{
da10a907
RH
1467 return float16_addsub(a, b, status, false);
1468}
1b615d48 1469
da10a907
RH
1470float16 float16_sub(float16 a, float16 b, float_status *status)
1471{
1472 return float16_addsub(a, b, status, true);
1b615d48
EC
1473}
1474
1475static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1476soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1477{
da10a907 1478 FloatParts64 pa, pb, *pr;
98e256fc
RH
1479
1480 float32_unpack_canonical(&pa, a, status);
1481 float32_unpack_canonical(&pb, b, status);
da10a907 1482 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1483
da10a907 1484 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1485}
1486
da10a907 1487static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1488{
da10a907 1489 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1490}
1491
da10a907 1492static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1493{
da10a907 1494 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1495}
1496
1497static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1498soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1499{
da10a907 1500 FloatParts64 pa, pb, *pr;
98e256fc
RH
1501
1502 float64_unpack_canonical(&pa, a, status);
1503 float64_unpack_canonical(&pb, b, status);
da10a907 1504 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1505
da10a907 1506 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1507}
1508
da10a907 1509static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1510{
da10a907 1511 return soft_f64_addsub(a, b, status, false);
1b615d48 1512}
6fff2167 1513
da10a907 1514static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1515{
da10a907 1516 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1517}
1518
1b615d48 1519static float hard_f32_add(float a, float b)
6fff2167 1520{
1b615d48
EC
1521 return a + b;
1522}
6fff2167 1523
1b615d48
EC
1524static float hard_f32_sub(float a, float b)
1525{
1526 return a - b;
6fff2167
AB
1527}
1528
1b615d48 1529static double hard_f64_add(double a, double b)
6fff2167 1530{
1b615d48
EC
1531 return a + b;
1532}
6fff2167 1533
1b615d48
EC
1534static double hard_f64_sub(double a, double b)
1535{
1536 return a - b;
1537}
1538
b240c9c4 1539static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1540{
1541 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1542 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1543 }
1544 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1545}
1546
b240c9c4 1547static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1548{
1549 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1550 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1551 } else {
1552 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1553 }
1554}
1555
1556static float32 float32_addsub(float32 a, float32 b, float_status *s,
1557 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1558{
1559 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1560 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1561}
1562
1563static float64 float64_addsub(float64 a, float64 b, float_status *s,
1564 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1565{
1566 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1567 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1568}
1569
1570float32 QEMU_FLATTEN
1571float32_add(float32 a, float32 b, float_status *s)
1572{
1573 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1574}
1575
1576float32 QEMU_FLATTEN
1577float32_sub(float32 a, float32 b, float_status *s)
1578{
1579 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1580}
1581
1582float64 QEMU_FLATTEN
1583float64_add(float64 a, float64 b, float_status *s)
1584{
1585 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1586}
1587
1588float64 QEMU_FLATTEN
1589float64_sub(float64 a, float64 b, float_status *s)
1590{
1591 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1592}
1593
da10a907
RH
1594static bfloat16 QEMU_FLATTEN
1595bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1596{
da10a907 1597 FloatParts64 pa, pb, *pr;
98e256fc
RH
1598
1599 bfloat16_unpack_canonical(&pa, a, status);
1600 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1601 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1602
da10a907 1603 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1604}
1605
da10a907 1606bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1607{
da10a907
RH
1608 return bfloat16_addsub(a, b, status, false);
1609}
8282310d 1610
da10a907
RH
1611bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1612{
1613 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1614}
1615
3ff49e56
RH
1616static float128 QEMU_FLATTEN
1617float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1618{
1619 FloatParts128 pa, pb, *pr;
1620
1621 float128_unpack_canonical(&pa, a, status);
1622 float128_unpack_canonical(&pb, b, status);
1623 pr = parts_addsub(&pa, &pb, status, subtract);
1624
1625 return float128_round_pack_canonical(pr, status);
1626}
1627
1628float128 float128_add(float128 a, float128 b, float_status *status)
1629{
1630 return float128_addsub(a, b, status, false);
1631}
1632
1633float128 float128_sub(float128 a, float128 b, float_status *status)
1634{
1635 return float128_addsub(a, b, status, true);
1636}
1637
74d707e2 1638/*
aca84527 1639 * Multiplication
74d707e2
AB
1640 */
1641
97ff87c0 1642float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1643{
aca84527 1644 FloatParts64 pa, pb, *pr;
98e256fc
RH
1645
1646 float16_unpack_canonical(&pa, a, status);
1647 float16_unpack_canonical(&pb, b, status);
aca84527 1648 pr = parts_mul(&pa, &pb, status);
74d707e2 1649
aca84527 1650 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1651}
1652
2dfabc86
EC
1653static float32 QEMU_SOFTFLOAT_ATTR
1654soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1655{
aca84527 1656 FloatParts64 pa, pb, *pr;
98e256fc
RH
1657
1658 float32_unpack_canonical(&pa, a, status);
1659 float32_unpack_canonical(&pb, b, status);
aca84527 1660 pr = parts_mul(&pa, &pb, status);
74d707e2 1661
aca84527 1662 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1663}
1664
2dfabc86
EC
1665static float64 QEMU_SOFTFLOAT_ATTR
1666soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1667{
aca84527 1668 FloatParts64 pa, pb, *pr;
98e256fc
RH
1669
1670 float64_unpack_canonical(&pa, a, status);
1671 float64_unpack_canonical(&pb, b, status);
aca84527 1672 pr = parts_mul(&pa, &pb, status);
74d707e2 1673
aca84527 1674 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1675}
1676
2dfabc86
EC
1677static float hard_f32_mul(float a, float b)
1678{
1679 return a * b;
1680}
1681
1682static double hard_f64_mul(double a, double b)
1683{
1684 return a * b;
1685}
1686
2dfabc86
EC
1687float32 QEMU_FLATTEN
1688float32_mul(float32 a, float32 b, float_status *s)
1689{
1690 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1691 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1692}
1693
1694float64 QEMU_FLATTEN
1695float64_mul(float64 a, float64 b, float_status *s)
1696{
1697 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1698 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1699}
1700
aca84527
RH
1701bfloat16 QEMU_FLATTEN
1702bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1703{
aca84527 1704 FloatParts64 pa, pb, *pr;
98e256fc
RH
1705
1706 bfloat16_unpack_canonical(&pa, a, status);
1707 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1708 pr = parts_mul(&pa, &pb, status);
8282310d 1709
aca84527
RH
1710 return bfloat16_round_pack_canonical(pr, status);
1711}
1712
1713float128 QEMU_FLATTEN
1714float128_mul(float128 a, float128 b, float_status *status)
1715{
1716 FloatParts128 pa, pb, *pr;
1717
1718 float128_unpack_canonical(&pa, a, status);
1719 float128_unpack_canonical(&pb, b, status);
1720 pr = parts_mul(&pa, &pb, status);
1721
1722 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1723}
1724
d446830a 1725/*
dedd123c 1726 * Fused multiply-add
d446830a
AB
1727 */
1728
97ff87c0 1729float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1730 int flags, float_status *status)
d446830a 1731{
dedd123c 1732 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1733
1734 float16_unpack_canonical(&pa, a, status);
1735 float16_unpack_canonical(&pb, b, status);
1736 float16_unpack_canonical(&pc, c, status);
dedd123c 1737 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1738
dedd123c 1739 return float16_round_pack_canonical(pr, status);
d446830a
AB
1740}
1741
ccf770ba
EC
1742static float32 QEMU_SOFTFLOAT_ATTR
1743soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1744 float_status *status)
d446830a 1745{
dedd123c 1746 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1747
1748 float32_unpack_canonical(&pa, a, status);
1749 float32_unpack_canonical(&pb, b, status);
1750 float32_unpack_canonical(&pc, c, status);
dedd123c 1751 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1752
dedd123c 1753 return float32_round_pack_canonical(pr, status);
d446830a
AB
1754}
1755
ccf770ba
EC
1756static float64 QEMU_SOFTFLOAT_ATTR
1757soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1758 float_status *status)
d446830a 1759{
dedd123c 1760 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1761
1762 float64_unpack_canonical(&pa, a, status);
1763 float64_unpack_canonical(&pb, b, status);
1764 float64_unpack_canonical(&pc, c, status);
dedd123c 1765 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1766
dedd123c 1767 return float64_round_pack_canonical(pr, status);
d446830a
AB
1768}
1769
f6b3b108
EC
1770static bool force_soft_fma;
1771
ccf770ba
EC
1772float32 QEMU_FLATTEN
1773float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1774{
1775 union_float32 ua, ub, uc, ur;
1776
1777 ua.s = xa;
1778 ub.s = xb;
1779 uc.s = xc;
1780
1781 if (unlikely(!can_use_fpu(s))) {
1782 goto soft;
1783 }
1784 if (unlikely(flags & float_muladd_halve_result)) {
1785 goto soft;
1786 }
1787
1788 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1789 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1790 goto soft;
1791 }
f6b3b108
EC
1792
1793 if (unlikely(force_soft_fma)) {
1794 goto soft;
1795 }
1796
ccf770ba
EC
1797 /*
1798 * When (a || b) == 0, there's no need to check for under/over flow,
1799 * since we know the addend is (normal || 0) and the product is 0.
1800 */
1801 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1802 union_float32 up;
1803 bool prod_sign;
1804
1805 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1806 prod_sign ^= !!(flags & float_muladd_negate_product);
1807 up.s = float32_set_sign(float32_zero, prod_sign);
1808
1809 if (flags & float_muladd_negate_c) {
1810 uc.h = -uc.h;
1811 }
1812 ur.h = up.h + uc.h;
1813 } else {
896f51fb
KC
1814 union_float32 ua_orig = ua;
1815 union_float32 uc_orig = uc;
1816
ccf770ba
EC
1817 if (flags & float_muladd_negate_product) {
1818 ua.h = -ua.h;
1819 }
1820 if (flags & float_muladd_negate_c) {
1821 uc.h = -uc.h;
1822 }
1823
1824 ur.h = fmaf(ua.h, ub.h, uc.h);
1825
1826 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1827 float_raise(float_flag_overflow, s);
ccf770ba 1828 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1829 ua = ua_orig;
1830 uc = uc_orig;
ccf770ba
EC
1831 goto soft;
1832 }
1833 }
1834 if (flags & float_muladd_negate_result) {
1835 return float32_chs(ur.s);
1836 }
1837 return ur.s;
1838
1839 soft:
1840 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1841}
1842
1843float64 QEMU_FLATTEN
1844float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1845{
1846 union_float64 ua, ub, uc, ur;
1847
1848 ua.s = xa;
1849 ub.s = xb;
1850 uc.s = xc;
1851
1852 if (unlikely(!can_use_fpu(s))) {
1853 goto soft;
1854 }
1855 if (unlikely(flags & float_muladd_halve_result)) {
1856 goto soft;
1857 }
1858
1859 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1860 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1861 goto soft;
1862 }
f6b3b108
EC
1863
1864 if (unlikely(force_soft_fma)) {
1865 goto soft;
1866 }
1867
ccf770ba
EC
1868 /*
1869 * When (a || b) == 0, there's no need to check for under/over flow,
1870 * since we know the addend is (normal || 0) and the product is 0.
1871 */
1872 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1873 union_float64 up;
1874 bool prod_sign;
1875
1876 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1877 prod_sign ^= !!(flags & float_muladd_negate_product);
1878 up.s = float64_set_sign(float64_zero, prod_sign);
1879
1880 if (flags & float_muladd_negate_c) {
1881 uc.h = -uc.h;
1882 }
1883 ur.h = up.h + uc.h;
1884 } else {
896f51fb
KC
1885 union_float64 ua_orig = ua;
1886 union_float64 uc_orig = uc;
1887
ccf770ba
EC
1888 if (flags & float_muladd_negate_product) {
1889 ua.h = -ua.h;
1890 }
1891 if (flags & float_muladd_negate_c) {
1892 uc.h = -uc.h;
1893 }
1894
1895 ur.h = fma(ua.h, ub.h, uc.h);
1896
1897 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1898 float_raise(float_flag_overflow, s);
ccf770ba 1899 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1900 ua = ua_orig;
1901 uc = uc_orig;
ccf770ba
EC
1902 goto soft;
1903 }
1904 }
1905 if (flags & float_muladd_negate_result) {
1906 return float64_chs(ur.s);
1907 }
1908 return ur.s;
1909
1910 soft:
1911 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1912}
1913
8282310d
LZ
1914bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1915 int flags, float_status *status)
1916{
dedd123c 1917 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1918
1919 bfloat16_unpack_canonical(&pa, a, status);
1920 bfloat16_unpack_canonical(&pb, b, status);
1921 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
1922 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1923
1924 return bfloat16_round_pack_canonical(pr, status);
1925}
8282310d 1926
dedd123c
RH
1927float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1928 int flags, float_status *status)
1929{
1930 FloatParts128 pa, pb, pc, *pr;
1931
1932 float128_unpack_canonical(&pa, a, status);
1933 float128_unpack_canonical(&pb, b, status);
1934 float128_unpack_canonical(&pc, c, status);
1935 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1936
1937 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1938}
1939
cf07323d 1940/*
ec961b81 1941 * Division
cf07323d
AB
1942 */
1943
cf07323d
AB
1944float16 float16_div(float16 a, float16 b, float_status *status)
1945{
ec961b81 1946 FloatParts64 pa, pb, *pr;
98e256fc
RH
1947
1948 float16_unpack_canonical(&pa, a, status);
1949 float16_unpack_canonical(&pb, b, status);
ec961b81 1950 pr = parts_div(&pa, &pb, status);
cf07323d 1951
ec961b81 1952 return float16_round_pack_canonical(pr, status);
cf07323d
AB
1953}
1954
4a629561
EC
1955static float32 QEMU_SOFTFLOAT_ATTR
1956soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1957{
ec961b81 1958 FloatParts64 pa, pb, *pr;
98e256fc
RH
1959
1960 float32_unpack_canonical(&pa, a, status);
1961 float32_unpack_canonical(&pb, b, status);
ec961b81 1962 pr = parts_div(&pa, &pb, status);
cf07323d 1963
ec961b81 1964 return float32_round_pack_canonical(pr, status);
cf07323d
AB
1965}
1966
4a629561
EC
1967static float64 QEMU_SOFTFLOAT_ATTR
1968soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1969{
ec961b81 1970 FloatParts64 pa, pb, *pr;
98e256fc
RH
1971
1972 float64_unpack_canonical(&pa, a, status);
1973 float64_unpack_canonical(&pb, b, status);
ec961b81 1974 pr = parts_div(&pa, &pb, status);
cf07323d 1975
ec961b81 1976 return float64_round_pack_canonical(pr, status);
cf07323d
AB
1977}
1978
4a629561
EC
1979static float hard_f32_div(float a, float b)
1980{
1981 return a / b;
1982}
1983
1984static double hard_f64_div(double a, double b)
1985{
1986 return a / b;
1987}
1988
1989static bool f32_div_pre(union_float32 a, union_float32 b)
1990{
1991 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1992 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1993 fpclassify(b.h) == FP_NORMAL;
1994 }
1995 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1996}
1997
1998static bool f64_div_pre(union_float64 a, union_float64 b)
1999{
2000 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2001 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2002 fpclassify(b.h) == FP_NORMAL;
2003 }
2004 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2005}
2006
2007static bool f32_div_post(union_float32 a, union_float32 b)
2008{
2009 if (QEMU_HARDFLOAT_2F32_USE_FP) {
2010 return fpclassify(a.h) != FP_ZERO;
2011 }
2012 return !float32_is_zero(a.s);
2013}
2014
2015static bool f64_div_post(union_float64 a, union_float64 b)
2016{
2017 if (QEMU_HARDFLOAT_2F64_USE_FP) {
2018 return fpclassify(a.h) != FP_ZERO;
2019 }
2020 return !float64_is_zero(a.s);
2021}
2022
2023float32 QEMU_FLATTEN
2024float32_div(float32 a, float32 b, float_status *s)
2025{
2026 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 2027 f32_div_pre, f32_div_post);
4a629561
EC
2028}
2029
2030float64 QEMU_FLATTEN
2031float64_div(float64 a, float64 b, float_status *s)
2032{
2033 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2034 f64_div_pre, f64_div_post);
4a629561
EC
2035}
2036
ec961b81
RH
2037bfloat16 QEMU_FLATTEN
2038bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2039{
ec961b81 2040 FloatParts64 pa, pb, *pr;
98e256fc
RH
2041
2042 bfloat16_unpack_canonical(&pa, a, status);
2043 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2044 pr = parts_div(&pa, &pb, status);
8282310d 2045
ec961b81
RH
2046 return bfloat16_round_pack_canonical(pr, status);
2047}
2048
2049float128 QEMU_FLATTEN
2050float128_div(float128 a, float128 b, float_status *status)
2051{
2052 FloatParts128 pa, pb, *pr;
2053
2054 float128_unpack_canonical(&pa, a, status);
2055 float128_unpack_canonical(&pb, b, status);
2056 pr = parts_div(&pa, &pb, status);
2057
2058 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2059}
2060
6fed16b2
AB
2061/*
2062 * Float to Float conversions
2063 *
2064 * Returns the result of converting one float format to another. The
2065 * conversion is performed according to the IEC/IEEE Standard for
2066 * Binary Floating-Point Arithmetic.
2067 *
c3f1875e
RH
2068 * Usually this only needs to take care of raising invalid exceptions
2069 * and handling the conversion on NaNs.
6fed16b2
AB
2070 */
2071
c3f1875e
RH
2072static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2073{
2074 switch (a->cls) {
2075 case float_class_qnan:
2076 case float_class_snan:
2077 /*
2078 * There is no NaN in the destination format. Raise Invalid
2079 * and return a zero with the sign of the input NaN.
2080 */
2081 float_raise(float_flag_invalid, s);
2082 a->cls = float_class_zero;
2083 break;
2084
2085 case float_class_inf:
2086 /*
2087 * There is no Inf in the destination format. Raise Invalid
2088 * and return the maximum normal with the correct sign.
2089 */
2090 float_raise(float_flag_invalid, s);
2091 a->cls = float_class_normal;
2092 a->exp = float16_params_ahp.exp_max;
2093 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2094 float16_params_ahp.frac_size + 1);
2095 break;
2096
2097 case float_class_normal:
2098 case float_class_zero:
2099 break;
2100
2101 default:
2102 g_assert_not_reached();
2103 }
2104}
2105
2106static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2107{
2108 if (is_nan(a->cls)) {
2109 parts_return_nan(a, s);
6fed16b2 2110 }
6fed16b2
AB
2111}
2112
c3f1875e
RH
2113static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2114{
2115 if (is_nan(a->cls)) {
2116 parts_return_nan(a, s);
2117 }
2118}
2119
2120#define parts_float_to_float(P, S) \
2121 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2122
9882ccaf
RH
2123static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2124 float_status *s)
2125{
2126 a->cls = b->cls;
2127 a->sign = b->sign;
2128 a->exp = b->exp;
2129
2130 if (a->cls == float_class_normal) {
2131 frac_truncjam(a, b);
2132 } else if (is_nan(a->cls)) {
2133 /* Discard the low bits of the NaN. */
2134 a->frac = b->frac_hi;
2135 parts_return_nan(a, s);
2136 }
2137}
2138
2139static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2140 float_status *s)
2141{
2142 a->cls = b->cls;
2143 a->sign = b->sign;
2144 a->exp = b->exp;
2145 frac_widen(a, b);
2146
2147 if (is_nan(a->cls)) {
2148 parts_return_nan(a, s);
2149 }
2150}
2151
6fed16b2
AB
2152float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2153{
2154 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2155 FloatParts64 p;
98e256fc 2156
c3f1875e
RH
2157 float16a_unpack_canonical(&p, a, s, fmt16);
2158 parts_float_to_float(&p, s);
2159 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2160}
2161
2162float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2163{
2164 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2165 FloatParts64 p;
98e256fc 2166
c3f1875e
RH
2167 float16a_unpack_canonical(&p, a, s, fmt16);
2168 parts_float_to_float(&p, s);
2169 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2170}
2171
2172float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2173{
c3f1875e
RH
2174 FloatParts64 p;
2175 const FloatFmt *fmt;
98e256fc 2176
c3f1875e
RH
2177 float32_unpack_canonical(&p, a, s);
2178 if (ieee) {
2179 parts_float_to_float(&p, s);
2180 fmt = &float16_params;
2181 } else {
2182 parts_float_to_ahp(&p, s);
2183 fmt = &float16_params_ahp;
2184 }
2185 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2186}
2187
21381dcf
MK
2188static float64 QEMU_SOFTFLOAT_ATTR
2189soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2190{
c3f1875e 2191 FloatParts64 p;
98e256fc 2192
c3f1875e
RH
2193 float32_unpack_canonical(&p, a, s);
2194 parts_float_to_float(&p, s);
2195 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2196}
2197
21381dcf
MK
2198float64 float32_to_float64(float32 a, float_status *s)
2199{
2200 if (likely(float32_is_normal(a))) {
2201 /* Widening conversion can never produce inexact results. */
2202 union_float32 uf;
2203 union_float64 ud;
2204 uf.s = a;
2205 ud.h = uf.h;
2206 return ud.s;
2207 } else if (float32_is_zero(a)) {
2208 return float64_set_sign(float64_zero, float32_is_neg(a));
2209 } else {
2210 return soft_float32_to_float64(a, s);
2211 }
2212}
2213
6fed16b2
AB
2214float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2215{
c3f1875e
RH
2216 FloatParts64 p;
2217 const FloatFmt *fmt;
98e256fc 2218
c3f1875e
RH
2219 float64_unpack_canonical(&p, a, s);
2220 if (ieee) {
2221 parts_float_to_float(&p, s);
2222 fmt = &float16_params;
2223 } else {
2224 parts_float_to_ahp(&p, s);
2225 fmt = &float16_params_ahp;
2226 }
2227 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2228}
2229
2230float32 float64_to_float32(float64 a, float_status *s)
2231{
c3f1875e 2232 FloatParts64 p;
98e256fc 2233
c3f1875e
RH
2234 float64_unpack_canonical(&p, a, s);
2235 parts_float_to_float(&p, s);
2236 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2237}
2238
34f0c0a9
LZ
2239float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2240{
c3f1875e 2241 FloatParts64 p;
98e256fc 2242
c3f1875e
RH
2243 bfloat16_unpack_canonical(&p, a, s);
2244 parts_float_to_float(&p, s);
2245 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2246}
2247
2248float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2249{
c3f1875e 2250 FloatParts64 p;
98e256fc 2251
c3f1875e
RH
2252 bfloat16_unpack_canonical(&p, a, s);
2253 parts_float_to_float(&p, s);
2254 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2255}
2256
2257bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2258{
c3f1875e 2259 FloatParts64 p;
98e256fc 2260
c3f1875e
RH
2261 float32_unpack_canonical(&p, a, s);
2262 parts_float_to_float(&p, s);
2263 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2264}
2265
2266bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2267{
c3f1875e 2268 FloatParts64 p;
98e256fc 2269
c3f1875e
RH
2270 float64_unpack_canonical(&p, a, s);
2271 parts_float_to_float(&p, s);
2272 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2273}
2274
9882ccaf
RH
2275float32 float128_to_float32(float128 a, float_status *s)
2276{
2277 FloatParts64 p64;
2278 FloatParts128 p128;
2279
2280 float128_unpack_canonical(&p128, a, s);
2281 parts_float_to_float_narrow(&p64, &p128, s);
2282 return float32_round_pack_canonical(&p64, s);
2283}
2284
2285float64 float128_to_float64(float128 a, float_status *s)
2286{
2287 FloatParts64 p64;
2288 FloatParts128 p128;
2289
2290 float128_unpack_canonical(&p128, a, s);
2291 parts_float_to_float_narrow(&p64, &p128, s);
2292 return float64_round_pack_canonical(&p64, s);
2293}
2294
2295float128 float32_to_float128(float32 a, float_status *s)
2296{
2297 FloatParts64 p64;
2298 FloatParts128 p128;
2299
2300 float32_unpack_canonical(&p64, a, s);
2301 parts_float_to_float_widen(&p128, &p64, s);
2302 return float128_round_pack_canonical(&p128, s);
2303}
2304
2305float128 float64_to_float128(float64 a, float_status *s)
2306{
2307 FloatParts64 p64;
2308 FloatParts128 p128;
2309
2310 float64_unpack_canonical(&p64, a, s);
2311 parts_float_to_float_widen(&p128, &p64, s);
2312 return float128_round_pack_canonical(&p128, s);
2313}
2314
dbe4d53a 2315/*
afc34931 2316 * Round to integral value
dbe4d53a
AB
2317 */
2318
dbe4d53a
AB
2319float16 float16_round_to_int(float16 a, float_status *s)
2320{
afc34931 2321 FloatParts64 p;
98e256fc 2322
afc34931
RH
2323 float16_unpack_canonical(&p, a, s);
2324 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2325 return float16_round_pack_canonical(&p, s);
dbe4d53a
AB
2326}
2327
2328float32 float32_round_to_int(float32 a, float_status *s)
2329{
afc34931 2330 FloatParts64 p;
98e256fc 2331
afc34931
RH
2332 float32_unpack_canonical(&p, a, s);
2333 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2334 return float32_round_pack_canonical(&p, s);
dbe4d53a
AB
2335}
2336
2337float64 float64_round_to_int(float64 a, float_status *s)
2338{
afc34931 2339 FloatParts64 p;
98e256fc 2340
afc34931
RH
2341 float64_unpack_canonical(&p, a, s);
2342 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2343 return float64_round_pack_canonical(&p, s);
dbe4d53a
AB
2344}
2345
34f0c0a9
LZ
2346bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2347{
afc34931 2348 FloatParts64 p;
98e256fc 2349
afc34931
RH
2350 bfloat16_unpack_canonical(&p, a, s);
2351 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2352 return bfloat16_round_pack_canonical(&p, s);
2353}
2354
2355float128 float128_round_to_int(float128 a, float_status *s)
2356{
2357 FloatParts128 p;
2358
2359 float128_unpack_canonical(&p, a, s);
2360 parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2361 return float128_round_pack_canonical(&p, s);
34f0c0a9
LZ
2362}
2363
ab52f973 2364/*
463b3f0d
RH
2365 * Floating-point to signed integer conversions
2366 */
ab52f973 2367
0d93d8ec
FC
2368int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2369 float_status *s)
2370{
98e256fc
RH
2371 FloatParts64 p;
2372
2373 float16_unpack_canonical(&p, a, s);
463b3f0d 2374 return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2375}
2376
3dede407 2377int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2378 float_status *s)
2379{
98e256fc
RH
2380 FloatParts64 p;
2381
2382 float16_unpack_canonical(&p, a, s);
463b3f0d 2383 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2384}
2385
3dede407 2386int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2387 float_status *s)
2388{
98e256fc
RH
2389 FloatParts64 p;
2390
2391 float16_unpack_canonical(&p, a, s);
463b3f0d 2392 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2393}
2394
3dede407 2395int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2396 float_status *s)
2397{
98e256fc
RH
2398 FloatParts64 p;
2399
2400 float16_unpack_canonical(&p, a, s);
463b3f0d 2401 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2402}
2403
3dede407 2404int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2405 float_status *s)
2406{
98e256fc
RH
2407 FloatParts64 p;
2408
2409 float32_unpack_canonical(&p, a, s);
463b3f0d 2410 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2411}
2412
3dede407 2413int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2414 float_status *s)
2415{
98e256fc
RH
2416 FloatParts64 p;
2417
2418 float32_unpack_canonical(&p, a, s);
463b3f0d 2419 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2420}
2421
3dede407 2422int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2423 float_status *s)
2424{
98e256fc
RH
2425 FloatParts64 p;
2426
2427 float32_unpack_canonical(&p, a, s);
463b3f0d 2428 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2429}
2430
3dede407 2431int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2432 float_status *s)
2433{
98e256fc
RH
2434 FloatParts64 p;
2435
2436 float64_unpack_canonical(&p, a, s);
463b3f0d 2437 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2438}
2439
3dede407 2440int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2441 float_status *s)
2442{
98e256fc
RH
2443 FloatParts64 p;
2444
2445 float64_unpack_canonical(&p, a, s);
463b3f0d 2446 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2447}
2448
3dede407 2449int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2450 float_status *s)
2451{
98e256fc
RH
2452 FloatParts64 p;
2453
2454 float64_unpack_canonical(&p, a, s);
463b3f0d
RH
2455 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2456}
2457
2458int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2459 float_status *s)
2460{
2461 FloatParts64 p;
2462
2463 bfloat16_unpack_canonical(&p, a, s);
2464 return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2465}
2466
2467int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2468 float_status *s)
2469{
2470 FloatParts64 p;
2471
2472 bfloat16_unpack_canonical(&p, a, s);
2473 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2474}
2475
2476int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2477 float_status *s)
2478{
2479 FloatParts64 p;
2480
2481 bfloat16_unpack_canonical(&p, a, s);
2482 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2483}
2484
2485static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
2486 int scale, float_status *s)
2487{
2488 FloatParts128 p;
2489
2490 float128_unpack_canonical(&p, a, s);
2491 return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2492}
2493
2494static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
2495 int scale, float_status *s)
2496{
2497 FloatParts128 p;
2498
2499 float128_unpack_canonical(&p, a, s);
2500 return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2501}
2502
0d93d8ec
FC
2503int8_t float16_to_int8(float16 a, float_status *s)
2504{
2505 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2506}
2507
2f6c74be
RH
2508int16_t float16_to_int16(float16 a, float_status *s)
2509{
2510 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2511}
2512
2513int32_t float16_to_int32(float16 a, float_status *s)
2514{
2515 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2516}
2517
2518int64_t float16_to_int64(float16 a, float_status *s)
2519{
2520 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2521}
2522
2523int16_t float32_to_int16(float32 a, float_status *s)
2524{
2525 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2526}
2527
2528int32_t float32_to_int32(float32 a, float_status *s)
2529{
2530 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2531}
2532
2533int64_t float32_to_int64(float32 a, float_status *s)
2534{
2535 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2536}
2537
2538int16_t float64_to_int16(float64 a, float_status *s)
2539{
2540 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2541}
2542
2543int32_t float64_to_int32(float64 a, float_status *s)
2544{
2545 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2546}
2547
2548int64_t float64_to_int64(float64 a, float_status *s)
2549{
2550 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2551}
2552
463b3f0d
RH
2553int32_t float128_to_int32(float128 a, float_status *s)
2554{
2555 return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2556}
2557
2558int64_t float128_to_int64(float128 a, float_status *s)
2559{
2560 return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2561}
2562
2f6c74be
RH
2563int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2564{
2565 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2566}
2567
2568int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2569{
2570 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2571}
2572
2573int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2574{
2575 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2576}
2577
2f6c74be
RH
2578int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2579{
2580 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2581}
ab52f973 2582
2f6c74be
RH
2583int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2584{
2585 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2586}
2587
2588int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2589{
2590 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2591}
2592
2593int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2594{
2595 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2596}
ab52f973 2597
2f6c74be
RH
2598int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2599{
2600 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2601}
ab52f973 2602
2f6c74be
RH
2603int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2604{
2605 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2606}
ab52f973 2607
463b3f0d 2608int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
34f0c0a9 2609{
463b3f0d 2610 return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2611}
2612
463b3f0d 2613int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
34f0c0a9 2614{
463b3f0d 2615 return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
34f0c0a9
LZ
2616}
2617
2618int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2619{
2620 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2621}
2622
2623int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2624{
2625 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2626}
2627
2628int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2629{
2630 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2631}
2632
2633int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2634{
2635 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2636}
2637
2638int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2639{
2640 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2641}
2642
2643int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2644{
2645 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2646}
2647
ab52f973
AB
2648/*
2649 * Returns the result of converting the floating-point value `a' to
2650 * the unsigned integer format. The conversion is performed according
2651 * to the IEC/IEEE Standard for Binary Floating-Point
2652 * Arithmetic---which means in particular that the conversion is
2653 * rounded according to the current rounding mode. If `a' is a NaN,
2654 * the largest unsigned integer is returned. Otherwise, if the
2655 * conversion overflows, the largest unsigned integer is returned. If
2656 * the 'a' is negative, the result is rounded and zero is returned;
2657 * values that do not round to zero will raise the inexact exception
2658 * flag.
2659 */
2660
afc34931 2661static uint64_t round_to_uint_and_pack(FloatParts64 p, FloatRoundMode rmode,
3dede407
RH
2662 int scale, uint64_t max,
2663 float_status *s)
ab52f973 2664{
afc34931 2665 int flags = 0;
2f6c74be 2666 uint64_t r;
ab52f973
AB
2667
2668 switch (p.cls) {
2669 case float_class_snan:
2670 case float_class_qnan:
afc34931
RH
2671 flags = float_flag_invalid;
2672 r = max;
2673 break;
2674
ab52f973 2675 case float_class_inf:
afc34931
RH
2676 flags = float_flag_invalid;
2677 r = p.sign ? 0 : max;
2678 break;
2679
ab52f973
AB
2680 case float_class_zero:
2681 return 0;
afc34931 2682
ab52f973 2683 case float_class_normal:
afc34931
RH
2684 /* TODO: 62 = N - 2, frac_size for rounding */
2685 if (parts_round_to_int_normal(&p, rmode, scale, 62)) {
2686 flags = float_flag_inexact;
2687 if (p.cls == float_class_zero) {
2688 r = 0;
2689 break;
2690 }
ab52f973
AB
2691 }
2692
afc34931
RH
2693 if (p.sign) {
2694 flags = float_flag_invalid;
2695 r = 0;
2696 } else if (p.exp > DECOMPOSED_BINARY_POINT) {
2697 flags = float_flag_invalid;
2698 r = max;
ab52f973 2699 } else {
afc34931
RH
2700 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2701 if (r > max) {
2702 flags = float_flag_invalid;
2703 r = max;
2704 }
ab52f973 2705 }
afc34931 2706 break;
ab52f973 2707
ab52f973
AB
2708 default:
2709 g_assert_not_reached();
2710 }
afc34931
RH
2711
2712 float_raise(flags, s);
2713 return r;
ab52f973
AB
2714}
2715
0d93d8ec
FC
2716uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2717 float_status *s)
2718{
98e256fc
RH
2719 FloatParts64 p;
2720
2721 float16_unpack_canonical(&p, a, s);
2722 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2723}
2724
3dede407 2725uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2726 float_status *s)
2727{
98e256fc
RH
2728 FloatParts64 p;
2729
2730 float16_unpack_canonical(&p, a, s);
2731 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2732}
2733
3dede407 2734uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2735 float_status *s)
2736{
98e256fc
RH
2737 FloatParts64 p;
2738
2739 float16_unpack_canonical(&p, a, s);
2740 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2741}
2742
3dede407 2743uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2744 float_status *s)
2745{
98e256fc
RH
2746 FloatParts64 p;
2747
2748 float16_unpack_canonical(&p, a, s);
2749 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2750}
2751
3dede407 2752uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2753 float_status *s)
2754{
98e256fc
RH
2755 FloatParts64 p;
2756
2757 float32_unpack_canonical(&p, a, s);
2758 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2759}
2760
3dede407 2761uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2762 float_status *s)
2763{
98e256fc
RH
2764 FloatParts64 p;
2765
2766 float32_unpack_canonical(&p, a, s);
2767 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2768}
2769
3dede407 2770uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2771 float_status *s)
2772{
98e256fc
RH
2773 FloatParts64 p;
2774
2775 float32_unpack_canonical(&p, a, s);
2776 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2777}
2778
3dede407 2779uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2780 float_status *s)
2781{
98e256fc
RH
2782 FloatParts64 p;
2783
2784 float64_unpack_canonical(&p, a, s);
2785 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2786}
2787
3dede407 2788uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2789 float_status *s)
2790{
98e256fc
RH
2791 FloatParts64 p;
2792
2793 float64_unpack_canonical(&p, a, s);
2794 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2795}
2796
3dede407 2797uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2798 float_status *s)
2799{
98e256fc
RH
2800 FloatParts64 p;
2801
2802 float64_unpack_canonical(&p, a, s);
2803 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2804}
2805
0d93d8ec
FC
2806uint8_t float16_to_uint8(float16 a, float_status *s)
2807{
2808 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2809}
2810
2f6c74be
RH
2811uint16_t float16_to_uint16(float16 a, float_status *s)
2812{
2813 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2814}
2815
2816uint32_t float16_to_uint32(float16 a, float_status *s)
2817{
2818 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2819}
2820
2821uint64_t float16_to_uint64(float16 a, float_status *s)
2822{
2823 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2824}
2825
2826uint16_t float32_to_uint16(float32 a, float_status *s)
2827{
2828 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2829}
2830
2831uint32_t float32_to_uint32(float32 a, float_status *s)
2832{
2833 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2834}
2835
2836uint64_t float32_to_uint64(float32 a, float_status *s)
2837{
2838 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2839}
2840
2841uint16_t float64_to_uint16(float64 a, float_status *s)
2842{
2843 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2844}
2845
2846uint32_t float64_to_uint32(float64 a, float_status *s)
2847{
2848 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2849}
2850
2851uint64_t float64_to_uint64(float64 a, float_status *s)
2852{
2853 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2854}
2855
2856uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2857{
2858 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2859}
2860
2861uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2862{
2863 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2864}
2865
2866uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2867{
2868 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2869}
2870
2871uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2872{
2873 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2874}
2875
2876uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2877{
2878 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2879}
2880
2881uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2882{
2883 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2884}
2885
2886uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2887{
2888 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2889}
2890
2891uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2892{
2893 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2894}
2895
2896uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2897{
2898 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2899}
ab52f973 2900
34f0c0a9
LZ
2901/*
2902 * Returns the result of converting the bfloat16 value `a' to
2903 * the unsigned integer format.
2904 */
2905
2906uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2907 int scale, float_status *s)
2908{
98e256fc
RH
2909 FloatParts64 p;
2910
2911 bfloat16_unpack_canonical(&p, a, s);
2912 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2913}
2914
2915uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2916 int scale, float_status *s)
2917{
98e256fc
RH
2918 FloatParts64 p;
2919
2920 bfloat16_unpack_canonical(&p, a, s);
2921 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
2922}
2923
2924uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2925 int scale, float_status *s)
2926{
98e256fc
RH
2927 FloatParts64 p;
2928
2929 bfloat16_unpack_canonical(&p, a, s);
2930 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
2931}
2932
2933uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2934{
2935 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2936}
2937
2938uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2939{
2940 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2941}
2942
2943uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2944{
2945 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2946}
2947
2948uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2949{
2950 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2951}
2952
2953uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2954{
2955 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2956}
2957
2958uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2959{
2960 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2961}
2962
c02e1fb8
AB
2963/*
2964 * Integer to float conversions
2965 *
2966 * Returns the result of converting the two's complement integer `a'
2967 * to the floating-point format. The conversion is performed according
2968 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2969 */
2970
f8155c1d 2971static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2972{
f8155c1d 2973 FloatParts64 r = { .sign = false };
2abdfe24 2974
c02e1fb8
AB
2975 if (a == 0) {
2976 r.cls = float_class_zero;
c02e1fb8 2977 } else {
2abdfe24
RH
2978 uint64_t f = a;
2979 int shift;
2980
2981 r.cls = float_class_normal;
c02e1fb8 2982 if (a < 0) {
2abdfe24 2983 f = -f;
c02e1fb8 2984 r.sign = true;
c02e1fb8 2985 }
e99c4373 2986 shift = clz64(f);
2abdfe24
RH
2987 scale = MIN(MAX(scale, -0x10000), 0x10000);
2988
2989 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2990 r.frac = f << shift;
c02e1fb8
AB
2991 }
2992
2993 return r;
2994}
2995
2abdfe24 2996float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2997{
f8155c1d 2998 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2999 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3000}
3001
2abdfe24
RH
3002float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3003{
3004 return int64_to_float16_scalbn(a, scale, status);
3005}
3006
3007float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3008{
3009 return int64_to_float16_scalbn(a, scale, status);
3010}
3011
3012float16 int64_to_float16(int64_t a, float_status *status)
3013{
3014 return int64_to_float16_scalbn(a, 0, status);
3015}
3016
c02e1fb8
AB
3017float16 int32_to_float16(int32_t a, float_status *status)
3018{
2abdfe24 3019 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3020}
3021
3022float16 int16_to_float16(int16_t a, float_status *status)
3023{
2abdfe24 3024 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3025}
3026
0d93d8ec
FC
3027float16 int8_to_float16(int8_t a, float_status *status)
3028{
3029 return int64_to_float16_scalbn(a, 0, status);
3030}
3031
2abdfe24 3032float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3033{
f8155c1d 3034 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3035 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3036}
3037
2abdfe24
RH
3038float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3039{
3040 return int64_to_float32_scalbn(a, scale, status);
3041}
3042
3043float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3044{
3045 return int64_to_float32_scalbn(a, scale, status);
3046}
3047
3048float32 int64_to_float32(int64_t a, float_status *status)
3049{
3050 return int64_to_float32_scalbn(a, 0, status);
3051}
3052
c02e1fb8
AB
3053float32 int32_to_float32(int32_t a, float_status *status)
3054{
2abdfe24 3055 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3056}
3057
3058float32 int16_to_float32(int16_t a, float_status *status)
3059{
2abdfe24 3060 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3061}
3062
2abdfe24 3063float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3064{
f8155c1d 3065 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3066 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3067}
3068
2abdfe24
RH
3069float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3070{
3071 return int64_to_float64_scalbn(a, scale, status);
3072}
3073
3074float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3075{
3076 return int64_to_float64_scalbn(a, scale, status);
3077}
3078
3079float64 int64_to_float64(int64_t a, float_status *status)
3080{
3081 return int64_to_float64_scalbn(a, 0, status);
3082}
3083
c02e1fb8
AB
3084float64 int32_to_float64(int32_t a, float_status *status)
3085{
2abdfe24 3086 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3087}
3088
3089float64 int16_to_float64(int16_t a, float_status *status)
3090{
2abdfe24 3091 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3092}
3093
34f0c0a9
LZ
3094/*
3095 * Returns the result of converting the two's complement integer `a'
3096 * to the bfloat16 format.
3097 */
3098
3099bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3100{
f8155c1d 3101 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3102 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3103}
3104
3105bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3106{
3107 return int64_to_bfloat16_scalbn(a, scale, status);
3108}
3109
3110bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3111{
3112 return int64_to_bfloat16_scalbn(a, scale, status);
3113}
3114
3115bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3116{
3117 return int64_to_bfloat16_scalbn(a, 0, status);
3118}
3119
3120bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3121{
3122 return int64_to_bfloat16_scalbn(a, 0, status);
3123}
3124
3125bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3126{
3127 return int64_to_bfloat16_scalbn(a, 0, status);
3128}
c02e1fb8
AB
3129
3130/*
3131 * Unsigned Integer to float conversions
3132 *
3133 * Returns the result of converting the unsigned integer `a' to the
3134 * floating-point format. The conversion is performed according to the
3135 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3136 */
3137
f8155c1d 3138static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3139{
f8155c1d 3140 FloatParts64 r = { .sign = false };
e99c4373 3141 int shift;
c02e1fb8
AB
3142
3143 if (a == 0) {
3144 r.cls = float_class_zero;
3145 } else {
2abdfe24 3146 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3147 shift = clz64(a);
c02e1fb8 3148 r.cls = float_class_normal;
e99c4373
RH
3149 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3150 r.frac = a << shift;
c02e1fb8
AB
3151 }
3152
3153 return r;
3154}
3155
2abdfe24 3156float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3157{
f8155c1d 3158 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3159 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3160}
3161
2abdfe24
RH
3162float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3163{
3164 return uint64_to_float16_scalbn(a, scale, status);
3165}
3166
3167float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3168{
3169 return uint64_to_float16_scalbn(a, scale, status);
3170}
3171
3172float16 uint64_to_float16(uint64_t a, float_status *status)
3173{
3174 return uint64_to_float16_scalbn(a, 0, status);
3175}
3176
c02e1fb8
AB
3177float16 uint32_to_float16(uint32_t a, float_status *status)
3178{
2abdfe24 3179 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3180}
3181
3182float16 uint16_to_float16(uint16_t a, float_status *status)
3183{
2abdfe24 3184 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3185}
3186
0d93d8ec
FC
3187float16 uint8_to_float16(uint8_t a, float_status *status)
3188{
3189 return uint64_to_float16_scalbn(a, 0, status);
3190}
3191
2abdfe24 3192float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3193{
f8155c1d 3194 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3195 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3196}
3197
2abdfe24
RH
3198float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3199{
3200 return uint64_to_float32_scalbn(a, scale, status);
3201}
3202
3203float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3204{
3205 return uint64_to_float32_scalbn(a, scale, status);
3206}
3207
3208float32 uint64_to_float32(uint64_t a, float_status *status)
3209{
3210 return uint64_to_float32_scalbn(a, 0, status);
3211}
3212
c02e1fb8
AB
3213float32 uint32_to_float32(uint32_t a, float_status *status)
3214{
2abdfe24 3215 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3216}
3217
3218float32 uint16_to_float32(uint16_t a, float_status *status)
3219{
2abdfe24 3220 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3221}
3222
2abdfe24 3223float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3224{
f8155c1d 3225 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3226 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3227}
3228
2abdfe24
RH
3229float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3230{
3231 return uint64_to_float64_scalbn(a, scale, status);
3232}
3233
3234float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3235{
3236 return uint64_to_float64_scalbn(a, scale, status);
3237}
3238
3239float64 uint64_to_float64(uint64_t a, float_status *status)
3240{
3241 return uint64_to_float64_scalbn(a, 0, status);
3242}
3243
c02e1fb8
AB
3244float64 uint32_to_float64(uint32_t a, float_status *status)
3245{
2abdfe24 3246 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3247}
3248
3249float64 uint16_to_float64(uint16_t a, float_status *status)
3250{
2abdfe24 3251 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3252}
3253
34f0c0a9
LZ
3254/*
3255 * Returns the result of converting the unsigned integer `a' to the
3256 * bfloat16 format.
3257 */
3258
3259bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3260{
f8155c1d 3261 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3262 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3263}
3264
3265bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3266{
3267 return uint64_to_bfloat16_scalbn(a, scale, status);
3268}
3269
3270bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3271{
3272 return uint64_to_bfloat16_scalbn(a, scale, status);
3273}
3274
3275bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3276{
3277 return uint64_to_bfloat16_scalbn(a, 0, status);
3278}
3279
3280bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3281{
3282 return uint64_to_bfloat16_scalbn(a, 0, status);
3283}
3284
3285bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3286{
3287 return uint64_to_bfloat16_scalbn(a, 0, status);
3288}
3289
89360067
AB
3290/* Float Min/Max */
3291/* min() and max() functions. These can't be implemented as
3292 * 'compare and pick one input' because that would mishandle
3293 * NaNs and +0 vs -0.
3294 *
3295 * minnum() and maxnum() functions. These are similar to the min()
3296 * and max() functions but if one of the arguments is a QNaN and
3297 * the other is numerical then the numerical argument is returned.
3298 * SNaNs will get quietened before being returned.
3299 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3300 * and maxNum() operations. min() and max() are the typical min/max
3301 * semantics provided by many CPUs which predate that specification.
3302 *
3303 * minnummag() and maxnummag() functions correspond to minNumMag()
3304 * and minNumMag() from the IEEE-754 2008.
3305 */
f8155c1d 3306static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3307 bool ieee, bool ismag, float_status *s)
3308{
3309 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3310 if (ieee) {
3311 /* Takes two floating-point values `a' and `b', one of
3312 * which is a NaN, and returns the appropriate NaN
3313 * result. If either `a' or `b' is a signaling NaN,
3314 * the invalid exception is raised.
3315 */
3316 if (is_snan(a.cls) || is_snan(b.cls)) {
22c355f4 3317 return *parts_pick_nan(&a, &b, s);
89360067
AB
3318 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3319 return b;
3320 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3321 return a;
3322 }
3323 }
22c355f4 3324 return *parts_pick_nan(&a, &b, s);
89360067
AB
3325 } else {
3326 int a_exp, b_exp;
89360067
AB
3327
3328 switch (a.cls) {
3329 case float_class_normal:
3330 a_exp = a.exp;
3331 break;
3332 case float_class_inf:
3333 a_exp = INT_MAX;
3334 break;
3335 case float_class_zero:
3336 a_exp = INT_MIN;
3337 break;
3338 default:
3339 g_assert_not_reached();
3340 break;
3341 }
3342 switch (b.cls) {
3343 case float_class_normal:
3344 b_exp = b.exp;
3345 break;
3346 case float_class_inf:
3347 b_exp = INT_MAX;
3348 break;
3349 case float_class_zero:
3350 b_exp = INT_MIN;
3351 break;
3352 default:
3353 g_assert_not_reached();
3354 break;
3355 }
3356
6245327a
EC
3357 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3358 bool a_less = a_exp < b_exp;
3359 if (a_exp == b_exp) {
3360 a_less = a.frac < b.frac;
3361 }
3362 return a_less ^ ismin ? b : a;
89360067
AB
3363 }
3364
6245327a 3365 if (a.sign == b.sign) {
89360067
AB
3366 bool a_less = a_exp < b_exp;
3367 if (a_exp == b_exp) {
3368 a_less = a.frac < b.frac;
3369 }
6245327a 3370 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3371 } else {
6245327a 3372 return a.sign ^ ismin ? b : a;
89360067
AB
3373 }
3374 }
3375}
3376
3377#define MINMAX(sz, name, ismin, isiee, ismag) \
3378float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3379 float_status *s) \
3380{ \
98e256fc
RH
3381 FloatParts64 pa, pb, pr; \
3382 float ## sz ## _unpack_canonical(&pa, a, s); \
3383 float ## sz ## _unpack_canonical(&pb, b, s); \
3384 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3385 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3386}
3387
3388MINMAX(16, min, true, false, false)
3389MINMAX(16, minnum, true, true, false)
3390MINMAX(16, minnummag, true, true, true)
3391MINMAX(16, max, false, false, false)
3392MINMAX(16, maxnum, false, true, false)
3393MINMAX(16, maxnummag, false, true, true)
3394
3395MINMAX(32, min, true, false, false)
3396MINMAX(32, minnum, true, true, false)
3397MINMAX(32, minnummag, true, true, true)
3398MINMAX(32, max, false, false, false)
3399MINMAX(32, maxnum, false, true, false)
3400MINMAX(32, maxnummag, false, true, true)
3401
3402MINMAX(64, min, true, false, false)
3403MINMAX(64, minnum, true, true, false)
3404MINMAX(64, minnummag, true, true, true)
3405MINMAX(64, max, false, false, false)
3406MINMAX(64, maxnum, false, true, false)
3407MINMAX(64, maxnummag, false, true, true)
3408
3409#undef MINMAX
3410
8282310d
LZ
3411#define BF16_MINMAX(name, ismin, isiee, ismag) \
3412bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3413{ \
98e256fc
RH
3414 FloatParts64 pa, pb, pr; \
3415 bfloat16_unpack_canonical(&pa, a, s); \
3416 bfloat16_unpack_canonical(&pb, b, s); \
3417 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3418 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3419}
3420
3421BF16_MINMAX(min, true, false, false)
3422BF16_MINMAX(minnum, true, true, false)
3423BF16_MINMAX(minnummag, true, true, true)
3424BF16_MINMAX(max, false, false, false)
3425BF16_MINMAX(maxnum, false, true, false)
3426BF16_MINMAX(maxnummag, false, true, true)
3427
3428#undef BF16_MINMAX
3429
0c4c9092 3430/* Floating point compare */
f8155c1d 3431static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3432 float_status *s)
0c4c9092
AB
3433{
3434 if (is_nan(a.cls) || is_nan(b.cls)) {
3435 if (!is_quiet ||
3436 a.cls == float_class_snan ||
3437 b.cls == float_class_snan) {
d82f3b2d 3438 float_raise(float_flag_invalid, s);
0c4c9092
AB
3439 }
3440 return float_relation_unordered;
3441 }
3442
3443 if (a.cls == float_class_zero) {
3444 if (b.cls == float_class_zero) {
3445 return float_relation_equal;
3446 }
3447 return b.sign ? float_relation_greater : float_relation_less;
3448 } else if (b.cls == float_class_zero) {
3449 return a.sign ? float_relation_less : float_relation_greater;
3450 }
3451
3452 /* The only really important thing about infinity is its sign. If
3453 * both are infinities the sign marks the smallest of the two.
3454 */
3455 if (a.cls == float_class_inf) {
3456 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3457 return float_relation_equal;
3458 }
3459 return a.sign ? float_relation_less : float_relation_greater;
3460 } else if (b.cls == float_class_inf) {
3461 return b.sign ? float_relation_greater : float_relation_less;
3462 }
3463
3464 if (a.sign != b.sign) {
3465 return a.sign ? float_relation_less : float_relation_greater;
3466 }
3467
3468 if (a.exp == b.exp) {
3469 if (a.frac == b.frac) {
3470 return float_relation_equal;
3471 }
3472 if (a.sign) {
3473 return a.frac > b.frac ?
3474 float_relation_less : float_relation_greater;
3475 } else {
3476 return a.frac > b.frac ?
3477 float_relation_greater : float_relation_less;
3478 }
3479 } else {
3480 if (a.sign) {
3481 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3482 } else {
3483 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3484 }
3485 }
3486}
3487
d9fe9db9
EC
3488#define COMPARE(name, attr, sz) \
3489static int attr \
3490name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3491{ \
98e256fc
RH
3492 FloatParts64 pa, pb; \
3493 float ## sz ## _unpack_canonical(&pa, a, s); \
3494 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3495 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3496}
3497
d9fe9db9
EC
3498COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3499COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3500COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3501
3502#undef COMPARE
3503
71bfd65c 3504FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3505{
3506 return soft_f16_compare(a, b, false, s);
3507}
3508
71bfd65c 3509FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3510{
3511 return soft_f16_compare(a, b, true, s);
3512}
3513
71bfd65c 3514static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3515f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3516{
3517 union_float32 ua, ub;
3518
3519 ua.s = xa;
3520 ub.s = xb;
3521
3522 if (QEMU_NO_HARDFLOAT) {
3523 goto soft;
3524 }
3525
3526 float32_input_flush2(&ua.s, &ub.s, s);
3527 if (isgreaterequal(ua.h, ub.h)) {
3528 if (isgreater(ua.h, ub.h)) {
3529 return float_relation_greater;
3530 }
3531 return float_relation_equal;
3532 }
3533 if (likely(isless(ua.h, ub.h))) {
3534 return float_relation_less;
3535 }
3536 /* The only condition remaining is unordered.
3537 * Fall through to set flags.
3538 */
3539 soft:
3540 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3541}
3542
71bfd65c 3543FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3544{
3545 return f32_compare(a, b, false, s);
3546}
3547
71bfd65c 3548FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3549{
3550 return f32_compare(a, b, true, s);
3551}
3552
71bfd65c 3553static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3554f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3555{
3556 union_float64 ua, ub;
3557
3558 ua.s = xa;
3559 ub.s = xb;
3560
3561 if (QEMU_NO_HARDFLOAT) {
3562 goto soft;
3563 }
3564
3565 float64_input_flush2(&ua.s, &ub.s, s);
3566 if (isgreaterequal(ua.h, ub.h)) {
3567 if (isgreater(ua.h, ub.h)) {
3568 return float_relation_greater;
3569 }
3570 return float_relation_equal;
3571 }
3572 if (likely(isless(ua.h, ub.h))) {
3573 return float_relation_less;
3574 }
3575 /* The only condition remaining is unordered.
3576 * Fall through to set flags.
3577 */
3578 soft:
3579 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3580}
3581
71bfd65c 3582FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3583{
3584 return f64_compare(a, b, false, s);
3585}
3586
71bfd65c 3587FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3588{
3589 return f64_compare(a, b, true, s);
3590}
3591
8282310d
LZ
3592static FloatRelation QEMU_FLATTEN
3593soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3594{
98e256fc
RH
3595 FloatParts64 pa, pb;
3596
3597 bfloat16_unpack_canonical(&pa, a, s);
3598 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3599 return compare_floats(pa, pb, is_quiet, s);
3600}
3601
3602FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3603{
3604 return soft_bf16_compare(a, b, false, s);
3605}
3606
3607FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3608{
3609 return soft_bf16_compare(a, b, true, s);
3610}
3611
0bfc9f19 3612/* Multiply A by 2 raised to the power N. */
f8155c1d 3613static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3614{
3615 if (unlikely(is_nan(a.cls))) {
7c45bad8 3616 parts_return_nan(&a, s);
0bfc9f19
AB
3617 }
3618 if (a.cls == float_class_normal) {
f8155c1d 3619 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3620 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3621 * still allows rounding to infinity, without allowing overflow
f8155c1d 3622 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3623 */
3624 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3625 a.exp += n;
3626 }
3627 return a;
3628}
3629
3630float16 float16_scalbn(float16 a, int n, float_status *status)
3631{
98e256fc
RH
3632 FloatParts64 pa, pr;
3633
3634 float16_unpack_canonical(&pa, a, status);
3635 pr = scalbn_decomposed(pa, n, status);
e293e927 3636 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3637}
3638
3639float32 float32_scalbn(float32 a, int n, float_status *status)
3640{
98e256fc
RH
3641 FloatParts64 pa, pr;
3642
3643 float32_unpack_canonical(&pa, a, status);
3644 pr = scalbn_decomposed(pa, n, status);
e293e927 3645 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3646}
3647
3648float64 float64_scalbn(float64 a, int n, float_status *status)
3649{
98e256fc
RH
3650 FloatParts64 pa, pr;
3651
3652 float64_unpack_canonical(&pa, a, status);
3653 pr = scalbn_decomposed(pa, n, status);
e293e927 3654 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3655}
3656
8282310d
LZ
3657bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3658{
98e256fc
RH
3659 FloatParts64 pa, pr;
3660
3661 bfloat16_unpack_canonical(&pa, a, status);
3662 pr = scalbn_decomposed(pa, n, status);
e293e927 3663 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3664}
3665
c13bb2da
AB
3666/*
3667 * Square Root
3668 *
3669 * The old softfloat code did an approximation step before zeroing in
3670 * on the final result. However for simpleness we just compute the
3671 * square root by iterating down from the implicit bit to enough extra
3672 * bits to ensure we get a correctly rounded result.
3673 *
3674 * This does mean however the calculation is slower than before,
3675 * especially for 64 bit floats.
3676 */
3677
f8155c1d 3678static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3679{
3680 uint64_t a_frac, r_frac, s_frac;
3681 int bit, last_bit;
3682
3683 if (is_nan(a.cls)) {
7c45bad8
RH
3684 parts_return_nan(&a, s);
3685 return a;
c13bb2da
AB
3686 }
3687 if (a.cls == float_class_zero) {
3688 return a; /* sqrt(+-0) = +-0 */
3689 }
3690 if (a.sign) {
d82f3b2d 3691 float_raise(float_flag_invalid, s);
0fc07cad
RH
3692 parts_default_nan(&a, s);
3693 return a;
c13bb2da
AB
3694 }
3695 if (a.cls == float_class_inf) {
3696 return a; /* sqrt(+inf) = +inf */
3697 }
3698
3699 assert(a.cls == float_class_normal);
3700
3701 /* We need two overflow bits at the top. Adding room for that is a
3702 * right shift. If the exponent is odd, we can discard the low bit
3703 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3704 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3705 */
e99c4373 3706 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3707 a.exp >>= 1;
3708
3709 /* Bit-by-bit computation of sqrt. */
3710 r_frac = 0;
3711 s_frac = 0;
3712
3713 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3714 * properly rounded result. Remember we've inserted two more bits
3715 * at the top, so these positions are two less.
c13bb2da 3716 */
e99c4373 3717 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3718 last_bit = MAX(p->frac_shift - 4, 0);
3719 do {
3720 uint64_t q = 1ULL << bit;
3721 uint64_t t_frac = s_frac + q;
3722 if (t_frac <= a_frac) {
3723 s_frac = t_frac + q;
3724 a_frac -= t_frac;
3725 r_frac += q;
3726 }
3727 a_frac <<= 1;
3728 } while (--bit >= last_bit);
3729
3730 /* Undo the right shift done above. If there is any remaining
3731 * fraction, the result is inexact. Set the sticky bit.
3732 */
e99c4373 3733 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3734
3735 return a;
3736}
3737
97ff87c0 3738float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3739{
98e256fc
RH
3740 FloatParts64 pa, pr;
3741
3742 float16_unpack_canonical(&pa, a, status);
3743 pr = sqrt_float(pa, status, &float16_params);
e293e927 3744 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3745}
3746
f131bae8
EC
3747static float32 QEMU_SOFTFLOAT_ATTR
3748soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3749{
98e256fc
RH
3750 FloatParts64 pa, pr;
3751
3752 float32_unpack_canonical(&pa, a, status);
3753 pr = sqrt_float(pa, status, &float32_params);
e293e927 3754 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3755}
3756
f131bae8
EC
3757static float64 QEMU_SOFTFLOAT_ATTR
3758soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3759{
98e256fc
RH
3760 FloatParts64 pa, pr;
3761
3762 float64_unpack_canonical(&pa, a, status);
3763 pr = sqrt_float(pa, status, &float64_params);
e293e927 3764 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3765}
3766
f131bae8
EC
3767float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3768{
3769 union_float32 ua, ur;
3770
3771 ua.s = xa;
3772 if (unlikely(!can_use_fpu(s))) {
3773 goto soft;
3774 }
3775
3776 float32_input_flush1(&ua.s, s);
3777 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3778 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3779 fpclassify(ua.h) == FP_ZERO) ||
3780 signbit(ua.h))) {
3781 goto soft;
3782 }
3783 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3784 float32_is_neg(ua.s))) {
3785 goto soft;
3786 }
3787 ur.h = sqrtf(ua.h);
3788 return ur.s;
3789
3790 soft:
3791 return soft_f32_sqrt(ua.s, s);
3792}
3793
3794float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3795{
3796 union_float64 ua, ur;
3797
3798 ua.s = xa;
3799 if (unlikely(!can_use_fpu(s))) {
3800 goto soft;
3801 }
3802
3803 float64_input_flush1(&ua.s, s);
3804 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3805 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3806 fpclassify(ua.h) == FP_ZERO) ||
3807 signbit(ua.h))) {
3808 goto soft;
3809 }
3810 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3811 float64_is_neg(ua.s))) {
3812 goto soft;
3813 }
3814 ur.h = sqrt(ua.h);
3815 return ur.s;
3816
3817 soft:
3818 return soft_f64_sqrt(ua.s, s);
3819}
3820
8282310d
LZ
3821bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3822{
98e256fc
RH
3823 FloatParts64 pa, pr;
3824
3825 bfloat16_unpack_canonical(&pa, a, status);
3826 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3827 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3828}
3829
0218a16e
RH
3830/*----------------------------------------------------------------------------
3831| The pattern for a default generated NaN.
3832*----------------------------------------------------------------------------*/
3833
3834float16 float16_default_nan(float_status *status)
3835{
0fc07cad
RH
3836 FloatParts64 p;
3837
3838 parts_default_nan(&p, status);
0218a16e 3839 p.frac >>= float16_params.frac_shift;
71fd178e 3840 return float16_pack_raw(&p);
0218a16e
RH
3841}
3842
3843float32 float32_default_nan(float_status *status)
3844{
0fc07cad
RH
3845 FloatParts64 p;
3846
3847 parts_default_nan(&p, status);
0218a16e 3848 p.frac >>= float32_params.frac_shift;
71fd178e 3849 return float32_pack_raw(&p);
0218a16e
RH
3850}
3851
3852float64 float64_default_nan(float_status *status)
3853{
0fc07cad
RH
3854 FloatParts64 p;
3855
3856 parts_default_nan(&p, status);
0218a16e 3857 p.frac >>= float64_params.frac_shift;
71fd178e 3858 return float64_pack_raw(&p);
0218a16e
RH
3859}
3860
3861float128 float128_default_nan(float_status *status)
3862{
e9034ea8 3863 FloatParts128 p;
0218a16e 3864
0fc07cad 3865 parts_default_nan(&p, status);
e9034ea8
RH
3866 frac_shr(&p, float128_params.frac_shift);
3867 return float128_pack_raw(&p);
0218a16e 3868}
c13bb2da 3869
8282310d
LZ
3870bfloat16 bfloat16_default_nan(float_status *status)
3871{
0fc07cad
RH
3872 FloatParts64 p;
3873
3874 parts_default_nan(&p, status);
8282310d 3875 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3876 return bfloat16_pack_raw(&p);
8282310d
LZ
3877}
3878
158142c2 3879/*----------------------------------------------------------------------------
377ed926
RH
3880| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3881*----------------------------------------------------------------------------*/
3882
3883float16 float16_silence_nan(float16 a, float_status *status)
3884{
3dddb203
RH
3885 FloatParts64 p;
3886
3887 float16_unpack_raw(&p, a);
377ed926 3888 p.frac <<= float16_params.frac_shift;
92ff426d 3889 parts_silence_nan(&p, status);
377ed926 3890 p.frac >>= float16_params.frac_shift;
71fd178e 3891 return float16_pack_raw(&p);
377ed926
RH
3892}
3893
3894float32 float32_silence_nan(float32 a, float_status *status)
3895{
3dddb203
RH
3896 FloatParts64 p;
3897
3898 float32_unpack_raw(&p, a);
377ed926 3899 p.frac <<= float32_params.frac_shift;
92ff426d 3900 parts_silence_nan(&p, status);
377ed926 3901 p.frac >>= float32_params.frac_shift;
71fd178e 3902 return float32_pack_raw(&p);
377ed926
RH
3903}
3904
3905float64 float64_silence_nan(float64 a, float_status *status)
3906{
3dddb203
RH
3907 FloatParts64 p;
3908
3909 float64_unpack_raw(&p, a);
377ed926 3910 p.frac <<= float64_params.frac_shift;
92ff426d 3911 parts_silence_nan(&p, status);
377ed926 3912 p.frac >>= float64_params.frac_shift;
71fd178e 3913 return float64_pack_raw(&p);
377ed926
RH
3914}
3915
8282310d
LZ
3916bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3917{
3dddb203
RH
3918 FloatParts64 p;
3919
3920 bfloat16_unpack_raw(&p, a);
8282310d 3921 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3922 parts_silence_nan(&p, status);
8282310d 3923 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3924 return bfloat16_pack_raw(&p);
8282310d 3925}
e6b405fe 3926
0018b1f4
RH
3927float128 float128_silence_nan(float128 a, float_status *status)
3928{
3929 FloatParts128 p;
3930
3931 float128_unpack_raw(&p, a);
3932 frac_shl(&p, float128_params.frac_shift);
3933 parts_silence_nan(&p, status);
3934 frac_shr(&p, float128_params.frac_shift);
3935 return float128_pack_raw(&p);
3936}
3937
e6b405fe
AB
3938/*----------------------------------------------------------------------------
3939| If `a' is denormal and we are in flush-to-zero mode then set the
3940| input-denormal exception and return zero. Otherwise just return the value.
3941*----------------------------------------------------------------------------*/
3942
f8155c1d 3943static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3944{
3945 if (p.exp == 0 && p.frac != 0) {
3946 float_raise(float_flag_input_denormal, status);
3947 return true;
3948 }
3949
3950 return false;
3951}
3952
3953float16 float16_squash_input_denormal(float16 a, float_status *status)
3954{
3955 if (status->flush_inputs_to_zero) {
3dddb203
RH
3956 FloatParts64 p;
3957
3958 float16_unpack_raw(&p, a);
e6b405fe
AB
3959 if (parts_squash_denormal(p, status)) {
3960 return float16_set_sign(float16_zero, p.sign);
3961 }
3962 }
3963 return a;
3964}
3965
3966float32 float32_squash_input_denormal(float32 a, float_status *status)
3967{
3968 if (status->flush_inputs_to_zero) {
3dddb203
RH
3969 FloatParts64 p;
3970
3971 float32_unpack_raw(&p, a);
e6b405fe
AB
3972 if (parts_squash_denormal(p, status)) {
3973 return float32_set_sign(float32_zero, p.sign);
3974 }
3975 }
3976 return a;
3977}
3978
3979float64 float64_squash_input_denormal(float64 a, float_status *status)
3980{
3981 if (status->flush_inputs_to_zero) {
3dddb203
RH
3982 FloatParts64 p;
3983
3984 float64_unpack_raw(&p, a);
e6b405fe
AB
3985 if (parts_squash_denormal(p, status)) {
3986 return float64_set_sign(float64_zero, p.sign);
3987 }
3988 }
3989 return a;
3990}
3991
8282310d
LZ
3992bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3993{
3994 if (status->flush_inputs_to_zero) {
3dddb203
RH
3995 FloatParts64 p;
3996
3997 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3998 if (parts_squash_denormal(p, status)) {
3999 return bfloat16_set_sign(bfloat16_zero, p.sign);
4000 }
4001 }
4002 return a;
4003}
4004
377ed926 4005/*----------------------------------------------------------------------------
158142c2
FB
4006| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4007| and 7, and returns the properly rounded 32-bit integer corresponding to the
4008| input. If `zSign' is 1, the input is negated before being converted to an
4009| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4010| is simply rounded to an integer, with the inexact exception raised if the
4011| input cannot be represented exactly as an integer. However, if the fixed-
4012| point input is too large, the invalid exception is raised and the largest
4013| positive or negative integer is returned.
4014*----------------------------------------------------------------------------*/
4015
c120391c
RH
4016static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4017 float_status *status)
158142c2 4018{
8f506c70 4019 int8_t roundingMode;
c120391c 4020 bool roundNearestEven;
8f506c70 4021 int8_t roundIncrement, roundBits;
760e1416 4022 int32_t z;
158142c2 4023
a2f2d288 4024 roundingMode = status->float_rounding_mode;
158142c2 4025 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4026 switch (roundingMode) {
4027 case float_round_nearest_even:
f9288a76 4028 case float_round_ties_away:
dc355b76
PM
4029 roundIncrement = 0x40;
4030 break;
4031 case float_round_to_zero:
4032 roundIncrement = 0;
4033 break;
4034 case float_round_up:
4035 roundIncrement = zSign ? 0 : 0x7f;
4036 break;
4037 case float_round_down:
4038 roundIncrement = zSign ? 0x7f : 0;
4039 break;
5d64abb3
RH
4040 case float_round_to_odd:
4041 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4042 break;
dc355b76
PM
4043 default:
4044 abort();
158142c2
FB
4045 }
4046 roundBits = absZ & 0x7F;
4047 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
4048 if (!(roundBits ^ 0x40) && roundNearestEven) {
4049 absZ &= ~1;
4050 }
158142c2
FB
4051 z = absZ;
4052 if ( zSign ) z = - z;
4053 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 4054 float_raise(float_flag_invalid, status);
2c217da0 4055 return zSign ? INT32_MIN : INT32_MAX;
158142c2 4056 }
a2f2d288 4057 if (roundBits) {
d82f3b2d 4058 float_raise(float_flag_inexact, status);
a2f2d288 4059 }
158142c2
FB
4060 return z;
4061
4062}
4063
4064/*----------------------------------------------------------------------------
4065| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4066| `absZ1', with binary point between bits 63 and 64 (between the input words),
4067| and returns the properly rounded 64-bit integer corresponding to the input.
4068| If `zSign' is 1, the input is negated before being converted to an integer.
4069| Ordinarily, the fixed-point input is simply rounded to an integer, with
4070| the inexact exception raised if the input cannot be represented exactly as
4071| an integer. However, if the fixed-point input is too large, the invalid
4072| exception is raised and the largest positive or negative integer is
4073| returned.
4074*----------------------------------------------------------------------------*/
4075
c120391c 4076static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 4077 float_status *status)
158142c2 4078{
8f506c70 4079 int8_t roundingMode;
c120391c 4080 bool roundNearestEven, increment;
760e1416 4081 int64_t z;
158142c2 4082
a2f2d288 4083 roundingMode = status->float_rounding_mode;
158142c2 4084 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4085 switch (roundingMode) {
4086 case float_round_nearest_even:
f9288a76 4087 case float_round_ties_away:
dc355b76
PM
4088 increment = ((int64_t) absZ1 < 0);
4089 break;
4090 case float_round_to_zero:
4091 increment = 0;
4092 break;
4093 case float_round_up:
4094 increment = !zSign && absZ1;
4095 break;
4096 case float_round_down:
4097 increment = zSign && absZ1;
4098 break;
5d64abb3
RH
4099 case float_round_to_odd:
4100 increment = !(absZ0 & 1) && absZ1;
4101 break;
dc355b76
PM
4102 default:
4103 abort();
158142c2
FB
4104 }
4105 if ( increment ) {
4106 ++absZ0;
4107 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4108 if (!(absZ1 << 1) && roundNearestEven) {
4109 absZ0 &= ~1;
4110 }
158142c2
FB
4111 }
4112 z = absZ0;
4113 if ( zSign ) z = - z;
4114 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4115 overflow:
ff32e16e 4116 float_raise(float_flag_invalid, status);
2c217da0 4117 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4118 }
a2f2d288 4119 if (absZ1) {
d82f3b2d 4120 float_raise(float_flag_inexact, status);
a2f2d288 4121 }
158142c2
FB
4122 return z;
4123
4124}
4125
fb3ea83a
TM
4126/*----------------------------------------------------------------------------
4127| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4128| `absZ1', with binary point between bits 63 and 64 (between the input words),
4129| and returns the properly rounded 64-bit unsigned integer corresponding to the
4130| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4131| with the inexact exception raised if the input cannot be represented exactly
4132| as an integer. However, if the fixed-point input is too large, the invalid
4133| exception is raised and the largest unsigned integer is returned.
4134*----------------------------------------------------------------------------*/
4135
c120391c 4136static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4137 uint64_t absZ1, float_status *status)
fb3ea83a 4138{
8f506c70 4139 int8_t roundingMode;
c120391c 4140 bool roundNearestEven, increment;
fb3ea83a 4141
a2f2d288 4142 roundingMode = status->float_rounding_mode;
fb3ea83a 4143 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4144 switch (roundingMode) {
4145 case float_round_nearest_even:
f9288a76 4146 case float_round_ties_away:
dc355b76
PM
4147 increment = ((int64_t)absZ1 < 0);
4148 break;
4149 case float_round_to_zero:
4150 increment = 0;
4151 break;
4152 case float_round_up:
4153 increment = !zSign && absZ1;
4154 break;
4155 case float_round_down:
4156 increment = zSign && absZ1;
4157 break;
5d64abb3
RH
4158 case float_round_to_odd:
4159 increment = !(absZ0 & 1) && absZ1;
4160 break;
dc355b76
PM
4161 default:
4162 abort();
fb3ea83a
TM
4163 }
4164 if (increment) {
4165 ++absZ0;
4166 if (absZ0 == 0) {
ff32e16e 4167 float_raise(float_flag_invalid, status);
2c217da0 4168 return UINT64_MAX;
fb3ea83a 4169 }
40662886
PMD
4170 if (!(absZ1 << 1) && roundNearestEven) {
4171 absZ0 &= ~1;
4172 }
fb3ea83a
TM
4173 }
4174
4175 if (zSign && absZ0) {
ff32e16e 4176 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4177 return 0;
4178 }
4179
4180 if (absZ1) {
d82f3b2d 4181 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4182 }
4183 return absZ0;
4184}
4185
158142c2
FB
4186/*----------------------------------------------------------------------------
4187| Normalizes the subnormal single-precision floating-point value represented
4188| by the denormalized significand `aSig'. The normalized exponent and
4189| significand are stored at the locations pointed to by `zExpPtr' and
4190| `zSigPtr', respectively.
4191*----------------------------------------------------------------------------*/
4192
4193static void
0c48262d 4194 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4195{
8f506c70 4196 int8_t shiftCount;
158142c2 4197
0019d5c3 4198 shiftCount = clz32(aSig) - 8;
158142c2
FB
4199 *zSigPtr = aSig<<shiftCount;
4200 *zExpPtr = 1 - shiftCount;
4201
4202}
4203
158142c2
FB
4204/*----------------------------------------------------------------------------
4205| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4206| and significand `zSig', and returns the proper single-precision floating-
4207| point value corresponding to the abstract input. Ordinarily, the abstract
4208| value is simply rounded and packed into the single-precision format, with
4209| the inexact exception raised if the abstract input cannot be represented
4210| exactly. However, if the abstract value is too large, the overflow and
4211| inexact exceptions are raised and an infinity or maximal finite value is
4212| returned. If the abstract value is too small, the input value is rounded to
4213| a subnormal number, and the underflow and inexact exceptions are raised if
4214| the abstract input cannot be represented exactly as a subnormal single-
4215| precision floating-point number.
4216| The input significand `zSig' has its binary point between bits 30
4217| and 29, which is 7 bits to the left of the usual location. This shifted
4218| significand must be normalized or smaller. If `zSig' is not normalized,
4219| `zExp' must be 0; in that case, the result returned is a subnormal number,
4220| and it must not require rounding. In the usual case that `zSig' is
4221| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4222| The handling of underflow and overflow follows the IEC/IEEE Standard for
4223| Binary Floating-Point Arithmetic.
4224*----------------------------------------------------------------------------*/
4225
c120391c 4226static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4227 float_status *status)
158142c2 4228{
8f506c70 4229 int8_t roundingMode;
c120391c 4230 bool roundNearestEven;
8f506c70 4231 int8_t roundIncrement, roundBits;
c120391c 4232 bool isTiny;
158142c2 4233
a2f2d288 4234 roundingMode = status->float_rounding_mode;
158142c2 4235 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4236 switch (roundingMode) {
4237 case float_round_nearest_even:
f9288a76 4238 case float_round_ties_away:
dc355b76
PM
4239 roundIncrement = 0x40;
4240 break;
4241 case float_round_to_zero:
4242 roundIncrement = 0;
4243 break;
4244 case float_round_up:
4245 roundIncrement = zSign ? 0 : 0x7f;
4246 break;
4247 case float_round_down:
4248 roundIncrement = zSign ? 0x7f : 0;
4249 break;
5d64abb3
RH
4250 case float_round_to_odd:
4251 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4252 break;
dc355b76
PM
4253 default:
4254 abort();
4255 break;
158142c2
FB
4256 }
4257 roundBits = zSig & 0x7F;
bb98fe42 4258 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4259 if ( ( 0xFD < zExp )
4260 || ( ( zExp == 0xFD )
bb98fe42 4261 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4262 ) {
5d64abb3
RH
4263 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4264 roundIncrement != 0;
ff32e16e 4265 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4266 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4267 }
4268 if ( zExp < 0 ) {
a2f2d288 4269 if (status->flush_to_zero) {
ff32e16e 4270 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4271 return packFloat32(zSign, 0, 0);
4272 }
a828b373
RH
4273 isTiny = status->tininess_before_rounding
4274 || (zExp < -1)
4275 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4276 shift32RightJamming( zSig, - zExp, &zSig );
4277 zExp = 0;
4278 roundBits = zSig & 0x7F;
ff32e16e
PM
4279 if (isTiny && roundBits) {
4280 float_raise(float_flag_underflow, status);
4281 }
5d64abb3
RH
4282 if (roundingMode == float_round_to_odd) {
4283 /*
4284 * For round-to-odd case, the roundIncrement depends on
4285 * zSig which just changed.
4286 */
4287 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4288 }
158142c2
FB
4289 }
4290 }
a2f2d288 4291 if (roundBits) {
d82f3b2d 4292 float_raise(float_flag_inexact, status);
a2f2d288 4293 }
158142c2 4294 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4295 if (!(roundBits ^ 0x40) && roundNearestEven) {
4296 zSig &= ~1;
4297 }
158142c2
FB
4298 if ( zSig == 0 ) zExp = 0;
4299 return packFloat32( zSign, zExp, zSig );
4300
4301}
4302
4303/*----------------------------------------------------------------------------
4304| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4305| and significand `zSig', and returns the proper single-precision floating-
4306| point value corresponding to the abstract input. This routine is just like
4307| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4308| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4309| floating-point exponent.
4310*----------------------------------------------------------------------------*/
4311
4312static float32
c120391c 4313 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4314 float_status *status)
158142c2 4315{
8f506c70 4316 int8_t shiftCount;
158142c2 4317
0019d5c3 4318 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4319 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4320 status);
158142c2
FB
4321
4322}
4323
158142c2
FB
4324/*----------------------------------------------------------------------------
4325| Normalizes the subnormal double-precision floating-point value represented
4326| by the denormalized significand `aSig'. The normalized exponent and
4327| significand are stored at the locations pointed to by `zExpPtr' and
4328| `zSigPtr', respectively.
4329*----------------------------------------------------------------------------*/
4330
4331static void
0c48262d 4332 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4333{
8f506c70 4334 int8_t shiftCount;
158142c2 4335
0019d5c3 4336 shiftCount = clz64(aSig) - 11;
158142c2
FB
4337 *zSigPtr = aSig<<shiftCount;
4338 *zExpPtr = 1 - shiftCount;
4339
4340}
4341
4342/*----------------------------------------------------------------------------
4343| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4344| double-precision floating-point value, returning the result. After being
4345| shifted into the proper positions, the three fields are simply added
4346| together to form the result. This means that any integer portion of `zSig'
4347| will be added into the exponent. Since a properly normalized significand
4348| will have an integer portion equal to 1, the `zExp' input should be 1 less
4349| than the desired result exponent whenever `zSig' is a complete, normalized
4350| significand.
4351*----------------------------------------------------------------------------*/
4352
c120391c 4353static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4354{
4355
f090c9d4 4356 return make_float64(
bb98fe42 4357 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4358
4359}
4360
4361/*----------------------------------------------------------------------------
4362| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4363| and significand `zSig', and returns the proper double-precision floating-
4364| point value corresponding to the abstract input. Ordinarily, the abstract
4365| value is simply rounded and packed into the double-precision format, with
4366| the inexact exception raised if the abstract input cannot be represented
4367| exactly. However, if the abstract value is too large, the overflow and
4368| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4369| returned. If the abstract value is too small, the input value is rounded to
4370| a subnormal number, and the underflow and inexact exceptions are raised if
4371| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4372| precision floating-point number.
4373| The input significand `zSig' has its binary point between bits 62
4374| and 61, which is 10 bits to the left of the usual location. This shifted
4375| significand must be normalized or smaller. If `zSig' is not normalized,
4376| `zExp' must be 0; in that case, the result returned is a subnormal number,
4377| and it must not require rounding. In the usual case that `zSig' is
4378| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4379| The handling of underflow and overflow follows the IEC/IEEE Standard for
4380| Binary Floating-Point Arithmetic.
4381*----------------------------------------------------------------------------*/
4382
c120391c 4383static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4384 float_status *status)
158142c2 4385{
8f506c70 4386 int8_t roundingMode;
c120391c 4387 bool roundNearestEven;
0c48262d 4388 int roundIncrement, roundBits;
c120391c 4389 bool isTiny;
158142c2 4390
a2f2d288 4391 roundingMode = status->float_rounding_mode;
158142c2 4392 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4393 switch (roundingMode) {
4394 case float_round_nearest_even:
f9288a76 4395 case float_round_ties_away:
dc355b76
PM
4396 roundIncrement = 0x200;
4397 break;
4398 case float_round_to_zero:
4399 roundIncrement = 0;
4400 break;
4401 case float_round_up:
4402 roundIncrement = zSign ? 0 : 0x3ff;
4403 break;
4404 case float_round_down:
4405 roundIncrement = zSign ? 0x3ff : 0;
4406 break;
9ee6f678
BR
4407 case float_round_to_odd:
4408 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4409 break;
dc355b76
PM
4410 default:
4411 abort();
158142c2
FB
4412 }
4413 roundBits = zSig & 0x3FF;
bb98fe42 4414 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4415 if ( ( 0x7FD < zExp )
4416 || ( ( zExp == 0x7FD )
bb98fe42 4417 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4418 ) {
9ee6f678
BR
4419 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4420 roundIncrement != 0;
ff32e16e 4421 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4422 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4423 }
4424 if ( zExp < 0 ) {
a2f2d288 4425 if (status->flush_to_zero) {
ff32e16e 4426 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4427 return packFloat64(zSign, 0, 0);
4428 }
a828b373
RH
4429 isTiny = status->tininess_before_rounding
4430 || (zExp < -1)
4431 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4432 shift64RightJamming( zSig, - zExp, &zSig );
4433 zExp = 0;
4434 roundBits = zSig & 0x3FF;
ff32e16e
PM
4435 if (isTiny && roundBits) {
4436 float_raise(float_flag_underflow, status);
4437 }
9ee6f678
BR
4438 if (roundingMode == float_round_to_odd) {
4439 /*
4440 * For round-to-odd case, the roundIncrement depends on
4441 * zSig which just changed.
4442 */
4443 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4444 }
158142c2
FB
4445 }
4446 }
a2f2d288 4447 if (roundBits) {
d82f3b2d 4448 float_raise(float_flag_inexact, status);
a2f2d288 4449 }
158142c2 4450 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4451 if (!(roundBits ^ 0x200) && roundNearestEven) {
4452 zSig &= ~1;
4453 }
158142c2
FB
4454 if ( zSig == 0 ) zExp = 0;
4455 return packFloat64( zSign, zExp, zSig );
4456
4457}
4458
4459/*----------------------------------------------------------------------------
4460| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4461| and significand `zSig', and returns the proper double-precision floating-
4462| point value corresponding to the abstract input. This routine is just like
4463| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4464| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4465| floating-point exponent.
4466*----------------------------------------------------------------------------*/
4467
4468static float64
c120391c 4469 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4470 float_status *status)
158142c2 4471{
8f506c70 4472 int8_t shiftCount;
158142c2 4473
0019d5c3 4474 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4475 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4476 status);
158142c2
FB
4477
4478}
4479
158142c2
FB
4480/*----------------------------------------------------------------------------
4481| Normalizes the subnormal extended double-precision floating-point value
4482| represented by the denormalized significand `aSig'. The normalized exponent
4483| and significand are stored at the locations pointed to by `zExpPtr' and
4484| `zSigPtr', respectively.
4485*----------------------------------------------------------------------------*/
4486
88857aca
LV
4487void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4488 uint64_t *zSigPtr)
158142c2 4489{
8f506c70 4490 int8_t shiftCount;
158142c2 4491
0019d5c3 4492 shiftCount = clz64(aSig);
158142c2
FB
4493 *zSigPtr = aSig<<shiftCount;
4494 *zExpPtr = 1 - shiftCount;
158142c2
FB
4495}
4496
4497/*----------------------------------------------------------------------------
4498| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4499| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4500| and returns the proper extended double-precision floating-point value
4501| corresponding to the abstract input. Ordinarily, the abstract value is
4502| rounded and packed into the extended double-precision format, with the
4503| inexact exception raised if the abstract input cannot be represented
4504| exactly. However, if the abstract value is too large, the overflow and
4505| inexact exceptions are raised and an infinity or maximal finite value is
4506| returned. If the abstract value is too small, the input value is rounded to
4507| a subnormal number, and the underflow and inexact exceptions are raised if
4508| the abstract input cannot be represented exactly as a subnormal extended
4509| double-precision floating-point number.
4510| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4511| number of bits as single or double precision, respectively. Otherwise, the
4512| result is rounded to the full precision of the extended double-precision
4513| format.
4514| The input significand must be normalized or smaller. If the input
4515| significand is not normalized, `zExp' must be 0; in that case, the result
4516| returned is a subnormal number, and it must not require rounding. The
4517| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4518| Floating-Point Arithmetic.
4519*----------------------------------------------------------------------------*/
4520
c120391c 4521floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4522 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4523 float_status *status)
158142c2 4524{
8f506c70 4525 int8_t roundingMode;
c120391c 4526 bool roundNearestEven, increment, isTiny;
f42c2224 4527 int64_t roundIncrement, roundMask, roundBits;
158142c2 4528
a2f2d288 4529 roundingMode = status->float_rounding_mode;
158142c2
FB
4530 roundNearestEven = ( roundingMode == float_round_nearest_even );
4531 if ( roundingPrecision == 80 ) goto precision80;
4532 if ( roundingPrecision == 64 ) {
e9321124
AB
4533 roundIncrement = UINT64_C(0x0000000000000400);
4534 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4535 }
4536 else if ( roundingPrecision == 32 ) {
e9321124
AB
4537 roundIncrement = UINT64_C(0x0000008000000000);
4538 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4539 }
4540 else {
4541 goto precision80;
4542 }
4543 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4544 switch (roundingMode) {
4545 case float_round_nearest_even:
f9288a76 4546 case float_round_ties_away:
dc355b76
PM
4547 break;
4548 case float_round_to_zero:
4549 roundIncrement = 0;
4550 break;
4551 case float_round_up:
4552 roundIncrement = zSign ? 0 : roundMask;
4553 break;
4554 case float_round_down:
4555 roundIncrement = zSign ? roundMask : 0;
4556 break;
4557 default:
4558 abort();
158142c2
FB
4559 }
4560 roundBits = zSig0 & roundMask;
bb98fe42 4561 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4562 if ( ( 0x7FFE < zExp )
4563 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4564 ) {
4565 goto overflow;
4566 }
4567 if ( zExp <= 0 ) {
a2f2d288 4568 if (status->flush_to_zero) {
ff32e16e 4569 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4570 return packFloatx80(zSign, 0, 0);
4571 }
a828b373
RH
4572 isTiny = status->tininess_before_rounding
4573 || (zExp < 0 )
4574 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4575 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4576 zExp = 0;
4577 roundBits = zSig0 & roundMask;
ff32e16e
PM
4578 if (isTiny && roundBits) {
4579 float_raise(float_flag_underflow, status);
4580 }
a2f2d288 4581 if (roundBits) {
d82f3b2d 4582 float_raise(float_flag_inexact, status);
a2f2d288 4583 }
158142c2 4584 zSig0 += roundIncrement;
bb98fe42 4585 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4586 roundIncrement = roundMask + 1;
4587 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4588 roundMask |= roundIncrement;
4589 }
4590 zSig0 &= ~ roundMask;
4591 return packFloatx80( zSign, zExp, zSig0 );
4592 }
4593 }
a2f2d288 4594 if (roundBits) {
d82f3b2d 4595 float_raise(float_flag_inexact, status);
a2f2d288 4596 }
158142c2
FB
4597 zSig0 += roundIncrement;
4598 if ( zSig0 < roundIncrement ) {
4599 ++zExp;
e9321124 4600 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4601 }
4602 roundIncrement = roundMask + 1;
4603 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4604 roundMask |= roundIncrement;
4605 }
4606 zSig0 &= ~ roundMask;
4607 if ( zSig0 == 0 ) zExp = 0;
4608 return packFloatx80( zSign, zExp, zSig0 );
4609 precision80:
dc355b76
PM
4610 switch (roundingMode) {
4611 case float_round_nearest_even:
f9288a76 4612 case float_round_ties_away:
dc355b76
PM
4613 increment = ((int64_t)zSig1 < 0);
4614 break;
4615 case float_round_to_zero:
4616 increment = 0;
4617 break;
4618 case float_round_up:
4619 increment = !zSign && zSig1;
4620 break;
4621 case float_round_down:
4622 increment = zSign && zSig1;
4623 break;
4624 default:
4625 abort();
158142c2 4626 }
bb98fe42 4627 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4628 if ( ( 0x7FFE < zExp )
4629 || ( ( zExp == 0x7FFE )
e9321124 4630 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4631 && increment
4632 )
4633 ) {
4634 roundMask = 0;
4635 overflow:
ff32e16e 4636 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4637 if ( ( roundingMode == float_round_to_zero )
4638 || ( zSign && ( roundingMode == float_round_up ) )
4639 || ( ! zSign && ( roundingMode == float_round_down ) )
4640 ) {
4641 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4642 }
0f605c88
LV
4643 return packFloatx80(zSign,
4644 floatx80_infinity_high,
4645 floatx80_infinity_low);
158142c2
FB
4646 }
4647 if ( zExp <= 0 ) {
a828b373
RH
4648 isTiny = status->tininess_before_rounding
4649 || (zExp < 0)
4650 || !increment
4651 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4652 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4653 zExp = 0;
ff32e16e
PM
4654 if (isTiny && zSig1) {
4655 float_raise(float_flag_underflow, status);
4656 }
a2f2d288 4657 if (zSig1) {
d82f3b2d 4658 float_raise(float_flag_inexact, status);
a2f2d288 4659 }
dc355b76
PM
4660 switch (roundingMode) {
4661 case float_round_nearest_even:
f9288a76 4662 case float_round_ties_away:
dc355b76
PM
4663 increment = ((int64_t)zSig1 < 0);
4664 break;
4665 case float_round_to_zero:
4666 increment = 0;
4667 break;
4668 case float_round_up:
4669 increment = !zSign && zSig1;
4670 break;
4671 case float_round_down:
4672 increment = zSign && zSig1;
4673 break;
4674 default:
4675 abort();
158142c2
FB
4676 }
4677 if ( increment ) {
4678 ++zSig0;
40662886
PMD
4679 if (!(zSig1 << 1) && roundNearestEven) {
4680 zSig0 &= ~1;
4681 }
bb98fe42 4682 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4683 }
4684 return packFloatx80( zSign, zExp, zSig0 );
4685 }
4686 }
a2f2d288 4687 if (zSig1) {
d82f3b2d 4688 float_raise(float_flag_inexact, status);
a2f2d288 4689 }
158142c2
FB
4690 if ( increment ) {
4691 ++zSig0;
4692 if ( zSig0 == 0 ) {
4693 ++zExp;
e9321124 4694 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4695 }
4696 else {
40662886
PMD
4697 if (!(zSig1 << 1) && roundNearestEven) {
4698 zSig0 &= ~1;
4699 }
158142c2
FB
4700 }
4701 }
4702 else {
4703 if ( zSig0 == 0 ) zExp = 0;
4704 }
4705 return packFloatx80( zSign, zExp, zSig0 );
4706
4707}
4708
4709/*----------------------------------------------------------------------------
4710| Takes an abstract floating-point value having sign `zSign', exponent
4711| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4712| and returns the proper extended double-precision floating-point value
4713| corresponding to the abstract input. This routine is just like
4714| `roundAndPackFloatx80' except that the input significand does not have to be
4715| normalized.
4716*----------------------------------------------------------------------------*/
4717
88857aca 4718floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4719 bool zSign, int32_t zExp,
88857aca
LV
4720 uint64_t zSig0, uint64_t zSig1,
4721 float_status *status)
158142c2 4722{
8f506c70 4723 int8_t shiftCount;
158142c2
FB
4724
4725 if ( zSig0 == 0 ) {
4726 zSig0 = zSig1;
4727 zSig1 = 0;
4728 zExp -= 64;
4729 }
0019d5c3 4730 shiftCount = clz64(zSig0);
158142c2
FB
4731 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4732 zExp -= shiftCount;
ff32e16e
PM
4733 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4734 zSig0, zSig1, status);
158142c2
FB
4735
4736}
4737
158142c2
FB
4738/*----------------------------------------------------------------------------
4739| Returns the least-significant 64 fraction bits of the quadruple-precision
4740| floating-point value `a'.
4741*----------------------------------------------------------------------------*/
4742
a49db98d 4743static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4744{
4745
4746 return a.low;
4747
4748}
4749
4750/*----------------------------------------------------------------------------
4751| Returns the most-significant 48 fraction bits of the quadruple-precision
4752| floating-point value `a'.
4753*----------------------------------------------------------------------------*/
4754
a49db98d 4755static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4756{
4757
e9321124 4758 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4759
4760}
4761
4762/*----------------------------------------------------------------------------
4763| Returns the exponent bits of the quadruple-precision floating-point value
4764| `a'.
4765*----------------------------------------------------------------------------*/
4766
f4014512 4767static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4768{
4769
4770 return ( a.high>>48 ) & 0x7FFF;
4771
4772}
4773
4774/*----------------------------------------------------------------------------
4775| Returns the sign bit of the quadruple-precision floating-point value `a'.
4776*----------------------------------------------------------------------------*/
4777
c120391c 4778static inline bool extractFloat128Sign(float128 a)
158142c2 4779{
c120391c 4780 return a.high >> 63;
158142c2
FB
4781}
4782
4783/*----------------------------------------------------------------------------
4784| Normalizes the subnormal quadruple-precision floating-point value
4785| represented by the denormalized significand formed by the concatenation of
4786| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4787| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4788| significand are stored at the location pointed to by `zSig0Ptr', and the
4789| least significant 64 bits of the normalized significand are stored at the
4790| location pointed to by `zSig1Ptr'.
4791*----------------------------------------------------------------------------*/
4792
4793static void
4794 normalizeFloat128Subnormal(
bb98fe42
AF
4795 uint64_t aSig0,
4796 uint64_t aSig1,
f4014512 4797 int32_t *zExpPtr,
bb98fe42
AF
4798 uint64_t *zSig0Ptr,
4799 uint64_t *zSig1Ptr
158142c2
FB
4800 )
4801{
8f506c70 4802 int8_t shiftCount;
158142c2
FB
4803
4804 if ( aSig0 == 0 ) {
0019d5c3 4805 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4806 if ( shiftCount < 0 ) {
4807 *zSig0Ptr = aSig1>>( - shiftCount );
4808 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4809 }
4810 else {
4811 *zSig0Ptr = aSig1<<shiftCount;
4812 *zSig1Ptr = 0;
4813 }
4814 *zExpPtr = - shiftCount - 63;
4815 }
4816 else {
0019d5c3 4817 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4818 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4819 *zExpPtr = 1 - shiftCount;
4820 }
4821
4822}
4823
4824/*----------------------------------------------------------------------------
4825| Packs the sign `zSign', the exponent `zExp', and the significand formed
4826| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4827| floating-point value, returning the result. After being shifted into the
4828| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4829| added together to form the most significant 32 bits of the result. This
4830| means that any integer portion of `zSig0' will be added into the exponent.
4831| Since a properly normalized significand will have an integer portion equal
4832| to 1, the `zExp' input should be 1 less than the desired result exponent
4833| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4834| significand.
4835*----------------------------------------------------------------------------*/
4836
a49db98d 4837static inline float128
c120391c 4838packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4839{
4840 float128 z;
4841
4842 z.low = zSig1;
c120391c 4843 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4844 return z;
158142c2
FB
4845}
4846
4847/*----------------------------------------------------------------------------
4848| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4849| and extended significand formed by the concatenation of `zSig0', `zSig1',
4850| and `zSig2', and returns the proper quadruple-precision floating-point value
4851| corresponding to the abstract input. Ordinarily, the abstract value is
4852| simply rounded and packed into the quadruple-precision format, with the
4853| inexact exception raised if the abstract input cannot be represented
4854| exactly. However, if the abstract value is too large, the overflow and
4855| inexact exceptions are raised and an infinity or maximal finite value is
4856| returned. If the abstract value is too small, the input value is rounded to
4857| a subnormal number, and the underflow and inexact exceptions are raised if
4858| the abstract input cannot be represented exactly as a subnormal quadruple-
4859| precision floating-point number.
4860| The input significand must be normalized or smaller. If the input
4861| significand is not normalized, `zExp' must be 0; in that case, the result
4862| returned is a subnormal number, and it must not require rounding. In the
4863| usual case that the input significand is normalized, `zExp' must be 1 less
4864| than the ``true'' floating-point exponent. The handling of underflow and
4865| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4866*----------------------------------------------------------------------------*/
4867
c120391c 4868static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4869 uint64_t zSig0, uint64_t zSig1,
4870 uint64_t zSig2, float_status *status)
158142c2 4871{
8f506c70 4872 int8_t roundingMode;
c120391c 4873 bool roundNearestEven, increment, isTiny;
158142c2 4874
a2f2d288 4875 roundingMode = status->float_rounding_mode;
158142c2 4876 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4877 switch (roundingMode) {
4878 case float_round_nearest_even:
f9288a76 4879 case float_round_ties_away:
dc355b76
PM
4880 increment = ((int64_t)zSig2 < 0);
4881 break;
4882 case float_round_to_zero:
4883 increment = 0;
4884 break;
4885 case float_round_up:
4886 increment = !zSign && zSig2;
4887 break;
4888 case float_round_down:
4889 increment = zSign && zSig2;
4890 break;
9ee6f678
BR
4891 case float_round_to_odd:
4892 increment = !(zSig1 & 0x1) && zSig2;
4893 break;
dc355b76
PM
4894 default:
4895 abort();
158142c2 4896 }
bb98fe42 4897 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4898 if ( ( 0x7FFD < zExp )
4899 || ( ( zExp == 0x7FFD )
4900 && eq128(
e9321124
AB
4901 UINT64_C(0x0001FFFFFFFFFFFF),
4902 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4903 zSig0,
4904 zSig1
4905 )
4906 && increment
4907 )
4908 ) {
ff32e16e 4909 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4910 if ( ( roundingMode == float_round_to_zero )
4911 || ( zSign && ( roundingMode == float_round_up ) )
4912 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4913 || (roundingMode == float_round_to_odd)
158142c2
FB
4914 ) {
4915 return
4916 packFloat128(
4917 zSign,
4918 0x7FFE,
e9321124
AB
4919 UINT64_C(0x0000FFFFFFFFFFFF),
4920 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4921 );
4922 }
4923 return packFloat128( zSign, 0x7FFF, 0, 0 );
4924 }
4925 if ( zExp < 0 ) {
a2f2d288 4926 if (status->flush_to_zero) {
ff32e16e 4927 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4928 return packFloat128(zSign, 0, 0, 0);
4929 }
a828b373
RH
4930 isTiny = status->tininess_before_rounding
4931 || (zExp < -1)
4932 || !increment
4933 || lt128(zSig0, zSig1,
4934 UINT64_C(0x0001FFFFFFFFFFFF),
4935 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4936 shift128ExtraRightJamming(
4937 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4938 zExp = 0;
ff32e16e
PM
4939 if (isTiny && zSig2) {
4940 float_raise(float_flag_underflow, status);
4941 }
dc355b76
PM
4942 switch (roundingMode) {
4943 case float_round_nearest_even:
f9288a76 4944 case float_round_ties_away:
dc355b76
PM
4945 increment = ((int64_t)zSig2 < 0);
4946 break;
4947 case float_round_to_zero:
4948 increment = 0;
4949 break;
4950 case float_round_up:
4951 increment = !zSign && zSig2;
4952 break;
4953 case float_round_down:
4954 increment = zSign && zSig2;
4955 break;
9ee6f678
BR
4956 case float_round_to_odd:
4957 increment = !(zSig1 & 0x1) && zSig2;
4958 break;
dc355b76
PM
4959 default:
4960 abort();
158142c2
FB
4961 }
4962 }
4963 }
a2f2d288 4964 if (zSig2) {
d82f3b2d 4965 float_raise(float_flag_inexact, status);
a2f2d288 4966 }
158142c2
FB
4967 if ( increment ) {
4968 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4969 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4970 zSig1 &= ~1;
4971 }
158142c2
FB
4972 }
4973 else {
4974 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4975 }
4976 return packFloat128( zSign, zExp, zSig0, zSig1 );
4977
4978}
4979
4980/*----------------------------------------------------------------------------
4981| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4982| and significand formed by the concatenation of `zSig0' and `zSig1', and
4983| returns the proper quadruple-precision floating-point value corresponding
4984| to the abstract input. This routine is just like `roundAndPackFloat128'
4985| except that the input significand has fewer bits and does not have to be
4986| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4987| point exponent.
4988*----------------------------------------------------------------------------*/
4989
c120391c 4990static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4991 uint64_t zSig0, uint64_t zSig1,
4992 float_status *status)
158142c2 4993{
8f506c70 4994 int8_t shiftCount;
bb98fe42 4995 uint64_t zSig2;
158142c2
FB
4996
4997 if ( zSig0 == 0 ) {
4998 zSig0 = zSig1;
4999 zSig1 = 0;
5000 zExp -= 64;
5001 }
0019d5c3 5002 shiftCount = clz64(zSig0) - 15;
158142c2
FB
5003 if ( 0 <= shiftCount ) {
5004 zSig2 = 0;
5005 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5006 }
5007 else {
5008 shift128ExtraRightJamming(
5009 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5010 }
5011 zExp -= shiftCount;
ff32e16e 5012 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
5013
5014}
5015
158142c2 5016
158142c2
FB
5017/*----------------------------------------------------------------------------
5018| Returns the result of converting the 32-bit two's complement integer `a'
5019| to the extended double-precision floating-point format. The conversion
5020| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5021| Arithmetic.
5022*----------------------------------------------------------------------------*/
5023
e5a41ffa 5024floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 5025{
c120391c 5026 bool zSign;
3a87d009 5027 uint32_t absA;
8f506c70 5028 int8_t shiftCount;
bb98fe42 5029 uint64_t zSig;
158142c2
FB
5030
5031 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5032 zSign = ( a < 0 );
5033 absA = zSign ? - a : a;
0019d5c3 5034 shiftCount = clz32(absA) + 32;
158142c2
FB
5035 zSig = absA;
5036 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5037
5038}
5039
158142c2
FB
5040/*----------------------------------------------------------------------------
5041| Returns the result of converting the 32-bit two's complement integer `a' to
5042| the quadruple-precision floating-point format. The conversion is performed
5043| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5044*----------------------------------------------------------------------------*/
5045
e5a41ffa 5046float128 int32_to_float128(int32_t a, float_status *status)
158142c2 5047{
c120391c 5048 bool zSign;
3a87d009 5049 uint32_t absA;
8f506c70 5050 int8_t shiftCount;
bb98fe42 5051 uint64_t zSig0;
158142c2
FB
5052
5053 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5054 zSign = ( a < 0 );
5055 absA = zSign ? - a : a;
0019d5c3 5056 shiftCount = clz32(absA) + 17;
158142c2
FB
5057 zSig0 = absA;
5058 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5059
5060}
5061
158142c2
FB
5062/*----------------------------------------------------------------------------
5063| Returns the result of converting the 64-bit two's complement integer `a'
5064| to the extended double-precision floating-point format. The conversion
5065| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5066| Arithmetic.
5067*----------------------------------------------------------------------------*/
5068
e5a41ffa 5069floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 5070{
c120391c 5071 bool zSign;
182f42fd 5072 uint64_t absA;
8f506c70 5073 int8_t shiftCount;
158142c2
FB
5074
5075 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5076 zSign = ( a < 0 );
5077 absA = zSign ? - a : a;
0019d5c3 5078 shiftCount = clz64(absA);
158142c2
FB
5079 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5080
5081}
5082
158142c2
FB
5083/*----------------------------------------------------------------------------
5084| Returns the result of converting the 64-bit two's complement integer `a' to
5085| the quadruple-precision floating-point format. The conversion is performed
5086| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5087*----------------------------------------------------------------------------*/
5088
e5a41ffa 5089float128 int64_to_float128(int64_t a, float_status *status)
158142c2 5090{
c120391c 5091 bool zSign;
182f42fd 5092 uint64_t absA;
8f506c70 5093 int8_t shiftCount;
f4014512 5094 int32_t zExp;
bb98fe42 5095 uint64_t zSig0, zSig1;
158142c2
FB
5096
5097 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5098 zSign = ( a < 0 );
5099 absA = zSign ? - a : a;
0019d5c3 5100 shiftCount = clz64(absA) + 49;
158142c2
FB
5101 zExp = 0x406E - shiftCount;
5102 if ( 64 <= shiftCount ) {
5103 zSig1 = 0;
5104 zSig0 = absA;
5105 shiftCount -= 64;
5106 }
5107 else {
5108 zSig1 = absA;
5109 zSig0 = 0;
5110 }
5111 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5112 return packFloat128( zSign, zExp, zSig0, zSig1 );
5113
5114}
5115
6bb8e0f1
PM
5116/*----------------------------------------------------------------------------
5117| Returns the result of converting the 64-bit unsigned integer `a'
5118| to the quadruple-precision floating-point format. The conversion is performed
5119| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5120*----------------------------------------------------------------------------*/
5121
e5a41ffa 5122float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5123{
5124 if (a == 0) {
5125 return float128_zero;
5126 }
6603d506 5127 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5128}
5129
158142c2
FB
5130/*----------------------------------------------------------------------------
5131| Returns the result of converting the single-precision floating-point value
5132| `a' to the extended double-precision floating-point format. The conversion
5133| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5134| Arithmetic.
5135*----------------------------------------------------------------------------*/
5136
e5a41ffa 5137floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5138{
c120391c 5139 bool aSign;
0c48262d 5140 int aExp;
bb98fe42 5141 uint32_t aSig;
158142c2 5142
ff32e16e 5143 a = float32_squash_input_denormal(a, status);
158142c2
FB
5144 aSig = extractFloat32Frac( a );
5145 aExp = extractFloat32Exp( a );
5146 aSign = extractFloat32Sign( a );
5147 if ( aExp == 0xFF ) {
ff32e16e 5148 if (aSig) {
7537c2b4
JM
5149 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5150 status);
5151 return floatx80_silence_nan(res, status);
ff32e16e 5152 }
0f605c88
LV
5153 return packFloatx80(aSign,
5154 floatx80_infinity_high,
5155 floatx80_infinity_low);
158142c2
FB
5156 }
5157 if ( aExp == 0 ) {
5158 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5159 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5160 }
5161 aSig |= 0x00800000;
bb98fe42 5162 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5163
5164}
5165
158142c2
FB
5166/*----------------------------------------------------------------------------
5167| Returns the remainder of the single-precision floating-point value `a'
5168| with respect to the corresponding value `b'. The operation is performed
5169| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5170*----------------------------------------------------------------------------*/
5171
e5a41ffa 5172float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5173{
c120391c 5174 bool aSign, zSign;
0c48262d 5175 int aExp, bExp, expDiff;
bb98fe42
AF
5176 uint32_t aSig, bSig;
5177 uint32_t q;
5178 uint64_t aSig64, bSig64, q64;
5179 uint32_t alternateASig;
5180 int32_t sigMean;
ff32e16e
PM
5181 a = float32_squash_input_denormal(a, status);
5182 b = float32_squash_input_denormal(b, status);
158142c2
FB
5183
5184 aSig = extractFloat32Frac( a );
5185 aExp = extractFloat32Exp( a );
5186 aSign = extractFloat32Sign( a );
5187 bSig = extractFloat32Frac( b );
5188 bExp = extractFloat32Exp( b );
158142c2
FB
5189 if ( aExp == 0xFF ) {
5190 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5191 return propagateFloat32NaN(a, b, status);
158142c2 5192 }
ff32e16e 5193 float_raise(float_flag_invalid, status);
af39bc8c 5194 return float32_default_nan(status);
158142c2
FB
5195 }
5196 if ( bExp == 0xFF ) {
ff32e16e
PM
5197 if (bSig) {
5198 return propagateFloat32NaN(a, b, status);
5199 }
158142c2
FB
5200 return a;
5201 }
5202 if ( bExp == 0 ) {
5203 if ( bSig == 0 ) {
ff32e16e 5204 float_raise(float_flag_invalid, status);
af39bc8c 5205 return float32_default_nan(status);
158142c2
FB
5206 }
5207 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5208 }
5209 if ( aExp == 0 ) {
5210 if ( aSig == 0 ) return a;
5211 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5212 }
5213 expDiff = aExp - bExp;
5214 aSig |= 0x00800000;
5215 bSig |= 0x00800000;
5216 if ( expDiff < 32 ) {
5217 aSig <<= 8;
5218 bSig <<= 8;
5219 if ( expDiff < 0 ) {
5220 if ( expDiff < -1 ) return a;
5221 aSig >>= 1;
5222 }
5223 q = ( bSig <= aSig );
5224 if ( q ) aSig -= bSig;
5225 if ( 0 < expDiff ) {
bb98fe42 5226 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5227 q >>= 32 - expDiff;
5228 bSig >>= 2;
5229 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5230 }
5231 else {
5232 aSig >>= 2;
5233 bSig >>= 2;
5234 }
5235 }
5236 else {
5237 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5238 aSig64 = ( (uint64_t) aSig )<<40;
5239 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5240 expDiff -= 64;
5241 while ( 0 < expDiff ) {
5242 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5243 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5244 aSig64 = - ( ( bSig * q64 )<<38 );
5245 expDiff -= 62;
5246 }
5247 expDiff += 64;
5248 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5249 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5250 q = q64>>( 64 - expDiff );
5251 bSig <<= 6;
5252 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5253 }
5254 do {
5255 alternateASig = aSig;
5256 ++q;
5257 aSig -= bSig;
bb98fe42 5258 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5259 sigMean = aSig + alternateASig;
5260 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5261 aSig = alternateASig;
5262 }
bb98fe42 5263 zSign = ( (int32_t) aSig < 0 );
158142c2 5264 if ( zSign ) aSig = - aSig;
ff32e16e 5265 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5266}
5267
369be8f6 5268
158142c2 5269
8229c991
AJ
5270/*----------------------------------------------------------------------------
5271| Returns the binary exponential of the single-precision floating-point value
5272| `a'. The operation is performed according to the IEC/IEEE Standard for
5273| Binary Floating-Point Arithmetic.
5274|
5275| Uses the following identities:
5276|
5277| 1. -------------------------------------------------------------------------
5278| x x*ln(2)
5279| 2 = e
5280|
5281| 2. -------------------------------------------------------------------------
5282| 2 3 4 5 n
5283| x x x x x x x
5284| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5285| 1! 2! 3! 4! 5! n!
5286*----------------------------------------------------------------------------*/
5287
5288static const float64 float32_exp2_coefficients[15] =
5289{
d5138cf4
PM
5290 const_float64( 0x3ff0000000000000ll ), /* 1 */
5291 const_float64( 0x3fe0000000000000ll ), /* 2 */
5292 const_float64( 0x3fc5555555555555ll ), /* 3 */
5293 const_float64( 0x3fa5555555555555ll ), /* 4 */
5294 const_float64( 0x3f81111111111111ll ), /* 5 */
5295 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5296 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5297 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5298 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5299 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5300 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5301 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5302 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5303 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5304 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5305};
5306
e5a41ffa 5307float32 float32_exp2(float32 a, float_status *status)
8229c991 5308{
c120391c 5309 bool aSign;
0c48262d 5310 int aExp;
bb98fe42 5311 uint32_t aSig;
8229c991
AJ
5312 float64 r, x, xn;
5313 int i;
ff32e16e 5314 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5315
5316 aSig = extractFloat32Frac( a );
5317 aExp = extractFloat32Exp( a );
5318 aSign = extractFloat32Sign( a );
5319
5320 if ( aExp == 0xFF) {
ff32e16e
PM
5321 if (aSig) {
5322 return propagateFloat32NaN(a, float32_zero, status);
5323 }
8229c991
AJ
5324 return (aSign) ? float32_zero : a;
5325 }
5326 if (aExp == 0) {
5327 if (aSig == 0) return float32_one;
5328 }
5329
ff32e16e 5330 float_raise(float_flag_inexact, status);
8229c991
AJ
5331
5332 /* ******************************* */
5333 /* using float64 for approximation */
5334 /* ******************************* */
ff32e16e
PM
5335 x = float32_to_float64(a, status);
5336 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5337
5338 xn = x;
5339 r = float64_one;
5340 for (i = 0 ; i < 15 ; i++) {
5341 float64 f;
5342
ff32e16e
PM
5343 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5344 r = float64_add(r, f, status);
8229c991 5345
ff32e16e 5346 xn = float64_mul(xn, x, status);
8229c991
AJ
5347 }
5348
5349 return float64_to_float32(r, status);
5350}
5351
374dfc33
AJ
5352/*----------------------------------------------------------------------------
5353| Returns the binary log of the single-precision floating-point value `a'.
5354| The operation is performed according to the IEC/IEEE Standard for Binary
5355| Floating-Point Arithmetic.
5356*----------------------------------------------------------------------------*/
e5a41ffa 5357float32 float32_log2(float32 a, float_status *status)
374dfc33 5358{
c120391c 5359 bool aSign, zSign;
0c48262d 5360 int aExp;
bb98fe42 5361 uint32_t aSig, zSig, i;
374dfc33 5362
ff32e16e 5363 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5364 aSig = extractFloat32Frac( a );
5365 aExp = extractFloat32Exp( a );
5366 aSign = extractFloat32Sign( a );
5367
5368 if ( aExp == 0 ) {
5369 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5370 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5371 }
5372 if ( aSign ) {
ff32e16e 5373 float_raise(float_flag_invalid, status);
af39bc8c 5374 return float32_default_nan(status);
374dfc33
AJ
5375 }
5376 if ( aExp == 0xFF ) {
ff32e16e
PM
5377 if (aSig) {
5378 return propagateFloat32NaN(a, float32_zero, status);
5379 }
374dfc33
AJ
5380 return a;
5381 }
5382
5383 aExp -= 0x7F;
5384 aSig |= 0x00800000;
5385 zSign = aExp < 0;
5386 zSig = aExp << 23;
5387
5388 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5389 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5390 if ( aSig & 0x01000000 ) {
5391 aSig >>= 1;
5392 zSig |= i;
5393 }
5394 }
5395
5396 if ( zSign )
5397 zSig = -zSig;
5398
ff32e16e 5399 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5400}
5401
158142c2 5402/*----------------------------------------------------------------------------
158142c2
FB
5403| Returns the result of converting the double-precision floating-point value
5404| `a' to the extended double-precision floating-point format. The conversion
5405| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5406| Arithmetic.
5407*----------------------------------------------------------------------------*/
5408
e5a41ffa 5409floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5410{
c120391c 5411 bool aSign;
0c48262d 5412 int aExp;
bb98fe42 5413 uint64_t aSig;
158142c2 5414
ff32e16e 5415 a = float64_squash_input_denormal(a, status);
158142c2
FB
5416 aSig = extractFloat64Frac( a );
5417 aExp = extractFloat64Exp( a );
5418 aSign = extractFloat64Sign( a );
5419 if ( aExp == 0x7FF ) {
ff32e16e 5420 if (aSig) {
7537c2b4
JM
5421 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5422 status);
5423 return floatx80_silence_nan(res, status);
ff32e16e 5424 }
0f605c88
LV
5425 return packFloatx80(aSign,
5426 floatx80_infinity_high,
5427 floatx80_infinity_low);
158142c2
FB
5428 }
5429 if ( aExp == 0 ) {
5430 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5431 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5432 }
5433 return
5434 packFloatx80(
e9321124 5435 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5436
5437}
5438
158142c2
FB
5439/*----------------------------------------------------------------------------
5440| Returns the remainder of the double-precision floating-point value `a'
5441| with respect to the corresponding value `b'. The operation is performed
5442| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5443*----------------------------------------------------------------------------*/
5444
e5a41ffa 5445float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5446{
c120391c 5447 bool aSign, zSign;
0c48262d 5448 int aExp, bExp, expDiff;
bb98fe42
AF
5449 uint64_t aSig, bSig;
5450 uint64_t q, alternateASig;
5451 int64_t sigMean;
158142c2 5452
ff32e16e
PM
5453 a = float64_squash_input_denormal(a, status);
5454 b = float64_squash_input_denormal(b, status);
158142c2
FB
5455 aSig = extractFloat64Frac( a );
5456 aExp = extractFloat64Exp( a );
5457 aSign = extractFloat64Sign( a );
5458 bSig = extractFloat64Frac( b );
5459 bExp = extractFloat64Exp( b );
158142c2
FB
5460 if ( aExp == 0x7FF ) {
5461 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5462 return propagateFloat64NaN(a, b, status);
158142c2 5463 }
ff32e16e 5464 float_raise(float_flag_invalid, status);
af39bc8c 5465 return float64_default_nan(status);
158142c2
FB
5466 }
5467 if ( bExp == 0x7FF ) {
ff32e16e
PM
5468 if (bSig) {
5469 return propagateFloat64NaN(a, b, status);
5470 }
158142c2
FB
5471 return a;
5472 }
5473 if ( bExp == 0 ) {
5474 if ( bSig == 0 ) {
ff32e16e 5475 float_raise(float_flag_invalid, status);
af39bc8c 5476 return float64_default_nan(status);
158142c2
FB
5477 }
5478 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5479 }
5480 if ( aExp == 0 ) {
5481 if ( aSig == 0 ) return a;
5482 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5483 }
5484 expDiff = aExp - bExp;
e9321124
AB
5485 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5486 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5487 if ( expDiff < 0 ) {
5488 if ( expDiff < -1 ) return a;
5489 aSig >>= 1;
5490 }
5491 q = ( bSig <= aSig );
5492 if ( q ) aSig -= bSig;
5493 expDiff -= 64;
5494 while ( 0 < expDiff ) {
5495 q = estimateDiv128To64( aSig, 0, bSig );
5496 q = ( 2 < q ) ? q - 2 : 0;
5497 aSig = - ( ( bSig>>2 ) * q );
5498 expDiff -= 62;
5499 }
5500 expDiff += 64;
5501 if ( 0 < expDiff ) {
5502 q = estimateDiv128To64( aSig, 0, bSig );
5503 q = ( 2 < q ) ? q - 2 : 0;
5504 q >>= 64 - expDiff;
5505 bSig >>= 2;
5506 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5507 }
5508 else {
5509 aSig >>= 2;
5510 bSig >>= 2;
5511 }
5512 do {
5513 alternateASig = aSig;
5514 ++q;
5515 aSig -= bSig;
bb98fe42 5516 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5517 sigMean = aSig + alternateASig;
5518 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5519 aSig = alternateASig;
5520 }
bb98fe42 5521 zSign = ( (int64_t) aSig < 0 );
158142c2 5522 if ( zSign ) aSig = - aSig;
ff32e16e 5523 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5524
5525}
5526
374dfc33
AJ
5527/*----------------------------------------------------------------------------
5528| Returns the binary log of the double-precision floating-point value `a'.
5529| The operation is performed according to the IEC/IEEE Standard for Binary
5530| Floating-Point Arithmetic.
5531*----------------------------------------------------------------------------*/
e5a41ffa 5532float64 float64_log2(float64 a, float_status *status)
374dfc33 5533{
c120391c 5534 bool aSign, zSign;
0c48262d 5535 int aExp;
bb98fe42 5536 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5537 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5538
5539 aSig = extractFloat64Frac( a );
5540 aExp = extractFloat64Exp( a );
5541 aSign = extractFloat64Sign( a );
5542
5543 if ( aExp == 0 ) {
5544 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5545 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5546 }
5547 if ( aSign ) {
ff32e16e 5548 float_raise(float_flag_invalid, status);
af39bc8c 5549 return float64_default_nan(status);
374dfc33
AJ
5550 }
5551 if ( aExp == 0x7FF ) {
ff32e16e
PM
5552 if (aSig) {
5553 return propagateFloat64NaN(a, float64_zero, status);
5554 }
374dfc33
AJ
5555 return a;
5556 }
5557
5558 aExp -= 0x3FF;
e9321124 5559 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5560 zSign = aExp < 0;
bb98fe42 5561 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5562 for (i = 1LL << 51; i > 0; i >>= 1) {
5563 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5564 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5565 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5566 aSig >>= 1;
5567 zSig |= i;
5568 }
5569 }
5570
5571 if ( zSign )
5572 zSig = -zSig;
ff32e16e 5573 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5574}
5575
158142c2
FB
5576/*----------------------------------------------------------------------------
5577| Returns the result of converting the extended double-precision floating-
5578| point value `a' to the 32-bit two's complement integer format. The
5579| conversion is performed according to the IEC/IEEE Standard for Binary
5580| Floating-Point Arithmetic---which means in particular that the conversion
5581| is rounded according to the current rounding mode. If `a' is a NaN, the
5582| largest positive integer is returned. Otherwise, if the conversion
5583| overflows, the largest integer with the same sign as `a' is returned.
5584*----------------------------------------------------------------------------*/
5585
f4014512 5586int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5587{
c120391c 5588 bool aSign;
f4014512 5589 int32_t aExp, shiftCount;
bb98fe42 5590 uint64_t aSig;
158142c2 5591
d1eb8f2a
AD
5592 if (floatx80_invalid_encoding(a)) {
5593 float_raise(float_flag_invalid, status);
5594 return 1 << 31;
5595 }
158142c2
FB
5596 aSig = extractFloatx80Frac( a );
5597 aExp = extractFloatx80Exp( a );
5598 aSign = extractFloatx80Sign( a );
bb98fe42 5599 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5600 shiftCount = 0x4037 - aExp;
5601 if ( shiftCount <= 0 ) shiftCount = 1;
5602 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5603 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5604
5605}
5606
5607/*----------------------------------------------------------------------------
5608| Returns the result of converting the extended double-precision floating-
5609| point value `a' to the 32-bit two's complement integer format. The
5610| conversion is performed according to the IEC/IEEE Standard for Binary
5611| Floating-Point Arithmetic, except that the conversion is always rounded
5612| toward zero. If `a' is a NaN, the largest positive integer is returned.
5613| Otherwise, if the conversion overflows, the largest integer with the same
5614| sign as `a' is returned.
5615*----------------------------------------------------------------------------*/
5616
f4014512 5617int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5618{
c120391c 5619 bool aSign;
f4014512 5620 int32_t aExp, shiftCount;
bb98fe42 5621 uint64_t aSig, savedASig;
b3a6a2e0 5622 int32_t z;
158142c2 5623
d1eb8f2a
AD
5624 if (floatx80_invalid_encoding(a)) {
5625 float_raise(float_flag_invalid, status);
5626 return 1 << 31;
5627 }
158142c2
FB
5628 aSig = extractFloatx80Frac( a );
5629 aExp = extractFloatx80Exp( a );
5630 aSign = extractFloatx80Sign( a );
5631 if ( 0x401E < aExp ) {
bb98fe42 5632 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5633 goto invalid;
5634 }
5635 else if ( aExp < 0x3FFF ) {
a2f2d288 5636 if (aExp || aSig) {
d82f3b2d 5637 float_raise(float_flag_inexact, status);
a2f2d288 5638 }
158142c2
FB
5639 return 0;
5640 }
5641 shiftCount = 0x403E - aExp;
5642 savedASig = aSig;
5643 aSig >>= shiftCount;
5644 z = aSig;
5645 if ( aSign ) z = - z;
5646 if ( ( z < 0 ) ^ aSign ) {
5647 invalid:
ff32e16e 5648 float_raise(float_flag_invalid, status);
bb98fe42 5649 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5650 }
5651 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5652 float_raise(float_flag_inexact, status);
158142c2
FB
5653 }
5654 return z;
5655
5656}
5657
5658/*----------------------------------------------------------------------------
5659| Returns the result of converting the extended double-precision floating-
5660| point value `a' to the 64-bit two's complement integer format. The
5661| conversion is performed according to the IEC/IEEE Standard for Binary
5662| Floating-Point Arithmetic---which means in particular that the conversion
5663| is rounded according to the current rounding mode. If `a' is a NaN,
5664| the largest positive integer is returned. Otherwise, if the conversion
5665| overflows, the largest integer with the same sign as `a' is returned.
5666*----------------------------------------------------------------------------*/
5667
f42c2224 5668int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5669{
c120391c 5670 bool aSign;
f4014512 5671 int32_t aExp, shiftCount;
bb98fe42 5672 uint64_t aSig, aSigExtra;
158142c2 5673
d1eb8f2a
AD
5674 if (floatx80_invalid_encoding(a)) {
5675 float_raise(float_flag_invalid, status);
5676 return 1ULL << 63;
5677 }
158142c2
FB
5678 aSig = extractFloatx80Frac( a );
5679 aExp = extractFloatx80Exp( a );
5680 aSign = extractFloatx80Sign( a );
5681 shiftCount = 0x403E - aExp;
5682 if ( shiftCount <= 0 ) {
5683 if ( shiftCount ) {
ff32e16e 5684 float_raise(float_flag_invalid, status);
0f605c88 5685 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5686 return INT64_MAX;
158142c2 5687 }
2c217da0 5688 return INT64_MIN;
158142c2
FB
5689 }
5690 aSigExtra = 0;
5691 }
5692 else {
5693 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5694 }
ff32e16e 5695 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5696
5697}
5698
5699/*----------------------------------------------------------------------------
5700| Returns the result of converting the extended double-precision floating-
5701| point value `a' to the 64-bit two's complement integer format. The
5702| conversion is performed according to the IEC/IEEE Standard for Binary
5703| Floating-Point Arithmetic, except that the conversion is always rounded
5704| toward zero. If `a' is a NaN, the largest positive integer is returned.
5705| Otherwise, if the conversion overflows, the largest integer with the same
5706| sign as `a' is returned.
5707*----------------------------------------------------------------------------*/
5708
f42c2224 5709int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5710{
c120391c 5711 bool aSign;
f4014512 5712 int32_t aExp, shiftCount;
bb98fe42 5713 uint64_t aSig;
f42c2224 5714 int64_t z;
158142c2 5715
d1eb8f2a
AD
5716 if (floatx80_invalid_encoding(a)) {
5717 float_raise(float_flag_invalid, status);
5718 return 1ULL << 63;
5719 }
158142c2
FB
5720 aSig = extractFloatx80Frac( a );
5721 aExp = extractFloatx80Exp( a );
5722 aSign = extractFloatx80Sign( a );
5723 shiftCount = aExp - 0x403E;
5724 if ( 0 <= shiftCount ) {
e9321124 5725 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5726 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5727 float_raise(float_flag_invalid, status);
158142c2 5728 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5729 return INT64_MAX;
158142c2
FB
5730 }
5731 }
2c217da0 5732 return INT64_MIN;
158142c2
FB
5733 }
5734 else if ( aExp < 0x3FFF ) {
a2f2d288 5735 if (aExp | aSig) {
d82f3b2d 5736 float_raise(float_flag_inexact, status);
a2f2d288 5737 }
158142c2
FB
5738 return 0;
5739 }
5740 z = aSig>>( - shiftCount );
bb98fe42 5741 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5742 float_raise(float_flag_inexact, status);
158142c2
FB
5743 }
5744 if ( aSign ) z = - z;
5745 return z;
5746
5747}
5748
5749/*----------------------------------------------------------------------------
5750| Returns the result of converting the extended double-precision floating-
5751| point value `a' to the single-precision floating-point format. The
5752| conversion is performed according to the IEC/IEEE Standard for Binary
5753| Floating-Point Arithmetic.
5754*----------------------------------------------------------------------------*/
5755
e5a41ffa 5756float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5757{
c120391c 5758 bool aSign;
f4014512 5759 int32_t aExp;
bb98fe42 5760 uint64_t aSig;
158142c2 5761
d1eb8f2a
AD
5762 if (floatx80_invalid_encoding(a)) {
5763 float_raise(float_flag_invalid, status);
5764 return float32_default_nan(status);
5765 }
158142c2
FB
5766 aSig = extractFloatx80Frac( a );
5767 aExp = extractFloatx80Exp( a );
5768 aSign = extractFloatx80Sign( a );
5769 if ( aExp == 0x7FFF ) {
bb98fe42 5770 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5771 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5772 status);
5773 return float32_silence_nan(res, status);
158142c2
FB
5774 }
5775 return packFloat32( aSign, 0xFF, 0 );
5776 }
5777 shift64RightJamming( aSig, 33, &aSig );
5778 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5779 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5780
5781}
5782
5783/*----------------------------------------------------------------------------
5784| Returns the result of converting the extended double-precision floating-
5785| point value `a' to the double-precision floating-point format. The
5786| conversion is performed according to the IEC/IEEE Standard for Binary
5787| Floating-Point Arithmetic.
5788*----------------------------------------------------------------------------*/
5789
e5a41ffa 5790float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5791{
c120391c 5792 bool aSign;
f4014512 5793 int32_t aExp;
bb98fe42 5794 uint64_t aSig, zSig;
158142c2 5795
d1eb8f2a
AD
5796 if (floatx80_invalid_encoding(a)) {
5797 float_raise(float_flag_invalid, status);
5798 return float64_default_nan(status);
5799 }
158142c2
FB
5800 aSig = extractFloatx80Frac( a );
5801 aExp = extractFloatx80Exp( a );
5802 aSign = extractFloatx80Sign( a );
5803 if ( aExp == 0x7FFF ) {
bb98fe42 5804 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5805 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5806 status);
5807 return float64_silence_nan(res, status);
158142c2
FB
5808 }
5809 return packFloat64( aSign, 0x7FF, 0 );
5810 }
5811 shift64RightJamming( aSig, 1, &zSig );
5812 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5813 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5814
5815}
5816
158142c2
FB
5817/*----------------------------------------------------------------------------
5818| Returns the result of converting the extended double-precision floating-
5819| point value `a' to the quadruple-precision floating-point format. The
5820| conversion is performed according to the IEC/IEEE Standard for Binary
5821| Floating-Point Arithmetic.
5822*----------------------------------------------------------------------------*/
5823
e5a41ffa 5824float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5825{
c120391c 5826 bool aSign;
0c48262d 5827 int aExp;
bb98fe42 5828 uint64_t aSig, zSig0, zSig1;
158142c2 5829
d1eb8f2a
AD
5830 if (floatx80_invalid_encoding(a)) {
5831 float_raise(float_flag_invalid, status);
5832 return float128_default_nan(status);
5833 }
158142c2
FB
5834 aSig = extractFloatx80Frac( a );
5835 aExp = extractFloatx80Exp( a );
5836 aSign = extractFloatx80Sign( a );
bb98fe42 5837 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5838 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5839 status);
5840 return float128_silence_nan(res, status);
158142c2
FB
5841 }
5842 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5843 return packFloat128( aSign, aExp, zSig0, zSig1 );
5844
5845}
5846
0f721292
LV
5847/*----------------------------------------------------------------------------
5848| Rounds the extended double-precision floating-point value `a'
5849| to the precision provided by floatx80_rounding_precision and returns the
5850| result as an extended double-precision floating-point value.
5851| The operation is performed according to the IEC/IEEE Standard for Binary
5852| Floating-Point Arithmetic.
5853*----------------------------------------------------------------------------*/
5854
5855floatx80 floatx80_round(floatx80 a, float_status *status)
5856{
5857 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5858 extractFloatx80Sign(a),
5859 extractFloatx80Exp(a),
5860 extractFloatx80Frac(a), 0, status);
5861}
5862
158142c2
FB
5863/*----------------------------------------------------------------------------
5864| Rounds the extended double-precision floating-point value `a' to an integer,
5865| and returns the result as an extended quadruple-precision floating-point
5866| value. The operation is performed according to the IEC/IEEE Standard for
5867| Binary Floating-Point Arithmetic.
5868*----------------------------------------------------------------------------*/
5869
e5a41ffa 5870floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5871{
c120391c 5872 bool aSign;
f4014512 5873 int32_t aExp;
bb98fe42 5874 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5875 floatx80 z;
5876
d1eb8f2a
AD
5877 if (floatx80_invalid_encoding(a)) {
5878 float_raise(float_flag_invalid, status);
5879 return floatx80_default_nan(status);
5880 }
158142c2
FB
5881 aExp = extractFloatx80Exp( a );
5882 if ( 0x403E <= aExp ) {
bb98fe42 5883 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5884 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5885 }
5886 return a;
5887 }
5888 if ( aExp < 0x3FFF ) {
5889 if ( ( aExp == 0 )
9ecaf5cc 5890 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5891 return a;
5892 }
d82f3b2d 5893 float_raise(float_flag_inexact, status);
158142c2 5894 aSign = extractFloatx80Sign( a );
a2f2d288 5895 switch (status->float_rounding_mode) {
158142c2 5896 case float_round_nearest_even:
bb98fe42 5897 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5898 ) {
5899 return
e9321124 5900 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5901 }
5902 break;
f9288a76
PM
5903 case float_round_ties_away:
5904 if (aExp == 0x3FFE) {
e9321124 5905 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5906 }
5907 break;
158142c2
FB
5908 case float_round_down:
5909 return
5910 aSign ?
e9321124 5911 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5912 : packFloatx80( 0, 0, 0 );
5913 case float_round_up:
5914 return
5915 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5916 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5917
5918 case float_round_to_zero:
5919 break;
5920 default:
5921 g_assert_not_reached();
158142c2
FB
5922 }
5923 return packFloatx80( aSign, 0, 0 );
5924 }
5925 lastBitMask = 1;
5926 lastBitMask <<= 0x403E - aExp;
5927 roundBitsMask = lastBitMask - 1;
5928 z = a;
a2f2d288 5929 switch (status->float_rounding_mode) {
dc355b76 5930 case float_round_nearest_even:
158142c2 5931 z.low += lastBitMask>>1;
dc355b76
PM
5932 if ((z.low & roundBitsMask) == 0) {
5933 z.low &= ~lastBitMask;
5934 }
5935 break;
f9288a76
PM
5936 case float_round_ties_away:
5937 z.low += lastBitMask >> 1;
5938 break;
dc355b76
PM
5939 case float_round_to_zero:
5940 break;
5941 case float_round_up:
5942 if (!extractFloatx80Sign(z)) {
5943 z.low += roundBitsMask;
5944 }
5945 break;
5946 case float_round_down:
5947 if (extractFloatx80Sign(z)) {
158142c2
FB
5948 z.low += roundBitsMask;
5949 }
dc355b76
PM
5950 break;
5951 default:
5952 abort();
158142c2
FB
5953 }
5954 z.low &= ~ roundBitsMask;
5955 if ( z.low == 0 ) {
5956 ++z.high;
e9321124 5957 z.low = UINT64_C(0x8000000000000000);
158142c2 5958 }
a2f2d288 5959 if (z.low != a.low) {
d82f3b2d 5960 float_raise(float_flag_inexact, status);
a2f2d288 5961 }
158142c2
FB
5962 return z;
5963
5964}
5965
5966/*----------------------------------------------------------------------------
5967| Returns the result of adding the absolute values of the extended double-
5968| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5969| negated before being returned. `zSign' is ignored if the result is a NaN.
5970| The addition is performed according to the IEC/IEEE Standard for Binary
5971| Floating-Point Arithmetic.
5972*----------------------------------------------------------------------------*/
5973
c120391c 5974static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5975 float_status *status)
158142c2 5976{
f4014512 5977 int32_t aExp, bExp, zExp;
bb98fe42 5978 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5979 int32_t expDiff;
158142c2
FB
5980
5981 aSig = extractFloatx80Frac( a );
5982 aExp = extractFloatx80Exp( a );
5983 bSig = extractFloatx80Frac( b );
5984 bExp = extractFloatx80Exp( b );
5985 expDiff = aExp - bExp;
5986 if ( 0 < expDiff ) {
5987 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5988 if ((uint64_t)(aSig << 1)) {
5989 return propagateFloatx80NaN(a, b, status);
5990 }
158142c2
FB
5991 return a;
5992 }
5993 if ( bExp == 0 ) --expDiff;
5994 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5995 zExp = aExp;
5996 }
5997 else if ( expDiff < 0 ) {
5998 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5999 if ((uint64_t)(bSig << 1)) {
6000 return propagateFloatx80NaN(a, b, status);
6001 }
0f605c88
LV
6002 return packFloatx80(zSign,
6003 floatx80_infinity_high,
6004 floatx80_infinity_low);
158142c2
FB
6005 }
6006 if ( aExp == 0 ) ++expDiff;
6007 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6008 zExp = bExp;
6009 }
6010 else {
6011 if ( aExp == 0x7FFF ) {
bb98fe42 6012 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6013 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6014 }
6015 return a;
6016 }
6017 zSig1 = 0;
6018 zSig0 = aSig + bSig;
6019 if ( aExp == 0 ) {
41602807
JM
6020 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6021 /* At least one of the values is a pseudo-denormal,
6022 * and there is a carry out of the result. */
6023 zExp = 1;
6024 goto shiftRight1;
6025 }
2f311075
RH
6026 if (zSig0 == 0) {
6027 return packFloatx80(zSign, 0, 0);
6028 }
158142c2
FB
6029 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6030 goto roundAndPack;
6031 }
6032 zExp = aExp;
6033 goto shiftRight1;
6034 }
6035 zSig0 = aSig + bSig;
bb98fe42 6036 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6037 shiftRight1:
6038 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6039 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6040 ++zExp;
6041 roundAndPack:
a2f2d288 6042 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6043 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6044}
6045
6046/*----------------------------------------------------------------------------
6047| Returns the result of subtracting the absolute values of the extended
6048| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6049| difference is negated before being returned. `zSign' is ignored if the
6050| result is a NaN. The subtraction is performed according to the IEC/IEEE
6051| Standard for Binary Floating-Point Arithmetic.
6052*----------------------------------------------------------------------------*/
6053
c120391c 6054static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6055 float_status *status)
158142c2 6056{
f4014512 6057 int32_t aExp, bExp, zExp;
bb98fe42 6058 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6059 int32_t expDiff;
158142c2
FB
6060
6061 aSig = extractFloatx80Frac( a );
6062 aExp = extractFloatx80Exp( a );
6063 bSig = extractFloatx80Frac( b );
6064 bExp = extractFloatx80Exp( b );
6065 expDiff = aExp - bExp;
6066 if ( 0 < expDiff ) goto aExpBigger;
6067 if ( expDiff < 0 ) goto bExpBigger;
6068 if ( aExp == 0x7FFF ) {
bb98fe42 6069 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6070 return propagateFloatx80NaN(a, b, status);
158142c2 6071 }
ff32e16e 6072 float_raise(float_flag_invalid, status);
af39bc8c 6073 return floatx80_default_nan(status);
158142c2
FB
6074 }
6075 if ( aExp == 0 ) {
6076 aExp = 1;
6077 bExp = 1;
6078 }
6079 zSig1 = 0;
6080 if ( bSig < aSig ) goto aBigger;
6081 if ( aSig < bSig ) goto bBigger;
a2f2d288 6082 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6083 bExpBigger:
6084 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6085 if ((uint64_t)(bSig << 1)) {
6086 return propagateFloatx80NaN(a, b, status);
6087 }
0f605c88
LV
6088 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6089 floatx80_infinity_low);
158142c2
FB
6090 }
6091 if ( aExp == 0 ) ++expDiff;
6092 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6093 bBigger:
6094 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6095 zExp = bExp;
6096 zSign ^= 1;
6097 goto normalizeRoundAndPack;
6098 aExpBigger:
6099 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6100 if ((uint64_t)(aSig << 1)) {
6101 return propagateFloatx80NaN(a, b, status);
6102 }
158142c2
FB
6103 return a;
6104 }
6105 if ( bExp == 0 ) --expDiff;
6106 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6107 aBigger:
6108 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6109 zExp = aExp;
6110 normalizeRoundAndPack:
a2f2d288 6111 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6112 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6113}
6114
6115/*----------------------------------------------------------------------------
6116| Returns the result of adding the extended double-precision floating-point
6117| values `a' and `b'. The operation is performed according to the IEC/IEEE
6118| Standard for Binary Floating-Point Arithmetic.
6119*----------------------------------------------------------------------------*/
6120
e5a41ffa 6121floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6122{
c120391c 6123 bool aSign, bSign;
158142c2 6124
d1eb8f2a
AD
6125 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6126 float_raise(float_flag_invalid, status);
6127 return floatx80_default_nan(status);
6128 }
158142c2
FB
6129 aSign = extractFloatx80Sign( a );
6130 bSign = extractFloatx80Sign( b );
6131 if ( aSign == bSign ) {
ff32e16e 6132 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6133 }
6134 else {
ff32e16e 6135 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6136 }
6137
6138}
6139
6140/*----------------------------------------------------------------------------
6141| Returns the result of subtracting the extended double-precision floating-
6142| point values `a' and `b'. The operation is performed according to the
6143| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6144*----------------------------------------------------------------------------*/
6145
e5a41ffa 6146floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6147{
c120391c 6148 bool aSign, bSign;
158142c2 6149
d1eb8f2a
AD
6150 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6151 float_raise(float_flag_invalid, status);
6152 return floatx80_default_nan(status);
6153 }
158142c2
FB
6154 aSign = extractFloatx80Sign( a );
6155 bSign = extractFloatx80Sign( b );
6156 if ( aSign == bSign ) {
ff32e16e 6157 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6158 }
6159 else {
ff32e16e 6160 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6161 }
6162
6163}
6164
6165/*----------------------------------------------------------------------------
6166| Returns the result of multiplying the extended double-precision floating-
6167| point values `a' and `b'. The operation is performed according to the
6168| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6169*----------------------------------------------------------------------------*/
6170
e5a41ffa 6171floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6172{
c120391c 6173 bool aSign, bSign, zSign;
f4014512 6174 int32_t aExp, bExp, zExp;
bb98fe42 6175 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6176
d1eb8f2a
AD
6177 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6178 float_raise(float_flag_invalid, status);
6179 return floatx80_default_nan(status);
6180 }
158142c2
FB
6181 aSig = extractFloatx80Frac( a );
6182 aExp = extractFloatx80Exp( a );
6183 aSign = extractFloatx80Sign( a );
6184 bSig = extractFloatx80Frac( b );
6185 bExp = extractFloatx80Exp( b );
6186 bSign = extractFloatx80Sign( b );
6187 zSign = aSign ^ bSign;
6188 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6189 if ( (uint64_t) ( aSig<<1 )
6190 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6191 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6192 }
6193 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6194 return packFloatx80(zSign, floatx80_infinity_high,
6195 floatx80_infinity_low);
158142c2
FB
6196 }
6197 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6198 if ((uint64_t)(bSig << 1)) {
6199 return propagateFloatx80NaN(a, b, status);
6200 }
158142c2
FB
6201 if ( ( aExp | aSig ) == 0 ) {
6202 invalid:
ff32e16e 6203 float_raise(float_flag_invalid, status);
af39bc8c 6204 return floatx80_default_nan(status);
158142c2 6205 }
0f605c88
LV
6206 return packFloatx80(zSign, floatx80_infinity_high,
6207 floatx80_infinity_low);
158142c2
FB
6208 }
6209 if ( aExp == 0 ) {
6210 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6211 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6212 }
6213 if ( bExp == 0 ) {
6214 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6215 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6216 }
6217 zExp = aExp + bExp - 0x3FFE;
6218 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6219 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6220 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6221 --zExp;
6222 }
a2f2d288 6223 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6224 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6225}
6226
6227/*----------------------------------------------------------------------------
6228| Returns the result of dividing the extended double-precision floating-point
6229| value `a' by the corresponding value `b'. The operation is performed
6230| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6231*----------------------------------------------------------------------------*/
6232
e5a41ffa 6233floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6234{
c120391c 6235 bool aSign, bSign, zSign;
f4014512 6236 int32_t aExp, bExp, zExp;
bb98fe42
AF
6237 uint64_t aSig, bSig, zSig0, zSig1;
6238 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6239
d1eb8f2a
AD
6240 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6241 float_raise(float_flag_invalid, status);
6242 return floatx80_default_nan(status);
6243 }
158142c2
FB
6244 aSig = extractFloatx80Frac( a );
6245 aExp = extractFloatx80Exp( a );
6246 aSign = extractFloatx80Sign( a );
6247 bSig = extractFloatx80Frac( b );
6248 bExp = extractFloatx80Exp( b );
6249 bSign = extractFloatx80Sign( b );
6250 zSign = aSign ^ bSign;
6251 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6252 if ((uint64_t)(aSig << 1)) {
6253 return propagateFloatx80NaN(a, b, status);
6254 }
158142c2 6255 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6256 if ((uint64_t)(bSig << 1)) {
6257 return propagateFloatx80NaN(a, b, status);
6258 }
158142c2
FB
6259 goto invalid;
6260 }
0f605c88
LV
6261 return packFloatx80(zSign, floatx80_infinity_high,
6262 floatx80_infinity_low);
158142c2
FB
6263 }
6264 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6265 if ((uint64_t)(bSig << 1)) {
6266 return propagateFloatx80NaN(a, b, status);
6267 }
158142c2
FB
6268 return packFloatx80( zSign, 0, 0 );
6269 }
6270 if ( bExp == 0 ) {
6271 if ( bSig == 0 ) {
6272 if ( ( aExp | aSig ) == 0 ) {
6273 invalid:
ff32e16e 6274 float_raise(float_flag_invalid, status);
af39bc8c 6275 return floatx80_default_nan(status);
158142c2 6276 }
ff32e16e 6277 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6278 return packFloatx80(zSign, floatx80_infinity_high,
6279 floatx80_infinity_low);
158142c2
FB
6280 }
6281 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6282 }
6283 if ( aExp == 0 ) {
6284 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6285 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6286 }
6287 zExp = aExp - bExp + 0x3FFE;
6288 rem1 = 0;
6289 if ( bSig <= aSig ) {
6290 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6291 ++zExp;
6292 }
6293 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6294 mul64To128( bSig, zSig0, &term0, &term1 );
6295 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6296 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6297 --zSig0;
6298 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6299 }
6300 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6301 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6302 mul64To128( bSig, zSig1, &term1, &term2 );
6303 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6304 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6305 --zSig1;
6306 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6307 }
6308 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6309 }
a2f2d288 6310 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6311 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6312}
6313
6314/*----------------------------------------------------------------------------
6315| Returns the remainder of the extended double-precision floating-point value
6316| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6317| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6318| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6319| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6320| the absolute value of the integer quotient.
158142c2
FB
6321*----------------------------------------------------------------------------*/
6322
445810ec 6323floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6324 float_status *status)
158142c2 6325{
c120391c 6326 bool aSign, zSign;
b662495d 6327 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6328 uint64_t aSig0, aSig1, bSig;
6329 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6330
445810ec 6331 *quotient = 0;
d1eb8f2a
AD
6332 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6333 float_raise(float_flag_invalid, status);
6334 return floatx80_default_nan(status);
6335 }
158142c2 6336 aSig0 = extractFloatx80Frac( a );
b662495d 6337 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6338 aSign = extractFloatx80Sign( a );
6339 bSig = extractFloatx80Frac( b );
6340 bExp = extractFloatx80Exp( b );
158142c2 6341 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6342 if ( (uint64_t) ( aSig0<<1 )
6343 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6344 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6345 }
6346 goto invalid;
6347 }
6348 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6349 if ((uint64_t)(bSig << 1)) {
6350 return propagateFloatx80NaN(a, b, status);
6351 }
b662495d
JM
6352 if (aExp == 0 && aSig0 >> 63) {
6353 /*
6354 * Pseudo-denormal argument must be returned in normalized
6355 * form.
6356 */
6357 return packFloatx80(aSign, 1, aSig0);
6358 }
158142c2
FB
6359 return a;
6360 }
6361 if ( bExp == 0 ) {
6362 if ( bSig == 0 ) {
6363 invalid:
ff32e16e 6364 float_raise(float_flag_invalid, status);
af39bc8c 6365 return floatx80_default_nan(status);
158142c2
FB
6366 }
6367 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6368 }
6369 if ( aExp == 0 ) {
499a2f7b 6370 if ( aSig0 == 0 ) return a;
158142c2
FB
6371 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6372 }
158142c2
FB
6373 zSign = aSign;
6374 expDiff = aExp - bExp;
6375 aSig1 = 0;
6376 if ( expDiff < 0 ) {
b662495d
JM
6377 if ( mod || expDiff < -1 ) {
6378 if (aExp == 1 && aExpOrig == 0) {
6379 /*
6380 * Pseudo-denormal argument must be returned in
6381 * normalized form.
6382 */
6383 return packFloatx80(aSign, aExp, aSig0);
6384 }
6385 return a;
6386 }
158142c2
FB
6387 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6388 expDiff = 0;
6389 }
445810ec 6390 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6391 if ( q ) aSig0 -= bSig;
6392 expDiff -= 64;
6393 while ( 0 < expDiff ) {
6394 q = estimateDiv128To64( aSig0, aSig1, bSig );
6395 q = ( 2 < q ) ? q - 2 : 0;
6396 mul64To128( bSig, q, &term0, &term1 );
6397 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6398 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6399 expDiff -= 62;
445810ec
JM
6400 *quotient <<= 62;
6401 *quotient += q;
158142c2
FB
6402 }
6403 expDiff += 64;
6404 if ( 0 < expDiff ) {
6405 q = estimateDiv128To64( aSig0, aSig1, bSig );
6406 q = ( 2 < q ) ? q - 2 : 0;
6407 q >>= 64 - expDiff;
6408 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6409 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6410 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6411 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6412 ++q;
6413 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6414 }
445810ec
JM
6415 if (expDiff < 64) {
6416 *quotient <<= expDiff;
6417 } else {
6418 *quotient = 0;
6419 }
6420 *quotient += q;
158142c2
FB
6421 }
6422 else {
6423 term1 = 0;
6424 term0 = bSig;
6425 }
6b8b0136
JM
6426 if (!mod) {
6427 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6428 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6429 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6430 && ( q & 1 ) )
6431 ) {
6432 aSig0 = alternateASig0;
6433 aSig1 = alternateASig1;
6434 zSign = ! zSign;
445810ec 6435 ++*quotient;
6b8b0136 6436 }
158142c2
FB
6437 }
6438 return
6439 normalizeRoundAndPackFloatx80(
ff32e16e 6440 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6441
6442}
6443
6b8b0136
JM
6444/*----------------------------------------------------------------------------
6445| Returns the remainder of the extended double-precision floating-point value
6446| `a' with respect to the corresponding value `b'. The operation is performed
6447| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6448*----------------------------------------------------------------------------*/
6449
6450floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6451{
445810ec
JM
6452 uint64_t quotient;
6453 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6454}
6455
6456/*----------------------------------------------------------------------------
6457| Returns the remainder of the extended double-precision floating-point value
6458| `a' with respect to the corresponding value `b', with the quotient truncated
6459| toward zero.
6460*----------------------------------------------------------------------------*/
6461
6462floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6463{
445810ec
JM
6464 uint64_t quotient;
6465 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6466}
6467
158142c2
FB
6468/*----------------------------------------------------------------------------
6469| Returns the square root of the extended double-precision floating-point
6470| value `a'. The operation is performed according to the IEC/IEEE Standard
6471| for Binary Floating-Point Arithmetic.
6472*----------------------------------------------------------------------------*/
6473
e5a41ffa 6474floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6475{
c120391c 6476 bool aSign;
f4014512 6477 int32_t aExp, zExp;
bb98fe42
AF
6478 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6479 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6480
d1eb8f2a
AD
6481 if (floatx80_invalid_encoding(a)) {
6482 float_raise(float_flag_invalid, status);
6483 return floatx80_default_nan(status);
6484 }
158142c2
FB
6485 aSig0 = extractFloatx80Frac( a );
6486 aExp = extractFloatx80Exp( a );
6487 aSign = extractFloatx80Sign( a );
6488 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6489 if ((uint64_t)(aSig0 << 1)) {
6490 return propagateFloatx80NaN(a, a, status);
6491 }
158142c2
FB
6492 if ( ! aSign ) return a;
6493 goto invalid;
6494 }
6495 if ( aSign ) {
6496 if ( ( aExp | aSig0 ) == 0 ) return a;
6497 invalid:
ff32e16e 6498 float_raise(float_flag_invalid, status);
af39bc8c 6499 return floatx80_default_nan(status);
158142c2
FB
6500 }
6501 if ( aExp == 0 ) {
6502 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6503 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6504 }
6505 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6506 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6507 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6508 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6509 doubleZSig0 = zSig0<<1;
6510 mul64To128( zSig0, zSig0, &term0, &term1 );
6511 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6512 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6513 --zSig0;
6514 doubleZSig0 -= 2;
6515 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6516 }
6517 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6518 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6519 if ( zSig1 == 0 ) zSig1 = 1;
6520 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6521 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6522 mul64To128( zSig1, zSig1, &term2, &term3 );
6523 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6524 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6525 --zSig1;
6526 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6527 term3 |= 1;
6528 term2 |= doubleZSig0;
6529 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6530 }
6531 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6532 }
6533 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6534 zSig0 |= doubleZSig0;
a2f2d288
PM
6535 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6536 0, zExp, zSig0, zSig1, status);
158142c2
FB
6537}
6538
2e6d8568
BR
6539/*----------------------------------------------------------------------------
6540| Returns the result of converting the quadruple-precision floating-point value
6541| `a' to the 64-bit unsigned integer format. The conversion is
6542| performed according to the IEC/IEEE Standard for Binary Floating-Point
6543| Arithmetic---which means in particular that the conversion is rounded
6544| according to the current rounding mode. If `a' is a NaN, the largest
6545| positive integer is returned. If the conversion overflows, the
6546| largest unsigned integer is returned. If 'a' is negative, the value is
6547| rounded and zero is returned; negative values that do not round to zero
6548| will raise the inexact exception.
6549*----------------------------------------------------------------------------*/
6550
6551uint64_t float128_to_uint64(float128 a, float_status *status)
6552{
c120391c 6553 bool aSign;
2e6d8568
BR
6554 int aExp;
6555 int shiftCount;
6556 uint64_t aSig0, aSig1;
6557
6558 aSig0 = extractFloat128Frac0(a);
6559 aSig1 = extractFloat128Frac1(a);
6560 aExp = extractFloat128Exp(a);
6561 aSign = extractFloat128Sign(a);
6562 if (aSign && (aExp > 0x3FFE)) {
6563 float_raise(float_flag_invalid, status);
6564 if (float128_is_any_nan(a)) {
2c217da0 6565 return UINT64_MAX;
2e6d8568
BR
6566 } else {
6567 return 0;
6568 }
6569 }
6570 if (aExp) {
2c217da0 6571 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6572 }
6573 shiftCount = 0x402F - aExp;
6574 if (shiftCount <= 0) {
6575 if (0x403E < aExp) {
6576 float_raise(float_flag_invalid, status);
2c217da0 6577 return UINT64_MAX;
2e6d8568
BR
6578 }
6579 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6580 } else {
6581 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6582 }
6583 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6584}
6585
6586uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6587{
6588 uint64_t v;
6589 signed char current_rounding_mode = status->float_rounding_mode;
6590
6591 set_float_rounding_mode(float_round_to_zero, status);
6592 v = float128_to_uint64(a, status);
6593 set_float_rounding_mode(current_rounding_mode, status);
6594
6595 return v;
6596}
6597
158142c2
FB
6598/*----------------------------------------------------------------------------
6599| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6600| value `a' to the 32-bit unsigned integer format. The conversion
6601| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6602| Arithmetic except that the conversion is always rounded toward zero.
6603| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6604| if the conversion overflows, the largest unsigned integer is returned.
6605| If 'a' is negative, the value is rounded and zero is returned; negative
6606| values that do not round to zero will raise the inexact exception.
6607*----------------------------------------------------------------------------*/
6608
6609uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6610{
6611 uint64_t v;
6612 uint32_t res;
6613 int old_exc_flags = get_float_exception_flags(status);
6614
6615 v = float128_to_uint64_round_to_zero(a, status);
6616 if (v > 0xffffffff) {
6617 res = 0xffffffff;
6618 } else {
6619 return v;
6620 }
6621 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6622 float_raise(float_flag_invalid, status);
6623 return res;
6624}
6625
6626/*----------------------------------------------------------------------------
6627| Returns the result of converting the quadruple-precision floating-point value
6628| `a' to the 32-bit unsigned integer format. The conversion is
6629| performed according to the IEC/IEEE Standard for Binary Floating-Point
6630| Arithmetic---which means in particular that the conversion is rounded
6631| according to the current rounding mode. If `a' is a NaN, the largest
6632| positive integer is returned. If the conversion overflows, the
6633| largest unsigned integer is returned. If 'a' is negative, the value is
6634| rounded and zero is returned; negative values that do not round to zero
6635| will raise the inexact exception.
6636*----------------------------------------------------------------------------*/
6637
6638uint32_t float128_to_uint32(float128 a, float_status *status)
6639{
6640 uint64_t v;
6641 uint32_t res;
6642 int old_exc_flags = get_float_exception_flags(status);
6643
6644 v = float128_to_uint64(a, status);
6645 if (v > 0xffffffff) {
6646 res = 0xffffffff;
6647 } else {
6648 return v;
6649 }
6650 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6651 float_raise(float_flag_invalid, status);
6652 return res;
6653}
6654
158142c2
FB
6655/*----------------------------------------------------------------------------
6656| Returns the result of converting the quadruple-precision floating-point
6657| value `a' to the extended double-precision floating-point format. The
6658| conversion is performed according to the IEC/IEEE Standard for Binary
6659| Floating-Point Arithmetic.
6660*----------------------------------------------------------------------------*/
6661
e5a41ffa 6662floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6663{
c120391c 6664 bool aSign;
f4014512 6665 int32_t aExp;
bb98fe42 6666 uint64_t aSig0, aSig1;
158142c2
FB
6667
6668 aSig1 = extractFloat128Frac1( a );
6669 aSig0 = extractFloat128Frac0( a );
6670 aExp = extractFloat128Exp( a );
6671 aSign = extractFloat128Sign( a );
6672 if ( aExp == 0x7FFF ) {
6673 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6674 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6675 status);
6676 return floatx80_silence_nan(res, status);
158142c2 6677 }
0f605c88
LV
6678 return packFloatx80(aSign, floatx80_infinity_high,
6679 floatx80_infinity_low);
158142c2
FB
6680 }
6681 if ( aExp == 0 ) {
6682 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6683 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6684 }
6685 else {
e9321124 6686 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6687 }
6688 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6689 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6690
6691}
6692
158142c2
FB
6693/*----------------------------------------------------------------------------
6694| Returns the remainder of the quadruple-precision floating-point value `a'
6695| with respect to the corresponding value `b'. The operation is performed
6696| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6697*----------------------------------------------------------------------------*/
6698
e5a41ffa 6699float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6700{
c120391c 6701 bool aSign, zSign;
f4014512 6702 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6703 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6704 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6705 int64_t sigMean0;
158142c2
FB
6706
6707 aSig1 = extractFloat128Frac1( a );
6708 aSig0 = extractFloat128Frac0( a );
6709 aExp = extractFloat128Exp( a );
6710 aSign = extractFloat128Sign( a );
6711 bSig1 = extractFloat128Frac1( b );
6712 bSig0 = extractFloat128Frac0( b );
6713 bExp = extractFloat128Exp( b );
158142c2
FB
6714 if ( aExp == 0x7FFF ) {
6715 if ( ( aSig0 | aSig1 )
6716 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6717 return propagateFloat128NaN(a, b, status);
158142c2
FB
6718 }
6719 goto invalid;
6720 }
6721 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6722 if (bSig0 | bSig1) {
6723 return propagateFloat128NaN(a, b, status);
6724 }
158142c2
FB
6725 return a;
6726 }
6727 if ( bExp == 0 ) {
6728 if ( ( bSig0 | bSig1 ) == 0 ) {
6729 invalid:
ff32e16e 6730 float_raise(float_flag_invalid, status);
af39bc8c 6731 return float128_default_nan(status);
158142c2
FB
6732 }
6733 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6734 }
6735 if ( aExp == 0 ) {
6736 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6737 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6738 }
6739 expDiff = aExp - bExp;
6740 if ( expDiff < -1 ) return a;
6741 shortShift128Left(
e9321124 6742 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
6743 aSig1,
6744 15 - ( expDiff < 0 ),
6745 &aSig0,
6746 &aSig1
6747 );
6748 shortShift128Left(
e9321124 6749 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
6750 q = le128( bSig0, bSig1, aSig0, aSig1 );
6751 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6752 expDiff -= 64;
6753 while ( 0 < expDiff ) {
6754 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6755 q = ( 4 < q ) ? q - 4 : 0;
6756 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6757 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6758 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6759 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6760 expDiff -= 61;
6761 }
6762 if ( -64 < expDiff ) {
6763 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6764 q = ( 4 < q ) ? q - 4 : 0;
6765 q >>= - expDiff;
6766 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6767 expDiff += 52;
6768 if ( expDiff < 0 ) {
6769 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6770 }
6771 else {
6772 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6773 }
6774 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6775 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6776 }
6777 else {
6778 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6779 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6780 }
6781 do {
6782 alternateASig0 = aSig0;
6783 alternateASig1 = aSig1;
6784 ++q;
6785 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6786 } while ( 0 <= (int64_t) aSig0 );
158142c2 6787 add128(
bb98fe42 6788 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6789 if ( ( sigMean0 < 0 )
6790 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6791 aSig0 = alternateASig0;
6792 aSig1 = alternateASig1;
6793 }
bb98fe42 6794 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6795 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6796 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6797 status);
158142c2
FB
6798}
6799
6800/*----------------------------------------------------------------------------
6801| Returns the square root of the quadruple-precision floating-point value `a'.
6802| The operation is performed according to the IEC/IEEE Standard for Binary
6803| Floating-Point Arithmetic.
6804*----------------------------------------------------------------------------*/
6805
e5a41ffa 6806float128 float128_sqrt(float128 a, float_status *status)
158142c2 6807{
c120391c 6808 bool aSign;
f4014512 6809 int32_t aExp, zExp;
bb98fe42
AF
6810 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6811 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6812
6813 aSig1 = extractFloat128Frac1( a );
6814 aSig0 = extractFloat128Frac0( a );
6815 aExp = extractFloat128Exp( a );
6816 aSign = extractFloat128Sign( a );
6817 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6818 if (aSig0 | aSig1) {
6819 return propagateFloat128NaN(a, a, status);
6820 }
158142c2
FB
6821 if ( ! aSign ) return a;
6822 goto invalid;
6823 }
6824 if ( aSign ) {
6825 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6826 invalid:
ff32e16e 6827 float_raise(float_flag_invalid, status);
af39bc8c 6828 return float128_default_nan(status);
158142c2
FB
6829 }
6830 if ( aExp == 0 ) {
6831 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6832 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6833 }
6834 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 6835 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6836 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6837 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6838 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6839 doubleZSig0 = zSig0<<1;
6840 mul64To128( zSig0, zSig0, &term0, &term1 );
6841 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6842 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6843 --zSig0;
6844 doubleZSig0 -= 2;
6845 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6846 }
6847 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6848 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6849 if ( zSig1 == 0 ) zSig1 = 1;
6850 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6851 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6852 mul64To128( zSig1, zSig1, &term2, &term3 );
6853 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6854 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6855 --zSig1;
6856 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6857 term3 |= 1;
6858 term2 |= doubleZSig0;
6859 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6860 }
6861 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6862 }
6863 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6864 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6865
6866}
6867
71bfd65c
RH
6868static inline FloatRelation
6869floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
6870 float_status *status)
f6714d36 6871{
c120391c 6872 bool aSign, bSign;
f6714d36 6873
d1eb8f2a
AD
6874 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6875 float_raise(float_flag_invalid, status);
6876 return float_relation_unordered;
6877 }
f6714d36
AJ
6878 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6879 ( extractFloatx80Frac( a )<<1 ) ) ||
6880 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6881 ( extractFloatx80Frac( b )<<1 ) )) {
6882 if (!is_quiet ||
af39bc8c
AM
6883 floatx80_is_signaling_nan(a, status) ||
6884 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6885 float_raise(float_flag_invalid, status);
f6714d36
AJ
6886 }
6887 return float_relation_unordered;
6888 }
6889 aSign = extractFloatx80Sign( a );
6890 bSign = extractFloatx80Sign( b );
6891 if ( aSign != bSign ) {
6892
6893 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6894 ( ( a.low | b.low ) == 0 ) ) {
6895 /* zero case */
6896 return float_relation_equal;
6897 } else {
6898 return 1 - (2 * aSign);
6899 }
6900 } else {
be53fa78
JM
6901 /* Normalize pseudo-denormals before comparison. */
6902 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
6903 ++a.high;
6904 }
6905 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
6906 ++b.high;
6907 }
f6714d36
AJ
6908 if (a.low == b.low && a.high == b.high) {
6909 return float_relation_equal;
6910 } else {
6911 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6912 }
6913 }
6914}
6915
71bfd65c 6916FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6917{
ff32e16e 6918 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6919}
6920
71bfd65c
RH
6921FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
6922 float_status *status)
f6714d36 6923{
ff32e16e 6924 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6925}
6926
71bfd65c
RH
6927static inline FloatRelation
6928float128_compare_internal(float128 a, float128 b, bool is_quiet,
6929 float_status *status)
1f587329 6930{
c120391c 6931 bool aSign, bSign;
1f587329
BS
6932
6933 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6934 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6935 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6936 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6937 if (!is_quiet ||
af39bc8c
AM
6938 float128_is_signaling_nan(a, status) ||
6939 float128_is_signaling_nan(b, status)) {
ff32e16e 6940 float_raise(float_flag_invalid, status);
1f587329
BS
6941 }
6942 return float_relation_unordered;
6943 }
6944 aSign = extractFloat128Sign( a );
6945 bSign = extractFloat128Sign( b );
6946 if ( aSign != bSign ) {
6947 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6948 /* zero case */
6949 return float_relation_equal;
6950 } else {
6951 return 1 - (2 * aSign);
6952 }
6953 } else {
6954 if (a.low == b.low && a.high == b.high) {
6955 return float_relation_equal;
6956 } else {
6957 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6958 }
6959 }
6960}
6961
71bfd65c 6962FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 6963{
ff32e16e 6964 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6965}
6966
71bfd65c
RH
6967FloatRelation float128_compare_quiet(float128 a, float128 b,
6968 float_status *status)
1f587329 6969{
ff32e16e 6970 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6971}
6972
e5a41ffa 6973floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 6974{
c120391c 6975 bool aSign;
326b9e98 6976 int32_t aExp;
bb98fe42 6977 uint64_t aSig;
9ee6e8bb 6978
d1eb8f2a
AD
6979 if (floatx80_invalid_encoding(a)) {
6980 float_raise(float_flag_invalid, status);
6981 return floatx80_default_nan(status);
6982 }
9ee6e8bb
PB
6983 aSig = extractFloatx80Frac( a );
6984 aExp = extractFloatx80Exp( a );
6985 aSign = extractFloatx80Sign( a );
6986
326b9e98
AJ
6987 if ( aExp == 0x7FFF ) {
6988 if ( aSig<<1 ) {
ff32e16e 6989 return propagateFloatx80NaN(a, a, status);
326b9e98 6990 }
9ee6e8bb
PB
6991 return a;
6992 }
326b9e98 6993
3c85c37f
PM
6994 if (aExp == 0) {
6995 if (aSig == 0) {
6996 return a;
6997 }
6998 aExp++;
6999 }
69397542 7000
326b9e98
AJ
7001 if (n > 0x10000) {
7002 n = 0x10000;
7003 } else if (n < -0x10000) {
7004 n = -0x10000;
7005 }
7006
9ee6e8bb 7007 aExp += n;
a2f2d288
PM
7008 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7009 aSign, aExp, aSig, 0, status);
9ee6e8bb 7010}
9ee6e8bb 7011
e5a41ffa 7012float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7013{
c120391c 7014 bool aSign;
326b9e98 7015 int32_t aExp;
bb98fe42 7016 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7017
7018 aSig1 = extractFloat128Frac1( a );
7019 aSig0 = extractFloat128Frac0( a );
7020 aExp = extractFloat128Exp( a );
7021 aSign = extractFloat128Sign( a );
7022 if ( aExp == 0x7FFF ) {
326b9e98 7023 if ( aSig0 | aSig1 ) {
ff32e16e 7024 return propagateFloat128NaN(a, a, status);
326b9e98 7025 }
9ee6e8bb
PB
7026 return a;
7027 }
3c85c37f 7028 if (aExp != 0) {
e9321124 7029 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7030 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7031 return a;
3c85c37f
PM
7032 } else {
7033 aExp++;
7034 }
69397542 7035
326b9e98
AJ
7036 if (n > 0x10000) {
7037 n = 0x10000;
7038 } else if (n < -0x10000) {
7039 n = -0x10000;
7040 }
7041
69397542
PB
7042 aExp += n - 1;
7043 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7044 , status);
9ee6e8bb
PB
7045
7046}
f6b3b108
EC
7047
7048static void __attribute__((constructor)) softfloat_init(void)
7049{
7050 union_float64 ua, ub, uc, ur;
7051
7052 if (QEMU_NO_HARDFLOAT) {
7053 return;
7054 }
7055 /*
7056 * Test that the host's FMA is not obviously broken. For example,
7057 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7058 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7059 */
7060 ua.s = 0x0020000000000001ULL;
7061 ub.s = 0x3ca0000000000000ULL;
7062 uc.s = 0x0020000000000000ULL;
7063 ur.h = fma(ua.h, ub.h, uc.h);
7064 if (ur.s != 0x0020000000000001ULL) {
7065 force_soft_fma = true;
7066 }
7067}