]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Convert float-to-float conversions with float128
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5 502/*
0018b1f4
RH
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
a90119b5 505 *
0018b1f4
RH
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
a90119b5
AB
509 */
510
511typedef struct {
a90119b5
AB
512 FloatClass cls;
513 bool sign;
4109b9ea
RH
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
f8155c1d 526} FloatParts64;
a90119b5 527
0018b1f4
RH
528typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534} FloatParts128;
535
aca84527
RH
536typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544} FloatParts256;
545
0018b1f4 546/* These apply to the most significant word of each FloatPartsN. */
e99c4373 547#define DECOMPOSED_BINARY_POINT 63
a90119b5 548#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
549
550/* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
ca3a3d5a 558 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 559 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
562 */
563typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
ca3a3d5a 573 bool arm_althp;
a90119b5
AB
574} FloatFmt;
575
576/* Expand fields based on the size of exponent and fraction */
577#define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
0018b1f4
RH
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
587
588static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590};
591
6fed16b2
AB
592static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595};
596
8282310d
LZ
597static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599};
600
a90119b5
AB
601static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603};
604
605static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607};
608
0018b1f4
RH
609static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611};
612
6fff2167 613/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 614static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 615{
d8fdd172
RH
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
6fff2167 618
d8fdd172 619 *r = (FloatParts64) {
6fff2167 620 .cls = float_class_unclassified,
d8fdd172
RH
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
6fff2167
AB
624 };
625}
626
3dddb203 627static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 628{
3dddb203 629 unpack_raw64(p, &float16_params, f);
6fff2167
AB
630}
631
3dddb203 632static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 633{
3dddb203 634 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
635}
636
3dddb203 637static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 638{
3dddb203 639 unpack_raw64(p, &float32_params, f);
6fff2167
AB
640}
641
3dddb203 642static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 643{
3dddb203 644 unpack_raw64(p, &float64_params, f);
6fff2167
AB
645}
646
0018b1f4
RH
647static void float128_unpack_raw(FloatParts128 *p, float128 f)
648{
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659}
660
6fff2167 661/* Pack a float from parts, but do not canonicalize. */
9e4af58c 662static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 663{
9e4af58c
RH
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
6fff2167
AB
672}
673
71fd178e 674static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 675{
71fd178e 676 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
677}
678
71fd178e 679static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 680{
71fd178e 681 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
682}
683
71fd178e 684static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 685{
71fd178e 686 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
687}
688
71fd178e 689static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 690{
71fd178e 691 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
692}
693
0018b1f4
RH
694static float128 float128_pack_raw(const FloatParts128 *p)
695{
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704}
705
0664335a
RH
706/*----------------------------------------------------------------------------
707| Functions and definitions to determine: (1) whether tininess for underflow
708| is detected before or after rounding by default, (2) what (if anything)
709| happens when exceptions are raised, (3) how signaling NaNs are distinguished
710| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711| are propagated from function inputs to output. These details are target-
712| specific.
713*----------------------------------------------------------------------------*/
139c1837 714#include "softfloat-specialize.c.inc"
0664335a 715
0018b1f4
RH
716#define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
dedd123c
RH
719#define PARTS_GENERIC_64_128_256(NAME, P) \
720 QEMU_GENERIC(P, (FloatParts256 *, parts256_##NAME), \
721 (FloatParts128 *, parts128_##NAME), parts64_##NAME)
722
e9034ea8 723#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
724#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
725
7c45bad8
RH
726static void parts64_return_nan(FloatParts64 *a, float_status *s);
727static void parts128_return_nan(FloatParts128 *a, float_status *s);
728
729#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 730
22c355f4
RH
731static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
732 float_status *s);
733static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
734 float_status *s);
735
736#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
737
979582d0
RH
738static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
739 FloatParts64 *c, float_status *s,
740 int ab_mask, int abc_mask);
741static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
742 FloatParts128 *b,
743 FloatParts128 *c,
744 float_status *s,
745 int ab_mask, int abc_mask);
746
747#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
748 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
749
d46975bc
RH
750static void parts64_canonicalize(FloatParts64 *p, float_status *status,
751 const FloatFmt *fmt);
752static void parts128_canonicalize(FloatParts128 *p, float_status *status,
753 const FloatFmt *fmt);
754
755#define parts_canonicalize(A, S, F) \
756 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
757
ee6959f2
RH
758static void parts64_uncanon(FloatParts64 *p, float_status *status,
759 const FloatFmt *fmt);
760static void parts128_uncanon(FloatParts128 *p, float_status *status,
761 const FloatFmt *fmt);
762
763#define parts_uncanon(A, S, F) \
764 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
765
da10a907
RH
766static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
767static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 768static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
769
770#define parts_add_normal(A, B) \
dedd123c 771 PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
da10a907
RH
772
773static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
774static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
dedd123c 775static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
da10a907
RH
776
777#define parts_sub_normal(A, B) \
dedd123c 778 PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
da10a907
RH
779
780static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
781 float_status *s, bool subtract);
782static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
783 float_status *s, bool subtract);
784
785#define parts_addsub(A, B, S, Z) \
786 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
787
aca84527
RH
788static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
789 float_status *s);
790static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
791 float_status *s);
792
793#define parts_mul(A, B, S) \
794 PARTS_GENERIC_64_128(mul, A)(A, B, S)
795
dedd123c
RH
796static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
797 FloatParts64 *c, int flags,
798 float_status *s);
799static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
800 FloatParts128 *c, int flags,
801 float_status *s);
802
803#define parts_muladd(A, B, C, Z, S) \
804 PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
805
ec961b81
RH
806static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
807 float_status *s);
808static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
809 float_status *s);
810
811#define parts_div(A, B, S) \
812 PARTS_GENERIC_64_128(div, A)(A, B, S)
813
0018b1f4
RH
814/*
815 * Helper functions for softfloat-parts.c.inc, per-size operations.
816 */
817
22c355f4
RH
818#define FRAC_GENERIC_64_128(NAME, P) \
819 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
820
dedd123c
RH
821#define FRAC_GENERIC_64_128_256(NAME, P) \
822 QEMU_GENERIC(P, (FloatParts256 *, frac256_##NAME), \
823 (FloatParts128 *, frac128_##NAME), frac64_##NAME)
824
da10a907
RH
825static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
826{
827 return uadd64_overflow(a->frac, b->frac, &r->frac);
828}
829
830static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
831{
832 bool c = 0;
833 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
834 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
835 return c;
836}
837
dedd123c
RH
838static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
839{
840 bool c = 0;
841 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
842 r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
843 r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
844 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
845 return c;
846}
847
848#define frac_add(R, A, B) FRAC_GENERIC_64_128_256(add, R)(R, A, B)
da10a907 849
ee6959f2
RH
850static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
851{
852 return uadd64_overflow(a->frac, c, &r->frac);
853}
854
855static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
856{
857 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
858 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
859}
860
861#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
862
863static void frac64_allones(FloatParts64 *a)
864{
865 a->frac = -1;
866}
867
868static void frac128_allones(FloatParts128 *a)
869{
870 a->frac_hi = a->frac_lo = -1;
871}
872
873#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
874
22c355f4
RH
875static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
876{
877 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
878}
879
880static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
881{
882 uint64_t ta = a->frac_hi, tb = b->frac_hi;
883 if (ta == tb) {
884 ta = a->frac_lo, tb = b->frac_lo;
885 if (ta == tb) {
886 return 0;
887 }
888 }
889 return ta < tb ? -1 : 1;
890}
891
892#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
893
d46975bc 894static void frac64_clear(FloatParts64 *a)
0018b1f4 895{
d46975bc
RH
896 a->frac = 0;
897}
898
899static void frac128_clear(FloatParts128 *a)
900{
901 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
902}
903
d46975bc 904#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 905
ec961b81
RH
906static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
907{
908 uint64_t n1, n0, r, q;
909 bool ret;
910
911 /*
912 * We want a 2*N / N-bit division to produce exactly an N-bit
913 * result, so that we do not lose any precision and so that we
914 * do not have to renormalize afterward. If A.frac < B.frac,
915 * then division would produce an (N-1)-bit result; shift A left
916 * by one to produce the an N-bit result, and return true to
917 * decrement the exponent to match.
918 *
919 * The udiv_qrnnd algorithm that we're using requires normalization,
920 * i.e. the msb of the denominator must be set, which is already true.
921 */
922 ret = a->frac < b->frac;
923 if (ret) {
924 n0 = a->frac;
925 n1 = 0;
926 } else {
927 n0 = a->frac >> 1;
928 n1 = a->frac << 63;
929 }
930 q = udiv_qrnnd(&r, n0, n1, b->frac);
931
932 /* Set lsb if there is a remainder, to set inexact. */
933 a->frac = q | (r != 0);
934
935 return ret;
936}
937
938static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
939{
940 uint64_t q0, q1, a0, a1, b0, b1;
941 uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
942 bool ret = false;
943
944 a0 = a->frac_hi, a1 = a->frac_lo;
945 b0 = b->frac_hi, b1 = b->frac_lo;
946
947 ret = lt128(a0, a1, b0, b1);
948 if (!ret) {
949 a1 = shr_double(a0, a1, 1);
950 a0 = a0 >> 1;
951 }
952
953 /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
954 q0 = estimateDiv128To64(a0, a1, b0);
955
956 /*
957 * Estimate is high because B1 was not included (unless B1 == 0).
958 * Reduce quotient and increase remainder until remainder is non-negative.
959 * This loop will execute 0 to 2 times.
960 */
961 mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
962 sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
963 while (r0 != 0) {
964 q0--;
965 add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
966 }
967
968 /* Repeat using the remainder, producing a second word of quotient. */
969 q1 = estimateDiv128To64(r1, r2, b0);
970 mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
971 sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
972 while (r1 != 0) {
973 q1--;
974 add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
975 }
976
977 /* Any remainder indicates inexact; set sticky bit. */
978 q1 |= (r2 | r3) != 0;
979
980 a->frac_hi = q0;
981 a->frac_lo = q1;
982 return ret;
983}
984
985#define frac_div(A, B) FRAC_GENERIC_64_128(div, A)(A, B)
986
d46975bc 987static bool frac64_eqz(FloatParts64 *a)
0018b1f4 988{
d46975bc
RH
989 return a->frac == 0;
990}
991
992static bool frac128_eqz(FloatParts128 *a)
993{
994 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
995}
996
d46975bc 997#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 998
aca84527
RH
999static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1000{
1001 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1002}
1003
1004static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1005{
1006 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1007 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1008}
1009
1010#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1011
da10a907
RH
1012static void frac64_neg(FloatParts64 *a)
1013{
1014 a->frac = -a->frac;
1015}
1016
1017static void frac128_neg(FloatParts128 *a)
1018{
1019 bool c = 0;
1020 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1021 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1022}
1023
dedd123c
RH
1024static void frac256_neg(FloatParts256 *a)
1025{
1026 bool c = 0;
1027 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1028 a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1029 a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1030 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1031}
1032
1033#define frac_neg(A) FRAC_GENERIC_64_128_256(neg, A)(A)
da10a907 1034
d46975bc 1035static int frac64_normalize(FloatParts64 *a)
6fff2167 1036{
d46975bc
RH
1037 if (a->frac) {
1038 int shift = clz64(a->frac);
1039 a->frac <<= shift;
1040 return shift;
1041 }
1042 return 64;
1043}
1044
1045static int frac128_normalize(FloatParts128 *a)
1046{
1047 if (a->frac_hi) {
1048 int shl = clz64(a->frac_hi);
463e45dc
RH
1049 a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1050 a->frac_lo <<= shl;
d46975bc
RH
1051 return shl;
1052 } else if (a->frac_lo) {
1053 int shl = clz64(a->frac_lo);
463e45dc 1054 a->frac_hi = a->frac_lo << shl;
d46975bc
RH
1055 a->frac_lo = 0;
1056 return shl + 64;
6fff2167 1057 }
d46975bc 1058 return 128;
6fff2167
AB
1059}
1060
dedd123c
RH
1061static int frac256_normalize(FloatParts256 *a)
1062{
1063 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1064 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
463e45dc 1065 int ret, shl;
dedd123c
RH
1066
1067 if (likely(a0)) {
1068 shl = clz64(a0);
1069 if (shl == 0) {
1070 return 0;
1071 }
1072 ret = shl;
1073 } else {
1074 if (a1) {
1075 ret = 64;
1076 a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1077 } else if (a2) {
1078 ret = 128;
1079 a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1080 } else if (a3) {
1081 ret = 192;
1082 a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1083 } else {
1084 ret = 256;
1085 a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1086 goto done;
1087 }
1088 shl = clz64(a0);
1089 if (shl == 0) {
1090 goto done;
1091 }
1092 ret += shl;
1093 }
1094
463e45dc
RH
1095 a0 = shl_double(a0, a1, shl);
1096 a1 = shl_double(a1, a2, shl);
1097 a2 = shl_double(a2, a3, shl);
1098 a3 <<= shl;
dedd123c
RH
1099
1100 done:
1101 a->frac_hi = a0;
1102 a->frac_hm = a1;
1103 a->frac_lm = a2;
1104 a->frac_lo = a3;
1105 return ret;
1106}
1107
1108#define frac_normalize(A) FRAC_GENERIC_64_128_256(normalize, A)(A)
d46975bc
RH
1109
1110static void frac64_shl(FloatParts64 *a, int c)
1111{
1112 a->frac <<= c;
1113}
1114
1115static void frac128_shl(FloatParts128 *a, int c)
1116{
463e45dc
RH
1117 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1118
1119 if (c & 64) {
1120 a0 = a1, a1 = 0;
1121 }
1122
1123 c &= 63;
1124 if (c) {
1125 a0 = shl_double(a0, a1, c);
1126 a1 = a1 << c;
1127 }
1128
1129 a->frac_hi = a0;
1130 a->frac_lo = a1;
d46975bc
RH
1131}
1132
1133#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
1134
1135static void frac64_shr(FloatParts64 *a, int c)
1136{
1137 a->frac >>= c;
1138}
1139
1140static void frac128_shr(FloatParts128 *a, int c)
1141{
463e45dc
RH
1142 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1143
1144 if (c & 64) {
1145 a1 = a0, a0 = 0;
1146 }
1147
1148 c &= 63;
1149 if (c) {
1150 a1 = shr_double(a0, a1, c);
1151 a0 = a0 >> c;
1152 }
1153
1154 a->frac_hi = a0;
1155 a->frac_lo = a1;
d46975bc
RH
1156}
1157
1158#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
1159
ee6959f2 1160static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 1161{
463e45dc
RH
1162 uint64_t a0 = a->frac;
1163
1164 if (likely(c != 0)) {
1165 if (likely(c < 64)) {
1166 a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1167 } else {
1168 a0 = a0 != 0;
1169 }
1170 a->frac = a0;
1171 }
ee6959f2 1172}
6fff2167 1173
ee6959f2
RH
1174static void frac128_shrjam(FloatParts128 *a, int c)
1175{
463e45dc
RH
1176 uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1177 uint64_t sticky = 0;
1178
1179 if (unlikely(c == 0)) {
1180 return;
1181 } else if (likely(c < 64)) {
1182 /* nothing */
1183 } else if (likely(c < 128)) {
1184 sticky = a1;
1185 a1 = a0;
1186 a0 = 0;
1187 c &= 63;
1188 if (c == 0) {
1189 goto done;
1190 }
1191 } else {
1192 sticky = a0 | a1;
1193 a0 = a1 = 0;
1194 goto done;
1195 }
1196
1197 sticky |= shr_double(a1, 0, c);
1198 a1 = shr_double(a0, a1, c);
1199 a0 = a0 >> c;
1200
1201 done:
1202 a->frac_lo = a1 | (sticky != 0);
1203 a->frac_hi = a0;
6fff2167
AB
1204}
1205
dedd123c
RH
1206static void frac256_shrjam(FloatParts256 *a, int c)
1207{
1208 uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1209 uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1210 uint64_t sticky = 0;
dedd123c
RH
1211
1212 if (unlikely(c == 0)) {
1213 return;
1214 } else if (likely(c < 64)) {
1215 /* nothing */
1216 } else if (likely(c < 256)) {
1217 if (unlikely(c & 128)) {
1218 sticky |= a2 | a3;
1219 a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1220 }
1221 if (unlikely(c & 64)) {
1222 sticky |= a3;
1223 a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1224 }
1225 c &= 63;
1226 if (c == 0) {
1227 goto done;
1228 }
1229 } else {
1230 sticky = a0 | a1 | a2 | a3;
1231 a0 = a1 = a2 = a3 = 0;
1232 goto done;
1233 }
1234
463e45dc
RH
1235 sticky |= shr_double(a3, 0, c);
1236 a3 = shr_double(a2, a3, c);
1237 a2 = shr_double(a1, a2, c);
1238 a1 = shr_double(a0, a1, c);
1239 a0 = a0 >> c;
dedd123c
RH
1240
1241 done:
1242 a->frac_lo = a3 | (sticky != 0);
1243 a->frac_lm = a2;
1244 a->frac_hm = a1;
1245 a->frac_hi = a0;
1246}
1247
1248#define frac_shrjam(A, C) FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
d446830a 1249
da10a907
RH
1250static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1251{
1252 return usub64_overflow(a->frac, b->frac, &r->frac);
1253}
7c45bad8 1254
da10a907
RH
1255static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1256{
1257 bool c = 0;
1258 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1259 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1260 return c;
1261}
1262
dedd123c
RH
1263static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1264{
1265 bool c = 0;
1266 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1267 r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1268 r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1269 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1270 return c;
1271}
1272
1273#define frac_sub(R, A, B) FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
da10a907 1274
aca84527
RH
1275static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1276{
1277 r->frac = a->frac_hi | (a->frac_lo != 0);
1278}
1279
1280static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1281{
1282 r->frac_hi = a->frac_hi;
1283 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1284}
1285
1286#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1287
dedd123c
RH
1288static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1289{
1290 r->frac_hi = a->frac;
1291 r->frac_lo = 0;
1292}
1293
1294static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1295{
1296 r->frac_hi = a->frac_hi;
1297 r->frac_hm = a->frac_lo;
1298 r->frac_lm = 0;
1299 r->frac_lo = 0;
1300}
1301
1302#define frac_widen(A, B) FRAC_GENERIC_64_128(widen, B)(A, B)
1303
da10a907
RH
1304#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1305#define FloatPartsN glue(FloatParts,N)
aca84527 1306#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1307
1308#define N 64
aca84527 1309#define W 128
da10a907
RH
1310
1311#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1312#include "softfloat-parts.c.inc"
1313
da10a907 1314#undef N
aca84527 1315#undef W
da10a907 1316#define N 128
aca84527 1317#define W 256
7c45bad8 1318
da10a907 1319#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1320#include "softfloat-parts.c.inc"
1321
dedd123c
RH
1322#undef N
1323#undef W
1324#define N 256
1325
1326#include "softfloat-parts-addsub.c.inc"
1327
da10a907 1328#undef N
aca84527 1329#undef W
7c45bad8
RH
1330#undef partsN
1331#undef FloatPartsN
aca84527 1332#undef FloatPartsW
7c45bad8 1333
aaffb7bf
RH
1334/*
1335 * Pack/unpack routines with a specific FloatFmt.
1336 */
1337
98e256fc
RH
1338static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1339 float_status *s, const FloatFmt *params)
aaffb7bf 1340{
98e256fc 1341 float16_unpack_raw(p, f);
d46975bc 1342 parts_canonicalize(p, s, params);
aaffb7bf
RH
1343}
1344
98e256fc
RH
1345static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1346 float_status *s)
aaffb7bf 1347{
98e256fc 1348 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1349}
1350
98e256fc
RH
1351static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1352 float_status *s)
aaffb7bf 1353{
98e256fc 1354 bfloat16_unpack_raw(p, f);
d46975bc 1355 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1356}
1357
e293e927
RH
1358static float16 float16a_round_pack_canonical(FloatParts64 *p,
1359 float_status *s,
aaffb7bf
RH
1360 const FloatFmt *params)
1361{
ee6959f2 1362 parts_uncanon(p, s, params);
e293e927 1363 return float16_pack_raw(p);
aaffb7bf
RH
1364}
1365
e293e927
RH
1366static float16 float16_round_pack_canonical(FloatParts64 *p,
1367 float_status *s)
aaffb7bf
RH
1368{
1369 return float16a_round_pack_canonical(p, s, &float16_params);
1370}
1371
e293e927
RH
1372static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1373 float_status *s)
aaffb7bf 1374{
ee6959f2 1375 parts_uncanon(p, s, &bfloat16_params);
e293e927 1376 return bfloat16_pack_raw(p);
aaffb7bf
RH
1377}
1378
98e256fc
RH
1379static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1380 float_status *s)
aaffb7bf 1381{
98e256fc 1382 float32_unpack_raw(p, f);
d46975bc 1383 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1384}
1385
e293e927
RH
1386static float32 float32_round_pack_canonical(FloatParts64 *p,
1387 float_status *s)
aaffb7bf 1388{
ee6959f2 1389 parts_uncanon(p, s, &float32_params);
e293e927 1390 return float32_pack_raw(p);
aaffb7bf
RH
1391}
1392
98e256fc
RH
1393static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1394 float_status *s)
aaffb7bf 1395{
98e256fc 1396 float64_unpack_raw(p, f);
d46975bc 1397 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1398}
1399
e293e927
RH
1400static float64 float64_round_pack_canonical(FloatParts64 *p,
1401 float_status *s)
aaffb7bf 1402{
ee6959f2 1403 parts_uncanon(p, s, &float64_params);
e293e927 1404 return float64_pack_raw(p);
aaffb7bf
RH
1405}
1406
3ff49e56
RH
1407static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1408 float_status *s)
1409{
1410 float128_unpack_raw(p, f);
1411 parts_canonicalize(p, s, &float128_params);
1412}
1413
1414static float128 float128_round_pack_canonical(FloatParts128 *p,
1415 float_status *s)
1416{
1417 parts_uncanon(p, s, &float128_params);
1418 return float128_pack_raw(p);
1419}
1420
6fff2167 1421/*
da10a907 1422 * Addition and subtraction
6fff2167
AB
1423 */
1424
da10a907
RH
1425static float16 QEMU_FLATTEN
1426float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1427{
da10a907 1428 FloatParts64 pa, pb, *pr;
98e256fc
RH
1429
1430 float16_unpack_canonical(&pa, a, status);
1431 float16_unpack_canonical(&pb, b, status);
da10a907 1432 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1433
da10a907 1434 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1435}
1436
da10a907 1437float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1438{
da10a907
RH
1439 return float16_addsub(a, b, status, false);
1440}
1b615d48 1441
da10a907
RH
1442float16 float16_sub(float16 a, float16 b, float_status *status)
1443{
1444 return float16_addsub(a, b, status, true);
1b615d48
EC
1445}
1446
1447static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1448soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1449{
da10a907 1450 FloatParts64 pa, pb, *pr;
98e256fc
RH
1451
1452 float32_unpack_canonical(&pa, a, status);
1453 float32_unpack_canonical(&pb, b, status);
da10a907 1454 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1455
da10a907 1456 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1457}
1458
da10a907 1459static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1460{
da10a907 1461 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1462}
1463
da10a907 1464static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1465{
da10a907 1466 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1467}
1468
1469static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1470soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1471{
da10a907 1472 FloatParts64 pa, pb, *pr;
98e256fc
RH
1473
1474 float64_unpack_canonical(&pa, a, status);
1475 float64_unpack_canonical(&pb, b, status);
da10a907 1476 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1477
da10a907 1478 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1479}
1480
da10a907 1481static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1482{
da10a907 1483 return soft_f64_addsub(a, b, status, false);
1b615d48 1484}
6fff2167 1485
da10a907 1486static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1487{
da10a907 1488 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1489}
1490
1b615d48 1491static float hard_f32_add(float a, float b)
6fff2167 1492{
1b615d48
EC
1493 return a + b;
1494}
6fff2167 1495
1b615d48
EC
1496static float hard_f32_sub(float a, float b)
1497{
1498 return a - b;
6fff2167
AB
1499}
1500
1b615d48 1501static double hard_f64_add(double a, double b)
6fff2167 1502{
1b615d48
EC
1503 return a + b;
1504}
6fff2167 1505
1b615d48
EC
1506static double hard_f64_sub(double a, double b)
1507{
1508 return a - b;
1509}
1510
b240c9c4 1511static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1512{
1513 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1514 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1515 }
1516 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1517}
1518
b240c9c4 1519static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1520{
1521 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1522 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1523 } else {
1524 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1525 }
1526}
1527
1528static float32 float32_addsub(float32 a, float32 b, float_status *s,
1529 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1530{
1531 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1532 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1533}
1534
1535static float64 float64_addsub(float64 a, float64 b, float_status *s,
1536 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1537{
1538 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1539 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1540}
1541
1542float32 QEMU_FLATTEN
1543float32_add(float32 a, float32 b, float_status *s)
1544{
1545 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1546}
1547
1548float32 QEMU_FLATTEN
1549float32_sub(float32 a, float32 b, float_status *s)
1550{
1551 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1552}
1553
1554float64 QEMU_FLATTEN
1555float64_add(float64 a, float64 b, float_status *s)
1556{
1557 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1558}
1559
1560float64 QEMU_FLATTEN
1561float64_sub(float64 a, float64 b, float_status *s)
1562{
1563 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1564}
1565
da10a907
RH
1566static bfloat16 QEMU_FLATTEN
1567bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1568{
da10a907 1569 FloatParts64 pa, pb, *pr;
98e256fc
RH
1570
1571 bfloat16_unpack_canonical(&pa, a, status);
1572 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1573 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1574
da10a907 1575 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1576}
1577
da10a907 1578bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1579{
da10a907
RH
1580 return bfloat16_addsub(a, b, status, false);
1581}
8282310d 1582
da10a907
RH
1583bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1584{
1585 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1586}
1587
3ff49e56
RH
1588static float128 QEMU_FLATTEN
1589float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1590{
1591 FloatParts128 pa, pb, *pr;
1592
1593 float128_unpack_canonical(&pa, a, status);
1594 float128_unpack_canonical(&pb, b, status);
1595 pr = parts_addsub(&pa, &pb, status, subtract);
1596
1597 return float128_round_pack_canonical(pr, status);
1598}
1599
1600float128 float128_add(float128 a, float128 b, float_status *status)
1601{
1602 return float128_addsub(a, b, status, false);
1603}
1604
1605float128 float128_sub(float128 a, float128 b, float_status *status)
1606{
1607 return float128_addsub(a, b, status, true);
1608}
1609
74d707e2 1610/*
aca84527 1611 * Multiplication
74d707e2
AB
1612 */
1613
97ff87c0 1614float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1615{
aca84527 1616 FloatParts64 pa, pb, *pr;
98e256fc
RH
1617
1618 float16_unpack_canonical(&pa, a, status);
1619 float16_unpack_canonical(&pb, b, status);
aca84527 1620 pr = parts_mul(&pa, &pb, status);
74d707e2 1621
aca84527 1622 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1623}
1624
2dfabc86
EC
1625static float32 QEMU_SOFTFLOAT_ATTR
1626soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1627{
aca84527 1628 FloatParts64 pa, pb, *pr;
98e256fc
RH
1629
1630 float32_unpack_canonical(&pa, a, status);
1631 float32_unpack_canonical(&pb, b, status);
aca84527 1632 pr = parts_mul(&pa, &pb, status);
74d707e2 1633
aca84527 1634 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1635}
1636
2dfabc86
EC
1637static float64 QEMU_SOFTFLOAT_ATTR
1638soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1639{
aca84527 1640 FloatParts64 pa, pb, *pr;
98e256fc
RH
1641
1642 float64_unpack_canonical(&pa, a, status);
1643 float64_unpack_canonical(&pb, b, status);
aca84527 1644 pr = parts_mul(&pa, &pb, status);
74d707e2 1645
aca84527 1646 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1647}
1648
2dfabc86
EC
1649static float hard_f32_mul(float a, float b)
1650{
1651 return a * b;
1652}
1653
1654static double hard_f64_mul(double a, double b)
1655{
1656 return a * b;
1657}
1658
2dfabc86
EC
1659float32 QEMU_FLATTEN
1660float32_mul(float32 a, float32 b, float_status *s)
1661{
1662 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1663 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1664}
1665
1666float64 QEMU_FLATTEN
1667float64_mul(float64 a, float64 b, float_status *s)
1668{
1669 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1670 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1671}
1672
aca84527
RH
1673bfloat16 QEMU_FLATTEN
1674bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1675{
aca84527 1676 FloatParts64 pa, pb, *pr;
98e256fc
RH
1677
1678 bfloat16_unpack_canonical(&pa, a, status);
1679 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1680 pr = parts_mul(&pa, &pb, status);
8282310d 1681
aca84527
RH
1682 return bfloat16_round_pack_canonical(pr, status);
1683}
1684
1685float128 QEMU_FLATTEN
1686float128_mul(float128 a, float128 b, float_status *status)
1687{
1688 FloatParts128 pa, pb, *pr;
1689
1690 float128_unpack_canonical(&pa, a, status);
1691 float128_unpack_canonical(&pb, b, status);
1692 pr = parts_mul(&pa, &pb, status);
1693
1694 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1695}
1696
d446830a 1697/*
dedd123c 1698 * Fused multiply-add
d446830a
AB
1699 */
1700
97ff87c0 1701float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
dedd123c 1702 int flags, float_status *status)
d446830a 1703{
dedd123c 1704 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1705
1706 float16_unpack_canonical(&pa, a, status);
1707 float16_unpack_canonical(&pb, b, status);
1708 float16_unpack_canonical(&pc, c, status);
dedd123c 1709 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1710
dedd123c 1711 return float16_round_pack_canonical(pr, status);
d446830a
AB
1712}
1713
ccf770ba
EC
1714static float32 QEMU_SOFTFLOAT_ATTR
1715soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1716 float_status *status)
d446830a 1717{
dedd123c 1718 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1719
1720 float32_unpack_canonical(&pa, a, status);
1721 float32_unpack_canonical(&pb, b, status);
1722 float32_unpack_canonical(&pc, c, status);
dedd123c 1723 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1724
dedd123c 1725 return float32_round_pack_canonical(pr, status);
d446830a
AB
1726}
1727
ccf770ba
EC
1728static float64 QEMU_SOFTFLOAT_ATTR
1729soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1730 float_status *status)
d446830a 1731{
dedd123c 1732 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1733
1734 float64_unpack_canonical(&pa, a, status);
1735 float64_unpack_canonical(&pb, b, status);
1736 float64_unpack_canonical(&pc, c, status);
dedd123c 1737 pr = parts_muladd(&pa, &pb, &pc, flags, status);
d446830a 1738
dedd123c 1739 return float64_round_pack_canonical(pr, status);
d446830a
AB
1740}
1741
f6b3b108
EC
1742static bool force_soft_fma;
1743
ccf770ba
EC
1744float32 QEMU_FLATTEN
1745float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1746{
1747 union_float32 ua, ub, uc, ur;
1748
1749 ua.s = xa;
1750 ub.s = xb;
1751 uc.s = xc;
1752
1753 if (unlikely(!can_use_fpu(s))) {
1754 goto soft;
1755 }
1756 if (unlikely(flags & float_muladd_halve_result)) {
1757 goto soft;
1758 }
1759
1760 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1761 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1762 goto soft;
1763 }
f6b3b108
EC
1764
1765 if (unlikely(force_soft_fma)) {
1766 goto soft;
1767 }
1768
ccf770ba
EC
1769 /*
1770 * When (a || b) == 0, there's no need to check for under/over flow,
1771 * since we know the addend is (normal || 0) and the product is 0.
1772 */
1773 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1774 union_float32 up;
1775 bool prod_sign;
1776
1777 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1778 prod_sign ^= !!(flags & float_muladd_negate_product);
1779 up.s = float32_set_sign(float32_zero, prod_sign);
1780
1781 if (flags & float_muladd_negate_c) {
1782 uc.h = -uc.h;
1783 }
1784 ur.h = up.h + uc.h;
1785 } else {
896f51fb
KC
1786 union_float32 ua_orig = ua;
1787 union_float32 uc_orig = uc;
1788
ccf770ba
EC
1789 if (flags & float_muladd_negate_product) {
1790 ua.h = -ua.h;
1791 }
1792 if (flags & float_muladd_negate_c) {
1793 uc.h = -uc.h;
1794 }
1795
1796 ur.h = fmaf(ua.h, ub.h, uc.h);
1797
1798 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1799 float_raise(float_flag_overflow, s);
ccf770ba 1800 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1801 ua = ua_orig;
1802 uc = uc_orig;
ccf770ba
EC
1803 goto soft;
1804 }
1805 }
1806 if (flags & float_muladd_negate_result) {
1807 return float32_chs(ur.s);
1808 }
1809 return ur.s;
1810
1811 soft:
1812 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1813}
1814
1815float64 QEMU_FLATTEN
1816float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1817{
1818 union_float64 ua, ub, uc, ur;
1819
1820 ua.s = xa;
1821 ub.s = xb;
1822 uc.s = xc;
1823
1824 if (unlikely(!can_use_fpu(s))) {
1825 goto soft;
1826 }
1827 if (unlikely(flags & float_muladd_halve_result)) {
1828 goto soft;
1829 }
1830
1831 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1832 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1833 goto soft;
1834 }
f6b3b108
EC
1835
1836 if (unlikely(force_soft_fma)) {
1837 goto soft;
1838 }
1839
ccf770ba
EC
1840 /*
1841 * When (a || b) == 0, there's no need to check for under/over flow,
1842 * since we know the addend is (normal || 0) and the product is 0.
1843 */
1844 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1845 union_float64 up;
1846 bool prod_sign;
1847
1848 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1849 prod_sign ^= !!(flags & float_muladd_negate_product);
1850 up.s = float64_set_sign(float64_zero, prod_sign);
1851
1852 if (flags & float_muladd_negate_c) {
1853 uc.h = -uc.h;
1854 }
1855 ur.h = up.h + uc.h;
1856 } else {
896f51fb
KC
1857 union_float64 ua_orig = ua;
1858 union_float64 uc_orig = uc;
1859
ccf770ba
EC
1860 if (flags & float_muladd_negate_product) {
1861 ua.h = -ua.h;
1862 }
1863 if (flags & float_muladd_negate_c) {
1864 uc.h = -uc.h;
1865 }
1866
1867 ur.h = fma(ua.h, ub.h, uc.h);
1868
1869 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1870 float_raise(float_flag_overflow, s);
ccf770ba 1871 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1872 ua = ua_orig;
1873 uc = uc_orig;
ccf770ba
EC
1874 goto soft;
1875 }
1876 }
1877 if (flags & float_muladd_negate_result) {
1878 return float64_chs(ur.s);
1879 }
1880 return ur.s;
1881
1882 soft:
1883 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1884}
1885
8282310d
LZ
1886bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1887 int flags, float_status *status)
1888{
dedd123c 1889 FloatParts64 pa, pb, pc, *pr;
98e256fc
RH
1890
1891 bfloat16_unpack_canonical(&pa, a, status);
1892 bfloat16_unpack_canonical(&pb, b, status);
1893 bfloat16_unpack_canonical(&pc, c, status);
dedd123c
RH
1894 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1895
1896 return bfloat16_round_pack_canonical(pr, status);
1897}
8282310d 1898
dedd123c
RH
1899float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
1900 int flags, float_status *status)
1901{
1902 FloatParts128 pa, pb, pc, *pr;
1903
1904 float128_unpack_canonical(&pa, a, status);
1905 float128_unpack_canonical(&pb, b, status);
1906 float128_unpack_canonical(&pc, c, status);
1907 pr = parts_muladd(&pa, &pb, &pc, flags, status);
1908
1909 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1910}
1911
cf07323d 1912/*
ec961b81 1913 * Division
cf07323d
AB
1914 */
1915
cf07323d
AB
1916float16 float16_div(float16 a, float16 b, float_status *status)
1917{
ec961b81 1918 FloatParts64 pa, pb, *pr;
98e256fc
RH
1919
1920 float16_unpack_canonical(&pa, a, status);
1921 float16_unpack_canonical(&pb, b, status);
ec961b81 1922 pr = parts_div(&pa, &pb, status);
cf07323d 1923
ec961b81 1924 return float16_round_pack_canonical(pr, status);
cf07323d
AB
1925}
1926
4a629561
EC
1927static float32 QEMU_SOFTFLOAT_ATTR
1928soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1929{
ec961b81 1930 FloatParts64 pa, pb, *pr;
98e256fc
RH
1931
1932 float32_unpack_canonical(&pa, a, status);
1933 float32_unpack_canonical(&pb, b, status);
ec961b81 1934 pr = parts_div(&pa, &pb, status);
cf07323d 1935
ec961b81 1936 return float32_round_pack_canonical(pr, status);
cf07323d
AB
1937}
1938
4a629561
EC
1939static float64 QEMU_SOFTFLOAT_ATTR
1940soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1941{
ec961b81 1942 FloatParts64 pa, pb, *pr;
98e256fc
RH
1943
1944 float64_unpack_canonical(&pa, a, status);
1945 float64_unpack_canonical(&pb, b, status);
ec961b81 1946 pr = parts_div(&pa, &pb, status);
cf07323d 1947
ec961b81 1948 return float64_round_pack_canonical(pr, status);
cf07323d
AB
1949}
1950
4a629561
EC
1951static float hard_f32_div(float a, float b)
1952{
1953 return a / b;
1954}
1955
1956static double hard_f64_div(double a, double b)
1957{
1958 return a / b;
1959}
1960
1961static bool f32_div_pre(union_float32 a, union_float32 b)
1962{
1963 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1964 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1965 fpclassify(b.h) == FP_NORMAL;
1966 }
1967 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1968}
1969
1970static bool f64_div_pre(union_float64 a, union_float64 b)
1971{
1972 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1973 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1974 fpclassify(b.h) == FP_NORMAL;
1975 }
1976 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1977}
1978
1979static bool f32_div_post(union_float32 a, union_float32 b)
1980{
1981 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1982 return fpclassify(a.h) != FP_ZERO;
1983 }
1984 return !float32_is_zero(a.s);
1985}
1986
1987static bool f64_div_post(union_float64 a, union_float64 b)
1988{
1989 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1990 return fpclassify(a.h) != FP_ZERO;
1991 }
1992 return !float64_is_zero(a.s);
1993}
1994
1995float32 QEMU_FLATTEN
1996float32_div(float32 a, float32 b, float_status *s)
1997{
1998 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1999 f32_div_pre, f32_div_post);
4a629561
EC
2000}
2001
2002float64 QEMU_FLATTEN
2003float64_div(float64 a, float64 b, float_status *s)
2004{
2005 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 2006 f64_div_pre, f64_div_post);
4a629561
EC
2007}
2008
ec961b81
RH
2009bfloat16 QEMU_FLATTEN
2010bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
8282310d 2011{
ec961b81 2012 FloatParts64 pa, pb, *pr;
98e256fc
RH
2013
2014 bfloat16_unpack_canonical(&pa, a, status);
2015 bfloat16_unpack_canonical(&pb, b, status);
ec961b81 2016 pr = parts_div(&pa, &pb, status);
8282310d 2017
ec961b81
RH
2018 return bfloat16_round_pack_canonical(pr, status);
2019}
2020
2021float128 QEMU_FLATTEN
2022float128_div(float128 a, float128 b, float_status *status)
2023{
2024 FloatParts128 pa, pb, *pr;
2025
2026 float128_unpack_canonical(&pa, a, status);
2027 float128_unpack_canonical(&pb, b, status);
2028 pr = parts_div(&pa, &pb, status);
2029
2030 return float128_round_pack_canonical(pr, status);
8282310d
LZ
2031}
2032
6fed16b2
AB
2033/*
2034 * Float to Float conversions
2035 *
2036 * Returns the result of converting one float format to another. The
2037 * conversion is performed according to the IEC/IEEE Standard for
2038 * Binary Floating-Point Arithmetic.
2039 *
c3f1875e
RH
2040 * Usually this only needs to take care of raising invalid exceptions
2041 * and handling the conversion on NaNs.
6fed16b2
AB
2042 */
2043
c3f1875e
RH
2044static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2045{
2046 switch (a->cls) {
2047 case float_class_qnan:
2048 case float_class_snan:
2049 /*
2050 * There is no NaN in the destination format. Raise Invalid
2051 * and return a zero with the sign of the input NaN.
2052 */
2053 float_raise(float_flag_invalid, s);
2054 a->cls = float_class_zero;
2055 break;
2056
2057 case float_class_inf:
2058 /*
2059 * There is no Inf in the destination format. Raise Invalid
2060 * and return the maximum normal with the correct sign.
2061 */
2062 float_raise(float_flag_invalid, s);
2063 a->cls = float_class_normal;
2064 a->exp = float16_params_ahp.exp_max;
2065 a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2066 float16_params_ahp.frac_size + 1);
2067 break;
2068
2069 case float_class_normal:
2070 case float_class_zero:
2071 break;
2072
2073 default:
2074 g_assert_not_reached();
2075 }
2076}
2077
2078static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2079{
2080 if (is_nan(a->cls)) {
2081 parts_return_nan(a, s);
6fed16b2 2082 }
6fed16b2
AB
2083}
2084
c3f1875e
RH
2085static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2086{
2087 if (is_nan(a->cls)) {
2088 parts_return_nan(a, s);
2089 }
2090}
2091
2092#define parts_float_to_float(P, S) \
2093 PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2094
9882ccaf
RH
2095static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2096 float_status *s)
2097{
2098 a->cls = b->cls;
2099 a->sign = b->sign;
2100 a->exp = b->exp;
2101
2102 if (a->cls == float_class_normal) {
2103 frac_truncjam(a, b);
2104 } else if (is_nan(a->cls)) {
2105 /* Discard the low bits of the NaN. */
2106 a->frac = b->frac_hi;
2107 parts_return_nan(a, s);
2108 }
2109}
2110
2111static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2112 float_status *s)
2113{
2114 a->cls = b->cls;
2115 a->sign = b->sign;
2116 a->exp = b->exp;
2117 frac_widen(a, b);
2118
2119 if (is_nan(a->cls)) {
2120 parts_return_nan(a, s);
2121 }
2122}
2123
6fed16b2
AB
2124float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2125{
2126 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2127 FloatParts64 p;
98e256fc 2128
c3f1875e
RH
2129 float16a_unpack_canonical(&p, a, s, fmt16);
2130 parts_float_to_float(&p, s);
2131 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2132}
2133
2134float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2135{
2136 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
c3f1875e 2137 FloatParts64 p;
98e256fc 2138
c3f1875e
RH
2139 float16a_unpack_canonical(&p, a, s, fmt16);
2140 parts_float_to_float(&p, s);
2141 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2142}
2143
2144float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2145{
c3f1875e
RH
2146 FloatParts64 p;
2147 const FloatFmt *fmt;
98e256fc 2148
c3f1875e
RH
2149 float32_unpack_canonical(&p, a, s);
2150 if (ieee) {
2151 parts_float_to_float(&p, s);
2152 fmt = &float16_params;
2153 } else {
2154 parts_float_to_ahp(&p, s);
2155 fmt = &float16_params_ahp;
2156 }
2157 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2158}
2159
21381dcf
MK
2160static float64 QEMU_SOFTFLOAT_ATTR
2161soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2162{
c3f1875e 2163 FloatParts64 p;
98e256fc 2164
c3f1875e
RH
2165 float32_unpack_canonical(&p, a, s);
2166 parts_float_to_float(&p, s);
2167 return float64_round_pack_canonical(&p, s);
6fed16b2
AB
2168}
2169
21381dcf
MK
2170float64 float32_to_float64(float32 a, float_status *s)
2171{
2172 if (likely(float32_is_normal(a))) {
2173 /* Widening conversion can never produce inexact results. */
2174 union_float32 uf;
2175 union_float64 ud;
2176 uf.s = a;
2177 ud.h = uf.h;
2178 return ud.s;
2179 } else if (float32_is_zero(a)) {
2180 return float64_set_sign(float64_zero, float32_is_neg(a));
2181 } else {
2182 return soft_float32_to_float64(a, s);
2183 }
2184}
2185
6fed16b2
AB
2186float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2187{
c3f1875e
RH
2188 FloatParts64 p;
2189 const FloatFmt *fmt;
98e256fc 2190
c3f1875e
RH
2191 float64_unpack_canonical(&p, a, s);
2192 if (ieee) {
2193 parts_float_to_float(&p, s);
2194 fmt = &float16_params;
2195 } else {
2196 parts_float_to_ahp(&p, s);
2197 fmt = &float16_params_ahp;
2198 }
2199 return float16a_round_pack_canonical(&p, s, fmt);
6fed16b2
AB
2200}
2201
2202float32 float64_to_float32(float64 a, float_status *s)
2203{
c3f1875e 2204 FloatParts64 p;
98e256fc 2205
c3f1875e
RH
2206 float64_unpack_canonical(&p, a, s);
2207 parts_float_to_float(&p, s);
2208 return float32_round_pack_canonical(&p, s);
6fed16b2
AB
2209}
2210
34f0c0a9
LZ
2211float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2212{
c3f1875e 2213 FloatParts64 p;
98e256fc 2214
c3f1875e
RH
2215 bfloat16_unpack_canonical(&p, a, s);
2216 parts_float_to_float(&p, s);
2217 return float32_round_pack_canonical(&p, s);
34f0c0a9
LZ
2218}
2219
2220float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2221{
c3f1875e 2222 FloatParts64 p;
98e256fc 2223
c3f1875e
RH
2224 bfloat16_unpack_canonical(&p, a, s);
2225 parts_float_to_float(&p, s);
2226 return float64_round_pack_canonical(&p, s);
34f0c0a9
LZ
2227}
2228
2229bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2230{
c3f1875e 2231 FloatParts64 p;
98e256fc 2232
c3f1875e
RH
2233 float32_unpack_canonical(&p, a, s);
2234 parts_float_to_float(&p, s);
2235 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2236}
2237
2238bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2239{
c3f1875e 2240 FloatParts64 p;
98e256fc 2241
c3f1875e
RH
2242 float64_unpack_canonical(&p, a, s);
2243 parts_float_to_float(&p, s);
2244 return bfloat16_round_pack_canonical(&p, s);
34f0c0a9
LZ
2245}
2246
9882ccaf
RH
2247float32 float128_to_float32(float128 a, float_status *s)
2248{
2249 FloatParts64 p64;
2250 FloatParts128 p128;
2251
2252 float128_unpack_canonical(&p128, a, s);
2253 parts_float_to_float_narrow(&p64, &p128, s);
2254 return float32_round_pack_canonical(&p64, s);
2255}
2256
2257float64 float128_to_float64(float128 a, float_status *s)
2258{
2259 FloatParts64 p64;
2260 FloatParts128 p128;
2261
2262 float128_unpack_canonical(&p128, a, s);
2263 parts_float_to_float_narrow(&p64, &p128, s);
2264 return float64_round_pack_canonical(&p64, s);
2265}
2266
2267float128 float32_to_float128(float32 a, float_status *s)
2268{
2269 FloatParts64 p64;
2270 FloatParts128 p128;
2271
2272 float32_unpack_canonical(&p64, a, s);
2273 parts_float_to_float_widen(&p128, &p64, s);
2274 return float128_round_pack_canonical(&p128, s);
2275}
2276
2277float128 float64_to_float128(float64 a, float_status *s)
2278{
2279 FloatParts64 p64;
2280 FloatParts128 p128;
2281
2282 float64_unpack_canonical(&p64, a, s);
2283 parts_float_to_float_widen(&p128, &p64, s);
2284 return float128_round_pack_canonical(&p128, s);
2285}
2286
dbe4d53a
AB
2287/*
2288 * Rounds the floating-point value `a' to an integer, and returns the
2289 * result as a floating-point value. The operation is performed
2290 * according to the IEC/IEEE Standard for Binary Floating-Point
2291 * Arithmetic.
2292 */
2293
f8155c1d 2294static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2f6c74be 2295 int scale, float_status *s)
dbe4d53a 2296{
2f6c74be
RH
2297 switch (a.cls) {
2298 case float_class_qnan:
2299 case float_class_snan:
7c45bad8
RH
2300 parts_return_nan(&a, s);
2301 break;
dbe4d53a 2302
dbe4d53a
AB
2303 case float_class_zero:
2304 case float_class_inf:
dbe4d53a
AB
2305 /* already "integral" */
2306 break;
2f6c74be 2307
dbe4d53a 2308 case float_class_normal:
2f6c74be
RH
2309 scale = MIN(MAX(scale, -0x10000), 0x10000);
2310 a.exp += scale;
2311
dbe4d53a
AB
2312 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2313 /* already integral */
2314 break;
2315 }
2316 if (a.exp < 0) {
2317 bool one;
2318 /* all fractional */
d82f3b2d 2319 float_raise(float_flag_inexact, s);
2f6c74be 2320 switch (rmode) {
dbe4d53a
AB
2321 case float_round_nearest_even:
2322 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2323 break;
2324 case float_round_ties_away:
2325 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2326 break;
2327 case float_round_to_zero:
2328 one = false;
2329 break;
2330 case float_round_up:
2331 one = !a.sign;
2332 break;
2333 case float_round_down:
2334 one = a.sign;
2335 break;
5d64abb3
RH
2336 case float_round_to_odd:
2337 one = true;
2338 break;
dbe4d53a
AB
2339 default:
2340 g_assert_not_reached();
2341 }
2342
2343 if (one) {
2344 a.frac = DECOMPOSED_IMPLICIT_BIT;
2345 a.exp = 0;
2346 } else {
2347 a.cls = float_class_zero;
2348 }
2349 } else {
2350 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2351 uint64_t frac_lsbm1 = frac_lsb >> 1;
2352 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2353 uint64_t rnd_mask = rnd_even_mask >> 1;
2354 uint64_t inc;
2355
2f6c74be 2356 switch (rmode) {
dbe4d53a
AB
2357 case float_round_nearest_even:
2358 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2359 break;
2360 case float_round_ties_away:
2361 inc = frac_lsbm1;
2362 break;
2363 case float_round_to_zero:
2364 inc = 0;
2365 break;
2366 case float_round_up:
2367 inc = a.sign ? 0 : rnd_mask;
2368 break;
2369 case float_round_down:
2370 inc = a.sign ? rnd_mask : 0;
2371 break;
5d64abb3
RH
2372 case float_round_to_odd:
2373 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2374 break;
dbe4d53a
AB
2375 default:
2376 g_assert_not_reached();
2377 }
2378
2379 if (a.frac & rnd_mask) {
d82f3b2d 2380 float_raise(float_flag_inexact, s);
e99c4373 2381 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2382 a.frac >>= 1;
e99c4373 2383 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2384 a.exp++;
2385 }
e99c4373 2386 a.frac &= ~rnd_mask;
dbe4d53a
AB
2387 }
2388 }
2389 break;
2390 default:
2391 g_assert_not_reached();
2392 }
2393 return a;
2394}
2395
2396float16 float16_round_to_int(float16 a, float_status *s)
2397{
98e256fc
RH
2398 FloatParts64 pa, pr;
2399
2400 float16_unpack_canonical(&pa, a, s);
2401 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2402 return float16_round_pack_canonical(&pr, s);
dbe4d53a
AB
2403}
2404
2405float32 float32_round_to_int(float32 a, float_status *s)
2406{
98e256fc
RH
2407 FloatParts64 pa, pr;
2408
2409 float32_unpack_canonical(&pa, a, s);
2410 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2411 return float32_round_pack_canonical(&pr, s);
dbe4d53a
AB
2412}
2413
2414float64 float64_round_to_int(float64 a, float_status *s)
2415{
98e256fc
RH
2416 FloatParts64 pa, pr;
2417
2418 float64_unpack_canonical(&pa, a, s);
2419 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2420 return float64_round_pack_canonical(&pr, s);
dbe4d53a
AB
2421}
2422
34f0c0a9
LZ
2423/*
2424 * Rounds the bfloat16 value `a' to an integer, and returns the
2425 * result as a bfloat16 value.
2426 */
2427
2428bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2429{
98e256fc
RH
2430 FloatParts64 pa, pr;
2431
2432 bfloat16_unpack_canonical(&pa, a, s);
2433 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2434 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2435}
2436
ab52f973
AB
2437/*
2438 * Returns the result of converting the floating-point value `a' to
2439 * the two's complement integer format. The conversion is performed
2440 * according to the IEC/IEEE Standard for Binary Floating-Point
2441 * Arithmetic---which means in particular that the conversion is
2442 * rounded according to the current rounding mode. If `a' is a NaN,
2443 * the largest positive integer is returned. Otherwise, if the
2444 * conversion overflows, the largest integer with the same sign as `a'
2445 * is returned.
2446*/
2447
f8155c1d 2448static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407 2449 int scale, int64_t min, int64_t max,
ab52f973
AB
2450 float_status *s)
2451{
2452 uint64_t r;
2453 int orig_flags = get_float_exception_flags(s);
f8155c1d 2454 FloatParts64 p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2455
2456 switch (p.cls) {
2457 case float_class_snan:
2458 case float_class_qnan:
801bc563 2459 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2460 return max;
2461 case float_class_inf:
801bc563 2462 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2463 return p.sign ? min : max;
2464 case float_class_zero:
2465 return 0;
2466 case float_class_normal:
e99c4373 2467 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2468 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2469 } else {
2470 r = UINT64_MAX;
2471 }
2472 if (p.sign) {
33358375 2473 if (r <= -(uint64_t) min) {
ab52f973
AB
2474 return -r;
2475 } else {
2476 s->float_exception_flags = orig_flags | float_flag_invalid;
2477 return min;
2478 }
2479 } else {
33358375 2480 if (r <= max) {
ab52f973
AB
2481 return r;
2482 } else {
2483 s->float_exception_flags = orig_flags | float_flag_invalid;
2484 return max;
2485 }
2486 }
2487 default:
2488 g_assert_not_reached();
2489 }
2490}
2491
0d93d8ec
FC
2492int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2493 float_status *s)
2494{
98e256fc
RH
2495 FloatParts64 p;
2496
2497 float16_unpack_canonical(&p, a, s);
2498 return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2499}
2500
3dede407 2501int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2502 float_status *s)
2503{
98e256fc
RH
2504 FloatParts64 p;
2505
2506 float16_unpack_canonical(&p, a, s);
2507 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2508}
2509
3dede407 2510int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2511 float_status *s)
2512{
98e256fc
RH
2513 FloatParts64 p;
2514
2515 float16_unpack_canonical(&p, a, s);
2516 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2517}
2518
3dede407 2519int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2520 float_status *s)
2521{
98e256fc
RH
2522 FloatParts64 p;
2523
2524 float16_unpack_canonical(&p, a, s);
2525 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2526}
2527
3dede407 2528int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2529 float_status *s)
2530{
98e256fc
RH
2531 FloatParts64 p;
2532
2533 float32_unpack_canonical(&p, a, s);
2534 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2535}
2536
3dede407 2537int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2538 float_status *s)
2539{
98e256fc
RH
2540 FloatParts64 p;
2541
2542 float32_unpack_canonical(&p, a, s);
2543 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2544}
2545
3dede407 2546int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2547 float_status *s)
2548{
98e256fc
RH
2549 FloatParts64 p;
2550
2551 float32_unpack_canonical(&p, a, s);
2552 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2553}
2554
3dede407 2555int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2556 float_status *s)
2557{
98e256fc
RH
2558 FloatParts64 p;
2559
2560 float64_unpack_canonical(&p, a, s);
2561 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2562}
2563
3dede407 2564int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2565 float_status *s)
2566{
98e256fc
RH
2567 FloatParts64 p;
2568
2569 float64_unpack_canonical(&p, a, s);
2570 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2571}
2572
3dede407 2573int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2574 float_status *s)
2575{
98e256fc
RH
2576 FloatParts64 p;
2577
2578 float64_unpack_canonical(&p, a, s);
2579 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2580}
2581
0d93d8ec
FC
2582int8_t float16_to_int8(float16 a, float_status *s)
2583{
2584 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2585}
2586
2f6c74be
RH
2587int16_t float16_to_int16(float16 a, float_status *s)
2588{
2589 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2590}
2591
2592int32_t float16_to_int32(float16 a, float_status *s)
2593{
2594 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2595}
2596
2597int64_t float16_to_int64(float16 a, float_status *s)
2598{
2599 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2600}
2601
2602int16_t float32_to_int16(float32 a, float_status *s)
2603{
2604 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2605}
2606
2607int32_t float32_to_int32(float32 a, float_status *s)
2608{
2609 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2610}
2611
2612int64_t float32_to_int64(float32 a, float_status *s)
2613{
2614 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2615}
2616
2617int16_t float64_to_int16(float64 a, float_status *s)
2618{
2619 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2620}
2621
2622int32_t float64_to_int32(float64 a, float_status *s)
2623{
2624 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2625}
2626
2627int64_t float64_to_int64(float64 a, float_status *s)
2628{
2629 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2630}
2631
2632int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2633{
2634 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2635}
2636
2637int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2638{
2639 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2640}
2641
2642int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2643{
2644 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2645}
2646
2f6c74be
RH
2647int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2648{
2649 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2650}
ab52f973 2651
2f6c74be
RH
2652int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2653{
2654 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2655}
2656
2657int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2658{
2659 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2660}
2661
2662int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2663{
2664 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2665}
ab52f973 2666
2f6c74be
RH
2667int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2668{
2669 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2670}
ab52f973 2671
2f6c74be
RH
2672int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2673{
2674 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2675}
ab52f973 2676
34f0c0a9
LZ
2677/*
2678 * Returns the result of converting the floating-point value `a' to
2679 * the two's complement integer format.
2680 */
2681
2682int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2683 float_status *s)
2684{
98e256fc
RH
2685 FloatParts64 p;
2686
2687 bfloat16_unpack_canonical(&p, a, s);
2688 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
34f0c0a9
LZ
2689}
2690
2691int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2692 float_status *s)
2693{
98e256fc
RH
2694 FloatParts64 p;
2695
2696 bfloat16_unpack_canonical(&p, a, s);
2697 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
34f0c0a9
LZ
2698}
2699
2700int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2701 float_status *s)
2702{
98e256fc
RH
2703 FloatParts64 p;
2704
2705 bfloat16_unpack_canonical(&p, a, s);
2706 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
34f0c0a9
LZ
2707}
2708
2709int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2710{
2711 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2712}
2713
2714int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2715{
2716 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2717}
2718
2719int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2720{
2721 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2722}
2723
2724int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2725{
2726 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2727}
2728
2729int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2730{
2731 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2732}
2733
2734int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2735{
2736 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2737}
2738
ab52f973
AB
2739/*
2740 * Returns the result of converting the floating-point value `a' to
2741 * the unsigned integer format. The conversion is performed according
2742 * to the IEC/IEEE Standard for Binary Floating-Point
2743 * Arithmetic---which means in particular that the conversion is
2744 * rounded according to the current rounding mode. If `a' is a NaN,
2745 * the largest unsigned integer is returned. Otherwise, if the
2746 * conversion overflows, the largest unsigned integer is returned. If
2747 * the 'a' is negative, the result is rounded and zero is returned;
2748 * values that do not round to zero will raise the inexact exception
2749 * flag.
2750 */
2751
f8155c1d 2752static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407
RH
2753 int scale, uint64_t max,
2754 float_status *s)
ab52f973
AB
2755{
2756 int orig_flags = get_float_exception_flags(s);
f8155c1d 2757 FloatParts64 p = round_to_int(in, rmode, scale, s);
2f6c74be 2758 uint64_t r;
ab52f973
AB
2759
2760 switch (p.cls) {
2761 case float_class_snan:
2762 case float_class_qnan:
2763 s->float_exception_flags = orig_flags | float_flag_invalid;
2764 return max;
2765 case float_class_inf:
801bc563 2766 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2767 return p.sign ? 0 : max;
2768 case float_class_zero:
2769 return 0;
2770 case float_class_normal:
ab52f973
AB
2771 if (p.sign) {
2772 s->float_exception_flags = orig_flags | float_flag_invalid;
2773 return 0;
2774 }
2775
e99c4373 2776 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2777 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2778 } else {
2779 s->float_exception_flags = orig_flags | float_flag_invalid;
2780 return max;
2781 }
2782
2783 /* For uint64 this will never trip, but if p.exp is too large
2784 * to shift a decomposed fraction we shall have exited via the
2785 * 3rd leg above.
2786 */
2787 if (r > max) {
2788 s->float_exception_flags = orig_flags | float_flag_invalid;
2789 return max;
ab52f973 2790 }
2f6c74be 2791 return r;
ab52f973
AB
2792 default:
2793 g_assert_not_reached();
2794 }
2795}
2796
0d93d8ec
FC
2797uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2798 float_status *s)
2799{
98e256fc
RH
2800 FloatParts64 p;
2801
2802 float16_unpack_canonical(&p, a, s);
2803 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2804}
2805
3dede407 2806uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2807 float_status *s)
2808{
98e256fc
RH
2809 FloatParts64 p;
2810
2811 float16_unpack_canonical(&p, a, s);
2812 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2813}
2814
3dede407 2815uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2816 float_status *s)
2817{
98e256fc
RH
2818 FloatParts64 p;
2819
2820 float16_unpack_canonical(&p, a, s);
2821 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2822}
2823
3dede407 2824uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2825 float_status *s)
2826{
98e256fc
RH
2827 FloatParts64 p;
2828
2829 float16_unpack_canonical(&p, a, s);
2830 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2831}
2832
3dede407 2833uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2834 float_status *s)
2835{
98e256fc
RH
2836 FloatParts64 p;
2837
2838 float32_unpack_canonical(&p, a, s);
2839 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2840}
2841
3dede407 2842uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2843 float_status *s)
2844{
98e256fc
RH
2845 FloatParts64 p;
2846
2847 float32_unpack_canonical(&p, a, s);
2848 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2849}
2850
3dede407 2851uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2852 float_status *s)
2853{
98e256fc
RH
2854 FloatParts64 p;
2855
2856 float32_unpack_canonical(&p, a, s);
2857 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2858}
2859
3dede407 2860uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2861 float_status *s)
2862{
98e256fc
RH
2863 FloatParts64 p;
2864
2865 float64_unpack_canonical(&p, a, s);
2866 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2867}
2868
3dede407 2869uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2870 float_status *s)
2871{
98e256fc
RH
2872 FloatParts64 p;
2873
2874 float64_unpack_canonical(&p, a, s);
2875 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2876}
2877
3dede407 2878uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2879 float_status *s)
2880{
98e256fc
RH
2881 FloatParts64 p;
2882
2883 float64_unpack_canonical(&p, a, s);
2884 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2885}
2886
0d93d8ec
FC
2887uint8_t float16_to_uint8(float16 a, float_status *s)
2888{
2889 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2890}
2891
2f6c74be
RH
2892uint16_t float16_to_uint16(float16 a, float_status *s)
2893{
2894 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2895}
2896
2897uint32_t float16_to_uint32(float16 a, float_status *s)
2898{
2899 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2900}
2901
2902uint64_t float16_to_uint64(float16 a, float_status *s)
2903{
2904 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2905}
2906
2907uint16_t float32_to_uint16(float32 a, float_status *s)
2908{
2909 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2910}
2911
2912uint32_t float32_to_uint32(float32 a, float_status *s)
2913{
2914 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2915}
2916
2917uint64_t float32_to_uint64(float32 a, float_status *s)
2918{
2919 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2920}
2921
2922uint16_t float64_to_uint16(float64 a, float_status *s)
2923{
2924 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2925}
2926
2927uint32_t float64_to_uint32(float64 a, float_status *s)
2928{
2929 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2930}
2931
2932uint64_t float64_to_uint64(float64 a, float_status *s)
2933{
2934 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2935}
2936
2937uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2938{
2939 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2940}
2941
2942uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2943{
2944 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2945}
2946
2947uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2948{
2949 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2950}
2951
2952uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2953{
2954 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2955}
2956
2957uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2958{
2959 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2960}
2961
2962uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2963{
2964 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2965}
2966
2967uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2968{
2969 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2970}
2971
2972uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2973{
2974 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2975}
2976
2977uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2978{
2979 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2980}
ab52f973 2981
34f0c0a9
LZ
2982/*
2983 * Returns the result of converting the bfloat16 value `a' to
2984 * the unsigned integer format.
2985 */
2986
2987uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2988 int scale, float_status *s)
2989{
98e256fc
RH
2990 FloatParts64 p;
2991
2992 bfloat16_unpack_canonical(&p, a, s);
2993 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2994}
2995
2996uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2997 int scale, float_status *s)
2998{
98e256fc
RH
2999 FloatParts64 p;
3000
3001 bfloat16_unpack_canonical(&p, a, s);
3002 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
3003}
3004
3005uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3006 int scale, float_status *s)
3007{
98e256fc
RH
3008 FloatParts64 p;
3009
3010 bfloat16_unpack_canonical(&p, a, s);
3011 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
3012}
3013
3014uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3015{
3016 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3017}
3018
3019uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3020{
3021 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3022}
3023
3024uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3025{
3026 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3027}
3028
3029uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3030{
3031 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3032}
3033
3034uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3035{
3036 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3037}
3038
3039uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3040{
3041 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3042}
3043
c02e1fb8
AB
3044/*
3045 * Integer to float conversions
3046 *
3047 * Returns the result of converting the two's complement integer `a'
3048 * to the floating-point format. The conversion is performed according
3049 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3050 */
3051
f8155c1d 3052static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 3053{
f8155c1d 3054 FloatParts64 r = { .sign = false };
2abdfe24 3055
c02e1fb8
AB
3056 if (a == 0) {
3057 r.cls = float_class_zero;
c02e1fb8 3058 } else {
2abdfe24
RH
3059 uint64_t f = a;
3060 int shift;
3061
3062 r.cls = float_class_normal;
c02e1fb8 3063 if (a < 0) {
2abdfe24 3064 f = -f;
c02e1fb8 3065 r.sign = true;
c02e1fb8 3066 }
e99c4373 3067 shift = clz64(f);
2abdfe24
RH
3068 scale = MIN(MAX(scale, -0x10000), 0x10000);
3069
3070 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 3071 r.frac = f << shift;
c02e1fb8
AB
3072 }
3073
3074 return r;
3075}
3076
2abdfe24 3077float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3078{
f8155c1d 3079 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3080 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3081}
3082
2abdfe24
RH
3083float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3084{
3085 return int64_to_float16_scalbn(a, scale, status);
3086}
3087
3088float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3089{
3090 return int64_to_float16_scalbn(a, scale, status);
3091}
3092
3093float16 int64_to_float16(int64_t a, float_status *status)
3094{
3095 return int64_to_float16_scalbn(a, 0, status);
3096}
3097
c02e1fb8
AB
3098float16 int32_to_float16(int32_t a, float_status *status)
3099{
2abdfe24 3100 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3101}
3102
3103float16 int16_to_float16(int16_t a, float_status *status)
3104{
2abdfe24 3105 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3106}
3107
0d93d8ec
FC
3108float16 int8_to_float16(int8_t a, float_status *status)
3109{
3110 return int64_to_float16_scalbn(a, 0, status);
3111}
3112
2abdfe24 3113float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3114{
f8155c1d 3115 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3116 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3117}
3118
2abdfe24
RH
3119float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3120{
3121 return int64_to_float32_scalbn(a, scale, status);
3122}
3123
3124float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3125{
3126 return int64_to_float32_scalbn(a, scale, status);
3127}
3128
3129float32 int64_to_float32(int64_t a, float_status *status)
3130{
3131 return int64_to_float32_scalbn(a, 0, status);
3132}
3133
c02e1fb8
AB
3134float32 int32_to_float32(int32_t a, float_status *status)
3135{
2abdfe24 3136 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3137}
3138
3139float32 int16_to_float32(int16_t a, float_status *status)
3140{
2abdfe24 3141 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3142}
3143
2abdfe24 3144float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 3145{
f8155c1d 3146 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3147 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3148}
3149
2abdfe24
RH
3150float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3151{
3152 return int64_to_float64_scalbn(a, scale, status);
3153}
3154
3155float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3156{
3157 return int64_to_float64_scalbn(a, scale, status);
3158}
3159
3160float64 int64_to_float64(int64_t a, float_status *status)
3161{
3162 return int64_to_float64_scalbn(a, 0, status);
3163}
3164
c02e1fb8
AB
3165float64 int32_to_float64(int32_t a, float_status *status)
3166{
2abdfe24 3167 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3168}
3169
3170float64 int16_to_float64(int16_t a, float_status *status)
3171{
2abdfe24 3172 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3173}
3174
34f0c0a9
LZ
3175/*
3176 * Returns the result of converting the two's complement integer `a'
3177 * to the bfloat16 format.
3178 */
3179
3180bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3181{
f8155c1d 3182 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3183 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3184}
3185
3186bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3187{
3188 return int64_to_bfloat16_scalbn(a, scale, status);
3189}
3190
3191bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3192{
3193 return int64_to_bfloat16_scalbn(a, scale, status);
3194}
3195
3196bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3197{
3198 return int64_to_bfloat16_scalbn(a, 0, status);
3199}
3200
3201bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3202{
3203 return int64_to_bfloat16_scalbn(a, 0, status);
3204}
3205
3206bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3207{
3208 return int64_to_bfloat16_scalbn(a, 0, status);
3209}
c02e1fb8
AB
3210
3211/*
3212 * Unsigned Integer to float conversions
3213 *
3214 * Returns the result of converting the unsigned integer `a' to the
3215 * floating-point format. The conversion is performed according to the
3216 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3217 */
3218
f8155c1d 3219static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3220{
f8155c1d 3221 FloatParts64 r = { .sign = false };
e99c4373 3222 int shift;
c02e1fb8
AB
3223
3224 if (a == 0) {
3225 r.cls = float_class_zero;
3226 } else {
2abdfe24 3227 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3228 shift = clz64(a);
c02e1fb8 3229 r.cls = float_class_normal;
e99c4373
RH
3230 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3231 r.frac = a << shift;
c02e1fb8
AB
3232 }
3233
3234 return r;
3235}
3236
2abdfe24 3237float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3238{
f8155c1d 3239 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3240 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3241}
3242
2abdfe24
RH
3243float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3244{
3245 return uint64_to_float16_scalbn(a, scale, status);
3246}
3247
3248float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3249{
3250 return uint64_to_float16_scalbn(a, scale, status);
3251}
3252
3253float16 uint64_to_float16(uint64_t a, float_status *status)
3254{
3255 return uint64_to_float16_scalbn(a, 0, status);
3256}
3257
c02e1fb8
AB
3258float16 uint32_to_float16(uint32_t a, float_status *status)
3259{
2abdfe24 3260 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3261}
3262
3263float16 uint16_to_float16(uint16_t a, float_status *status)
3264{
2abdfe24 3265 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3266}
3267
0d93d8ec
FC
3268float16 uint8_to_float16(uint8_t a, float_status *status)
3269{
3270 return uint64_to_float16_scalbn(a, 0, status);
3271}
3272
2abdfe24 3273float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3274{
f8155c1d 3275 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3276 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3277}
3278
2abdfe24
RH
3279float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3280{
3281 return uint64_to_float32_scalbn(a, scale, status);
3282}
3283
3284float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3285{
3286 return uint64_to_float32_scalbn(a, scale, status);
3287}
3288
3289float32 uint64_to_float32(uint64_t a, float_status *status)
3290{
3291 return uint64_to_float32_scalbn(a, 0, status);
3292}
3293
c02e1fb8
AB
3294float32 uint32_to_float32(uint32_t a, float_status *status)
3295{
2abdfe24 3296 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3297}
3298
3299float32 uint16_to_float32(uint16_t a, float_status *status)
3300{
2abdfe24 3301 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3302}
3303
2abdfe24 3304float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3305{
f8155c1d 3306 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3307 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3308}
3309
2abdfe24
RH
3310float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3311{
3312 return uint64_to_float64_scalbn(a, scale, status);
3313}
3314
3315float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3316{
3317 return uint64_to_float64_scalbn(a, scale, status);
3318}
3319
3320float64 uint64_to_float64(uint64_t a, float_status *status)
3321{
3322 return uint64_to_float64_scalbn(a, 0, status);
3323}
3324
c02e1fb8
AB
3325float64 uint32_to_float64(uint32_t a, float_status *status)
3326{
2abdfe24 3327 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3328}
3329
3330float64 uint16_to_float64(uint16_t a, float_status *status)
3331{
2abdfe24 3332 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3333}
3334
34f0c0a9
LZ
3335/*
3336 * Returns the result of converting the unsigned integer `a' to the
3337 * bfloat16 format.
3338 */
3339
3340bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3341{
f8155c1d 3342 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3343 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3344}
3345
3346bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3347{
3348 return uint64_to_bfloat16_scalbn(a, scale, status);
3349}
3350
3351bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3352{
3353 return uint64_to_bfloat16_scalbn(a, scale, status);
3354}
3355
3356bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3357{
3358 return uint64_to_bfloat16_scalbn(a, 0, status);
3359}
3360
3361bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3362{
3363 return uint64_to_bfloat16_scalbn(a, 0, status);
3364}
3365
3366bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3367{
3368 return uint64_to_bfloat16_scalbn(a, 0, status);
3369}
3370
89360067
AB
3371/* Float Min/Max */
3372/* min() and max() functions. These can't be implemented as
3373 * 'compare and pick one input' because that would mishandle
3374 * NaNs and +0 vs -0.
3375 *
3376 * minnum() and maxnum() functions. These are similar to the min()
3377 * and max() functions but if one of the arguments is a QNaN and
3378 * the other is numerical then the numerical argument is returned.
3379 * SNaNs will get quietened before being returned.
3380 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3381 * and maxNum() operations. min() and max() are the typical min/max
3382 * semantics provided by many CPUs which predate that specification.
3383 *
3384 * minnummag() and maxnummag() functions correspond to minNumMag()
3385 * and minNumMag() from the IEEE-754 2008.
3386 */
f8155c1d 3387static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3388 bool ieee, bool ismag, float_status *s)
3389{
3390 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3391 if (ieee) {
3392 /* Takes two floating-point values `a' and `b', one of
3393 * which is a NaN, and returns the appropriate NaN
3394 * result. If either `a' or `b' is a signaling NaN,
3395 * the invalid exception is raised.
3396 */
3397 if (is_snan(a.cls) || is_snan(b.cls)) {
22c355f4 3398 return *parts_pick_nan(&a, &b, s);
89360067
AB
3399 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3400 return b;
3401 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3402 return a;
3403 }
3404 }
22c355f4 3405 return *parts_pick_nan(&a, &b, s);
89360067
AB
3406 } else {
3407 int a_exp, b_exp;
89360067
AB
3408
3409 switch (a.cls) {
3410 case float_class_normal:
3411 a_exp = a.exp;
3412 break;
3413 case float_class_inf:
3414 a_exp = INT_MAX;
3415 break;
3416 case float_class_zero:
3417 a_exp = INT_MIN;
3418 break;
3419 default:
3420 g_assert_not_reached();
3421 break;
3422 }
3423 switch (b.cls) {
3424 case float_class_normal:
3425 b_exp = b.exp;
3426 break;
3427 case float_class_inf:
3428 b_exp = INT_MAX;
3429 break;
3430 case float_class_zero:
3431 b_exp = INT_MIN;
3432 break;
3433 default:
3434 g_assert_not_reached();
3435 break;
3436 }
3437
6245327a
EC
3438 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3439 bool a_less = a_exp < b_exp;
3440 if (a_exp == b_exp) {
3441 a_less = a.frac < b.frac;
3442 }
3443 return a_less ^ ismin ? b : a;
89360067
AB
3444 }
3445
6245327a 3446 if (a.sign == b.sign) {
89360067
AB
3447 bool a_less = a_exp < b_exp;
3448 if (a_exp == b_exp) {
3449 a_less = a.frac < b.frac;
3450 }
6245327a 3451 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3452 } else {
6245327a 3453 return a.sign ^ ismin ? b : a;
89360067
AB
3454 }
3455 }
3456}
3457
3458#define MINMAX(sz, name, ismin, isiee, ismag) \
3459float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3460 float_status *s) \
3461{ \
98e256fc
RH
3462 FloatParts64 pa, pb, pr; \
3463 float ## sz ## _unpack_canonical(&pa, a, s); \
3464 float ## sz ## _unpack_canonical(&pb, b, s); \
3465 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3466 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3467}
3468
3469MINMAX(16, min, true, false, false)
3470MINMAX(16, minnum, true, true, false)
3471MINMAX(16, minnummag, true, true, true)
3472MINMAX(16, max, false, false, false)
3473MINMAX(16, maxnum, false, true, false)
3474MINMAX(16, maxnummag, false, true, true)
3475
3476MINMAX(32, min, true, false, false)
3477MINMAX(32, minnum, true, true, false)
3478MINMAX(32, minnummag, true, true, true)
3479MINMAX(32, max, false, false, false)
3480MINMAX(32, maxnum, false, true, false)
3481MINMAX(32, maxnummag, false, true, true)
3482
3483MINMAX(64, min, true, false, false)
3484MINMAX(64, minnum, true, true, false)
3485MINMAX(64, minnummag, true, true, true)
3486MINMAX(64, max, false, false, false)
3487MINMAX(64, maxnum, false, true, false)
3488MINMAX(64, maxnummag, false, true, true)
3489
3490#undef MINMAX
3491
8282310d
LZ
3492#define BF16_MINMAX(name, ismin, isiee, ismag) \
3493bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3494{ \
98e256fc
RH
3495 FloatParts64 pa, pb, pr; \
3496 bfloat16_unpack_canonical(&pa, a, s); \
3497 bfloat16_unpack_canonical(&pb, b, s); \
3498 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3499 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3500}
3501
3502BF16_MINMAX(min, true, false, false)
3503BF16_MINMAX(minnum, true, true, false)
3504BF16_MINMAX(minnummag, true, true, true)
3505BF16_MINMAX(max, false, false, false)
3506BF16_MINMAX(maxnum, false, true, false)
3507BF16_MINMAX(maxnummag, false, true, true)
3508
3509#undef BF16_MINMAX
3510
0c4c9092 3511/* Floating point compare */
f8155c1d 3512static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3513 float_status *s)
0c4c9092
AB
3514{
3515 if (is_nan(a.cls) || is_nan(b.cls)) {
3516 if (!is_quiet ||
3517 a.cls == float_class_snan ||
3518 b.cls == float_class_snan) {
d82f3b2d 3519 float_raise(float_flag_invalid, s);
0c4c9092
AB
3520 }
3521 return float_relation_unordered;
3522 }
3523
3524 if (a.cls == float_class_zero) {
3525 if (b.cls == float_class_zero) {
3526 return float_relation_equal;
3527 }
3528 return b.sign ? float_relation_greater : float_relation_less;
3529 } else if (b.cls == float_class_zero) {
3530 return a.sign ? float_relation_less : float_relation_greater;
3531 }
3532
3533 /* The only really important thing about infinity is its sign. If
3534 * both are infinities the sign marks the smallest of the two.
3535 */
3536 if (a.cls == float_class_inf) {
3537 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3538 return float_relation_equal;
3539 }
3540 return a.sign ? float_relation_less : float_relation_greater;
3541 } else if (b.cls == float_class_inf) {
3542 return b.sign ? float_relation_greater : float_relation_less;
3543 }
3544
3545 if (a.sign != b.sign) {
3546 return a.sign ? float_relation_less : float_relation_greater;
3547 }
3548
3549 if (a.exp == b.exp) {
3550 if (a.frac == b.frac) {
3551 return float_relation_equal;
3552 }
3553 if (a.sign) {
3554 return a.frac > b.frac ?
3555 float_relation_less : float_relation_greater;
3556 } else {
3557 return a.frac > b.frac ?
3558 float_relation_greater : float_relation_less;
3559 }
3560 } else {
3561 if (a.sign) {
3562 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3563 } else {
3564 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3565 }
3566 }
3567}
3568
d9fe9db9
EC
3569#define COMPARE(name, attr, sz) \
3570static int attr \
3571name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3572{ \
98e256fc
RH
3573 FloatParts64 pa, pb; \
3574 float ## sz ## _unpack_canonical(&pa, a, s); \
3575 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3576 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3577}
3578
d9fe9db9
EC
3579COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3580COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3581COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3582
3583#undef COMPARE
3584
71bfd65c 3585FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3586{
3587 return soft_f16_compare(a, b, false, s);
3588}
3589
71bfd65c 3590FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3591{
3592 return soft_f16_compare(a, b, true, s);
3593}
3594
71bfd65c 3595static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3596f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3597{
3598 union_float32 ua, ub;
3599
3600 ua.s = xa;
3601 ub.s = xb;
3602
3603 if (QEMU_NO_HARDFLOAT) {
3604 goto soft;
3605 }
3606
3607 float32_input_flush2(&ua.s, &ub.s, s);
3608 if (isgreaterequal(ua.h, ub.h)) {
3609 if (isgreater(ua.h, ub.h)) {
3610 return float_relation_greater;
3611 }
3612 return float_relation_equal;
3613 }
3614 if (likely(isless(ua.h, ub.h))) {
3615 return float_relation_less;
3616 }
3617 /* The only condition remaining is unordered.
3618 * Fall through to set flags.
3619 */
3620 soft:
3621 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3622}
3623
71bfd65c 3624FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3625{
3626 return f32_compare(a, b, false, s);
3627}
3628
71bfd65c 3629FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3630{
3631 return f32_compare(a, b, true, s);
3632}
3633
71bfd65c 3634static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3635f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3636{
3637 union_float64 ua, ub;
3638
3639 ua.s = xa;
3640 ub.s = xb;
3641
3642 if (QEMU_NO_HARDFLOAT) {
3643 goto soft;
3644 }
3645
3646 float64_input_flush2(&ua.s, &ub.s, s);
3647 if (isgreaterequal(ua.h, ub.h)) {
3648 if (isgreater(ua.h, ub.h)) {
3649 return float_relation_greater;
3650 }
3651 return float_relation_equal;
3652 }
3653 if (likely(isless(ua.h, ub.h))) {
3654 return float_relation_less;
3655 }
3656 /* The only condition remaining is unordered.
3657 * Fall through to set flags.
3658 */
3659 soft:
3660 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3661}
3662
71bfd65c 3663FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3664{
3665 return f64_compare(a, b, false, s);
3666}
3667
71bfd65c 3668FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3669{
3670 return f64_compare(a, b, true, s);
3671}
3672
8282310d
LZ
3673static FloatRelation QEMU_FLATTEN
3674soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3675{
98e256fc
RH
3676 FloatParts64 pa, pb;
3677
3678 bfloat16_unpack_canonical(&pa, a, s);
3679 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3680 return compare_floats(pa, pb, is_quiet, s);
3681}
3682
3683FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3684{
3685 return soft_bf16_compare(a, b, false, s);
3686}
3687
3688FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3689{
3690 return soft_bf16_compare(a, b, true, s);
3691}
3692
0bfc9f19 3693/* Multiply A by 2 raised to the power N. */
f8155c1d 3694static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3695{
3696 if (unlikely(is_nan(a.cls))) {
7c45bad8 3697 parts_return_nan(&a, s);
0bfc9f19
AB
3698 }
3699 if (a.cls == float_class_normal) {
f8155c1d 3700 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3701 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3702 * still allows rounding to infinity, without allowing overflow
f8155c1d 3703 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3704 */
3705 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3706 a.exp += n;
3707 }
3708 return a;
3709}
3710
3711float16 float16_scalbn(float16 a, int n, float_status *status)
3712{
98e256fc
RH
3713 FloatParts64 pa, pr;
3714
3715 float16_unpack_canonical(&pa, a, status);
3716 pr = scalbn_decomposed(pa, n, status);
e293e927 3717 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3718}
3719
3720float32 float32_scalbn(float32 a, int n, float_status *status)
3721{
98e256fc
RH
3722 FloatParts64 pa, pr;
3723
3724 float32_unpack_canonical(&pa, a, status);
3725 pr = scalbn_decomposed(pa, n, status);
e293e927 3726 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3727}
3728
3729float64 float64_scalbn(float64 a, int n, float_status *status)
3730{
98e256fc
RH
3731 FloatParts64 pa, pr;
3732
3733 float64_unpack_canonical(&pa, a, status);
3734 pr = scalbn_decomposed(pa, n, status);
e293e927 3735 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3736}
3737
8282310d
LZ
3738bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3739{
98e256fc
RH
3740 FloatParts64 pa, pr;
3741
3742 bfloat16_unpack_canonical(&pa, a, status);
3743 pr = scalbn_decomposed(pa, n, status);
e293e927 3744 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3745}
3746
c13bb2da
AB
3747/*
3748 * Square Root
3749 *
3750 * The old softfloat code did an approximation step before zeroing in
3751 * on the final result. However for simpleness we just compute the
3752 * square root by iterating down from the implicit bit to enough extra
3753 * bits to ensure we get a correctly rounded result.
3754 *
3755 * This does mean however the calculation is slower than before,
3756 * especially for 64 bit floats.
3757 */
3758
f8155c1d 3759static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3760{
3761 uint64_t a_frac, r_frac, s_frac;
3762 int bit, last_bit;
3763
3764 if (is_nan(a.cls)) {
7c45bad8
RH
3765 parts_return_nan(&a, s);
3766 return a;
c13bb2da
AB
3767 }
3768 if (a.cls == float_class_zero) {
3769 return a; /* sqrt(+-0) = +-0 */
3770 }
3771 if (a.sign) {
d82f3b2d 3772 float_raise(float_flag_invalid, s);
0fc07cad
RH
3773 parts_default_nan(&a, s);
3774 return a;
c13bb2da
AB
3775 }
3776 if (a.cls == float_class_inf) {
3777 return a; /* sqrt(+inf) = +inf */
3778 }
3779
3780 assert(a.cls == float_class_normal);
3781
3782 /* We need two overflow bits at the top. Adding room for that is a
3783 * right shift. If the exponent is odd, we can discard the low bit
3784 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3785 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3786 */
e99c4373 3787 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3788 a.exp >>= 1;
3789
3790 /* Bit-by-bit computation of sqrt. */
3791 r_frac = 0;
3792 s_frac = 0;
3793
3794 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3795 * properly rounded result. Remember we've inserted two more bits
3796 * at the top, so these positions are two less.
c13bb2da 3797 */
e99c4373 3798 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3799 last_bit = MAX(p->frac_shift - 4, 0);
3800 do {
3801 uint64_t q = 1ULL << bit;
3802 uint64_t t_frac = s_frac + q;
3803 if (t_frac <= a_frac) {
3804 s_frac = t_frac + q;
3805 a_frac -= t_frac;
3806 r_frac += q;
3807 }
3808 a_frac <<= 1;
3809 } while (--bit >= last_bit);
3810
3811 /* Undo the right shift done above. If there is any remaining
3812 * fraction, the result is inexact. Set the sticky bit.
3813 */
e99c4373 3814 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3815
3816 return a;
3817}
3818
97ff87c0 3819float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3820{
98e256fc
RH
3821 FloatParts64 pa, pr;
3822
3823 float16_unpack_canonical(&pa, a, status);
3824 pr = sqrt_float(pa, status, &float16_params);
e293e927 3825 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3826}
3827
f131bae8
EC
3828static float32 QEMU_SOFTFLOAT_ATTR
3829soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3830{
98e256fc
RH
3831 FloatParts64 pa, pr;
3832
3833 float32_unpack_canonical(&pa, a, status);
3834 pr = sqrt_float(pa, status, &float32_params);
e293e927 3835 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3836}
3837
f131bae8
EC
3838static float64 QEMU_SOFTFLOAT_ATTR
3839soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3840{
98e256fc
RH
3841 FloatParts64 pa, pr;
3842
3843 float64_unpack_canonical(&pa, a, status);
3844 pr = sqrt_float(pa, status, &float64_params);
e293e927 3845 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3846}
3847
f131bae8
EC
3848float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3849{
3850 union_float32 ua, ur;
3851
3852 ua.s = xa;
3853 if (unlikely(!can_use_fpu(s))) {
3854 goto soft;
3855 }
3856
3857 float32_input_flush1(&ua.s, s);
3858 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3859 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3860 fpclassify(ua.h) == FP_ZERO) ||
3861 signbit(ua.h))) {
3862 goto soft;
3863 }
3864 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3865 float32_is_neg(ua.s))) {
3866 goto soft;
3867 }
3868 ur.h = sqrtf(ua.h);
3869 return ur.s;
3870
3871 soft:
3872 return soft_f32_sqrt(ua.s, s);
3873}
3874
3875float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3876{
3877 union_float64 ua, ur;
3878
3879 ua.s = xa;
3880 if (unlikely(!can_use_fpu(s))) {
3881 goto soft;
3882 }
3883
3884 float64_input_flush1(&ua.s, s);
3885 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3886 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3887 fpclassify(ua.h) == FP_ZERO) ||
3888 signbit(ua.h))) {
3889 goto soft;
3890 }
3891 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3892 float64_is_neg(ua.s))) {
3893 goto soft;
3894 }
3895 ur.h = sqrt(ua.h);
3896 return ur.s;
3897
3898 soft:
3899 return soft_f64_sqrt(ua.s, s);
3900}
3901
8282310d
LZ
3902bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3903{
98e256fc
RH
3904 FloatParts64 pa, pr;
3905
3906 bfloat16_unpack_canonical(&pa, a, status);
3907 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3908 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3909}
3910
0218a16e
RH
3911/*----------------------------------------------------------------------------
3912| The pattern for a default generated NaN.
3913*----------------------------------------------------------------------------*/
3914
3915float16 float16_default_nan(float_status *status)
3916{
0fc07cad
RH
3917 FloatParts64 p;
3918
3919 parts_default_nan(&p, status);
0218a16e 3920 p.frac >>= float16_params.frac_shift;
71fd178e 3921 return float16_pack_raw(&p);
0218a16e
RH
3922}
3923
3924float32 float32_default_nan(float_status *status)
3925{
0fc07cad
RH
3926 FloatParts64 p;
3927
3928 parts_default_nan(&p, status);
0218a16e 3929 p.frac >>= float32_params.frac_shift;
71fd178e 3930 return float32_pack_raw(&p);
0218a16e
RH
3931}
3932
3933float64 float64_default_nan(float_status *status)
3934{
0fc07cad
RH
3935 FloatParts64 p;
3936
3937 parts_default_nan(&p, status);
0218a16e 3938 p.frac >>= float64_params.frac_shift;
71fd178e 3939 return float64_pack_raw(&p);
0218a16e
RH
3940}
3941
3942float128 float128_default_nan(float_status *status)
3943{
e9034ea8 3944 FloatParts128 p;
0218a16e 3945
0fc07cad 3946 parts_default_nan(&p, status);
e9034ea8
RH
3947 frac_shr(&p, float128_params.frac_shift);
3948 return float128_pack_raw(&p);
0218a16e 3949}
c13bb2da 3950
8282310d
LZ
3951bfloat16 bfloat16_default_nan(float_status *status)
3952{
0fc07cad
RH
3953 FloatParts64 p;
3954
3955 parts_default_nan(&p, status);
8282310d 3956 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3957 return bfloat16_pack_raw(&p);
8282310d
LZ
3958}
3959
158142c2 3960/*----------------------------------------------------------------------------
377ed926
RH
3961| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3962*----------------------------------------------------------------------------*/
3963
3964float16 float16_silence_nan(float16 a, float_status *status)
3965{
3dddb203
RH
3966 FloatParts64 p;
3967
3968 float16_unpack_raw(&p, a);
377ed926 3969 p.frac <<= float16_params.frac_shift;
92ff426d 3970 parts_silence_nan(&p, status);
377ed926 3971 p.frac >>= float16_params.frac_shift;
71fd178e 3972 return float16_pack_raw(&p);
377ed926
RH
3973}
3974
3975float32 float32_silence_nan(float32 a, float_status *status)
3976{
3dddb203
RH
3977 FloatParts64 p;
3978
3979 float32_unpack_raw(&p, a);
377ed926 3980 p.frac <<= float32_params.frac_shift;
92ff426d 3981 parts_silence_nan(&p, status);
377ed926 3982 p.frac >>= float32_params.frac_shift;
71fd178e 3983 return float32_pack_raw(&p);
377ed926
RH
3984}
3985
3986float64 float64_silence_nan(float64 a, float_status *status)
3987{
3dddb203
RH
3988 FloatParts64 p;
3989
3990 float64_unpack_raw(&p, a);
377ed926 3991 p.frac <<= float64_params.frac_shift;
92ff426d 3992 parts_silence_nan(&p, status);
377ed926 3993 p.frac >>= float64_params.frac_shift;
71fd178e 3994 return float64_pack_raw(&p);
377ed926
RH
3995}
3996
8282310d
LZ
3997bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3998{
3dddb203
RH
3999 FloatParts64 p;
4000
4001 bfloat16_unpack_raw(&p, a);
8282310d 4002 p.frac <<= bfloat16_params.frac_shift;
92ff426d 4003 parts_silence_nan(&p, status);
8282310d 4004 p.frac >>= bfloat16_params.frac_shift;
71fd178e 4005 return bfloat16_pack_raw(&p);
8282310d 4006}
e6b405fe 4007
0018b1f4
RH
4008float128 float128_silence_nan(float128 a, float_status *status)
4009{
4010 FloatParts128 p;
4011
4012 float128_unpack_raw(&p, a);
4013 frac_shl(&p, float128_params.frac_shift);
4014 parts_silence_nan(&p, status);
4015 frac_shr(&p, float128_params.frac_shift);
4016 return float128_pack_raw(&p);
4017}
4018
e6b405fe
AB
4019/*----------------------------------------------------------------------------
4020| If `a' is denormal and we are in flush-to-zero mode then set the
4021| input-denormal exception and return zero. Otherwise just return the value.
4022*----------------------------------------------------------------------------*/
4023
f8155c1d 4024static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
4025{
4026 if (p.exp == 0 && p.frac != 0) {
4027 float_raise(float_flag_input_denormal, status);
4028 return true;
4029 }
4030
4031 return false;
4032}
4033
4034float16 float16_squash_input_denormal(float16 a, float_status *status)
4035{
4036 if (status->flush_inputs_to_zero) {
3dddb203
RH
4037 FloatParts64 p;
4038
4039 float16_unpack_raw(&p, a);
e6b405fe
AB
4040 if (parts_squash_denormal(p, status)) {
4041 return float16_set_sign(float16_zero, p.sign);
4042 }
4043 }
4044 return a;
4045}
4046
4047float32 float32_squash_input_denormal(float32 a, float_status *status)
4048{
4049 if (status->flush_inputs_to_zero) {
3dddb203
RH
4050 FloatParts64 p;
4051
4052 float32_unpack_raw(&p, a);
e6b405fe
AB
4053 if (parts_squash_denormal(p, status)) {
4054 return float32_set_sign(float32_zero, p.sign);
4055 }
4056 }
4057 return a;
4058}
4059
4060float64 float64_squash_input_denormal(float64 a, float_status *status)
4061{
4062 if (status->flush_inputs_to_zero) {
3dddb203
RH
4063 FloatParts64 p;
4064
4065 float64_unpack_raw(&p, a);
e6b405fe
AB
4066 if (parts_squash_denormal(p, status)) {
4067 return float64_set_sign(float64_zero, p.sign);
4068 }
4069 }
4070 return a;
4071}
4072
8282310d
LZ
4073bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4074{
4075 if (status->flush_inputs_to_zero) {
3dddb203
RH
4076 FloatParts64 p;
4077
4078 bfloat16_unpack_raw(&p, a);
8282310d
LZ
4079 if (parts_squash_denormal(p, status)) {
4080 return bfloat16_set_sign(bfloat16_zero, p.sign);
4081 }
4082 }
4083 return a;
4084}
4085
377ed926 4086/*----------------------------------------------------------------------------
158142c2
FB
4087| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
4088| and 7, and returns the properly rounded 32-bit integer corresponding to the
4089| input. If `zSign' is 1, the input is negated before being converted to an
4090| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
4091| is simply rounded to an integer, with the inexact exception raised if the
4092| input cannot be represented exactly as an integer. However, if the fixed-
4093| point input is too large, the invalid exception is raised and the largest
4094| positive or negative integer is returned.
4095*----------------------------------------------------------------------------*/
4096
c120391c
RH
4097static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
4098 float_status *status)
158142c2 4099{
8f506c70 4100 int8_t roundingMode;
c120391c 4101 bool roundNearestEven;
8f506c70 4102 int8_t roundIncrement, roundBits;
760e1416 4103 int32_t z;
158142c2 4104
a2f2d288 4105 roundingMode = status->float_rounding_mode;
158142c2 4106 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4107 switch (roundingMode) {
4108 case float_round_nearest_even:
f9288a76 4109 case float_round_ties_away:
dc355b76
PM
4110 roundIncrement = 0x40;
4111 break;
4112 case float_round_to_zero:
4113 roundIncrement = 0;
4114 break;
4115 case float_round_up:
4116 roundIncrement = zSign ? 0 : 0x7f;
4117 break;
4118 case float_round_down:
4119 roundIncrement = zSign ? 0x7f : 0;
4120 break;
5d64abb3
RH
4121 case float_round_to_odd:
4122 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
4123 break;
dc355b76
PM
4124 default:
4125 abort();
158142c2
FB
4126 }
4127 roundBits = absZ & 0x7F;
4128 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
4129 if (!(roundBits ^ 0x40) && roundNearestEven) {
4130 absZ &= ~1;
4131 }
158142c2
FB
4132 z = absZ;
4133 if ( zSign ) z = - z;
4134 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 4135 float_raise(float_flag_invalid, status);
2c217da0 4136 return zSign ? INT32_MIN : INT32_MAX;
158142c2 4137 }
a2f2d288 4138 if (roundBits) {
d82f3b2d 4139 float_raise(float_flag_inexact, status);
a2f2d288 4140 }
158142c2
FB
4141 return z;
4142
4143}
4144
4145/*----------------------------------------------------------------------------
4146| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4147| `absZ1', with binary point between bits 63 and 64 (between the input words),
4148| and returns the properly rounded 64-bit integer corresponding to the input.
4149| If `zSign' is 1, the input is negated before being converted to an integer.
4150| Ordinarily, the fixed-point input is simply rounded to an integer, with
4151| the inexact exception raised if the input cannot be represented exactly as
4152| an integer. However, if the fixed-point input is too large, the invalid
4153| exception is raised and the largest positive or negative integer is
4154| returned.
4155*----------------------------------------------------------------------------*/
4156
c120391c 4157static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 4158 float_status *status)
158142c2 4159{
8f506c70 4160 int8_t roundingMode;
c120391c 4161 bool roundNearestEven, increment;
760e1416 4162 int64_t z;
158142c2 4163
a2f2d288 4164 roundingMode = status->float_rounding_mode;
158142c2 4165 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4166 switch (roundingMode) {
4167 case float_round_nearest_even:
f9288a76 4168 case float_round_ties_away:
dc355b76
PM
4169 increment = ((int64_t) absZ1 < 0);
4170 break;
4171 case float_round_to_zero:
4172 increment = 0;
4173 break;
4174 case float_round_up:
4175 increment = !zSign && absZ1;
4176 break;
4177 case float_round_down:
4178 increment = zSign && absZ1;
4179 break;
5d64abb3
RH
4180 case float_round_to_odd:
4181 increment = !(absZ0 & 1) && absZ1;
4182 break;
dc355b76
PM
4183 default:
4184 abort();
158142c2
FB
4185 }
4186 if ( increment ) {
4187 ++absZ0;
4188 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4189 if (!(absZ1 << 1) && roundNearestEven) {
4190 absZ0 &= ~1;
4191 }
158142c2
FB
4192 }
4193 z = absZ0;
4194 if ( zSign ) z = - z;
4195 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4196 overflow:
ff32e16e 4197 float_raise(float_flag_invalid, status);
2c217da0 4198 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4199 }
a2f2d288 4200 if (absZ1) {
d82f3b2d 4201 float_raise(float_flag_inexact, status);
a2f2d288 4202 }
158142c2
FB
4203 return z;
4204
4205}
4206
fb3ea83a
TM
4207/*----------------------------------------------------------------------------
4208| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4209| `absZ1', with binary point between bits 63 and 64 (between the input words),
4210| and returns the properly rounded 64-bit unsigned integer corresponding to the
4211| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4212| with the inexact exception raised if the input cannot be represented exactly
4213| as an integer. However, if the fixed-point input is too large, the invalid
4214| exception is raised and the largest unsigned integer is returned.
4215*----------------------------------------------------------------------------*/
4216
c120391c 4217static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4218 uint64_t absZ1, float_status *status)
fb3ea83a 4219{
8f506c70 4220 int8_t roundingMode;
c120391c 4221 bool roundNearestEven, increment;
fb3ea83a 4222
a2f2d288 4223 roundingMode = status->float_rounding_mode;
fb3ea83a 4224 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4225 switch (roundingMode) {
4226 case float_round_nearest_even:
f9288a76 4227 case float_round_ties_away:
dc355b76
PM
4228 increment = ((int64_t)absZ1 < 0);
4229 break;
4230 case float_round_to_zero:
4231 increment = 0;
4232 break;
4233 case float_round_up:
4234 increment = !zSign && absZ1;
4235 break;
4236 case float_round_down:
4237 increment = zSign && absZ1;
4238 break;
5d64abb3
RH
4239 case float_round_to_odd:
4240 increment = !(absZ0 & 1) && absZ1;
4241 break;
dc355b76
PM
4242 default:
4243 abort();
fb3ea83a
TM
4244 }
4245 if (increment) {
4246 ++absZ0;
4247 if (absZ0 == 0) {
ff32e16e 4248 float_raise(float_flag_invalid, status);
2c217da0 4249 return UINT64_MAX;
fb3ea83a 4250 }
40662886
PMD
4251 if (!(absZ1 << 1) && roundNearestEven) {
4252 absZ0 &= ~1;
4253 }
fb3ea83a
TM
4254 }
4255
4256 if (zSign && absZ0) {
ff32e16e 4257 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4258 return 0;
4259 }
4260
4261 if (absZ1) {
d82f3b2d 4262 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4263 }
4264 return absZ0;
4265}
4266
158142c2
FB
4267/*----------------------------------------------------------------------------
4268| Normalizes the subnormal single-precision floating-point value represented
4269| by the denormalized significand `aSig'. The normalized exponent and
4270| significand are stored at the locations pointed to by `zExpPtr' and
4271| `zSigPtr', respectively.
4272*----------------------------------------------------------------------------*/
4273
4274static void
0c48262d 4275 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4276{
8f506c70 4277 int8_t shiftCount;
158142c2 4278
0019d5c3 4279 shiftCount = clz32(aSig) - 8;
158142c2
FB
4280 *zSigPtr = aSig<<shiftCount;
4281 *zExpPtr = 1 - shiftCount;
4282
4283}
4284
158142c2
FB
4285/*----------------------------------------------------------------------------
4286| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4287| and significand `zSig', and returns the proper single-precision floating-
4288| point value corresponding to the abstract input. Ordinarily, the abstract
4289| value is simply rounded and packed into the single-precision format, with
4290| the inexact exception raised if the abstract input cannot be represented
4291| exactly. However, if the abstract value is too large, the overflow and
4292| inexact exceptions are raised and an infinity or maximal finite value is
4293| returned. If the abstract value is too small, the input value is rounded to
4294| a subnormal number, and the underflow and inexact exceptions are raised if
4295| the abstract input cannot be represented exactly as a subnormal single-
4296| precision floating-point number.
4297| The input significand `zSig' has its binary point between bits 30
4298| and 29, which is 7 bits to the left of the usual location. This shifted
4299| significand must be normalized or smaller. If `zSig' is not normalized,
4300| `zExp' must be 0; in that case, the result returned is a subnormal number,
4301| and it must not require rounding. In the usual case that `zSig' is
4302| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4303| The handling of underflow and overflow follows the IEC/IEEE Standard for
4304| Binary Floating-Point Arithmetic.
4305*----------------------------------------------------------------------------*/
4306
c120391c 4307static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4308 float_status *status)
158142c2 4309{
8f506c70 4310 int8_t roundingMode;
c120391c 4311 bool roundNearestEven;
8f506c70 4312 int8_t roundIncrement, roundBits;
c120391c 4313 bool isTiny;
158142c2 4314
a2f2d288 4315 roundingMode = status->float_rounding_mode;
158142c2 4316 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4317 switch (roundingMode) {
4318 case float_round_nearest_even:
f9288a76 4319 case float_round_ties_away:
dc355b76
PM
4320 roundIncrement = 0x40;
4321 break;
4322 case float_round_to_zero:
4323 roundIncrement = 0;
4324 break;
4325 case float_round_up:
4326 roundIncrement = zSign ? 0 : 0x7f;
4327 break;
4328 case float_round_down:
4329 roundIncrement = zSign ? 0x7f : 0;
4330 break;
5d64abb3
RH
4331 case float_round_to_odd:
4332 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4333 break;
dc355b76
PM
4334 default:
4335 abort();
4336 break;
158142c2
FB
4337 }
4338 roundBits = zSig & 0x7F;
bb98fe42 4339 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4340 if ( ( 0xFD < zExp )
4341 || ( ( zExp == 0xFD )
bb98fe42 4342 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4343 ) {
5d64abb3
RH
4344 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4345 roundIncrement != 0;
ff32e16e 4346 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4347 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4348 }
4349 if ( zExp < 0 ) {
a2f2d288 4350 if (status->flush_to_zero) {
ff32e16e 4351 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4352 return packFloat32(zSign, 0, 0);
4353 }
a828b373
RH
4354 isTiny = status->tininess_before_rounding
4355 || (zExp < -1)
4356 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4357 shift32RightJamming( zSig, - zExp, &zSig );
4358 zExp = 0;
4359 roundBits = zSig & 0x7F;
ff32e16e
PM
4360 if (isTiny && roundBits) {
4361 float_raise(float_flag_underflow, status);
4362 }
5d64abb3
RH
4363 if (roundingMode == float_round_to_odd) {
4364 /*
4365 * For round-to-odd case, the roundIncrement depends on
4366 * zSig which just changed.
4367 */
4368 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4369 }
158142c2
FB
4370 }
4371 }
a2f2d288 4372 if (roundBits) {
d82f3b2d 4373 float_raise(float_flag_inexact, status);
a2f2d288 4374 }
158142c2 4375 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4376 if (!(roundBits ^ 0x40) && roundNearestEven) {
4377 zSig &= ~1;
4378 }
158142c2
FB
4379 if ( zSig == 0 ) zExp = 0;
4380 return packFloat32( zSign, zExp, zSig );
4381
4382}
4383
4384/*----------------------------------------------------------------------------
4385| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4386| and significand `zSig', and returns the proper single-precision floating-
4387| point value corresponding to the abstract input. This routine is just like
4388| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4389| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4390| floating-point exponent.
4391*----------------------------------------------------------------------------*/
4392
4393static float32
c120391c 4394 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4395 float_status *status)
158142c2 4396{
8f506c70 4397 int8_t shiftCount;
158142c2 4398
0019d5c3 4399 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4400 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4401 status);
158142c2
FB
4402
4403}
4404
158142c2
FB
4405/*----------------------------------------------------------------------------
4406| Normalizes the subnormal double-precision floating-point value represented
4407| by the denormalized significand `aSig'. The normalized exponent and
4408| significand are stored at the locations pointed to by `zExpPtr' and
4409| `zSigPtr', respectively.
4410*----------------------------------------------------------------------------*/
4411
4412static void
0c48262d 4413 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4414{
8f506c70 4415 int8_t shiftCount;
158142c2 4416
0019d5c3 4417 shiftCount = clz64(aSig) - 11;
158142c2
FB
4418 *zSigPtr = aSig<<shiftCount;
4419 *zExpPtr = 1 - shiftCount;
4420
4421}
4422
4423/*----------------------------------------------------------------------------
4424| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4425| double-precision floating-point value, returning the result. After being
4426| shifted into the proper positions, the three fields are simply added
4427| together to form the result. This means that any integer portion of `zSig'
4428| will be added into the exponent. Since a properly normalized significand
4429| will have an integer portion equal to 1, the `zExp' input should be 1 less
4430| than the desired result exponent whenever `zSig' is a complete, normalized
4431| significand.
4432*----------------------------------------------------------------------------*/
4433
c120391c 4434static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4435{
4436
f090c9d4 4437 return make_float64(
bb98fe42 4438 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4439
4440}
4441
4442/*----------------------------------------------------------------------------
4443| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4444| and significand `zSig', and returns the proper double-precision floating-
4445| point value corresponding to the abstract input. Ordinarily, the abstract
4446| value is simply rounded and packed into the double-precision format, with
4447| the inexact exception raised if the abstract input cannot be represented
4448| exactly. However, if the abstract value is too large, the overflow and
4449| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4450| returned. If the abstract value is too small, the input value is rounded to
4451| a subnormal number, and the underflow and inexact exceptions are raised if
4452| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4453| precision floating-point number.
4454| The input significand `zSig' has its binary point between bits 62
4455| and 61, which is 10 bits to the left of the usual location. This shifted
4456| significand must be normalized or smaller. If `zSig' is not normalized,
4457| `zExp' must be 0; in that case, the result returned is a subnormal number,
4458| and it must not require rounding. In the usual case that `zSig' is
4459| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4460| The handling of underflow and overflow follows the IEC/IEEE Standard for
4461| Binary Floating-Point Arithmetic.
4462*----------------------------------------------------------------------------*/
4463
c120391c 4464static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4465 float_status *status)
158142c2 4466{
8f506c70 4467 int8_t roundingMode;
c120391c 4468 bool roundNearestEven;
0c48262d 4469 int roundIncrement, roundBits;
c120391c 4470 bool isTiny;
158142c2 4471
a2f2d288 4472 roundingMode = status->float_rounding_mode;
158142c2 4473 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4474 switch (roundingMode) {
4475 case float_round_nearest_even:
f9288a76 4476 case float_round_ties_away:
dc355b76
PM
4477 roundIncrement = 0x200;
4478 break;
4479 case float_round_to_zero:
4480 roundIncrement = 0;
4481 break;
4482 case float_round_up:
4483 roundIncrement = zSign ? 0 : 0x3ff;
4484 break;
4485 case float_round_down:
4486 roundIncrement = zSign ? 0x3ff : 0;
4487 break;
9ee6f678
BR
4488 case float_round_to_odd:
4489 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4490 break;
dc355b76
PM
4491 default:
4492 abort();
158142c2
FB
4493 }
4494 roundBits = zSig & 0x3FF;
bb98fe42 4495 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4496 if ( ( 0x7FD < zExp )
4497 || ( ( zExp == 0x7FD )
bb98fe42 4498 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4499 ) {
9ee6f678
BR
4500 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4501 roundIncrement != 0;
ff32e16e 4502 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4503 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4504 }
4505 if ( zExp < 0 ) {
a2f2d288 4506 if (status->flush_to_zero) {
ff32e16e 4507 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4508 return packFloat64(zSign, 0, 0);
4509 }
a828b373
RH
4510 isTiny = status->tininess_before_rounding
4511 || (zExp < -1)
4512 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4513 shift64RightJamming( zSig, - zExp, &zSig );
4514 zExp = 0;
4515 roundBits = zSig & 0x3FF;
ff32e16e
PM
4516 if (isTiny && roundBits) {
4517 float_raise(float_flag_underflow, status);
4518 }
9ee6f678
BR
4519 if (roundingMode == float_round_to_odd) {
4520 /*
4521 * For round-to-odd case, the roundIncrement depends on
4522 * zSig which just changed.
4523 */
4524 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4525 }
158142c2
FB
4526 }
4527 }
a2f2d288 4528 if (roundBits) {
d82f3b2d 4529 float_raise(float_flag_inexact, status);
a2f2d288 4530 }
158142c2 4531 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4532 if (!(roundBits ^ 0x200) && roundNearestEven) {
4533 zSig &= ~1;
4534 }
158142c2
FB
4535 if ( zSig == 0 ) zExp = 0;
4536 return packFloat64( zSign, zExp, zSig );
4537
4538}
4539
4540/*----------------------------------------------------------------------------
4541| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4542| and significand `zSig', and returns the proper double-precision floating-
4543| point value corresponding to the abstract input. This routine is just like
4544| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4545| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4546| floating-point exponent.
4547*----------------------------------------------------------------------------*/
4548
4549static float64
c120391c 4550 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4551 float_status *status)
158142c2 4552{
8f506c70 4553 int8_t shiftCount;
158142c2 4554
0019d5c3 4555 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4556 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4557 status);
158142c2
FB
4558
4559}
4560
158142c2
FB
4561/*----------------------------------------------------------------------------
4562| Normalizes the subnormal extended double-precision floating-point value
4563| represented by the denormalized significand `aSig'. The normalized exponent
4564| and significand are stored at the locations pointed to by `zExpPtr' and
4565| `zSigPtr', respectively.
4566*----------------------------------------------------------------------------*/
4567
88857aca
LV
4568void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4569 uint64_t *zSigPtr)
158142c2 4570{
8f506c70 4571 int8_t shiftCount;
158142c2 4572
0019d5c3 4573 shiftCount = clz64(aSig);
158142c2
FB
4574 *zSigPtr = aSig<<shiftCount;
4575 *zExpPtr = 1 - shiftCount;
158142c2
FB
4576}
4577
4578/*----------------------------------------------------------------------------
4579| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4580| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4581| and returns the proper extended double-precision floating-point value
4582| corresponding to the abstract input. Ordinarily, the abstract value is
4583| rounded and packed into the extended double-precision format, with the
4584| inexact exception raised if the abstract input cannot be represented
4585| exactly. However, if the abstract value is too large, the overflow and
4586| inexact exceptions are raised and an infinity or maximal finite value is
4587| returned. If the abstract value is too small, the input value is rounded to
4588| a subnormal number, and the underflow and inexact exceptions are raised if
4589| the abstract input cannot be represented exactly as a subnormal extended
4590| double-precision floating-point number.
4591| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4592| number of bits as single or double precision, respectively. Otherwise, the
4593| result is rounded to the full precision of the extended double-precision
4594| format.
4595| The input significand must be normalized or smaller. If the input
4596| significand is not normalized, `zExp' must be 0; in that case, the result
4597| returned is a subnormal number, and it must not require rounding. The
4598| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4599| Floating-Point Arithmetic.
4600*----------------------------------------------------------------------------*/
4601
c120391c 4602floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4603 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4604 float_status *status)
158142c2 4605{
8f506c70 4606 int8_t roundingMode;
c120391c 4607 bool roundNearestEven, increment, isTiny;
f42c2224 4608 int64_t roundIncrement, roundMask, roundBits;
158142c2 4609
a2f2d288 4610 roundingMode = status->float_rounding_mode;
158142c2
FB
4611 roundNearestEven = ( roundingMode == float_round_nearest_even );
4612 if ( roundingPrecision == 80 ) goto precision80;
4613 if ( roundingPrecision == 64 ) {
e9321124
AB
4614 roundIncrement = UINT64_C(0x0000000000000400);
4615 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4616 }
4617 else if ( roundingPrecision == 32 ) {
e9321124
AB
4618 roundIncrement = UINT64_C(0x0000008000000000);
4619 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4620 }
4621 else {
4622 goto precision80;
4623 }
4624 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4625 switch (roundingMode) {
4626 case float_round_nearest_even:
f9288a76 4627 case float_round_ties_away:
dc355b76
PM
4628 break;
4629 case float_round_to_zero:
4630 roundIncrement = 0;
4631 break;
4632 case float_round_up:
4633 roundIncrement = zSign ? 0 : roundMask;
4634 break;
4635 case float_round_down:
4636 roundIncrement = zSign ? roundMask : 0;
4637 break;
4638 default:
4639 abort();
158142c2
FB
4640 }
4641 roundBits = zSig0 & roundMask;
bb98fe42 4642 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4643 if ( ( 0x7FFE < zExp )
4644 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4645 ) {
4646 goto overflow;
4647 }
4648 if ( zExp <= 0 ) {
a2f2d288 4649 if (status->flush_to_zero) {
ff32e16e 4650 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4651 return packFloatx80(zSign, 0, 0);
4652 }
a828b373
RH
4653 isTiny = status->tininess_before_rounding
4654 || (zExp < 0 )
4655 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4656 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4657 zExp = 0;
4658 roundBits = zSig0 & roundMask;
ff32e16e
PM
4659 if (isTiny && roundBits) {
4660 float_raise(float_flag_underflow, status);
4661 }
a2f2d288 4662 if (roundBits) {
d82f3b2d 4663 float_raise(float_flag_inexact, status);
a2f2d288 4664 }
158142c2 4665 zSig0 += roundIncrement;
bb98fe42 4666 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4667 roundIncrement = roundMask + 1;
4668 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4669 roundMask |= roundIncrement;
4670 }
4671 zSig0 &= ~ roundMask;
4672 return packFloatx80( zSign, zExp, zSig0 );
4673 }
4674 }
a2f2d288 4675 if (roundBits) {
d82f3b2d 4676 float_raise(float_flag_inexact, status);
a2f2d288 4677 }
158142c2
FB
4678 zSig0 += roundIncrement;
4679 if ( zSig0 < roundIncrement ) {
4680 ++zExp;
e9321124 4681 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4682 }
4683 roundIncrement = roundMask + 1;
4684 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4685 roundMask |= roundIncrement;
4686 }
4687 zSig0 &= ~ roundMask;
4688 if ( zSig0 == 0 ) zExp = 0;
4689 return packFloatx80( zSign, zExp, zSig0 );
4690 precision80:
dc355b76
PM
4691 switch (roundingMode) {
4692 case float_round_nearest_even:
f9288a76 4693 case float_round_ties_away:
dc355b76
PM
4694 increment = ((int64_t)zSig1 < 0);
4695 break;
4696 case float_round_to_zero:
4697 increment = 0;
4698 break;
4699 case float_round_up:
4700 increment = !zSign && zSig1;
4701 break;
4702 case float_round_down:
4703 increment = zSign && zSig1;
4704 break;
4705 default:
4706 abort();
158142c2 4707 }
bb98fe42 4708 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4709 if ( ( 0x7FFE < zExp )
4710 || ( ( zExp == 0x7FFE )
e9321124 4711 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4712 && increment
4713 )
4714 ) {
4715 roundMask = 0;
4716 overflow:
ff32e16e 4717 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4718 if ( ( roundingMode == float_round_to_zero )
4719 || ( zSign && ( roundingMode == float_round_up ) )
4720 || ( ! zSign && ( roundingMode == float_round_down ) )
4721 ) {
4722 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4723 }
0f605c88
LV
4724 return packFloatx80(zSign,
4725 floatx80_infinity_high,
4726 floatx80_infinity_low);
158142c2
FB
4727 }
4728 if ( zExp <= 0 ) {
a828b373
RH
4729 isTiny = status->tininess_before_rounding
4730 || (zExp < 0)
4731 || !increment
4732 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4733 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4734 zExp = 0;
ff32e16e
PM
4735 if (isTiny && zSig1) {
4736 float_raise(float_flag_underflow, status);
4737 }
a2f2d288 4738 if (zSig1) {
d82f3b2d 4739 float_raise(float_flag_inexact, status);
a2f2d288 4740 }
dc355b76
PM
4741 switch (roundingMode) {
4742 case float_round_nearest_even:
f9288a76 4743 case float_round_ties_away:
dc355b76
PM
4744 increment = ((int64_t)zSig1 < 0);
4745 break;
4746 case float_round_to_zero:
4747 increment = 0;
4748 break;
4749 case float_round_up:
4750 increment = !zSign && zSig1;
4751 break;
4752 case float_round_down:
4753 increment = zSign && zSig1;
4754 break;
4755 default:
4756 abort();
158142c2
FB
4757 }
4758 if ( increment ) {
4759 ++zSig0;
40662886
PMD
4760 if (!(zSig1 << 1) && roundNearestEven) {
4761 zSig0 &= ~1;
4762 }
bb98fe42 4763 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4764 }
4765 return packFloatx80( zSign, zExp, zSig0 );
4766 }
4767 }
a2f2d288 4768 if (zSig1) {
d82f3b2d 4769 float_raise(float_flag_inexact, status);
a2f2d288 4770 }
158142c2
FB
4771 if ( increment ) {
4772 ++zSig0;
4773 if ( zSig0 == 0 ) {
4774 ++zExp;
e9321124 4775 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4776 }
4777 else {
40662886
PMD
4778 if (!(zSig1 << 1) && roundNearestEven) {
4779 zSig0 &= ~1;
4780 }
158142c2
FB
4781 }
4782 }
4783 else {
4784 if ( zSig0 == 0 ) zExp = 0;
4785 }
4786 return packFloatx80( zSign, zExp, zSig0 );
4787
4788}
4789
4790/*----------------------------------------------------------------------------
4791| Takes an abstract floating-point value having sign `zSign', exponent
4792| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4793| and returns the proper extended double-precision floating-point value
4794| corresponding to the abstract input. This routine is just like
4795| `roundAndPackFloatx80' except that the input significand does not have to be
4796| normalized.
4797*----------------------------------------------------------------------------*/
4798
88857aca 4799floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4800 bool zSign, int32_t zExp,
88857aca
LV
4801 uint64_t zSig0, uint64_t zSig1,
4802 float_status *status)
158142c2 4803{
8f506c70 4804 int8_t shiftCount;
158142c2
FB
4805
4806 if ( zSig0 == 0 ) {
4807 zSig0 = zSig1;
4808 zSig1 = 0;
4809 zExp -= 64;
4810 }
0019d5c3 4811 shiftCount = clz64(zSig0);
158142c2
FB
4812 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4813 zExp -= shiftCount;
ff32e16e
PM
4814 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4815 zSig0, zSig1, status);
158142c2
FB
4816
4817}
4818
158142c2
FB
4819/*----------------------------------------------------------------------------
4820| Returns the least-significant 64 fraction bits of the quadruple-precision
4821| floating-point value `a'.
4822*----------------------------------------------------------------------------*/
4823
a49db98d 4824static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4825{
4826
4827 return a.low;
4828
4829}
4830
4831/*----------------------------------------------------------------------------
4832| Returns the most-significant 48 fraction bits of the quadruple-precision
4833| floating-point value `a'.
4834*----------------------------------------------------------------------------*/
4835
a49db98d 4836static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4837{
4838
e9321124 4839 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4840
4841}
4842
4843/*----------------------------------------------------------------------------
4844| Returns the exponent bits of the quadruple-precision floating-point value
4845| `a'.
4846*----------------------------------------------------------------------------*/
4847
f4014512 4848static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4849{
4850
4851 return ( a.high>>48 ) & 0x7FFF;
4852
4853}
4854
4855/*----------------------------------------------------------------------------
4856| Returns the sign bit of the quadruple-precision floating-point value `a'.
4857*----------------------------------------------------------------------------*/
4858
c120391c 4859static inline bool extractFloat128Sign(float128 a)
158142c2 4860{
c120391c 4861 return a.high >> 63;
158142c2
FB
4862}
4863
4864/*----------------------------------------------------------------------------
4865| Normalizes the subnormal quadruple-precision floating-point value
4866| represented by the denormalized significand formed by the concatenation of
4867| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4868| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4869| significand are stored at the location pointed to by `zSig0Ptr', and the
4870| least significant 64 bits of the normalized significand are stored at the
4871| location pointed to by `zSig1Ptr'.
4872*----------------------------------------------------------------------------*/
4873
4874static void
4875 normalizeFloat128Subnormal(
bb98fe42
AF
4876 uint64_t aSig0,
4877 uint64_t aSig1,
f4014512 4878 int32_t *zExpPtr,
bb98fe42
AF
4879 uint64_t *zSig0Ptr,
4880 uint64_t *zSig1Ptr
158142c2
FB
4881 )
4882{
8f506c70 4883 int8_t shiftCount;
158142c2
FB
4884
4885 if ( aSig0 == 0 ) {
0019d5c3 4886 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4887 if ( shiftCount < 0 ) {
4888 *zSig0Ptr = aSig1>>( - shiftCount );
4889 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4890 }
4891 else {
4892 *zSig0Ptr = aSig1<<shiftCount;
4893 *zSig1Ptr = 0;
4894 }
4895 *zExpPtr = - shiftCount - 63;
4896 }
4897 else {
0019d5c3 4898 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4899 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4900 *zExpPtr = 1 - shiftCount;
4901 }
4902
4903}
4904
4905/*----------------------------------------------------------------------------
4906| Packs the sign `zSign', the exponent `zExp', and the significand formed
4907| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4908| floating-point value, returning the result. After being shifted into the
4909| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4910| added together to form the most significant 32 bits of the result. This
4911| means that any integer portion of `zSig0' will be added into the exponent.
4912| Since a properly normalized significand will have an integer portion equal
4913| to 1, the `zExp' input should be 1 less than the desired result exponent
4914| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4915| significand.
4916*----------------------------------------------------------------------------*/
4917
a49db98d 4918static inline float128
c120391c 4919packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4920{
4921 float128 z;
4922
4923 z.low = zSig1;
c120391c 4924 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4925 return z;
158142c2
FB
4926}
4927
4928/*----------------------------------------------------------------------------
4929| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4930| and extended significand formed by the concatenation of `zSig0', `zSig1',
4931| and `zSig2', and returns the proper quadruple-precision floating-point value
4932| corresponding to the abstract input. Ordinarily, the abstract value is
4933| simply rounded and packed into the quadruple-precision format, with the
4934| inexact exception raised if the abstract input cannot be represented
4935| exactly. However, if the abstract value is too large, the overflow and
4936| inexact exceptions are raised and an infinity or maximal finite value is
4937| returned. If the abstract value is too small, the input value is rounded to
4938| a subnormal number, and the underflow and inexact exceptions are raised if
4939| the abstract input cannot be represented exactly as a subnormal quadruple-
4940| precision floating-point number.
4941| The input significand must be normalized or smaller. If the input
4942| significand is not normalized, `zExp' must be 0; in that case, the result
4943| returned is a subnormal number, and it must not require rounding. In the
4944| usual case that the input significand is normalized, `zExp' must be 1 less
4945| than the ``true'' floating-point exponent. The handling of underflow and
4946| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4947*----------------------------------------------------------------------------*/
4948
c120391c 4949static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4950 uint64_t zSig0, uint64_t zSig1,
4951 uint64_t zSig2, float_status *status)
158142c2 4952{
8f506c70 4953 int8_t roundingMode;
c120391c 4954 bool roundNearestEven, increment, isTiny;
158142c2 4955
a2f2d288 4956 roundingMode = status->float_rounding_mode;
158142c2 4957 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4958 switch (roundingMode) {
4959 case float_round_nearest_even:
f9288a76 4960 case float_round_ties_away:
dc355b76
PM
4961 increment = ((int64_t)zSig2 < 0);
4962 break;
4963 case float_round_to_zero:
4964 increment = 0;
4965 break;
4966 case float_round_up:
4967 increment = !zSign && zSig2;
4968 break;
4969 case float_round_down:
4970 increment = zSign && zSig2;
4971 break;
9ee6f678
BR
4972 case float_round_to_odd:
4973 increment = !(zSig1 & 0x1) && zSig2;
4974 break;
dc355b76
PM
4975 default:
4976 abort();
158142c2 4977 }
bb98fe42 4978 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4979 if ( ( 0x7FFD < zExp )
4980 || ( ( zExp == 0x7FFD )
4981 && eq128(
e9321124
AB
4982 UINT64_C(0x0001FFFFFFFFFFFF),
4983 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4984 zSig0,
4985 zSig1
4986 )
4987 && increment
4988 )
4989 ) {
ff32e16e 4990 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4991 if ( ( roundingMode == float_round_to_zero )
4992 || ( zSign && ( roundingMode == float_round_up ) )
4993 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4994 || (roundingMode == float_round_to_odd)
158142c2
FB
4995 ) {
4996 return
4997 packFloat128(
4998 zSign,
4999 0x7FFE,
e9321124
AB
5000 UINT64_C(0x0000FFFFFFFFFFFF),
5001 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
5002 );
5003 }
5004 return packFloat128( zSign, 0x7FFF, 0, 0 );
5005 }
5006 if ( zExp < 0 ) {
a2f2d288 5007 if (status->flush_to_zero) {
ff32e16e 5008 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
5009 return packFloat128(zSign, 0, 0, 0);
5010 }
a828b373
RH
5011 isTiny = status->tininess_before_rounding
5012 || (zExp < -1)
5013 || !increment
5014 || lt128(zSig0, zSig1,
5015 UINT64_C(0x0001FFFFFFFFFFFF),
5016 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
5017 shift128ExtraRightJamming(
5018 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
5019 zExp = 0;
ff32e16e
PM
5020 if (isTiny && zSig2) {
5021 float_raise(float_flag_underflow, status);
5022 }
dc355b76
PM
5023 switch (roundingMode) {
5024 case float_round_nearest_even:
f9288a76 5025 case float_round_ties_away:
dc355b76
PM
5026 increment = ((int64_t)zSig2 < 0);
5027 break;
5028 case float_round_to_zero:
5029 increment = 0;
5030 break;
5031 case float_round_up:
5032 increment = !zSign && zSig2;
5033 break;
5034 case float_round_down:
5035 increment = zSign && zSig2;
5036 break;
9ee6f678
BR
5037 case float_round_to_odd:
5038 increment = !(zSig1 & 0x1) && zSig2;
5039 break;
dc355b76
PM
5040 default:
5041 abort();
158142c2
FB
5042 }
5043 }
5044 }
a2f2d288 5045 if (zSig2) {
d82f3b2d 5046 float_raise(float_flag_inexact, status);
a2f2d288 5047 }
158142c2
FB
5048 if ( increment ) {
5049 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
5050 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
5051 zSig1 &= ~1;
5052 }
158142c2
FB
5053 }
5054 else {
5055 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
5056 }
5057 return packFloat128( zSign, zExp, zSig0, zSig1 );
5058
5059}
5060
5061/*----------------------------------------------------------------------------
5062| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
5063| and significand formed by the concatenation of `zSig0' and `zSig1', and
5064| returns the proper quadruple-precision floating-point value corresponding
5065| to the abstract input. This routine is just like `roundAndPackFloat128'
5066| except that the input significand has fewer bits and does not have to be
5067| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
5068| point exponent.
5069*----------------------------------------------------------------------------*/
5070
c120391c 5071static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
5072 uint64_t zSig0, uint64_t zSig1,
5073 float_status *status)
158142c2 5074{
8f506c70 5075 int8_t shiftCount;
bb98fe42 5076 uint64_t zSig2;
158142c2
FB
5077
5078 if ( zSig0 == 0 ) {
5079 zSig0 = zSig1;
5080 zSig1 = 0;
5081 zExp -= 64;
5082 }
0019d5c3 5083 shiftCount = clz64(zSig0) - 15;
158142c2
FB
5084 if ( 0 <= shiftCount ) {
5085 zSig2 = 0;
5086 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5087 }
5088 else {
5089 shift128ExtraRightJamming(
5090 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
5091 }
5092 zExp -= shiftCount;
ff32e16e 5093 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
5094
5095}
5096
158142c2 5097
158142c2
FB
5098/*----------------------------------------------------------------------------
5099| Returns the result of converting the 32-bit two's complement integer `a'
5100| to the extended double-precision floating-point format. The conversion
5101| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5102| Arithmetic.
5103*----------------------------------------------------------------------------*/
5104
e5a41ffa 5105floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 5106{
c120391c 5107 bool zSign;
3a87d009 5108 uint32_t absA;
8f506c70 5109 int8_t shiftCount;
bb98fe42 5110 uint64_t zSig;
158142c2
FB
5111
5112 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5113 zSign = ( a < 0 );
5114 absA = zSign ? - a : a;
0019d5c3 5115 shiftCount = clz32(absA) + 32;
158142c2
FB
5116 zSig = absA;
5117 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
5118
5119}
5120
158142c2
FB
5121/*----------------------------------------------------------------------------
5122| Returns the result of converting the 32-bit two's complement integer `a' to
5123| the quadruple-precision floating-point format. The conversion is performed
5124| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5125*----------------------------------------------------------------------------*/
5126
e5a41ffa 5127float128 int32_to_float128(int32_t a, float_status *status)
158142c2 5128{
c120391c 5129 bool zSign;
3a87d009 5130 uint32_t absA;
8f506c70 5131 int8_t shiftCount;
bb98fe42 5132 uint64_t zSig0;
158142c2
FB
5133
5134 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5135 zSign = ( a < 0 );
5136 absA = zSign ? - a : a;
0019d5c3 5137 shiftCount = clz32(absA) + 17;
158142c2
FB
5138 zSig0 = absA;
5139 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
5140
5141}
5142
158142c2
FB
5143/*----------------------------------------------------------------------------
5144| Returns the result of converting the 64-bit two's complement integer `a'
5145| to the extended double-precision floating-point format. The conversion
5146| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5147| Arithmetic.
5148*----------------------------------------------------------------------------*/
5149
e5a41ffa 5150floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 5151{
c120391c 5152 bool zSign;
182f42fd 5153 uint64_t absA;
8f506c70 5154 int8_t shiftCount;
158142c2
FB
5155
5156 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
5157 zSign = ( a < 0 );
5158 absA = zSign ? - a : a;
0019d5c3 5159 shiftCount = clz64(absA);
158142c2
FB
5160 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
5161
5162}
5163
158142c2
FB
5164/*----------------------------------------------------------------------------
5165| Returns the result of converting the 64-bit two's complement integer `a' to
5166| the quadruple-precision floating-point format. The conversion is performed
5167| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5168*----------------------------------------------------------------------------*/
5169
e5a41ffa 5170float128 int64_to_float128(int64_t a, float_status *status)
158142c2 5171{
c120391c 5172 bool zSign;
182f42fd 5173 uint64_t absA;
8f506c70 5174 int8_t shiftCount;
f4014512 5175 int32_t zExp;
bb98fe42 5176 uint64_t zSig0, zSig1;
158142c2
FB
5177
5178 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5179 zSign = ( a < 0 );
5180 absA = zSign ? - a : a;
0019d5c3 5181 shiftCount = clz64(absA) + 49;
158142c2
FB
5182 zExp = 0x406E - shiftCount;
5183 if ( 64 <= shiftCount ) {
5184 zSig1 = 0;
5185 zSig0 = absA;
5186 shiftCount -= 64;
5187 }
5188 else {
5189 zSig1 = absA;
5190 zSig0 = 0;
5191 }
5192 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5193 return packFloat128( zSign, zExp, zSig0, zSig1 );
5194
5195}
5196
6bb8e0f1
PM
5197/*----------------------------------------------------------------------------
5198| Returns the result of converting the 64-bit unsigned integer `a'
5199| to the quadruple-precision floating-point format. The conversion is performed
5200| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5201*----------------------------------------------------------------------------*/
5202
e5a41ffa 5203float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5204{
5205 if (a == 0) {
5206 return float128_zero;
5207 }
6603d506 5208 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5209}
5210
158142c2
FB
5211/*----------------------------------------------------------------------------
5212| Returns the result of converting the single-precision floating-point value
5213| `a' to the extended double-precision floating-point format. The conversion
5214| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5215| Arithmetic.
5216*----------------------------------------------------------------------------*/
5217
e5a41ffa 5218floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5219{
c120391c 5220 bool aSign;
0c48262d 5221 int aExp;
bb98fe42 5222 uint32_t aSig;
158142c2 5223
ff32e16e 5224 a = float32_squash_input_denormal(a, status);
158142c2
FB
5225 aSig = extractFloat32Frac( a );
5226 aExp = extractFloat32Exp( a );
5227 aSign = extractFloat32Sign( a );
5228 if ( aExp == 0xFF ) {
ff32e16e 5229 if (aSig) {
7537c2b4
JM
5230 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5231 status);
5232 return floatx80_silence_nan(res, status);
ff32e16e 5233 }
0f605c88
LV
5234 return packFloatx80(aSign,
5235 floatx80_infinity_high,
5236 floatx80_infinity_low);
158142c2
FB
5237 }
5238 if ( aExp == 0 ) {
5239 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5240 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5241 }
5242 aSig |= 0x00800000;
bb98fe42 5243 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5244
5245}
5246
158142c2
FB
5247/*----------------------------------------------------------------------------
5248| Returns the remainder of the single-precision floating-point value `a'
5249| with respect to the corresponding value `b'. The operation is performed
5250| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5251*----------------------------------------------------------------------------*/
5252
e5a41ffa 5253float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5254{
c120391c 5255 bool aSign, zSign;
0c48262d 5256 int aExp, bExp, expDiff;
bb98fe42
AF
5257 uint32_t aSig, bSig;
5258 uint32_t q;
5259 uint64_t aSig64, bSig64, q64;
5260 uint32_t alternateASig;
5261 int32_t sigMean;
ff32e16e
PM
5262 a = float32_squash_input_denormal(a, status);
5263 b = float32_squash_input_denormal(b, status);
158142c2
FB
5264
5265 aSig = extractFloat32Frac( a );
5266 aExp = extractFloat32Exp( a );
5267 aSign = extractFloat32Sign( a );
5268 bSig = extractFloat32Frac( b );
5269 bExp = extractFloat32Exp( b );
158142c2
FB
5270 if ( aExp == 0xFF ) {
5271 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5272 return propagateFloat32NaN(a, b, status);
158142c2 5273 }
ff32e16e 5274 float_raise(float_flag_invalid, status);
af39bc8c 5275 return float32_default_nan(status);
158142c2
FB
5276 }
5277 if ( bExp == 0xFF ) {
ff32e16e
PM
5278 if (bSig) {
5279 return propagateFloat32NaN(a, b, status);
5280 }
158142c2
FB
5281 return a;
5282 }
5283 if ( bExp == 0 ) {
5284 if ( bSig == 0 ) {
ff32e16e 5285 float_raise(float_flag_invalid, status);
af39bc8c 5286 return float32_default_nan(status);
158142c2
FB
5287 }
5288 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5289 }
5290 if ( aExp == 0 ) {
5291 if ( aSig == 0 ) return a;
5292 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5293 }
5294 expDiff = aExp - bExp;
5295 aSig |= 0x00800000;
5296 bSig |= 0x00800000;
5297 if ( expDiff < 32 ) {
5298 aSig <<= 8;
5299 bSig <<= 8;
5300 if ( expDiff < 0 ) {
5301 if ( expDiff < -1 ) return a;
5302 aSig >>= 1;
5303 }
5304 q = ( bSig <= aSig );
5305 if ( q ) aSig -= bSig;
5306 if ( 0 < expDiff ) {
bb98fe42 5307 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5308 q >>= 32 - expDiff;
5309 bSig >>= 2;
5310 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5311 }
5312 else {
5313 aSig >>= 2;
5314 bSig >>= 2;
5315 }
5316 }
5317 else {
5318 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5319 aSig64 = ( (uint64_t) aSig )<<40;
5320 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5321 expDiff -= 64;
5322 while ( 0 < expDiff ) {
5323 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5324 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5325 aSig64 = - ( ( bSig * q64 )<<38 );
5326 expDiff -= 62;
5327 }
5328 expDiff += 64;
5329 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5330 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5331 q = q64>>( 64 - expDiff );
5332 bSig <<= 6;
5333 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5334 }
5335 do {
5336 alternateASig = aSig;
5337 ++q;
5338 aSig -= bSig;
bb98fe42 5339 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5340 sigMean = aSig + alternateASig;
5341 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5342 aSig = alternateASig;
5343 }
bb98fe42 5344 zSign = ( (int32_t) aSig < 0 );
158142c2 5345 if ( zSign ) aSig = - aSig;
ff32e16e 5346 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5347}
5348
369be8f6 5349
158142c2 5350
8229c991
AJ
5351/*----------------------------------------------------------------------------
5352| Returns the binary exponential of the single-precision floating-point value
5353| `a'. The operation is performed according to the IEC/IEEE Standard for
5354| Binary Floating-Point Arithmetic.
5355|
5356| Uses the following identities:
5357|
5358| 1. -------------------------------------------------------------------------
5359| x x*ln(2)
5360| 2 = e
5361|
5362| 2. -------------------------------------------------------------------------
5363| 2 3 4 5 n
5364| x x x x x x x
5365| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5366| 1! 2! 3! 4! 5! n!
5367*----------------------------------------------------------------------------*/
5368
5369static const float64 float32_exp2_coefficients[15] =
5370{
d5138cf4
PM
5371 const_float64( 0x3ff0000000000000ll ), /* 1 */
5372 const_float64( 0x3fe0000000000000ll ), /* 2 */
5373 const_float64( 0x3fc5555555555555ll ), /* 3 */
5374 const_float64( 0x3fa5555555555555ll ), /* 4 */
5375 const_float64( 0x3f81111111111111ll ), /* 5 */
5376 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5377 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5378 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5379 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5380 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5381 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5382 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5383 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5384 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5385 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5386};
5387
e5a41ffa 5388float32 float32_exp2(float32 a, float_status *status)
8229c991 5389{
c120391c 5390 bool aSign;
0c48262d 5391 int aExp;
bb98fe42 5392 uint32_t aSig;
8229c991
AJ
5393 float64 r, x, xn;
5394 int i;
ff32e16e 5395 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5396
5397 aSig = extractFloat32Frac( a );
5398 aExp = extractFloat32Exp( a );
5399 aSign = extractFloat32Sign( a );
5400
5401 if ( aExp == 0xFF) {
ff32e16e
PM
5402 if (aSig) {
5403 return propagateFloat32NaN(a, float32_zero, status);
5404 }
8229c991
AJ
5405 return (aSign) ? float32_zero : a;
5406 }
5407 if (aExp == 0) {
5408 if (aSig == 0) return float32_one;
5409 }
5410
ff32e16e 5411 float_raise(float_flag_inexact, status);
8229c991
AJ
5412
5413 /* ******************************* */
5414 /* using float64 for approximation */
5415 /* ******************************* */
ff32e16e
PM
5416 x = float32_to_float64(a, status);
5417 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5418
5419 xn = x;
5420 r = float64_one;
5421 for (i = 0 ; i < 15 ; i++) {
5422 float64 f;
5423
ff32e16e
PM
5424 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5425 r = float64_add(r, f, status);
8229c991 5426
ff32e16e 5427 xn = float64_mul(xn, x, status);
8229c991
AJ
5428 }
5429
5430 return float64_to_float32(r, status);
5431}
5432
374dfc33
AJ
5433/*----------------------------------------------------------------------------
5434| Returns the binary log of the single-precision floating-point value `a'.
5435| The operation is performed according to the IEC/IEEE Standard for Binary
5436| Floating-Point Arithmetic.
5437*----------------------------------------------------------------------------*/
e5a41ffa 5438float32 float32_log2(float32 a, float_status *status)
374dfc33 5439{
c120391c 5440 bool aSign, zSign;
0c48262d 5441 int aExp;
bb98fe42 5442 uint32_t aSig, zSig, i;
374dfc33 5443
ff32e16e 5444 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5445 aSig = extractFloat32Frac( a );
5446 aExp = extractFloat32Exp( a );
5447 aSign = extractFloat32Sign( a );
5448
5449 if ( aExp == 0 ) {
5450 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5451 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5452 }
5453 if ( aSign ) {
ff32e16e 5454 float_raise(float_flag_invalid, status);
af39bc8c 5455 return float32_default_nan(status);
374dfc33
AJ
5456 }
5457 if ( aExp == 0xFF ) {
ff32e16e
PM
5458 if (aSig) {
5459 return propagateFloat32NaN(a, float32_zero, status);
5460 }
374dfc33
AJ
5461 return a;
5462 }
5463
5464 aExp -= 0x7F;
5465 aSig |= 0x00800000;
5466 zSign = aExp < 0;
5467 zSig = aExp << 23;
5468
5469 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5470 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5471 if ( aSig & 0x01000000 ) {
5472 aSig >>= 1;
5473 zSig |= i;
5474 }
5475 }
5476
5477 if ( zSign )
5478 zSig = -zSig;
5479
ff32e16e 5480 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5481}
5482
158142c2 5483/*----------------------------------------------------------------------------
158142c2
FB
5484| Returns the result of converting the double-precision floating-point value
5485| `a' to the extended double-precision floating-point format. The conversion
5486| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5487| Arithmetic.
5488*----------------------------------------------------------------------------*/
5489
e5a41ffa 5490floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5491{
c120391c 5492 bool aSign;
0c48262d 5493 int aExp;
bb98fe42 5494 uint64_t aSig;
158142c2 5495
ff32e16e 5496 a = float64_squash_input_denormal(a, status);
158142c2
FB
5497 aSig = extractFloat64Frac( a );
5498 aExp = extractFloat64Exp( a );
5499 aSign = extractFloat64Sign( a );
5500 if ( aExp == 0x7FF ) {
ff32e16e 5501 if (aSig) {
7537c2b4
JM
5502 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5503 status);
5504 return floatx80_silence_nan(res, status);
ff32e16e 5505 }
0f605c88
LV
5506 return packFloatx80(aSign,
5507 floatx80_infinity_high,
5508 floatx80_infinity_low);
158142c2
FB
5509 }
5510 if ( aExp == 0 ) {
5511 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5512 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5513 }
5514 return
5515 packFloatx80(
e9321124 5516 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5517
5518}
5519
158142c2
FB
5520/*----------------------------------------------------------------------------
5521| Returns the remainder of the double-precision floating-point value `a'
5522| with respect to the corresponding value `b'. The operation is performed
5523| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5524*----------------------------------------------------------------------------*/
5525
e5a41ffa 5526float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5527{
c120391c 5528 bool aSign, zSign;
0c48262d 5529 int aExp, bExp, expDiff;
bb98fe42
AF
5530 uint64_t aSig, bSig;
5531 uint64_t q, alternateASig;
5532 int64_t sigMean;
158142c2 5533
ff32e16e
PM
5534 a = float64_squash_input_denormal(a, status);
5535 b = float64_squash_input_denormal(b, status);
158142c2
FB
5536 aSig = extractFloat64Frac( a );
5537 aExp = extractFloat64Exp( a );
5538 aSign = extractFloat64Sign( a );
5539 bSig = extractFloat64Frac( b );
5540 bExp = extractFloat64Exp( b );
158142c2
FB
5541 if ( aExp == 0x7FF ) {
5542 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5543 return propagateFloat64NaN(a, b, status);
158142c2 5544 }
ff32e16e 5545 float_raise(float_flag_invalid, status);
af39bc8c 5546 return float64_default_nan(status);
158142c2
FB
5547 }
5548 if ( bExp == 0x7FF ) {
ff32e16e
PM
5549 if (bSig) {
5550 return propagateFloat64NaN(a, b, status);
5551 }
158142c2
FB
5552 return a;
5553 }
5554 if ( bExp == 0 ) {
5555 if ( bSig == 0 ) {
ff32e16e 5556 float_raise(float_flag_invalid, status);
af39bc8c 5557 return float64_default_nan(status);
158142c2
FB
5558 }
5559 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5560 }
5561 if ( aExp == 0 ) {
5562 if ( aSig == 0 ) return a;
5563 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5564 }
5565 expDiff = aExp - bExp;
e9321124
AB
5566 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5567 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5568 if ( expDiff < 0 ) {
5569 if ( expDiff < -1 ) return a;
5570 aSig >>= 1;
5571 }
5572 q = ( bSig <= aSig );
5573 if ( q ) aSig -= bSig;
5574 expDiff -= 64;
5575 while ( 0 < expDiff ) {
5576 q = estimateDiv128To64( aSig, 0, bSig );
5577 q = ( 2 < q ) ? q - 2 : 0;
5578 aSig = - ( ( bSig>>2 ) * q );
5579 expDiff -= 62;
5580 }
5581 expDiff += 64;
5582 if ( 0 < expDiff ) {
5583 q = estimateDiv128To64( aSig, 0, bSig );
5584 q = ( 2 < q ) ? q - 2 : 0;
5585 q >>= 64 - expDiff;
5586 bSig >>= 2;
5587 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5588 }
5589 else {
5590 aSig >>= 2;
5591 bSig >>= 2;
5592 }
5593 do {
5594 alternateASig = aSig;
5595 ++q;
5596 aSig -= bSig;
bb98fe42 5597 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5598 sigMean = aSig + alternateASig;
5599 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5600 aSig = alternateASig;
5601 }
bb98fe42 5602 zSign = ( (int64_t) aSig < 0 );
158142c2 5603 if ( zSign ) aSig = - aSig;
ff32e16e 5604 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5605
5606}
5607
374dfc33
AJ
5608/*----------------------------------------------------------------------------
5609| Returns the binary log of the double-precision floating-point value `a'.
5610| The operation is performed according to the IEC/IEEE Standard for Binary
5611| Floating-Point Arithmetic.
5612*----------------------------------------------------------------------------*/
e5a41ffa 5613float64 float64_log2(float64 a, float_status *status)
374dfc33 5614{
c120391c 5615 bool aSign, zSign;
0c48262d 5616 int aExp;
bb98fe42 5617 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5618 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5619
5620 aSig = extractFloat64Frac( a );
5621 aExp = extractFloat64Exp( a );
5622 aSign = extractFloat64Sign( a );
5623
5624 if ( aExp == 0 ) {
5625 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5626 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5627 }
5628 if ( aSign ) {
ff32e16e 5629 float_raise(float_flag_invalid, status);
af39bc8c 5630 return float64_default_nan(status);
374dfc33
AJ
5631 }
5632 if ( aExp == 0x7FF ) {
ff32e16e
PM
5633 if (aSig) {
5634 return propagateFloat64NaN(a, float64_zero, status);
5635 }
374dfc33
AJ
5636 return a;
5637 }
5638
5639 aExp -= 0x3FF;
e9321124 5640 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5641 zSign = aExp < 0;
bb98fe42 5642 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5643 for (i = 1LL << 51; i > 0; i >>= 1) {
5644 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5645 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5646 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5647 aSig >>= 1;
5648 zSig |= i;
5649 }
5650 }
5651
5652 if ( zSign )
5653 zSig = -zSig;
ff32e16e 5654 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5655}
5656
158142c2
FB
5657/*----------------------------------------------------------------------------
5658| Returns the result of converting the extended double-precision floating-
5659| point value `a' to the 32-bit two's complement integer format. The
5660| conversion is performed according to the IEC/IEEE Standard for Binary
5661| Floating-Point Arithmetic---which means in particular that the conversion
5662| is rounded according to the current rounding mode. If `a' is a NaN, the
5663| largest positive integer is returned. Otherwise, if the conversion
5664| overflows, the largest integer with the same sign as `a' is returned.
5665*----------------------------------------------------------------------------*/
5666
f4014512 5667int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5668{
c120391c 5669 bool aSign;
f4014512 5670 int32_t aExp, shiftCount;
bb98fe42 5671 uint64_t aSig;
158142c2 5672
d1eb8f2a
AD
5673 if (floatx80_invalid_encoding(a)) {
5674 float_raise(float_flag_invalid, status);
5675 return 1 << 31;
5676 }
158142c2
FB
5677 aSig = extractFloatx80Frac( a );
5678 aExp = extractFloatx80Exp( a );
5679 aSign = extractFloatx80Sign( a );
bb98fe42 5680 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5681 shiftCount = 0x4037 - aExp;
5682 if ( shiftCount <= 0 ) shiftCount = 1;
5683 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5684 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5685
5686}
5687
5688/*----------------------------------------------------------------------------
5689| Returns the result of converting the extended double-precision floating-
5690| point value `a' to the 32-bit two's complement integer format. The
5691| conversion is performed according to the IEC/IEEE Standard for Binary
5692| Floating-Point Arithmetic, except that the conversion is always rounded
5693| toward zero. If `a' is a NaN, the largest positive integer is returned.
5694| Otherwise, if the conversion overflows, the largest integer with the same
5695| sign as `a' is returned.
5696*----------------------------------------------------------------------------*/
5697
f4014512 5698int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5699{
c120391c 5700 bool aSign;
f4014512 5701 int32_t aExp, shiftCount;
bb98fe42 5702 uint64_t aSig, savedASig;
b3a6a2e0 5703 int32_t z;
158142c2 5704
d1eb8f2a
AD
5705 if (floatx80_invalid_encoding(a)) {
5706 float_raise(float_flag_invalid, status);
5707 return 1 << 31;
5708 }
158142c2
FB
5709 aSig = extractFloatx80Frac( a );
5710 aExp = extractFloatx80Exp( a );
5711 aSign = extractFloatx80Sign( a );
5712 if ( 0x401E < aExp ) {
bb98fe42 5713 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5714 goto invalid;
5715 }
5716 else if ( aExp < 0x3FFF ) {
a2f2d288 5717 if (aExp || aSig) {
d82f3b2d 5718 float_raise(float_flag_inexact, status);
a2f2d288 5719 }
158142c2
FB
5720 return 0;
5721 }
5722 shiftCount = 0x403E - aExp;
5723 savedASig = aSig;
5724 aSig >>= shiftCount;
5725 z = aSig;
5726 if ( aSign ) z = - z;
5727 if ( ( z < 0 ) ^ aSign ) {
5728 invalid:
ff32e16e 5729 float_raise(float_flag_invalid, status);
bb98fe42 5730 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5731 }
5732 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5733 float_raise(float_flag_inexact, status);
158142c2
FB
5734 }
5735 return z;
5736
5737}
5738
5739/*----------------------------------------------------------------------------
5740| Returns the result of converting the extended double-precision floating-
5741| point value `a' to the 64-bit two's complement integer format. The
5742| conversion is performed according to the IEC/IEEE Standard for Binary
5743| Floating-Point Arithmetic---which means in particular that the conversion
5744| is rounded according to the current rounding mode. If `a' is a NaN,
5745| the largest positive integer is returned. Otherwise, if the conversion
5746| overflows, the largest integer with the same sign as `a' is returned.
5747*----------------------------------------------------------------------------*/
5748
f42c2224 5749int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5750{
c120391c 5751 bool aSign;
f4014512 5752 int32_t aExp, shiftCount;
bb98fe42 5753 uint64_t aSig, aSigExtra;
158142c2 5754
d1eb8f2a
AD
5755 if (floatx80_invalid_encoding(a)) {
5756 float_raise(float_flag_invalid, status);
5757 return 1ULL << 63;
5758 }
158142c2
FB
5759 aSig = extractFloatx80Frac( a );
5760 aExp = extractFloatx80Exp( a );
5761 aSign = extractFloatx80Sign( a );
5762 shiftCount = 0x403E - aExp;
5763 if ( shiftCount <= 0 ) {
5764 if ( shiftCount ) {
ff32e16e 5765 float_raise(float_flag_invalid, status);
0f605c88 5766 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5767 return INT64_MAX;
158142c2 5768 }
2c217da0 5769 return INT64_MIN;
158142c2
FB
5770 }
5771 aSigExtra = 0;
5772 }
5773 else {
5774 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5775 }
ff32e16e 5776 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5777
5778}
5779
5780/*----------------------------------------------------------------------------
5781| Returns the result of converting the extended double-precision floating-
5782| point value `a' to the 64-bit two's complement integer format. The
5783| conversion is performed according to the IEC/IEEE Standard for Binary
5784| Floating-Point Arithmetic, except that the conversion is always rounded
5785| toward zero. If `a' is a NaN, the largest positive integer is returned.
5786| Otherwise, if the conversion overflows, the largest integer with the same
5787| sign as `a' is returned.
5788*----------------------------------------------------------------------------*/
5789
f42c2224 5790int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5791{
c120391c 5792 bool aSign;
f4014512 5793 int32_t aExp, shiftCount;
bb98fe42 5794 uint64_t aSig;
f42c2224 5795 int64_t z;
158142c2 5796
d1eb8f2a
AD
5797 if (floatx80_invalid_encoding(a)) {
5798 float_raise(float_flag_invalid, status);
5799 return 1ULL << 63;
5800 }
158142c2
FB
5801 aSig = extractFloatx80Frac( a );
5802 aExp = extractFloatx80Exp( a );
5803 aSign = extractFloatx80Sign( a );
5804 shiftCount = aExp - 0x403E;
5805 if ( 0 <= shiftCount ) {
e9321124 5806 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5807 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5808 float_raise(float_flag_invalid, status);
158142c2 5809 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5810 return INT64_MAX;
158142c2
FB
5811 }
5812 }
2c217da0 5813 return INT64_MIN;
158142c2
FB
5814 }
5815 else if ( aExp < 0x3FFF ) {
a2f2d288 5816 if (aExp | aSig) {
d82f3b2d 5817 float_raise(float_flag_inexact, status);
a2f2d288 5818 }
158142c2
FB
5819 return 0;
5820 }
5821 z = aSig>>( - shiftCount );
bb98fe42 5822 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5823 float_raise(float_flag_inexact, status);
158142c2
FB
5824 }
5825 if ( aSign ) z = - z;
5826 return z;
5827
5828}
5829
5830/*----------------------------------------------------------------------------
5831| Returns the result of converting the extended double-precision floating-
5832| point value `a' to the single-precision floating-point format. The
5833| conversion is performed according to the IEC/IEEE Standard for Binary
5834| Floating-Point Arithmetic.
5835*----------------------------------------------------------------------------*/
5836
e5a41ffa 5837float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5838{
c120391c 5839 bool aSign;
f4014512 5840 int32_t aExp;
bb98fe42 5841 uint64_t aSig;
158142c2 5842
d1eb8f2a
AD
5843 if (floatx80_invalid_encoding(a)) {
5844 float_raise(float_flag_invalid, status);
5845 return float32_default_nan(status);
5846 }
158142c2
FB
5847 aSig = extractFloatx80Frac( a );
5848 aExp = extractFloatx80Exp( a );
5849 aSign = extractFloatx80Sign( a );
5850 if ( aExp == 0x7FFF ) {
bb98fe42 5851 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5852 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5853 status);
5854 return float32_silence_nan(res, status);
158142c2
FB
5855 }
5856 return packFloat32( aSign, 0xFF, 0 );
5857 }
5858 shift64RightJamming( aSig, 33, &aSig );
5859 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5860 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5861
5862}
5863
5864/*----------------------------------------------------------------------------
5865| Returns the result of converting the extended double-precision floating-
5866| point value `a' to the double-precision floating-point format. The
5867| conversion is performed according to the IEC/IEEE Standard for Binary
5868| Floating-Point Arithmetic.
5869*----------------------------------------------------------------------------*/
5870
e5a41ffa 5871float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5872{
c120391c 5873 bool aSign;
f4014512 5874 int32_t aExp;
bb98fe42 5875 uint64_t aSig, zSig;
158142c2 5876
d1eb8f2a
AD
5877 if (floatx80_invalid_encoding(a)) {
5878 float_raise(float_flag_invalid, status);
5879 return float64_default_nan(status);
5880 }
158142c2
FB
5881 aSig = extractFloatx80Frac( a );
5882 aExp = extractFloatx80Exp( a );
5883 aSign = extractFloatx80Sign( a );
5884 if ( aExp == 0x7FFF ) {
bb98fe42 5885 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5886 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5887 status);
5888 return float64_silence_nan(res, status);
158142c2
FB
5889 }
5890 return packFloat64( aSign, 0x7FF, 0 );
5891 }
5892 shift64RightJamming( aSig, 1, &zSig );
5893 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5894 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5895
5896}
5897
158142c2
FB
5898/*----------------------------------------------------------------------------
5899| Returns the result of converting the extended double-precision floating-
5900| point value `a' to the quadruple-precision floating-point format. The
5901| conversion is performed according to the IEC/IEEE Standard for Binary
5902| Floating-Point Arithmetic.
5903*----------------------------------------------------------------------------*/
5904
e5a41ffa 5905float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5906{
c120391c 5907 bool aSign;
0c48262d 5908 int aExp;
bb98fe42 5909 uint64_t aSig, zSig0, zSig1;
158142c2 5910
d1eb8f2a
AD
5911 if (floatx80_invalid_encoding(a)) {
5912 float_raise(float_flag_invalid, status);
5913 return float128_default_nan(status);
5914 }
158142c2
FB
5915 aSig = extractFloatx80Frac( a );
5916 aExp = extractFloatx80Exp( a );
5917 aSign = extractFloatx80Sign( a );
bb98fe42 5918 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5919 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5920 status);
5921 return float128_silence_nan(res, status);
158142c2
FB
5922 }
5923 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5924 return packFloat128( aSign, aExp, zSig0, zSig1 );
5925
5926}
5927
0f721292
LV
5928/*----------------------------------------------------------------------------
5929| Rounds the extended double-precision floating-point value `a'
5930| to the precision provided by floatx80_rounding_precision and returns the
5931| result as an extended double-precision floating-point value.
5932| The operation is performed according to the IEC/IEEE Standard for Binary
5933| Floating-Point Arithmetic.
5934*----------------------------------------------------------------------------*/
5935
5936floatx80 floatx80_round(floatx80 a, float_status *status)
5937{
5938 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5939 extractFloatx80Sign(a),
5940 extractFloatx80Exp(a),
5941 extractFloatx80Frac(a), 0, status);
5942}
5943
158142c2
FB
5944/*----------------------------------------------------------------------------
5945| Rounds the extended double-precision floating-point value `a' to an integer,
5946| and returns the result as an extended quadruple-precision floating-point
5947| value. The operation is performed according to the IEC/IEEE Standard for
5948| Binary Floating-Point Arithmetic.
5949*----------------------------------------------------------------------------*/
5950
e5a41ffa 5951floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5952{
c120391c 5953 bool aSign;
f4014512 5954 int32_t aExp;
bb98fe42 5955 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5956 floatx80 z;
5957
d1eb8f2a
AD
5958 if (floatx80_invalid_encoding(a)) {
5959 float_raise(float_flag_invalid, status);
5960 return floatx80_default_nan(status);
5961 }
158142c2
FB
5962 aExp = extractFloatx80Exp( a );
5963 if ( 0x403E <= aExp ) {
bb98fe42 5964 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5965 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5966 }
5967 return a;
5968 }
5969 if ( aExp < 0x3FFF ) {
5970 if ( ( aExp == 0 )
9ecaf5cc 5971 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5972 return a;
5973 }
d82f3b2d 5974 float_raise(float_flag_inexact, status);
158142c2 5975 aSign = extractFloatx80Sign( a );
a2f2d288 5976 switch (status->float_rounding_mode) {
158142c2 5977 case float_round_nearest_even:
bb98fe42 5978 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5979 ) {
5980 return
e9321124 5981 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5982 }
5983 break;
f9288a76
PM
5984 case float_round_ties_away:
5985 if (aExp == 0x3FFE) {
e9321124 5986 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5987 }
5988 break;
158142c2
FB
5989 case float_round_down:
5990 return
5991 aSign ?
e9321124 5992 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5993 : packFloatx80( 0, 0, 0 );
5994 case float_round_up:
5995 return
5996 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5997 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5998
5999 case float_round_to_zero:
6000 break;
6001 default:
6002 g_assert_not_reached();
158142c2
FB
6003 }
6004 return packFloatx80( aSign, 0, 0 );
6005 }
6006 lastBitMask = 1;
6007 lastBitMask <<= 0x403E - aExp;
6008 roundBitsMask = lastBitMask - 1;
6009 z = a;
a2f2d288 6010 switch (status->float_rounding_mode) {
dc355b76 6011 case float_round_nearest_even:
158142c2 6012 z.low += lastBitMask>>1;
dc355b76
PM
6013 if ((z.low & roundBitsMask) == 0) {
6014 z.low &= ~lastBitMask;
6015 }
6016 break;
f9288a76
PM
6017 case float_round_ties_away:
6018 z.low += lastBitMask >> 1;
6019 break;
dc355b76
PM
6020 case float_round_to_zero:
6021 break;
6022 case float_round_up:
6023 if (!extractFloatx80Sign(z)) {
6024 z.low += roundBitsMask;
6025 }
6026 break;
6027 case float_round_down:
6028 if (extractFloatx80Sign(z)) {
158142c2
FB
6029 z.low += roundBitsMask;
6030 }
dc355b76
PM
6031 break;
6032 default:
6033 abort();
158142c2
FB
6034 }
6035 z.low &= ~ roundBitsMask;
6036 if ( z.low == 0 ) {
6037 ++z.high;
e9321124 6038 z.low = UINT64_C(0x8000000000000000);
158142c2 6039 }
a2f2d288 6040 if (z.low != a.low) {
d82f3b2d 6041 float_raise(float_flag_inexact, status);
a2f2d288 6042 }
158142c2
FB
6043 return z;
6044
6045}
6046
6047/*----------------------------------------------------------------------------
6048| Returns the result of adding the absolute values of the extended double-
6049| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
6050| negated before being returned. `zSign' is ignored if the result is a NaN.
6051| The addition is performed according to the IEC/IEEE Standard for Binary
6052| Floating-Point Arithmetic.
6053*----------------------------------------------------------------------------*/
6054
c120391c 6055static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6056 float_status *status)
158142c2 6057{
f4014512 6058 int32_t aExp, bExp, zExp;
bb98fe42 6059 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6060 int32_t expDiff;
158142c2
FB
6061
6062 aSig = extractFloatx80Frac( a );
6063 aExp = extractFloatx80Exp( a );
6064 bSig = extractFloatx80Frac( b );
6065 bExp = extractFloatx80Exp( b );
6066 expDiff = aExp - bExp;
6067 if ( 0 < expDiff ) {
6068 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6069 if ((uint64_t)(aSig << 1)) {
6070 return propagateFloatx80NaN(a, b, status);
6071 }
158142c2
FB
6072 return a;
6073 }
6074 if ( bExp == 0 ) --expDiff;
6075 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6076 zExp = aExp;
6077 }
6078 else if ( expDiff < 0 ) {
6079 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6080 if ((uint64_t)(bSig << 1)) {
6081 return propagateFloatx80NaN(a, b, status);
6082 }
0f605c88
LV
6083 return packFloatx80(zSign,
6084 floatx80_infinity_high,
6085 floatx80_infinity_low);
158142c2
FB
6086 }
6087 if ( aExp == 0 ) ++expDiff;
6088 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6089 zExp = bExp;
6090 }
6091 else {
6092 if ( aExp == 0x7FFF ) {
bb98fe42 6093 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6094 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6095 }
6096 return a;
6097 }
6098 zSig1 = 0;
6099 zSig0 = aSig + bSig;
6100 if ( aExp == 0 ) {
41602807
JM
6101 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
6102 /* At least one of the values is a pseudo-denormal,
6103 * and there is a carry out of the result. */
6104 zExp = 1;
6105 goto shiftRight1;
6106 }
2f311075
RH
6107 if (zSig0 == 0) {
6108 return packFloatx80(zSign, 0, 0);
6109 }
158142c2
FB
6110 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6111 goto roundAndPack;
6112 }
6113 zExp = aExp;
6114 goto shiftRight1;
6115 }
6116 zSig0 = aSig + bSig;
bb98fe42 6117 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6118 shiftRight1:
6119 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6120 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6121 ++zExp;
6122 roundAndPack:
a2f2d288 6123 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6124 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6125}
6126
6127/*----------------------------------------------------------------------------
6128| Returns the result of subtracting the absolute values of the extended
6129| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6130| difference is negated before being returned. `zSign' is ignored if the
6131| result is a NaN. The subtraction is performed according to the IEC/IEEE
6132| Standard for Binary Floating-Point Arithmetic.
6133*----------------------------------------------------------------------------*/
6134
c120391c 6135static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6136 float_status *status)
158142c2 6137{
f4014512 6138 int32_t aExp, bExp, zExp;
bb98fe42 6139 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6140 int32_t expDiff;
158142c2
FB
6141
6142 aSig = extractFloatx80Frac( a );
6143 aExp = extractFloatx80Exp( a );
6144 bSig = extractFloatx80Frac( b );
6145 bExp = extractFloatx80Exp( b );
6146 expDiff = aExp - bExp;
6147 if ( 0 < expDiff ) goto aExpBigger;
6148 if ( expDiff < 0 ) goto bExpBigger;
6149 if ( aExp == 0x7FFF ) {
bb98fe42 6150 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6151 return propagateFloatx80NaN(a, b, status);
158142c2 6152 }
ff32e16e 6153 float_raise(float_flag_invalid, status);
af39bc8c 6154 return floatx80_default_nan(status);
158142c2
FB
6155 }
6156 if ( aExp == 0 ) {
6157 aExp = 1;
6158 bExp = 1;
6159 }
6160 zSig1 = 0;
6161 if ( bSig < aSig ) goto aBigger;
6162 if ( aSig < bSig ) goto bBigger;
a2f2d288 6163 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6164 bExpBigger:
6165 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6166 if ((uint64_t)(bSig << 1)) {
6167 return propagateFloatx80NaN(a, b, status);
6168 }
0f605c88
LV
6169 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6170 floatx80_infinity_low);
158142c2
FB
6171 }
6172 if ( aExp == 0 ) ++expDiff;
6173 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6174 bBigger:
6175 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6176 zExp = bExp;
6177 zSign ^= 1;
6178 goto normalizeRoundAndPack;
6179 aExpBigger:
6180 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6181 if ((uint64_t)(aSig << 1)) {
6182 return propagateFloatx80NaN(a, b, status);
6183 }
158142c2
FB
6184 return a;
6185 }
6186 if ( bExp == 0 ) --expDiff;
6187 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6188 aBigger:
6189 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6190 zExp = aExp;
6191 normalizeRoundAndPack:
a2f2d288 6192 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6193 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6194}
6195
6196/*----------------------------------------------------------------------------
6197| Returns the result of adding the extended double-precision floating-point
6198| values `a' and `b'. The operation is performed according to the IEC/IEEE
6199| Standard for Binary Floating-Point Arithmetic.
6200*----------------------------------------------------------------------------*/
6201
e5a41ffa 6202floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6203{
c120391c 6204 bool aSign, bSign;
158142c2 6205
d1eb8f2a
AD
6206 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6207 float_raise(float_flag_invalid, status);
6208 return floatx80_default_nan(status);
6209 }
158142c2
FB
6210 aSign = extractFloatx80Sign( a );
6211 bSign = extractFloatx80Sign( b );
6212 if ( aSign == bSign ) {
ff32e16e 6213 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6214 }
6215 else {
ff32e16e 6216 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6217 }
6218
6219}
6220
6221/*----------------------------------------------------------------------------
6222| Returns the result of subtracting the extended double-precision floating-
6223| point values `a' and `b'. The operation is performed according to the
6224| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6225*----------------------------------------------------------------------------*/
6226
e5a41ffa 6227floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6228{
c120391c 6229 bool aSign, bSign;
158142c2 6230
d1eb8f2a
AD
6231 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6232 float_raise(float_flag_invalid, status);
6233 return floatx80_default_nan(status);
6234 }
158142c2
FB
6235 aSign = extractFloatx80Sign( a );
6236 bSign = extractFloatx80Sign( b );
6237 if ( aSign == bSign ) {
ff32e16e 6238 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6239 }
6240 else {
ff32e16e 6241 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6242 }
6243
6244}
6245
6246/*----------------------------------------------------------------------------
6247| Returns the result of multiplying the extended double-precision floating-
6248| point values `a' and `b'. The operation is performed according to the
6249| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6250*----------------------------------------------------------------------------*/
6251
e5a41ffa 6252floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6253{
c120391c 6254 bool aSign, bSign, zSign;
f4014512 6255 int32_t aExp, bExp, zExp;
bb98fe42 6256 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6257
d1eb8f2a
AD
6258 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6259 float_raise(float_flag_invalid, status);
6260 return floatx80_default_nan(status);
6261 }
158142c2
FB
6262 aSig = extractFloatx80Frac( a );
6263 aExp = extractFloatx80Exp( a );
6264 aSign = extractFloatx80Sign( a );
6265 bSig = extractFloatx80Frac( b );
6266 bExp = extractFloatx80Exp( b );
6267 bSign = extractFloatx80Sign( b );
6268 zSign = aSign ^ bSign;
6269 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6270 if ( (uint64_t) ( aSig<<1 )
6271 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6272 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6273 }
6274 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6275 return packFloatx80(zSign, floatx80_infinity_high,
6276 floatx80_infinity_low);
158142c2
FB
6277 }
6278 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6279 if ((uint64_t)(bSig << 1)) {
6280 return propagateFloatx80NaN(a, b, status);
6281 }
158142c2
FB
6282 if ( ( aExp | aSig ) == 0 ) {
6283 invalid:
ff32e16e 6284 float_raise(float_flag_invalid, status);
af39bc8c 6285 return floatx80_default_nan(status);
158142c2 6286 }
0f605c88
LV
6287 return packFloatx80(zSign, floatx80_infinity_high,
6288 floatx80_infinity_low);
158142c2
FB
6289 }
6290 if ( aExp == 0 ) {
6291 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6292 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6293 }
6294 if ( bExp == 0 ) {
6295 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6296 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6297 }
6298 zExp = aExp + bExp - 0x3FFE;
6299 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6300 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6301 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6302 --zExp;
6303 }
a2f2d288 6304 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6305 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6306}
6307
6308/*----------------------------------------------------------------------------
6309| Returns the result of dividing the extended double-precision floating-point
6310| value `a' by the corresponding value `b'. The operation is performed
6311| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6312*----------------------------------------------------------------------------*/
6313
e5a41ffa 6314floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6315{
c120391c 6316 bool aSign, bSign, zSign;
f4014512 6317 int32_t aExp, bExp, zExp;
bb98fe42
AF
6318 uint64_t aSig, bSig, zSig0, zSig1;
6319 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6320
d1eb8f2a
AD
6321 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6322 float_raise(float_flag_invalid, status);
6323 return floatx80_default_nan(status);
6324 }
158142c2
FB
6325 aSig = extractFloatx80Frac( a );
6326 aExp = extractFloatx80Exp( a );
6327 aSign = extractFloatx80Sign( a );
6328 bSig = extractFloatx80Frac( b );
6329 bExp = extractFloatx80Exp( b );
6330 bSign = extractFloatx80Sign( b );
6331 zSign = aSign ^ bSign;
6332 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6333 if ((uint64_t)(aSig << 1)) {
6334 return propagateFloatx80NaN(a, b, status);
6335 }
158142c2 6336 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6337 if ((uint64_t)(bSig << 1)) {
6338 return propagateFloatx80NaN(a, b, status);
6339 }
158142c2
FB
6340 goto invalid;
6341 }
0f605c88
LV
6342 return packFloatx80(zSign, floatx80_infinity_high,
6343 floatx80_infinity_low);
158142c2
FB
6344 }
6345 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6346 if ((uint64_t)(bSig << 1)) {
6347 return propagateFloatx80NaN(a, b, status);
6348 }
158142c2
FB
6349 return packFloatx80( zSign, 0, 0 );
6350 }
6351 if ( bExp == 0 ) {
6352 if ( bSig == 0 ) {
6353 if ( ( aExp | aSig ) == 0 ) {
6354 invalid:
ff32e16e 6355 float_raise(float_flag_invalid, status);
af39bc8c 6356 return floatx80_default_nan(status);
158142c2 6357 }
ff32e16e 6358 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6359 return packFloatx80(zSign, floatx80_infinity_high,
6360 floatx80_infinity_low);
158142c2
FB
6361 }
6362 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6363 }
6364 if ( aExp == 0 ) {
6365 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6366 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6367 }
6368 zExp = aExp - bExp + 0x3FFE;
6369 rem1 = 0;
6370 if ( bSig <= aSig ) {
6371 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6372 ++zExp;
6373 }
6374 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6375 mul64To128( bSig, zSig0, &term0, &term1 );
6376 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6377 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6378 --zSig0;
6379 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6380 }
6381 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6382 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6383 mul64To128( bSig, zSig1, &term1, &term2 );
6384 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6385 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6386 --zSig1;
6387 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6388 }
6389 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6390 }
a2f2d288 6391 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6392 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6393}
6394
6395/*----------------------------------------------------------------------------
6396| Returns the remainder of the extended double-precision floating-point value
6397| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6398| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6399| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6400| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6401| the absolute value of the integer quotient.
158142c2
FB
6402*----------------------------------------------------------------------------*/
6403
445810ec 6404floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6405 float_status *status)
158142c2 6406{
c120391c 6407 bool aSign, zSign;
b662495d 6408 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6409 uint64_t aSig0, aSig1, bSig;
6410 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6411
445810ec 6412 *quotient = 0;
d1eb8f2a
AD
6413 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6414 float_raise(float_flag_invalid, status);
6415 return floatx80_default_nan(status);
6416 }
158142c2 6417 aSig0 = extractFloatx80Frac( a );
b662495d 6418 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6419 aSign = extractFloatx80Sign( a );
6420 bSig = extractFloatx80Frac( b );
6421 bExp = extractFloatx80Exp( b );
158142c2 6422 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6423 if ( (uint64_t) ( aSig0<<1 )
6424 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6425 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6426 }
6427 goto invalid;
6428 }
6429 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6430 if ((uint64_t)(bSig << 1)) {
6431 return propagateFloatx80NaN(a, b, status);
6432 }
b662495d
JM
6433 if (aExp == 0 && aSig0 >> 63) {
6434 /*
6435 * Pseudo-denormal argument must be returned in normalized
6436 * form.
6437 */
6438 return packFloatx80(aSign, 1, aSig0);
6439 }
158142c2
FB
6440 return a;
6441 }
6442 if ( bExp == 0 ) {
6443 if ( bSig == 0 ) {
6444 invalid:
ff32e16e 6445 float_raise(float_flag_invalid, status);
af39bc8c 6446 return floatx80_default_nan(status);
158142c2
FB
6447 }
6448 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6449 }
6450 if ( aExp == 0 ) {
499a2f7b 6451 if ( aSig0 == 0 ) return a;
158142c2
FB
6452 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6453 }
158142c2
FB
6454 zSign = aSign;
6455 expDiff = aExp - bExp;
6456 aSig1 = 0;
6457 if ( expDiff < 0 ) {
b662495d
JM
6458 if ( mod || expDiff < -1 ) {
6459 if (aExp == 1 && aExpOrig == 0) {
6460 /*
6461 * Pseudo-denormal argument must be returned in
6462 * normalized form.
6463 */
6464 return packFloatx80(aSign, aExp, aSig0);
6465 }
6466 return a;
6467 }
158142c2
FB
6468 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6469 expDiff = 0;
6470 }
445810ec 6471 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6472 if ( q ) aSig0 -= bSig;
6473 expDiff -= 64;
6474 while ( 0 < expDiff ) {
6475 q = estimateDiv128To64( aSig0, aSig1, bSig );
6476 q = ( 2 < q ) ? q - 2 : 0;
6477 mul64To128( bSig, q, &term0, &term1 );
6478 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6479 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6480 expDiff -= 62;
445810ec
JM
6481 *quotient <<= 62;
6482 *quotient += q;
158142c2
FB
6483 }
6484 expDiff += 64;
6485 if ( 0 < expDiff ) {
6486 q = estimateDiv128To64( aSig0, aSig1, bSig );
6487 q = ( 2 < q ) ? q - 2 : 0;
6488 q >>= 64 - expDiff;
6489 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6490 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6491 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6492 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6493 ++q;
6494 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6495 }
445810ec
JM
6496 if (expDiff < 64) {
6497 *quotient <<= expDiff;
6498 } else {
6499 *quotient = 0;
6500 }
6501 *quotient += q;
158142c2
FB
6502 }
6503 else {
6504 term1 = 0;
6505 term0 = bSig;
6506 }
6b8b0136
JM
6507 if (!mod) {
6508 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6509 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6510 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6511 && ( q & 1 ) )
6512 ) {
6513 aSig0 = alternateASig0;
6514 aSig1 = alternateASig1;
6515 zSign = ! zSign;
445810ec 6516 ++*quotient;
6b8b0136 6517 }
158142c2
FB
6518 }
6519 return
6520 normalizeRoundAndPackFloatx80(
ff32e16e 6521 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6522
6523}
6524
6b8b0136
JM
6525/*----------------------------------------------------------------------------
6526| Returns the remainder of the extended double-precision floating-point value
6527| `a' with respect to the corresponding value `b'. The operation is performed
6528| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6529*----------------------------------------------------------------------------*/
6530
6531floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6532{
445810ec
JM
6533 uint64_t quotient;
6534 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6535}
6536
6537/*----------------------------------------------------------------------------
6538| Returns the remainder of the extended double-precision floating-point value
6539| `a' with respect to the corresponding value `b', with the quotient truncated
6540| toward zero.
6541*----------------------------------------------------------------------------*/
6542
6543floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6544{
445810ec
JM
6545 uint64_t quotient;
6546 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6547}
6548
158142c2
FB
6549/*----------------------------------------------------------------------------
6550| Returns the square root of the extended double-precision floating-point
6551| value `a'. The operation is performed according to the IEC/IEEE Standard
6552| for Binary Floating-Point Arithmetic.
6553*----------------------------------------------------------------------------*/
6554
e5a41ffa 6555floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6556{
c120391c 6557 bool aSign;
f4014512 6558 int32_t aExp, zExp;
bb98fe42
AF
6559 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6560 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6561
d1eb8f2a
AD
6562 if (floatx80_invalid_encoding(a)) {
6563 float_raise(float_flag_invalid, status);
6564 return floatx80_default_nan(status);
6565 }
158142c2
FB
6566 aSig0 = extractFloatx80Frac( a );
6567 aExp = extractFloatx80Exp( a );
6568 aSign = extractFloatx80Sign( a );
6569 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6570 if ((uint64_t)(aSig0 << 1)) {
6571 return propagateFloatx80NaN(a, a, status);
6572 }
158142c2
FB
6573 if ( ! aSign ) return a;
6574 goto invalid;
6575 }
6576 if ( aSign ) {
6577 if ( ( aExp | aSig0 ) == 0 ) return a;
6578 invalid:
ff32e16e 6579 float_raise(float_flag_invalid, status);
af39bc8c 6580 return floatx80_default_nan(status);
158142c2
FB
6581 }
6582 if ( aExp == 0 ) {
6583 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6584 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6585 }
6586 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6587 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6588 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6589 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6590 doubleZSig0 = zSig0<<1;
6591 mul64To128( zSig0, zSig0, &term0, &term1 );
6592 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6593 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6594 --zSig0;
6595 doubleZSig0 -= 2;
6596 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6597 }
6598 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6599 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6600 if ( zSig1 == 0 ) zSig1 = 1;
6601 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6602 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6603 mul64To128( zSig1, zSig1, &term2, &term3 );
6604 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6605 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6606 --zSig1;
6607 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6608 term3 |= 1;
6609 term2 |= doubleZSig0;
6610 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6611 }
6612 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6613 }
6614 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6615 zSig0 |= doubleZSig0;
a2f2d288
PM
6616 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6617 0, zExp, zSig0, zSig1, status);
158142c2
FB
6618}
6619
6620/*----------------------------------------------------------------------------
158142c2
FB
6621| Returns the result of converting the quadruple-precision floating-point
6622| value `a' to the 32-bit two's complement integer format. The conversion
6623| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6624| Arithmetic---which means in particular that the conversion is rounded
6625| according to the current rounding mode. If `a' is a NaN, the largest
6626| positive integer is returned. Otherwise, if the conversion overflows, the
6627| largest integer with the same sign as `a' is returned.
6628*----------------------------------------------------------------------------*/
6629
f4014512 6630int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6631{
c120391c 6632 bool aSign;
f4014512 6633 int32_t aExp, shiftCount;
bb98fe42 6634 uint64_t aSig0, aSig1;
158142c2
FB
6635
6636 aSig1 = extractFloat128Frac1( a );
6637 aSig0 = extractFloat128Frac0( a );
6638 aExp = extractFloat128Exp( a );
6639 aSign = extractFloat128Sign( a );
6640 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6641 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6642 aSig0 |= ( aSig1 != 0 );
6643 shiftCount = 0x4028 - aExp;
6644 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6645 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6646
6647}
6648
6649/*----------------------------------------------------------------------------
6650| Returns the result of converting the quadruple-precision floating-point
6651| value `a' to the 32-bit two's complement integer format. The conversion
6652| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6653| Arithmetic, except that the conversion is always rounded toward zero. If
6654| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6655| conversion overflows, the largest integer with the same sign as `a' is
6656| returned.
6657*----------------------------------------------------------------------------*/
6658
f4014512 6659int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6660{
c120391c 6661 bool aSign;
f4014512 6662 int32_t aExp, shiftCount;
bb98fe42 6663 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6664 int32_t z;
158142c2
FB
6665
6666 aSig1 = extractFloat128Frac1( a );
6667 aSig0 = extractFloat128Frac0( a );
6668 aExp = extractFloat128Exp( a );
6669 aSign = extractFloat128Sign( a );
6670 aSig0 |= ( aSig1 != 0 );
6671 if ( 0x401E < aExp ) {
6672 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6673 goto invalid;
6674 }
6675 else if ( aExp < 0x3FFF ) {
a2f2d288 6676 if (aExp || aSig0) {
d82f3b2d 6677 float_raise(float_flag_inexact, status);
a2f2d288 6678 }
158142c2
FB
6679 return 0;
6680 }
e9321124 6681 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6682 shiftCount = 0x402F - aExp;
6683 savedASig = aSig0;
6684 aSig0 >>= shiftCount;
6685 z = aSig0;
6686 if ( aSign ) z = - z;
6687 if ( ( z < 0 ) ^ aSign ) {
6688 invalid:
ff32e16e 6689 float_raise(float_flag_invalid, status);
2c217da0 6690 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6691 }
6692 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6693 float_raise(float_flag_inexact, status);
158142c2
FB
6694 }
6695 return z;
6696
6697}
6698
6699/*----------------------------------------------------------------------------
6700| Returns the result of converting the quadruple-precision floating-point
6701| value `a' to the 64-bit two's complement integer format. The conversion
6702| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6703| Arithmetic---which means in particular that the conversion is rounded
6704| according to the current rounding mode. If `a' is a NaN, the largest
6705| positive integer is returned. Otherwise, if the conversion overflows, the
6706| largest integer with the same sign as `a' is returned.
6707*----------------------------------------------------------------------------*/
6708
f42c2224 6709int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6710{
c120391c 6711 bool aSign;
f4014512 6712 int32_t aExp, shiftCount;
bb98fe42 6713 uint64_t aSig0, aSig1;
158142c2
FB
6714
6715 aSig1 = extractFloat128Frac1( a );
6716 aSig0 = extractFloat128Frac0( a );
6717 aExp = extractFloat128Exp( a );
6718 aSign = extractFloat128Sign( a );
e9321124 6719 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6720 shiftCount = 0x402F - aExp;
6721 if ( shiftCount <= 0 ) {
6722 if ( 0x403E < aExp ) {
ff32e16e 6723 float_raise(float_flag_invalid, status);
158142c2
FB
6724 if ( ! aSign
6725 || ( ( aExp == 0x7FFF )
e9321124 6726 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6727 )
6728 ) {
2c217da0 6729 return INT64_MAX;
158142c2 6730 }
2c217da0 6731 return INT64_MIN;
158142c2
FB
6732 }
6733 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6734 }
6735 else {
6736 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6737 }
ff32e16e 6738 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6739
6740}
6741
6742/*----------------------------------------------------------------------------
6743| Returns the result of converting the quadruple-precision floating-point
6744| value `a' to the 64-bit two's complement integer format. The conversion
6745| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6746| Arithmetic, except that the conversion is always rounded toward zero.
6747| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6748| the conversion overflows, the largest integer with the same sign as `a' is
6749| returned.
6750*----------------------------------------------------------------------------*/
6751
f42c2224 6752int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6753{
c120391c 6754 bool aSign;
f4014512 6755 int32_t aExp, shiftCount;
bb98fe42 6756 uint64_t aSig0, aSig1;
f42c2224 6757 int64_t z;
158142c2
FB
6758
6759 aSig1 = extractFloat128Frac1( a );
6760 aSig0 = extractFloat128Frac0( a );
6761 aExp = extractFloat128Exp( a );
6762 aSign = extractFloat128Sign( a );
e9321124 6763 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6764 shiftCount = aExp - 0x402F;
6765 if ( 0 < shiftCount ) {
6766 if ( 0x403E <= aExp ) {
e9321124
AB
6767 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6768 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6769 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6770 if (aSig1) {
d82f3b2d 6771 float_raise(float_flag_inexact, status);
a2f2d288 6772 }
158142c2
FB
6773 }
6774 else {
ff32e16e 6775 float_raise(float_flag_invalid, status);
158142c2 6776 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6777 return INT64_MAX;
158142c2
FB
6778 }
6779 }
2c217da0 6780 return INT64_MIN;
158142c2
FB
6781 }
6782 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6783 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6784 float_raise(float_flag_inexact, status);
158142c2
FB
6785 }
6786 }
6787 else {
6788 if ( aExp < 0x3FFF ) {
6789 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6790 float_raise(float_flag_inexact, status);
158142c2
FB
6791 }
6792 return 0;
6793 }
6794 z = aSig0>>( - shiftCount );
6795 if ( aSig1
bb98fe42 6796 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6797 float_raise(float_flag_inexact, status);
158142c2
FB
6798 }
6799 }
6800 if ( aSign ) z = - z;
6801 return z;
6802
6803}
6804
2e6d8568
BR
6805/*----------------------------------------------------------------------------
6806| Returns the result of converting the quadruple-precision floating-point value
6807| `a' to the 64-bit unsigned integer format. The conversion is
6808| performed according to the IEC/IEEE Standard for Binary Floating-Point
6809| Arithmetic---which means in particular that the conversion is rounded
6810| according to the current rounding mode. If `a' is a NaN, the largest
6811| positive integer is returned. If the conversion overflows, the
6812| largest unsigned integer is returned. If 'a' is negative, the value is
6813| rounded and zero is returned; negative values that do not round to zero
6814| will raise the inexact exception.
6815*----------------------------------------------------------------------------*/
6816
6817uint64_t float128_to_uint64(float128 a, float_status *status)
6818{
c120391c 6819 bool aSign;
2e6d8568
BR
6820 int aExp;
6821 int shiftCount;
6822 uint64_t aSig0, aSig1;
6823
6824 aSig0 = extractFloat128Frac0(a);
6825 aSig1 = extractFloat128Frac1(a);
6826 aExp = extractFloat128Exp(a);
6827 aSign = extractFloat128Sign(a);
6828 if (aSign && (aExp > 0x3FFE)) {
6829 float_raise(float_flag_invalid, status);
6830 if (float128_is_any_nan(a)) {
2c217da0 6831 return UINT64_MAX;
2e6d8568
BR
6832 } else {
6833 return 0;
6834 }
6835 }
6836 if (aExp) {
2c217da0 6837 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6838 }
6839 shiftCount = 0x402F - aExp;
6840 if (shiftCount <= 0) {
6841 if (0x403E < aExp) {
6842 float_raise(float_flag_invalid, status);
2c217da0 6843 return UINT64_MAX;
2e6d8568
BR
6844 }
6845 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6846 } else {
6847 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6848 }
6849 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6850}
6851
6852uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6853{
6854 uint64_t v;
6855 signed char current_rounding_mode = status->float_rounding_mode;
6856
6857 set_float_rounding_mode(float_round_to_zero, status);
6858 v = float128_to_uint64(a, status);
6859 set_float_rounding_mode(current_rounding_mode, status);
6860
6861 return v;
6862}
6863
158142c2
FB
6864/*----------------------------------------------------------------------------
6865| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6866| value `a' to the 32-bit unsigned integer format. The conversion
6867| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6868| Arithmetic except that the conversion is always rounded toward zero.
6869| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6870| if the conversion overflows, the largest unsigned integer is returned.
6871| If 'a' is negative, the value is rounded and zero is returned; negative
6872| values that do not round to zero will raise the inexact exception.
6873*----------------------------------------------------------------------------*/
6874
6875uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6876{
6877 uint64_t v;
6878 uint32_t res;
6879 int old_exc_flags = get_float_exception_flags(status);
6880
6881 v = float128_to_uint64_round_to_zero(a, status);
6882 if (v > 0xffffffff) {
6883 res = 0xffffffff;
6884 } else {
6885 return v;
6886 }
6887 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6888 float_raise(float_flag_invalid, status);
6889 return res;
6890}
6891
6892/*----------------------------------------------------------------------------
6893| Returns the result of converting the quadruple-precision floating-point value
6894| `a' to the 32-bit unsigned integer format. The conversion is
6895| performed according to the IEC/IEEE Standard for Binary Floating-Point
6896| Arithmetic---which means in particular that the conversion is rounded
6897| according to the current rounding mode. If `a' is a NaN, the largest
6898| positive integer is returned. If the conversion overflows, the
6899| largest unsigned integer is returned. If 'a' is negative, the value is
6900| rounded and zero is returned; negative values that do not round to zero
6901| will raise the inexact exception.
6902*----------------------------------------------------------------------------*/
6903
6904uint32_t float128_to_uint32(float128 a, float_status *status)
6905{
6906 uint64_t v;
6907 uint32_t res;
6908 int old_exc_flags = get_float_exception_flags(status);
6909
6910 v = float128_to_uint64(a, status);
6911 if (v > 0xffffffff) {
6912 res = 0xffffffff;
6913 } else {
6914 return v;
6915 }
6916 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6917 float_raise(float_flag_invalid, status);
6918 return res;
6919}
6920
158142c2
FB
6921/*----------------------------------------------------------------------------
6922| Returns the result of converting the quadruple-precision floating-point
6923| value `a' to the extended double-precision floating-point format. The
6924| conversion is performed according to the IEC/IEEE Standard for Binary
6925| Floating-Point Arithmetic.
6926*----------------------------------------------------------------------------*/
6927
e5a41ffa 6928floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6929{
c120391c 6930 bool aSign;
f4014512 6931 int32_t aExp;
bb98fe42 6932 uint64_t aSig0, aSig1;
158142c2
FB
6933
6934 aSig1 = extractFloat128Frac1( a );
6935 aSig0 = extractFloat128Frac0( a );
6936 aExp = extractFloat128Exp( a );
6937 aSign = extractFloat128Sign( a );
6938 if ( aExp == 0x7FFF ) {
6939 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6940 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6941 status);
6942 return floatx80_silence_nan(res, status);
158142c2 6943 }
0f605c88
LV
6944 return packFloatx80(aSign, floatx80_infinity_high,
6945 floatx80_infinity_low);
158142c2
FB
6946 }
6947 if ( aExp == 0 ) {
6948 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6949 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6950 }
6951 else {
e9321124 6952 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6953 }
6954 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6955 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6956
6957}
6958
158142c2
FB
6959/*----------------------------------------------------------------------------
6960| Rounds the quadruple-precision floating-point value `a' to an integer, and
6961| returns the result as a quadruple-precision floating-point value. The
6962| operation is performed according to the IEC/IEEE Standard for Binary
6963| Floating-Point Arithmetic.
6964*----------------------------------------------------------------------------*/
6965
e5a41ffa 6966float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6967{
c120391c 6968 bool aSign;
f4014512 6969 int32_t aExp;
bb98fe42 6970 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6971 float128 z;
6972
6973 aExp = extractFloat128Exp( a );
6974 if ( 0x402F <= aExp ) {
6975 if ( 0x406F <= aExp ) {
6976 if ( ( aExp == 0x7FFF )
6977 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6978 ) {
ff32e16e 6979 return propagateFloat128NaN(a, a, status);
158142c2
FB
6980 }
6981 return a;
6982 }
6983 lastBitMask = 1;
6984 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6985 roundBitsMask = lastBitMask - 1;
6986 z = a;
a2f2d288 6987 switch (status->float_rounding_mode) {
dc355b76 6988 case float_round_nearest_even:
158142c2
FB
6989 if ( lastBitMask ) {
6990 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6991 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6992 }
6993 else {
bb98fe42 6994 if ( (int64_t) z.low < 0 ) {
158142c2 6995 ++z.high;
bb98fe42 6996 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6997 }
6998 }
dc355b76 6999 break;
f9288a76
PM
7000 case float_round_ties_away:
7001 if (lastBitMask) {
7002 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7003 } else {
7004 if ((int64_t) z.low < 0) {
7005 ++z.high;
7006 }
7007 }
7008 break;
dc355b76
PM
7009 case float_round_to_zero:
7010 break;
7011 case float_round_up:
7012 if (!extractFloat128Sign(z)) {
7013 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7014 }
7015 break;
7016 case float_round_down:
7017 if (extractFloat128Sign(z)) {
7018 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 7019 }
dc355b76 7020 break;
5d64abb3
RH
7021 case float_round_to_odd:
7022 /*
7023 * Note that if lastBitMask == 0, the last bit is the lsb
7024 * of high, and roundBitsMask == -1.
7025 */
7026 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7027 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7028 }
7029 break;
dc355b76
PM
7030 default:
7031 abort();
158142c2
FB
7032 }
7033 z.low &= ~ roundBitsMask;
7034 }
7035 else {
7036 if ( aExp < 0x3FFF ) {
bb98fe42 7037 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 7038 float_raise(float_flag_inexact, status);
158142c2 7039 aSign = extractFloat128Sign( a );
a2f2d288 7040 switch (status->float_rounding_mode) {
5d64abb3 7041 case float_round_nearest_even:
158142c2
FB
7042 if ( ( aExp == 0x3FFE )
7043 && ( extractFloat128Frac0( a )
7044 | extractFloat128Frac1( a ) )
7045 ) {
7046 return packFloat128( aSign, 0x3FFF, 0, 0 );
7047 }
7048 break;
f9288a76
PM
7049 case float_round_ties_away:
7050 if (aExp == 0x3FFE) {
7051 return packFloat128(aSign, 0x3FFF, 0, 0);
7052 }
7053 break;
5d64abb3 7054 case float_round_down:
158142c2
FB
7055 return
7056 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7057 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7058 case float_round_up:
158142c2
FB
7059 return
7060 aSign ? packFloat128( 1, 0, 0, 0 )
7061 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7062
7063 case float_round_to_odd:
7064 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7065
7066 case float_round_to_zero:
7067 break;
158142c2
FB
7068 }
7069 return packFloat128( aSign, 0, 0, 0 );
7070 }
7071 lastBitMask = 1;
7072 lastBitMask <<= 0x402F - aExp;
7073 roundBitsMask = lastBitMask - 1;
7074 z.low = 0;
7075 z.high = a.high;
a2f2d288 7076 switch (status->float_rounding_mode) {
dc355b76 7077 case float_round_nearest_even:
158142c2
FB
7078 z.high += lastBitMask>>1;
7079 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7080 z.high &= ~ lastBitMask;
7081 }
dc355b76 7082 break;
f9288a76
PM
7083 case float_round_ties_away:
7084 z.high += lastBitMask>>1;
7085 break;
dc355b76
PM
7086 case float_round_to_zero:
7087 break;
7088 case float_round_up:
7089 if (!extractFloat128Sign(z)) {
158142c2
FB
7090 z.high |= ( a.low != 0 );
7091 z.high += roundBitsMask;
7092 }
dc355b76
PM
7093 break;
7094 case float_round_down:
7095 if (extractFloat128Sign(z)) {
7096 z.high |= (a.low != 0);
7097 z.high += roundBitsMask;
7098 }
7099 break;
5d64abb3
RH
7100 case float_round_to_odd:
7101 if ((z.high & lastBitMask) == 0) {
7102 z.high |= (a.low != 0);
7103 z.high += roundBitsMask;
7104 }
7105 break;
dc355b76
PM
7106 default:
7107 abort();
158142c2
FB
7108 }
7109 z.high &= ~ roundBitsMask;
7110 }
7111 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 7112 float_raise(float_flag_inexact, status);
158142c2
FB
7113 }
7114 return z;
7115
7116}
7117
158142c2
FB
7118/*----------------------------------------------------------------------------
7119| Returns the remainder of the quadruple-precision floating-point value `a'
7120| with respect to the corresponding value `b'. The operation is performed
7121| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7122*----------------------------------------------------------------------------*/
7123
e5a41ffa 7124float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7125{
c120391c 7126 bool aSign, zSign;
f4014512 7127 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7128 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7129 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7130 int64_t sigMean0;
158142c2
FB
7131
7132 aSig1 = extractFloat128Frac1( a );
7133 aSig0 = extractFloat128Frac0( a );
7134 aExp = extractFloat128Exp( a );
7135 aSign = extractFloat128Sign( a );
7136 bSig1 = extractFloat128Frac1( b );
7137 bSig0 = extractFloat128Frac0( b );
7138 bExp = extractFloat128Exp( b );
158142c2
FB
7139 if ( aExp == 0x7FFF ) {
7140 if ( ( aSig0 | aSig1 )
7141 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7142 return propagateFloat128NaN(a, b, status);
158142c2
FB
7143 }
7144 goto invalid;
7145 }
7146 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7147 if (bSig0 | bSig1) {
7148 return propagateFloat128NaN(a, b, status);
7149 }
158142c2
FB
7150 return a;
7151 }
7152 if ( bExp == 0 ) {
7153 if ( ( bSig0 | bSig1 ) == 0 ) {
7154 invalid:
ff32e16e 7155 float_raise(float_flag_invalid, status);
af39bc8c 7156 return float128_default_nan(status);
158142c2
FB
7157 }
7158 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7159 }
7160 if ( aExp == 0 ) {
7161 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7162 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7163 }
7164 expDiff = aExp - bExp;
7165 if ( expDiff < -1 ) return a;
7166 shortShift128Left(
e9321124 7167 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7168 aSig1,
7169 15 - ( expDiff < 0 ),
7170 &aSig0,
7171 &aSig1
7172 );
7173 shortShift128Left(
e9321124 7174 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7175 q = le128( bSig0, bSig1, aSig0, aSig1 );
7176 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7177 expDiff -= 64;
7178 while ( 0 < expDiff ) {
7179 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7180 q = ( 4 < q ) ? q - 4 : 0;
7181 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7182 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7183 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7184 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7185 expDiff -= 61;
7186 }
7187 if ( -64 < expDiff ) {
7188 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7189 q = ( 4 < q ) ? q - 4 : 0;
7190 q >>= - expDiff;
7191 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7192 expDiff += 52;
7193 if ( expDiff < 0 ) {
7194 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7195 }
7196 else {
7197 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7198 }
7199 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7200 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7201 }
7202 else {
7203 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7204 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7205 }
7206 do {
7207 alternateASig0 = aSig0;
7208 alternateASig1 = aSig1;
7209 ++q;
7210 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7211 } while ( 0 <= (int64_t) aSig0 );
158142c2 7212 add128(
bb98fe42 7213 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7214 if ( ( sigMean0 < 0 )
7215 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7216 aSig0 = alternateASig0;
7217 aSig1 = alternateASig1;
7218 }
bb98fe42 7219 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7220 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7221 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7222 status);
158142c2
FB
7223}
7224
7225/*----------------------------------------------------------------------------
7226| Returns the square root of the quadruple-precision floating-point value `a'.
7227| The operation is performed according to the IEC/IEEE Standard for Binary
7228| Floating-Point Arithmetic.
7229*----------------------------------------------------------------------------*/
7230
e5a41ffa 7231float128 float128_sqrt(float128 a, float_status *status)
158142c2 7232{
c120391c 7233 bool aSign;
f4014512 7234 int32_t aExp, zExp;
bb98fe42
AF
7235 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7236 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7237
7238 aSig1 = extractFloat128Frac1( a );
7239 aSig0 = extractFloat128Frac0( a );
7240 aExp = extractFloat128Exp( a );
7241 aSign = extractFloat128Sign( a );
7242 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7243 if (aSig0 | aSig1) {
7244 return propagateFloat128NaN(a, a, status);
7245 }
158142c2
FB
7246 if ( ! aSign ) return a;
7247 goto invalid;
7248 }
7249 if ( aSign ) {
7250 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7251 invalid:
ff32e16e 7252 float_raise(float_flag_invalid, status);
af39bc8c 7253 return float128_default_nan(status);
158142c2
FB
7254 }
7255 if ( aExp == 0 ) {
7256 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7257 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7258 }
7259 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7260 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7261 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7262 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7263 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7264 doubleZSig0 = zSig0<<1;
7265 mul64To128( zSig0, zSig0, &term0, &term1 );
7266 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7267 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7268 --zSig0;
7269 doubleZSig0 -= 2;
7270 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7271 }
7272 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7273 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7274 if ( zSig1 == 0 ) zSig1 = 1;
7275 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7276 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7277 mul64To128( zSig1, zSig1, &term2, &term3 );
7278 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7279 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7280 --zSig1;
7281 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7282 term3 |= 1;
7283 term2 |= doubleZSig0;
7284 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7285 }
7286 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7287 }
7288 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7289 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7290
7291}
7292
71bfd65c
RH
7293static inline FloatRelation
7294floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7295 float_status *status)
f6714d36 7296{
c120391c 7297 bool aSign, bSign;
f6714d36 7298
d1eb8f2a
AD
7299 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7300 float_raise(float_flag_invalid, status);
7301 return float_relation_unordered;
7302 }
f6714d36
AJ
7303 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7304 ( extractFloatx80Frac( a )<<1 ) ) ||
7305 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7306 ( extractFloatx80Frac( b )<<1 ) )) {
7307 if (!is_quiet ||
af39bc8c
AM
7308 floatx80_is_signaling_nan(a, status) ||
7309 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7310 float_raise(float_flag_invalid, status);
f6714d36
AJ
7311 }
7312 return float_relation_unordered;
7313 }
7314 aSign = extractFloatx80Sign( a );
7315 bSign = extractFloatx80Sign( b );
7316 if ( aSign != bSign ) {
7317
7318 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7319 ( ( a.low | b.low ) == 0 ) ) {
7320 /* zero case */
7321 return float_relation_equal;
7322 } else {
7323 return 1 - (2 * aSign);
7324 }
7325 } else {
be53fa78
JM
7326 /* Normalize pseudo-denormals before comparison. */
7327 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7328 ++a.high;
7329 }
7330 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7331 ++b.high;
7332 }
f6714d36
AJ
7333 if (a.low == b.low && a.high == b.high) {
7334 return float_relation_equal;
7335 } else {
7336 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7337 }
7338 }
7339}
7340
71bfd65c 7341FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7342{
ff32e16e 7343 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7344}
7345
71bfd65c
RH
7346FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7347 float_status *status)
f6714d36 7348{
ff32e16e 7349 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7350}
7351
71bfd65c
RH
7352static inline FloatRelation
7353float128_compare_internal(float128 a, float128 b, bool is_quiet,
7354 float_status *status)
1f587329 7355{
c120391c 7356 bool aSign, bSign;
1f587329
BS
7357
7358 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7359 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7360 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7361 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7362 if (!is_quiet ||
af39bc8c
AM
7363 float128_is_signaling_nan(a, status) ||
7364 float128_is_signaling_nan(b, status)) {
ff32e16e 7365 float_raise(float_flag_invalid, status);
1f587329
BS
7366 }
7367 return float_relation_unordered;
7368 }
7369 aSign = extractFloat128Sign( a );
7370 bSign = extractFloat128Sign( b );
7371 if ( aSign != bSign ) {
7372 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7373 /* zero case */
7374 return float_relation_equal;
7375 } else {
7376 return 1 - (2 * aSign);
7377 }
7378 } else {
7379 if (a.low == b.low && a.high == b.high) {
7380 return float_relation_equal;
7381 } else {
7382 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7383 }
7384 }
7385}
7386
71bfd65c 7387FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7388{
ff32e16e 7389 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7390}
7391
71bfd65c
RH
7392FloatRelation float128_compare_quiet(float128 a, float128 b,
7393 float_status *status)
1f587329 7394{
ff32e16e 7395 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7396}
7397
e5a41ffa 7398floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7399{
c120391c 7400 bool aSign;
326b9e98 7401 int32_t aExp;
bb98fe42 7402 uint64_t aSig;
9ee6e8bb 7403
d1eb8f2a
AD
7404 if (floatx80_invalid_encoding(a)) {
7405 float_raise(float_flag_invalid, status);
7406 return floatx80_default_nan(status);
7407 }
9ee6e8bb
PB
7408 aSig = extractFloatx80Frac( a );
7409 aExp = extractFloatx80Exp( a );
7410 aSign = extractFloatx80Sign( a );
7411
326b9e98
AJ
7412 if ( aExp == 0x7FFF ) {
7413 if ( aSig<<1 ) {
ff32e16e 7414 return propagateFloatx80NaN(a, a, status);
326b9e98 7415 }
9ee6e8bb
PB
7416 return a;
7417 }
326b9e98 7418
3c85c37f
PM
7419 if (aExp == 0) {
7420 if (aSig == 0) {
7421 return a;
7422 }
7423 aExp++;
7424 }
69397542 7425
326b9e98
AJ
7426 if (n > 0x10000) {
7427 n = 0x10000;
7428 } else if (n < -0x10000) {
7429 n = -0x10000;
7430 }
7431
9ee6e8bb 7432 aExp += n;
a2f2d288
PM
7433 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7434 aSign, aExp, aSig, 0, status);
9ee6e8bb 7435}
9ee6e8bb 7436
e5a41ffa 7437float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7438{
c120391c 7439 bool aSign;
326b9e98 7440 int32_t aExp;
bb98fe42 7441 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7442
7443 aSig1 = extractFloat128Frac1( a );
7444 aSig0 = extractFloat128Frac0( a );
7445 aExp = extractFloat128Exp( a );
7446 aSign = extractFloat128Sign( a );
7447 if ( aExp == 0x7FFF ) {
326b9e98 7448 if ( aSig0 | aSig1 ) {
ff32e16e 7449 return propagateFloat128NaN(a, a, status);
326b9e98 7450 }
9ee6e8bb
PB
7451 return a;
7452 }
3c85c37f 7453 if (aExp != 0) {
e9321124 7454 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7455 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7456 return a;
3c85c37f
PM
7457 } else {
7458 aExp++;
7459 }
69397542 7460
326b9e98
AJ
7461 if (n > 0x10000) {
7462 n = 0x10000;
7463 } else if (n < -0x10000) {
7464 n = -0x10000;
7465 }
7466
69397542
PB
7467 aExp += n - 1;
7468 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7469 , status);
9ee6e8bb
PB
7470
7471}
f6b3b108
EC
7472
7473static void __attribute__((constructor)) softfloat_init(void)
7474{
7475 union_float64 ua, ub, uc, ur;
7476
7477 if (QEMU_NO_HARDFLOAT) {
7478 return;
7479 }
7480 /*
7481 * Test that the host's FMA is not obviously broken. For example,
7482 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7483 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7484 */
7485 ua.s = 0x0020000000000001ULL;
7486 ub.s = 0x3ca0000000000000ULL;
7487 uc.s = 0x0020000000000000ULL;
7488 ur.h = fma(ua.h, ub.h, uc.h);
7489 if (ur.s != 0x0020000000000001ULL) {
7490 force_soft_fma = true;
7491 }
7492}