]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Move mul_floats to softfloat-parts.c.inc
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
134eda00
RH
472#define float_cmask(bit) (1u << (bit))
473
474enum {
475 float_cmask_zero = float_cmask(float_class_zero),
476 float_cmask_normal = float_cmask(float_class_normal),
477 float_cmask_inf = float_cmask(float_class_inf),
478 float_cmask_qnan = float_cmask(float_class_qnan),
479 float_cmask_snan = float_cmask(float_class_snan),
480
481 float_cmask_infzero = float_cmask_zero | float_cmask_inf,
482 float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
483};
484
485
247d1f21
RH
486/* Simple helpers for checking if, or what kind of, NaN we have */
487static inline __attribute__((unused)) bool is_nan(FloatClass c)
488{
489 return unlikely(c >= float_class_qnan);
490}
491
492static inline __attribute__((unused)) bool is_snan(FloatClass c)
493{
494 return c == float_class_snan;
495}
496
497static inline __attribute__((unused)) bool is_qnan(FloatClass c)
498{
499 return c == float_class_qnan;
500}
501
a90119b5 502/*
0018b1f4
RH
503 * Structure holding all of the decomposed parts of a float.
504 * The exponent is unbiased and the fraction is normalized.
a90119b5 505 *
0018b1f4
RH
506 * The fraction words are stored in big-endian word ordering,
507 * so that truncation from a larger format to a smaller format
508 * can be done simply by ignoring subsequent elements.
a90119b5
AB
509 */
510
511typedef struct {
a90119b5
AB
512 FloatClass cls;
513 bool sign;
4109b9ea
RH
514 int32_t exp;
515 union {
516 /* Routines that know the structure may reference the singular name. */
517 uint64_t frac;
518 /*
519 * Routines expanded with multiple structures reference "hi" and "lo"
520 * depending on the operation. In FloatParts64, "hi" and "lo" are
521 * both the same word and aliased here.
522 */
523 uint64_t frac_hi;
524 uint64_t frac_lo;
525 };
f8155c1d 526} FloatParts64;
a90119b5 527
0018b1f4
RH
528typedef struct {
529 FloatClass cls;
530 bool sign;
531 int32_t exp;
532 uint64_t frac_hi;
533 uint64_t frac_lo;
534} FloatParts128;
535
aca84527
RH
536typedef struct {
537 FloatClass cls;
538 bool sign;
539 int32_t exp;
540 uint64_t frac_hi;
541 uint64_t frac_hm; /* high-middle */
542 uint64_t frac_lm; /* low-middle */
543 uint64_t frac_lo;
544} FloatParts256;
545
0018b1f4 546/* These apply to the most significant word of each FloatPartsN. */
e99c4373 547#define DECOMPOSED_BINARY_POINT 63
a90119b5 548#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
549
550/* Structure holding all of the relevant parameters for a format.
551 * exp_size: the size of the exponent field
552 * exp_bias: the offset applied to the exponent field
553 * exp_max: the maximum normalised exponent
554 * frac_size: the size of the fraction field
555 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
556 * The following are computed based the size of fraction
557 * frac_lsb: least significant bit of fraction
ca3a3d5a 558 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 559 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
560 * The following optional modifiers are available:
561 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
562 */
563typedef struct {
564 int exp_size;
565 int exp_bias;
566 int exp_max;
567 int frac_size;
568 int frac_shift;
569 uint64_t frac_lsb;
570 uint64_t frac_lsbm1;
571 uint64_t round_mask;
572 uint64_t roundeven_mask;
ca3a3d5a 573 bool arm_althp;
a90119b5
AB
574} FloatFmt;
575
576/* Expand fields based on the size of exponent and fraction */
577#define FLOAT_PARAMS(E, F) \
578 .exp_size = E, \
579 .exp_bias = ((1 << E) - 1) >> 1, \
580 .exp_max = (1 << E) - 1, \
581 .frac_size = F, \
0018b1f4
RH
582 .frac_shift = (-F - 1) & 63, \
583 .frac_lsb = 1ull << ((-F - 1) & 63), \
584 .frac_lsbm1 = 1ull << ((-F - 2) & 63), \
585 .round_mask = (1ull << ((-F - 1) & 63)) - 1, \
586 .roundeven_mask = (2ull << ((-F - 1) & 63)) - 1
a90119b5
AB
587
588static const FloatFmt float16_params = {
589 FLOAT_PARAMS(5, 10)
590};
591
6fed16b2
AB
592static const FloatFmt float16_params_ahp = {
593 FLOAT_PARAMS(5, 10),
594 .arm_althp = true
595};
596
8282310d
LZ
597static const FloatFmt bfloat16_params = {
598 FLOAT_PARAMS(8, 7)
599};
600
a90119b5
AB
601static const FloatFmt float32_params = {
602 FLOAT_PARAMS(8, 23)
603};
604
605static const FloatFmt float64_params = {
606 FLOAT_PARAMS(11, 52)
607};
608
0018b1f4
RH
609static const FloatFmt float128_params = {
610 FLOAT_PARAMS(15, 112)
611};
612
6fff2167 613/* Unpack a float to parts, but do not canonicalize. */
d8fdd172 614static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
6fff2167 615{
d8fdd172
RH
616 const int f_size = fmt->frac_size;
617 const int e_size = fmt->exp_size;
6fff2167 618
d8fdd172 619 *r = (FloatParts64) {
6fff2167 620 .cls = float_class_unclassified,
d8fdd172
RH
621 .sign = extract64(raw, f_size + e_size, 1),
622 .exp = extract64(raw, f_size, e_size),
623 .frac = extract64(raw, 0, f_size)
6fff2167
AB
624 };
625}
626
3dddb203 627static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
6fff2167 628{
3dddb203 629 unpack_raw64(p, &float16_params, f);
6fff2167
AB
630}
631
3dddb203 632static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
8282310d 633{
3dddb203 634 unpack_raw64(p, &bfloat16_params, f);
8282310d
LZ
635}
636
3dddb203 637static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
6fff2167 638{
3dddb203 639 unpack_raw64(p, &float32_params, f);
6fff2167
AB
640}
641
3dddb203 642static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
6fff2167 643{
3dddb203 644 unpack_raw64(p, &float64_params, f);
6fff2167
AB
645}
646
0018b1f4
RH
647static void float128_unpack_raw(FloatParts128 *p, float128 f)
648{
649 const int f_size = float128_params.frac_size - 64;
650 const int e_size = float128_params.exp_size;
651
652 *p = (FloatParts128) {
653 .cls = float_class_unclassified,
654 .sign = extract64(f.high, f_size + e_size, 1),
655 .exp = extract64(f.high, f_size, e_size),
656 .frac_hi = extract64(f.high, 0, f_size),
657 .frac_lo = f.low,
658 };
659}
660
6fff2167 661/* Pack a float from parts, but do not canonicalize. */
9e4af58c 662static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
6fff2167 663{
9e4af58c
RH
664 const int f_size = fmt->frac_size;
665 const int e_size = fmt->exp_size;
666 uint64_t ret;
667
668 ret = (uint64_t)p->sign << (f_size + e_size);
669 ret = deposit64(ret, f_size, e_size, p->exp);
670 ret = deposit64(ret, 0, f_size, p->frac);
671 return ret;
6fff2167
AB
672}
673
71fd178e 674static inline float16 float16_pack_raw(const FloatParts64 *p)
6fff2167 675{
71fd178e 676 return make_float16(pack_raw64(p, &float16_params));
6fff2167
AB
677}
678
71fd178e 679static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
8282310d 680{
71fd178e 681 return pack_raw64(p, &bfloat16_params);
8282310d
LZ
682}
683
71fd178e 684static inline float32 float32_pack_raw(const FloatParts64 *p)
6fff2167 685{
71fd178e 686 return make_float32(pack_raw64(p, &float32_params));
6fff2167
AB
687}
688
71fd178e 689static inline float64 float64_pack_raw(const FloatParts64 *p)
6fff2167 690{
71fd178e 691 return make_float64(pack_raw64(p, &float64_params));
6fff2167
AB
692}
693
0018b1f4
RH
694static float128 float128_pack_raw(const FloatParts128 *p)
695{
696 const int f_size = float128_params.frac_size - 64;
697 const int e_size = float128_params.exp_size;
698 uint64_t hi;
699
700 hi = (uint64_t)p->sign << (f_size + e_size);
701 hi = deposit64(hi, f_size, e_size, p->exp);
702 hi = deposit64(hi, 0, f_size, p->frac_hi);
703 return make_float128(hi, p->frac_lo);
704}
705
0664335a
RH
706/*----------------------------------------------------------------------------
707| Functions and definitions to determine: (1) whether tininess for underflow
708| is detected before or after rounding by default, (2) what (if anything)
709| happens when exceptions are raised, (3) how signaling NaNs are distinguished
710| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
711| are propagated from function inputs to output. These details are target-
712| specific.
713*----------------------------------------------------------------------------*/
139c1837 714#include "softfloat-specialize.c.inc"
0664335a 715
0018b1f4
RH
716#define PARTS_GENERIC_64_128(NAME, P) \
717 QEMU_GENERIC(P, (FloatParts128 *, parts128_##NAME), parts64_##NAME)
718
e9034ea8 719#define parts_default_nan(P, S) PARTS_GENERIC_64_128(default_nan, P)(P, S)
0018b1f4
RH
720#define parts_silence_nan(P, S) PARTS_GENERIC_64_128(silence_nan, P)(P, S)
721
7c45bad8
RH
722static void parts64_return_nan(FloatParts64 *a, float_status *s);
723static void parts128_return_nan(FloatParts128 *a, float_status *s);
724
725#define parts_return_nan(P, S) PARTS_GENERIC_64_128(return_nan, P)(P, S)
0018b1f4 726
22c355f4
RH
727static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
728 float_status *s);
729static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
730 float_status *s);
731
732#define parts_pick_nan(A, B, S) PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
733
979582d0
RH
734static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
735 FloatParts64 *c, float_status *s,
736 int ab_mask, int abc_mask);
737static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
738 FloatParts128 *b,
739 FloatParts128 *c,
740 float_status *s,
741 int ab_mask, int abc_mask);
742
743#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
744 PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
745
d46975bc
RH
746static void parts64_canonicalize(FloatParts64 *p, float_status *status,
747 const FloatFmt *fmt);
748static void parts128_canonicalize(FloatParts128 *p, float_status *status,
749 const FloatFmt *fmt);
750
751#define parts_canonicalize(A, S, F) \
752 PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
753
ee6959f2
RH
754static void parts64_uncanon(FloatParts64 *p, float_status *status,
755 const FloatFmt *fmt);
756static void parts128_uncanon(FloatParts128 *p, float_status *status,
757 const FloatFmt *fmt);
758
759#define parts_uncanon(A, S, F) \
760 PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
761
da10a907
RH
762static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
763static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
764
765#define parts_add_normal(A, B) \
766 PARTS_GENERIC_64_128(add_normal, A)(A, B)
767
768static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
769static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
770
771#define parts_sub_normal(A, B) \
772 PARTS_GENERIC_64_128(sub_normal, A)(A, B)
773
774static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
775 float_status *s, bool subtract);
776static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
777 float_status *s, bool subtract);
778
779#define parts_addsub(A, B, S, Z) \
780 PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
781
aca84527
RH
782static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
783 float_status *s);
784static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
785 float_status *s);
786
787#define parts_mul(A, B, S) \
788 PARTS_GENERIC_64_128(mul, A)(A, B, S)
789
0018b1f4
RH
790/*
791 * Helper functions for softfloat-parts.c.inc, per-size operations.
792 */
793
22c355f4
RH
794#define FRAC_GENERIC_64_128(NAME, P) \
795 QEMU_GENERIC(P, (FloatParts128 *, frac128_##NAME), frac64_##NAME)
796
da10a907
RH
797static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
798{
799 return uadd64_overflow(a->frac, b->frac, &r->frac);
800}
801
802static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
803{
804 bool c = 0;
805 r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
806 r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
807 return c;
808}
809
810#define frac_add(R, A, B) FRAC_GENERIC_64_128(add, R)(R, A, B)
811
ee6959f2
RH
812static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
813{
814 return uadd64_overflow(a->frac, c, &r->frac);
815}
816
817static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
818{
819 c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
820 return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
821}
822
823#define frac_addi(R, A, C) FRAC_GENERIC_64_128(addi, R)(R, A, C)
824
825static void frac64_allones(FloatParts64 *a)
826{
827 a->frac = -1;
828}
829
830static void frac128_allones(FloatParts128 *a)
831{
832 a->frac_hi = a->frac_lo = -1;
833}
834
835#define frac_allones(A) FRAC_GENERIC_64_128(allones, A)(A)
836
22c355f4
RH
837static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
838{
839 return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
840}
841
842static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
843{
844 uint64_t ta = a->frac_hi, tb = b->frac_hi;
845 if (ta == tb) {
846 ta = a->frac_lo, tb = b->frac_lo;
847 if (ta == tb) {
848 return 0;
849 }
850 }
851 return ta < tb ? -1 : 1;
852}
853
854#define frac_cmp(A, B) FRAC_GENERIC_64_128(cmp, A)(A, B)
855
d46975bc 856static void frac64_clear(FloatParts64 *a)
0018b1f4 857{
d46975bc
RH
858 a->frac = 0;
859}
860
861static void frac128_clear(FloatParts128 *a)
862{
863 a->frac_hi = a->frac_lo = 0;
0018b1f4
RH
864}
865
d46975bc 866#define frac_clear(A) FRAC_GENERIC_64_128(clear, A)(A)
0018b1f4 867
d46975bc 868static bool frac64_eqz(FloatParts64 *a)
0018b1f4 869{
d46975bc
RH
870 return a->frac == 0;
871}
872
873static bool frac128_eqz(FloatParts128 *a)
874{
875 return (a->frac_hi | a->frac_lo) == 0;
0018b1f4
RH
876}
877
d46975bc 878#define frac_eqz(A) FRAC_GENERIC_64_128(eqz, A)(A)
0fc07cad 879
aca84527
RH
880static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
881{
882 mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
883}
884
885static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
886{
887 mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
888 &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
889}
890
891#define frac_mulw(R, A, B) FRAC_GENERIC_64_128(mulw, A)(R, A, B)
892
da10a907
RH
893static void frac64_neg(FloatParts64 *a)
894{
895 a->frac = -a->frac;
896}
897
898static void frac128_neg(FloatParts128 *a)
899{
900 bool c = 0;
901 a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
902 a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
903}
904
905#define frac_neg(A) FRAC_GENERIC_64_128(neg, A)(A)
906
d46975bc 907static int frac64_normalize(FloatParts64 *a)
6fff2167 908{
d46975bc
RH
909 if (a->frac) {
910 int shift = clz64(a->frac);
911 a->frac <<= shift;
912 return shift;
913 }
914 return 64;
915}
916
917static int frac128_normalize(FloatParts128 *a)
918{
919 if (a->frac_hi) {
920 int shl = clz64(a->frac_hi);
921 if (shl) {
922 int shr = 64 - shl;
923 a->frac_hi = (a->frac_hi << shl) | (a->frac_lo >> shr);
924 a->frac_lo = (a->frac_lo << shl);
6fff2167 925 }
d46975bc
RH
926 return shl;
927 } else if (a->frac_lo) {
928 int shl = clz64(a->frac_lo);
929 a->frac_hi = (a->frac_lo << shl);
930 a->frac_lo = 0;
931 return shl + 64;
6fff2167 932 }
d46975bc 933 return 128;
6fff2167
AB
934}
935
d46975bc
RH
936#define frac_normalize(A) FRAC_GENERIC_64_128(normalize, A)(A)
937
938static void frac64_shl(FloatParts64 *a, int c)
939{
940 a->frac <<= c;
941}
942
943static void frac128_shl(FloatParts128 *a, int c)
944{
945 shift128Left(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
946}
947
948#define frac_shl(A, C) FRAC_GENERIC_64_128(shl, A)(A, C)
949
950static void frac64_shr(FloatParts64 *a, int c)
951{
952 a->frac >>= c;
953}
954
955static void frac128_shr(FloatParts128 *a, int c)
956{
957 shift128Right(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
958}
959
960#define frac_shr(A, C) FRAC_GENERIC_64_128(shr, A)(A, C)
961
ee6959f2 962static void frac64_shrjam(FloatParts64 *a, int c)
6fff2167 963{
ee6959f2
RH
964 shift64RightJamming(a->frac, c, &a->frac);
965}
6fff2167 966
ee6959f2
RH
967static void frac128_shrjam(FloatParts128 *a, int c)
968{
969 shift128RightJamming(a->frac_hi, a->frac_lo, c, &a->frac_hi, &a->frac_lo);
6fff2167
AB
970}
971
ee6959f2 972#define frac_shrjam(A, C) FRAC_GENERIC_64_128(shrjam, A)(A, C)
d446830a 973
da10a907
RH
974static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
975{
976 return usub64_overflow(a->frac, b->frac, &r->frac);
977}
7c45bad8 978
da10a907
RH
979static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
980{
981 bool c = 0;
982 r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
983 r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
984 return c;
985}
986
987#define frac_sub(R, A, B) FRAC_GENERIC_64_128(sub, R)(R, A, B)
988
aca84527
RH
989static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
990{
991 r->frac = a->frac_hi | (a->frac_lo != 0);
992}
993
994static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
995{
996 r->frac_hi = a->frac_hi;
997 r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
998}
999
1000#define frac_truncjam(R, A) FRAC_GENERIC_64_128(truncjam, R)(R, A)
1001
da10a907
RH
1002#define partsN(NAME) glue(glue(glue(parts,N),_),NAME)
1003#define FloatPartsN glue(FloatParts,N)
aca84527 1004#define FloatPartsW glue(FloatParts,W)
da10a907
RH
1005
1006#define N 64
aca84527 1007#define W 128
da10a907
RH
1008
1009#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1010#include "softfloat-parts.c.inc"
1011
da10a907 1012#undef N
aca84527 1013#undef W
da10a907 1014#define N 128
aca84527 1015#define W 256
7c45bad8 1016
da10a907 1017#include "softfloat-parts-addsub.c.inc"
7c45bad8
RH
1018#include "softfloat-parts.c.inc"
1019
da10a907 1020#undef N
aca84527 1021#undef W
7c45bad8
RH
1022#undef partsN
1023#undef FloatPartsN
aca84527 1024#undef FloatPartsW
7c45bad8 1025
aaffb7bf
RH
1026/*
1027 * Pack/unpack routines with a specific FloatFmt.
1028 */
1029
98e256fc
RH
1030static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1031 float_status *s, const FloatFmt *params)
aaffb7bf 1032{
98e256fc 1033 float16_unpack_raw(p, f);
d46975bc 1034 parts_canonicalize(p, s, params);
aaffb7bf
RH
1035}
1036
98e256fc
RH
1037static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1038 float_status *s)
aaffb7bf 1039{
98e256fc 1040 float16a_unpack_canonical(p, f, s, &float16_params);
aaffb7bf
RH
1041}
1042
98e256fc
RH
1043static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1044 float_status *s)
aaffb7bf 1045{
98e256fc 1046 bfloat16_unpack_raw(p, f);
d46975bc 1047 parts_canonicalize(p, s, &bfloat16_params);
aaffb7bf
RH
1048}
1049
e293e927
RH
1050static float16 float16a_round_pack_canonical(FloatParts64 *p,
1051 float_status *s,
aaffb7bf
RH
1052 const FloatFmt *params)
1053{
ee6959f2 1054 parts_uncanon(p, s, params);
e293e927 1055 return float16_pack_raw(p);
aaffb7bf
RH
1056}
1057
e293e927
RH
1058static float16 float16_round_pack_canonical(FloatParts64 *p,
1059 float_status *s)
aaffb7bf
RH
1060{
1061 return float16a_round_pack_canonical(p, s, &float16_params);
1062}
1063
e293e927
RH
1064static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1065 float_status *s)
aaffb7bf 1066{
ee6959f2 1067 parts_uncanon(p, s, &bfloat16_params);
e293e927 1068 return bfloat16_pack_raw(p);
aaffb7bf
RH
1069}
1070
98e256fc
RH
1071static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1072 float_status *s)
aaffb7bf 1073{
98e256fc 1074 float32_unpack_raw(p, f);
d46975bc 1075 parts_canonicalize(p, s, &float32_params);
aaffb7bf
RH
1076}
1077
e293e927
RH
1078static float32 float32_round_pack_canonical(FloatParts64 *p,
1079 float_status *s)
aaffb7bf 1080{
ee6959f2 1081 parts_uncanon(p, s, &float32_params);
e293e927 1082 return float32_pack_raw(p);
aaffb7bf
RH
1083}
1084
98e256fc
RH
1085static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1086 float_status *s)
aaffb7bf 1087{
98e256fc 1088 float64_unpack_raw(p, f);
d46975bc 1089 parts_canonicalize(p, s, &float64_params);
aaffb7bf
RH
1090}
1091
e293e927
RH
1092static float64 float64_round_pack_canonical(FloatParts64 *p,
1093 float_status *s)
aaffb7bf 1094{
ee6959f2 1095 parts_uncanon(p, s, &float64_params);
e293e927 1096 return float64_pack_raw(p);
aaffb7bf
RH
1097}
1098
3ff49e56
RH
1099static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1100 float_status *s)
1101{
1102 float128_unpack_raw(p, f);
1103 parts_canonicalize(p, s, &float128_params);
1104}
1105
1106static float128 float128_round_pack_canonical(FloatParts128 *p,
1107 float_status *s)
1108{
1109 parts_uncanon(p, s, &float128_params);
1110 return float128_pack_raw(p);
1111}
1112
6fff2167 1113/*
da10a907 1114 * Addition and subtraction
6fff2167
AB
1115 */
1116
da10a907
RH
1117static float16 QEMU_FLATTEN
1118float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
6fff2167 1119{
da10a907 1120 FloatParts64 pa, pb, *pr;
98e256fc
RH
1121
1122 float16_unpack_canonical(&pa, a, status);
1123 float16_unpack_canonical(&pb, b, status);
da10a907 1124 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1125
da10a907 1126 return float16_round_pack_canonical(pr, status);
6fff2167
AB
1127}
1128
da10a907 1129float16 float16_add(float16 a, float16 b, float_status *status)
1b615d48 1130{
da10a907
RH
1131 return float16_addsub(a, b, status, false);
1132}
1b615d48 1133
da10a907
RH
1134float16 float16_sub(float16 a, float16 b, float_status *status)
1135{
1136 return float16_addsub(a, b, status, true);
1b615d48
EC
1137}
1138
1139static float32 QEMU_SOFTFLOAT_ATTR
da10a907 1140soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
6fff2167 1141{
da10a907 1142 FloatParts64 pa, pb, *pr;
98e256fc
RH
1143
1144 float32_unpack_canonical(&pa, a, status);
1145 float32_unpack_canonical(&pb, b, status);
da10a907 1146 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1147
da10a907 1148 return float32_round_pack_canonical(pr, status);
6fff2167
AB
1149}
1150
da10a907 1151static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1b615d48 1152{
da10a907 1153 return soft_f32_addsub(a, b, status, false);
1b615d48
EC
1154}
1155
da10a907 1156static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1b615d48 1157{
da10a907 1158 return soft_f32_addsub(a, b, status, true);
1b615d48
EC
1159}
1160
1161static float64 QEMU_SOFTFLOAT_ATTR
da10a907 1162soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
6fff2167 1163{
da10a907 1164 FloatParts64 pa, pb, *pr;
98e256fc
RH
1165
1166 float64_unpack_canonical(&pa, a, status);
1167 float64_unpack_canonical(&pb, b, status);
da10a907 1168 pr = parts_addsub(&pa, &pb, status, subtract);
6fff2167 1169
da10a907 1170 return float64_round_pack_canonical(pr, status);
6fff2167
AB
1171}
1172
da10a907 1173static float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1174{
da10a907 1175 return soft_f64_addsub(a, b, status, false);
1b615d48 1176}
6fff2167 1177
da10a907 1178static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1b615d48 1179{
da10a907 1180 return soft_f64_addsub(a, b, status, true);
6fff2167
AB
1181}
1182
1b615d48 1183static float hard_f32_add(float a, float b)
6fff2167 1184{
1b615d48
EC
1185 return a + b;
1186}
6fff2167 1187
1b615d48
EC
1188static float hard_f32_sub(float a, float b)
1189{
1190 return a - b;
6fff2167
AB
1191}
1192
1b615d48 1193static double hard_f64_add(double a, double b)
6fff2167 1194{
1b615d48
EC
1195 return a + b;
1196}
6fff2167 1197
1b615d48
EC
1198static double hard_f64_sub(double a, double b)
1199{
1200 return a - b;
1201}
1202
b240c9c4 1203static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1204{
1205 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1206 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1207 }
1208 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1209}
1210
b240c9c4 1211static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1212{
1213 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1214 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1215 } else {
1216 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1217 }
1218}
1219
1220static float32 float32_addsub(float32 a, float32 b, float_status *s,
1221 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1222{
1223 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1224 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1225}
1226
1227static float64 float64_addsub(float64 a, float64 b, float_status *s,
1228 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1229{
1230 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1231 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1232}
1233
1234float32 QEMU_FLATTEN
1235float32_add(float32 a, float32 b, float_status *s)
1236{
1237 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1238}
1239
1240float32 QEMU_FLATTEN
1241float32_sub(float32 a, float32 b, float_status *s)
1242{
1243 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1244}
1245
1246float64 QEMU_FLATTEN
1247float64_add(float64 a, float64 b, float_status *s)
1248{
1249 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1250}
1251
1252float64 QEMU_FLATTEN
1253float64_sub(float64 a, float64 b, float_status *s)
1254{
1255 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1256}
1257
da10a907
RH
1258static bfloat16 QEMU_FLATTEN
1259bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
8282310d 1260{
da10a907 1261 FloatParts64 pa, pb, *pr;
98e256fc
RH
1262
1263 bfloat16_unpack_canonical(&pa, a, status);
1264 bfloat16_unpack_canonical(&pb, b, status);
da10a907 1265 pr = parts_addsub(&pa, &pb, status, subtract);
8282310d 1266
da10a907 1267 return bfloat16_round_pack_canonical(pr, status);
8282310d
LZ
1268}
1269
da10a907 1270bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1271{
da10a907
RH
1272 return bfloat16_addsub(a, b, status, false);
1273}
8282310d 1274
da10a907
RH
1275bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1276{
1277 return bfloat16_addsub(a, b, status, true);
8282310d
LZ
1278}
1279
3ff49e56
RH
1280static float128 QEMU_FLATTEN
1281float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1282{
1283 FloatParts128 pa, pb, *pr;
1284
1285 float128_unpack_canonical(&pa, a, status);
1286 float128_unpack_canonical(&pb, b, status);
1287 pr = parts_addsub(&pa, &pb, status, subtract);
1288
1289 return float128_round_pack_canonical(pr, status);
1290}
1291
1292float128 float128_add(float128 a, float128 b, float_status *status)
1293{
1294 return float128_addsub(a, b, status, false);
1295}
1296
1297float128 float128_sub(float128 a, float128 b, float_status *status)
1298{
1299 return float128_addsub(a, b, status, true);
1300}
1301
74d707e2 1302/*
aca84527 1303 * Multiplication
74d707e2
AB
1304 */
1305
97ff87c0 1306float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2 1307{
aca84527 1308 FloatParts64 pa, pb, *pr;
98e256fc
RH
1309
1310 float16_unpack_canonical(&pa, a, status);
1311 float16_unpack_canonical(&pb, b, status);
aca84527 1312 pr = parts_mul(&pa, &pb, status);
74d707e2 1313
aca84527 1314 return float16_round_pack_canonical(pr, status);
74d707e2
AB
1315}
1316
2dfabc86
EC
1317static float32 QEMU_SOFTFLOAT_ATTR
1318soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2 1319{
aca84527 1320 FloatParts64 pa, pb, *pr;
98e256fc
RH
1321
1322 float32_unpack_canonical(&pa, a, status);
1323 float32_unpack_canonical(&pb, b, status);
aca84527 1324 pr = parts_mul(&pa, &pb, status);
74d707e2 1325
aca84527 1326 return float32_round_pack_canonical(pr, status);
74d707e2
AB
1327}
1328
2dfabc86
EC
1329static float64 QEMU_SOFTFLOAT_ATTR
1330soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2 1331{
aca84527 1332 FloatParts64 pa, pb, *pr;
98e256fc
RH
1333
1334 float64_unpack_canonical(&pa, a, status);
1335 float64_unpack_canonical(&pb, b, status);
aca84527 1336 pr = parts_mul(&pa, &pb, status);
74d707e2 1337
aca84527 1338 return float64_round_pack_canonical(pr, status);
74d707e2
AB
1339}
1340
2dfabc86
EC
1341static float hard_f32_mul(float a, float b)
1342{
1343 return a * b;
1344}
1345
1346static double hard_f64_mul(double a, double b)
1347{
1348 return a * b;
1349}
1350
2dfabc86
EC
1351float32 QEMU_FLATTEN
1352float32_mul(float32 a, float32 b, float_status *s)
1353{
1354 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1355 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1356}
1357
1358float64 QEMU_FLATTEN
1359float64_mul(float64 a, float64 b, float_status *s)
1360{
1361 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1362 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1363}
1364
aca84527
RH
1365bfloat16 QEMU_FLATTEN
1366bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
8282310d 1367{
aca84527 1368 FloatParts64 pa, pb, *pr;
98e256fc
RH
1369
1370 bfloat16_unpack_canonical(&pa, a, status);
1371 bfloat16_unpack_canonical(&pb, b, status);
aca84527 1372 pr = parts_mul(&pa, &pb, status);
8282310d 1373
aca84527
RH
1374 return bfloat16_round_pack_canonical(pr, status);
1375}
1376
1377float128 QEMU_FLATTEN
1378float128_mul(float128 a, float128 b, float_status *status)
1379{
1380 FloatParts128 pa, pb, *pr;
1381
1382 float128_unpack_canonical(&pa, a, status);
1383 float128_unpack_canonical(&pb, b, status);
1384 pr = parts_mul(&pa, &pb, status);
1385
1386 return float128_round_pack_canonical(pr, status);
8282310d
LZ
1387}
1388
d446830a
AB
1389/*
1390 * Returns the result of multiplying the floating-point values `a' and
1391 * `b' then adding 'c', with no intermediate rounding step after the
1392 * multiplication. The operation is performed according to the
1393 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1394 * The flags argument allows the caller to select negation of the
1395 * addend, the intermediate product, or the final result. (The
1396 * difference between this and having the caller do a separate
1397 * negation is that negating externally will flip the sign bit on
1398 * NaNs.)
1399 */
1400
f8155c1d 1401static FloatParts64 muladd_floats(FloatParts64 a, FloatParts64 b, FloatParts64 c,
d446830a
AB
1402 int flags, float_status *s)
1403{
134eda00 1404 bool inf_zero, p_sign;
d446830a
AB
1405 bool sign_flip = flags & float_muladd_negate_result;
1406 FloatClass p_class;
1407 uint64_t hi, lo;
1408 int p_exp;
134eda00
RH
1409 int ab_mask, abc_mask;
1410
1411 ab_mask = float_cmask(a.cls) | float_cmask(b.cls);
1412 abc_mask = float_cmask(c.cls) | ab_mask;
1413 inf_zero = ab_mask == float_cmask_infzero;
d446830a
AB
1414
1415 /* It is implementation-defined whether the cases of (0,inf,qnan)
1416 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1417 * they return if they do), so we have to hand this information
1418 * off to the target-specific pick-a-NaN routine.
1419 */
134eda00 1420 if (unlikely(abc_mask & float_cmask_anynan)) {
979582d0 1421 return *parts_pick_nan_muladd(&a, &b, &c, s, ab_mask, abc_mask);
d446830a
AB
1422 }
1423
1424 if (inf_zero) {
d82f3b2d 1425 float_raise(float_flag_invalid, s);
0fc07cad
RH
1426 parts_default_nan(&a, s);
1427 return a;
d446830a
AB
1428 }
1429
1430 if (flags & float_muladd_negate_c) {
1431 c.sign ^= 1;
1432 }
1433
1434 p_sign = a.sign ^ b.sign;
1435
1436 if (flags & float_muladd_negate_product) {
1437 p_sign ^= 1;
1438 }
1439
134eda00 1440 if (ab_mask & float_cmask_inf) {
d446830a 1441 p_class = float_class_inf;
134eda00 1442 } else if (ab_mask & float_cmask_zero) {
d446830a
AB
1443 p_class = float_class_zero;
1444 } else {
1445 p_class = float_class_normal;
1446 }
1447
1448 if (c.cls == float_class_inf) {
1449 if (p_class == float_class_inf && p_sign != c.sign) {
d82f3b2d 1450 float_raise(float_flag_invalid, s);
0fc07cad 1451 parts_default_nan(&c, s);
d446830a 1452 } else {
9793c1e2 1453 c.sign ^= sign_flip;
d446830a 1454 }
0fc07cad 1455 return c;
d446830a
AB
1456 }
1457
1458 if (p_class == float_class_inf) {
1459 a.cls = float_class_inf;
1460 a.sign = p_sign ^ sign_flip;
1461 return a;
1462 }
1463
1464 if (p_class == float_class_zero) {
1465 if (c.cls == float_class_zero) {
1466 if (p_sign != c.sign) {
1467 p_sign = s->float_rounding_mode == float_round_down;
1468 }
1469 c.sign = p_sign;
1470 } else if (flags & float_muladd_halve_result) {
1471 c.exp -= 1;
1472 }
1473 c.sign ^= sign_flip;
1474 return c;
1475 }
1476
1477 /* a & b should be normals now... */
1478 assert(a.cls == float_class_normal &&
1479 b.cls == float_class_normal);
1480
1481 p_exp = a.exp + b.exp;
1482
d446830a 1483 mul64To128(a.frac, b.frac, &hi, &lo);
d446830a 1484
e99c4373
RH
1485 /* Renormalize to the msb. */
1486 if (hi & DECOMPOSED_IMPLICIT_BIT) {
d446830a 1487 p_exp += 1;
e99c4373
RH
1488 } else {
1489 shortShift128Left(hi, lo, 1, &hi, &lo);
d446830a
AB
1490 }
1491
1492 /* + add/sub */
e99c4373 1493 if (c.cls != float_class_zero) {
d446830a
AB
1494 int exp_diff = p_exp - c.exp;
1495 if (p_sign == c.sign) {
1496 /* Addition */
1497 if (exp_diff <= 0) {
e99c4373 1498 shift64RightJamming(hi, -exp_diff, &hi);
d446830a 1499 p_exp = c.exp;
e99c4373
RH
1500 if (uadd64_overflow(hi, c.frac, &hi)) {
1501 shift64RightJamming(hi, 1, &hi);
1502 hi |= DECOMPOSED_IMPLICIT_BIT;
1503 p_exp += 1;
1504 }
d446830a 1505 } else {
e99c4373
RH
1506 uint64_t c_hi, c_lo, over;
1507 shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
1508 add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
1509 if (over) {
1510 shift64RightJamming(hi, 1, &hi);
1511 hi |= DECOMPOSED_IMPLICIT_BIT;
1512 p_exp += 1;
1513 }
d446830a 1514 }
d446830a
AB
1515 } else {
1516 /* Subtraction */
e99c4373 1517 uint64_t c_hi = c.frac, c_lo = 0;
d446830a
AB
1518
1519 if (exp_diff <= 0) {
1520 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1521 if (exp_diff == 0
1522 &&
1523 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1524 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1525 } else {
1526 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1527 p_sign ^= 1;
1528 p_exp = c.exp;
1529 }
1530 } else {
1531 shift128RightJamming(c_hi, c_lo,
1532 exp_diff,
1533 &c_hi, &c_lo);
1534 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1535 }
1536
1537 if (hi == 0 && lo == 0) {
1538 a.cls = float_class_zero;
1539 a.sign = s->float_rounding_mode == float_round_down;
1540 a.sign ^= sign_flip;
1541 return a;
1542 } else {
1543 int shift;
1544 if (hi != 0) {
1545 shift = clz64(hi);
1546 } else {
1547 shift = clz64(lo) + 64;
1548 }
1549 /* Normalizing to a binary point of 124 is the
1550 correct adjust for the exponent. However since we're
1551 shifting, we might as well put the binary point back
e99c4373 1552 at 63 where we really want it. Therefore shift as
d446830a
AB
1553 if we're leaving 1 bit at the top of the word, but
1554 adjust the exponent as if we're leaving 3 bits. */
e99c4373
RH
1555 shift128Left(hi, lo, shift, &hi, &lo);
1556 p_exp -= shift;
d446830a
AB
1557 }
1558 }
1559 }
e99c4373 1560 hi |= (lo != 0);
d446830a
AB
1561
1562 if (flags & float_muladd_halve_result) {
1563 p_exp -= 1;
1564 }
1565
1566 /* finally prepare our result */
1567 a.cls = float_class_normal;
1568 a.sign = p_sign ^ sign_flip;
1569 a.exp = p_exp;
e99c4373 1570 a.frac = hi;
d446830a
AB
1571
1572 return a;
1573}
1574
97ff87c0 1575float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1576 int flags, float_status *status)
1577{
98e256fc
RH
1578 FloatParts64 pa, pb, pc, pr;
1579
1580 float16_unpack_canonical(&pa, a, status);
1581 float16_unpack_canonical(&pb, b, status);
1582 float16_unpack_canonical(&pc, c, status);
1583 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1584
e293e927 1585 return float16_round_pack_canonical(&pr, status);
d446830a
AB
1586}
1587
ccf770ba
EC
1588static float32 QEMU_SOFTFLOAT_ATTR
1589soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1590 float_status *status)
d446830a 1591{
98e256fc
RH
1592 FloatParts64 pa, pb, pc, pr;
1593
1594 float32_unpack_canonical(&pa, a, status);
1595 float32_unpack_canonical(&pb, b, status);
1596 float32_unpack_canonical(&pc, c, status);
1597 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1598
e293e927 1599 return float32_round_pack_canonical(&pr, status);
d446830a
AB
1600}
1601
ccf770ba
EC
1602static float64 QEMU_SOFTFLOAT_ATTR
1603soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1604 float_status *status)
d446830a 1605{
98e256fc
RH
1606 FloatParts64 pa, pb, pc, pr;
1607
1608 float64_unpack_canonical(&pa, a, status);
1609 float64_unpack_canonical(&pb, b, status);
1610 float64_unpack_canonical(&pc, c, status);
1611 pr = muladd_floats(pa, pb, pc, flags, status);
d446830a 1612
e293e927 1613 return float64_round_pack_canonical(&pr, status);
d446830a
AB
1614}
1615
f6b3b108
EC
1616static bool force_soft_fma;
1617
ccf770ba
EC
1618float32 QEMU_FLATTEN
1619float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1620{
1621 union_float32 ua, ub, uc, ur;
1622
1623 ua.s = xa;
1624 ub.s = xb;
1625 uc.s = xc;
1626
1627 if (unlikely(!can_use_fpu(s))) {
1628 goto soft;
1629 }
1630 if (unlikely(flags & float_muladd_halve_result)) {
1631 goto soft;
1632 }
1633
1634 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1635 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1636 goto soft;
1637 }
f6b3b108
EC
1638
1639 if (unlikely(force_soft_fma)) {
1640 goto soft;
1641 }
1642
ccf770ba
EC
1643 /*
1644 * When (a || b) == 0, there's no need to check for under/over flow,
1645 * since we know the addend is (normal || 0) and the product is 0.
1646 */
1647 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1648 union_float32 up;
1649 bool prod_sign;
1650
1651 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1652 prod_sign ^= !!(flags & float_muladd_negate_product);
1653 up.s = float32_set_sign(float32_zero, prod_sign);
1654
1655 if (flags & float_muladd_negate_c) {
1656 uc.h = -uc.h;
1657 }
1658 ur.h = up.h + uc.h;
1659 } else {
896f51fb
KC
1660 union_float32 ua_orig = ua;
1661 union_float32 uc_orig = uc;
1662
ccf770ba
EC
1663 if (flags & float_muladd_negate_product) {
1664 ua.h = -ua.h;
1665 }
1666 if (flags & float_muladd_negate_c) {
1667 uc.h = -uc.h;
1668 }
1669
1670 ur.h = fmaf(ua.h, ub.h, uc.h);
1671
1672 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1673 float_raise(float_flag_overflow, s);
ccf770ba 1674 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1675 ua = ua_orig;
1676 uc = uc_orig;
ccf770ba
EC
1677 goto soft;
1678 }
1679 }
1680 if (flags & float_muladd_negate_result) {
1681 return float32_chs(ur.s);
1682 }
1683 return ur.s;
1684
1685 soft:
1686 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1687}
1688
1689float64 QEMU_FLATTEN
1690float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1691{
1692 union_float64 ua, ub, uc, ur;
1693
1694 ua.s = xa;
1695 ub.s = xb;
1696 uc.s = xc;
1697
1698 if (unlikely(!can_use_fpu(s))) {
1699 goto soft;
1700 }
1701 if (unlikely(flags & float_muladd_halve_result)) {
1702 goto soft;
1703 }
1704
1705 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1706 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1707 goto soft;
1708 }
f6b3b108
EC
1709
1710 if (unlikely(force_soft_fma)) {
1711 goto soft;
1712 }
1713
ccf770ba
EC
1714 /*
1715 * When (a || b) == 0, there's no need to check for under/over flow,
1716 * since we know the addend is (normal || 0) and the product is 0.
1717 */
1718 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1719 union_float64 up;
1720 bool prod_sign;
1721
1722 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1723 prod_sign ^= !!(flags & float_muladd_negate_product);
1724 up.s = float64_set_sign(float64_zero, prod_sign);
1725
1726 if (flags & float_muladd_negate_c) {
1727 uc.h = -uc.h;
1728 }
1729 ur.h = up.h + uc.h;
1730 } else {
896f51fb
KC
1731 union_float64 ua_orig = ua;
1732 union_float64 uc_orig = uc;
1733
ccf770ba
EC
1734 if (flags & float_muladd_negate_product) {
1735 ua.h = -ua.h;
1736 }
1737 if (flags & float_muladd_negate_c) {
1738 uc.h = -uc.h;
1739 }
1740
1741 ur.h = fma(ua.h, ub.h, uc.h);
1742
1743 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1744 float_raise(float_flag_overflow, s);
ccf770ba 1745 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1746 ua = ua_orig;
1747 uc = uc_orig;
ccf770ba
EC
1748 goto soft;
1749 }
1750 }
1751 if (flags & float_muladd_negate_result) {
1752 return float64_chs(ur.s);
1753 }
1754 return ur.s;
1755
1756 soft:
1757 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1758}
1759
8282310d
LZ
1760/*
1761 * Returns the result of multiplying the bfloat16 values `a'
1762 * and `b' then adding 'c', with no intermediate rounding step after the
1763 * multiplication.
1764 */
1765
1766bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1767 int flags, float_status *status)
1768{
98e256fc
RH
1769 FloatParts64 pa, pb, pc, pr;
1770
1771 bfloat16_unpack_canonical(&pa, a, status);
1772 bfloat16_unpack_canonical(&pb, b, status);
1773 bfloat16_unpack_canonical(&pc, c, status);
1774 pr = muladd_floats(pa, pb, pc, flags, status);
8282310d 1775
e293e927 1776 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1777}
1778
cf07323d
AB
1779/*
1780 * Returns the result of dividing the floating-point value `a' by the
1781 * corresponding value `b'. The operation is performed according to
1782 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1783 */
1784
f8155c1d 1785static FloatParts64 div_floats(FloatParts64 a, FloatParts64 b, float_status *s)
cf07323d
AB
1786{
1787 bool sign = a.sign ^ b.sign;
1788
1789 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1790 uint64_t n0, n1, q, r;
cf07323d 1791 int exp = a.exp - b.exp;
5dfbc9e4
RH
1792
1793 /*
1794 * We want a 2*N / N-bit division to produce exactly an N-bit
1795 * result, so that we do not lose any precision and so that we
1796 * do not have to renormalize afterward. If A.frac < B.frac,
1797 * then division would produce an (N-1)-bit result; shift A left
1798 * by one to produce the an N-bit result, and decrement the
1799 * exponent to match.
1800 *
1801 * The udiv_qrnnd algorithm that we're using requires normalization,
e99c4373 1802 * i.e. the msb of the denominator must be set, which is already true.
5dfbc9e4 1803 */
cf07323d
AB
1804 if (a.frac < b.frac) {
1805 exp -= 1;
5dfbc9e4 1806 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
e99c4373
RH
1807 } else {
1808 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
cf07323d 1809 }
e99c4373 1810 q = udiv_qrnnd(&r, n1, n0, b.frac);
5dfbc9e4 1811
e99c4373 1812 /* Set lsb if there is a remainder, to set inexact. */
5dfbc9e4 1813 a.frac = q | (r != 0);
cf07323d
AB
1814 a.sign = sign;
1815 a.exp = exp;
1816 return a;
1817 }
1818 /* handle all the NaN cases */
1819 if (is_nan(a.cls) || is_nan(b.cls)) {
22c355f4 1820 return *parts_pick_nan(&a, &b, s);
cf07323d
AB
1821 }
1822 /* 0/0 or Inf/Inf */
1823 if (a.cls == b.cls
1824 &&
1825 (a.cls == float_class_inf || a.cls == float_class_zero)) {
d82f3b2d 1826 float_raise(float_flag_invalid, s);
0fc07cad
RH
1827 parts_default_nan(&a, s);
1828 return a;
cf07323d 1829 }
9cb4e398
AB
1830 /* Inf / x or 0 / x */
1831 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1832 a.sign = sign;
1833 return a;
1834 }
cf07323d
AB
1835 /* Div 0 => Inf */
1836 if (b.cls == float_class_zero) {
d82f3b2d 1837 float_raise(float_flag_divbyzero, s);
cf07323d
AB
1838 a.cls = float_class_inf;
1839 a.sign = sign;
1840 return a;
1841 }
cf07323d
AB
1842 /* Div by Inf */
1843 if (b.cls == float_class_inf) {
1844 a.cls = float_class_zero;
1845 a.sign = sign;
1846 return a;
1847 }
1848 g_assert_not_reached();
1849}
1850
1851float16 float16_div(float16 a, float16 b, float_status *status)
1852{
98e256fc
RH
1853 FloatParts64 pa, pb, pr;
1854
1855 float16_unpack_canonical(&pa, a, status);
1856 float16_unpack_canonical(&pb, b, status);
1857 pr = div_floats(pa, pb, status);
cf07323d 1858
e293e927 1859 return float16_round_pack_canonical(&pr, status);
cf07323d
AB
1860}
1861
4a629561
EC
1862static float32 QEMU_SOFTFLOAT_ATTR
1863soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d 1864{
98e256fc
RH
1865 FloatParts64 pa, pb, pr;
1866
1867 float32_unpack_canonical(&pa, a, status);
1868 float32_unpack_canonical(&pb, b, status);
1869 pr = div_floats(pa, pb, status);
cf07323d 1870
e293e927 1871 return float32_round_pack_canonical(&pr, status);
cf07323d
AB
1872}
1873
4a629561
EC
1874static float64 QEMU_SOFTFLOAT_ATTR
1875soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d 1876{
98e256fc
RH
1877 FloatParts64 pa, pb, pr;
1878
1879 float64_unpack_canonical(&pa, a, status);
1880 float64_unpack_canonical(&pb, b, status);
1881 pr = div_floats(pa, pb, status);
cf07323d 1882
e293e927 1883 return float64_round_pack_canonical(&pr, status);
cf07323d
AB
1884}
1885
4a629561
EC
1886static float hard_f32_div(float a, float b)
1887{
1888 return a / b;
1889}
1890
1891static double hard_f64_div(double a, double b)
1892{
1893 return a / b;
1894}
1895
1896static bool f32_div_pre(union_float32 a, union_float32 b)
1897{
1898 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1899 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1900 fpclassify(b.h) == FP_NORMAL;
1901 }
1902 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1903}
1904
1905static bool f64_div_pre(union_float64 a, union_float64 b)
1906{
1907 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1908 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1909 fpclassify(b.h) == FP_NORMAL;
1910 }
1911 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1912}
1913
1914static bool f32_div_post(union_float32 a, union_float32 b)
1915{
1916 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1917 return fpclassify(a.h) != FP_ZERO;
1918 }
1919 return !float32_is_zero(a.s);
1920}
1921
1922static bool f64_div_post(union_float64 a, union_float64 b)
1923{
1924 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1925 return fpclassify(a.h) != FP_ZERO;
1926 }
1927 return !float64_is_zero(a.s);
1928}
1929
1930float32 QEMU_FLATTEN
1931float32_div(float32 a, float32 b, float_status *s)
1932{
1933 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1934 f32_div_pre, f32_div_post);
4a629561
EC
1935}
1936
1937float64 QEMU_FLATTEN
1938float64_div(float64 a, float64 b, float_status *s)
1939{
1940 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1941 f64_div_pre, f64_div_post);
4a629561
EC
1942}
1943
8282310d
LZ
1944/*
1945 * Returns the result of dividing the bfloat16
1946 * value `a' by the corresponding value `b'.
1947 */
1948
1949bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1950{
98e256fc
RH
1951 FloatParts64 pa, pb, pr;
1952
1953 bfloat16_unpack_canonical(&pa, a, status);
1954 bfloat16_unpack_canonical(&pb, b, status);
1955 pr = div_floats(pa, pb, status);
8282310d 1956
e293e927 1957 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
1958}
1959
6fed16b2
AB
1960/*
1961 * Float to Float conversions
1962 *
1963 * Returns the result of converting one float format to another. The
1964 * conversion is performed according to the IEC/IEEE Standard for
1965 * Binary Floating-Point Arithmetic.
1966 *
1967 * The float_to_float helper only needs to take care of raising
1968 * invalid exceptions and handling the conversion on NaNs.
1969 */
1970
f8155c1d 1971static FloatParts64 float_to_float(FloatParts64 a, const FloatFmt *dstf,
6fed16b2
AB
1972 float_status *s)
1973{
1974 if (dstf->arm_althp) {
1975 switch (a.cls) {
1976 case float_class_qnan:
1977 case float_class_snan:
1978 /* There is no NaN in the destination format. Raise Invalid
1979 * and return a zero with the sign of the input NaN.
1980 */
d82f3b2d 1981 float_raise(float_flag_invalid, s);
6fed16b2
AB
1982 a.cls = float_class_zero;
1983 a.frac = 0;
1984 a.exp = 0;
1985 break;
1986
1987 case float_class_inf:
1988 /* There is no Inf in the destination format. Raise Invalid
1989 * and return the maximum normal with the correct sign.
1990 */
d82f3b2d 1991 float_raise(float_flag_invalid, s);
6fed16b2
AB
1992 a.cls = float_class_normal;
1993 a.exp = dstf->exp_max;
1994 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1995 break;
1996
1997 default:
1998 break;
1999 }
2000 } else if (is_nan(a.cls)) {
7c45bad8 2001 parts_return_nan(&a, s);
6fed16b2
AB
2002 }
2003 return a;
2004}
2005
2006float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2007{
2008 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2009 FloatParts64 pa, pr;
2010
2011 float16a_unpack_canonical(&pa, a, s, fmt16);
2012 pr = float_to_float(pa, &float32_params, s);
e293e927 2013 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2014}
2015
2016float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2017{
2018 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2019 FloatParts64 pa, pr;
2020
2021 float16a_unpack_canonical(&pa, a, s, fmt16);
2022 pr = float_to_float(pa, &float64_params, s);
e293e927 2023 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2024}
2025
2026float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2027{
2028 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2029 FloatParts64 pa, pr;
2030
2031 float32_unpack_canonical(&pa, a, s);
2032 pr = float_to_float(pa, fmt16, s);
e293e927 2033 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2034}
2035
21381dcf
MK
2036static float64 QEMU_SOFTFLOAT_ATTR
2037soft_float32_to_float64(float32 a, float_status *s)
6fed16b2 2038{
98e256fc
RH
2039 FloatParts64 pa, pr;
2040
2041 float32_unpack_canonical(&pa, a, s);
2042 pr = float_to_float(pa, &float64_params, s);
e293e927 2043 return float64_round_pack_canonical(&pr, s);
6fed16b2
AB
2044}
2045
21381dcf
MK
2046float64 float32_to_float64(float32 a, float_status *s)
2047{
2048 if (likely(float32_is_normal(a))) {
2049 /* Widening conversion can never produce inexact results. */
2050 union_float32 uf;
2051 union_float64 ud;
2052 uf.s = a;
2053 ud.h = uf.h;
2054 return ud.s;
2055 } else if (float32_is_zero(a)) {
2056 return float64_set_sign(float64_zero, float32_is_neg(a));
2057 } else {
2058 return soft_float32_to_float64(a, s);
2059 }
2060}
2061
6fed16b2
AB
2062float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2063{
2064 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
98e256fc
RH
2065 FloatParts64 pa, pr;
2066
2067 float64_unpack_canonical(&pa, a, s);
2068 pr = float_to_float(pa, fmt16, s);
e293e927 2069 return float16a_round_pack_canonical(&pr, s, fmt16);
6fed16b2
AB
2070}
2071
2072float32 float64_to_float32(float64 a, float_status *s)
2073{
98e256fc
RH
2074 FloatParts64 pa, pr;
2075
2076 float64_unpack_canonical(&pa, a, s);
2077 pr = float_to_float(pa, &float32_params, s);
e293e927 2078 return float32_round_pack_canonical(&pr, s);
6fed16b2
AB
2079}
2080
34f0c0a9
LZ
2081float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2082{
98e256fc
RH
2083 FloatParts64 pa, pr;
2084
2085 bfloat16_unpack_canonical(&pa, a, s);
2086 pr = float_to_float(pa, &float32_params, s);
e293e927 2087 return float32_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2088}
2089
2090float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2091{
98e256fc
RH
2092 FloatParts64 pa, pr;
2093
2094 bfloat16_unpack_canonical(&pa, a, s);
2095 pr = float_to_float(pa, &float64_params, s);
e293e927 2096 return float64_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2097}
2098
2099bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2100{
98e256fc
RH
2101 FloatParts64 pa, pr;
2102
2103 float32_unpack_canonical(&pa, a, s);
2104 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2105 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2106}
2107
2108bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2109{
98e256fc
RH
2110 FloatParts64 pa, pr;
2111
2112 float64_unpack_canonical(&pa, a, s);
2113 pr = float_to_float(pa, &bfloat16_params, s);
e293e927 2114 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2115}
2116
dbe4d53a
AB
2117/*
2118 * Rounds the floating-point value `a' to an integer, and returns the
2119 * result as a floating-point value. The operation is performed
2120 * according to the IEC/IEEE Standard for Binary Floating-Point
2121 * Arithmetic.
2122 */
2123
f8155c1d 2124static FloatParts64 round_to_int(FloatParts64 a, FloatRoundMode rmode,
2f6c74be 2125 int scale, float_status *s)
dbe4d53a 2126{
2f6c74be
RH
2127 switch (a.cls) {
2128 case float_class_qnan:
2129 case float_class_snan:
7c45bad8
RH
2130 parts_return_nan(&a, s);
2131 break;
dbe4d53a 2132
dbe4d53a
AB
2133 case float_class_zero:
2134 case float_class_inf:
dbe4d53a
AB
2135 /* already "integral" */
2136 break;
2f6c74be 2137
dbe4d53a 2138 case float_class_normal:
2f6c74be
RH
2139 scale = MIN(MAX(scale, -0x10000), 0x10000);
2140 a.exp += scale;
2141
dbe4d53a
AB
2142 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2143 /* already integral */
2144 break;
2145 }
2146 if (a.exp < 0) {
2147 bool one;
2148 /* all fractional */
d82f3b2d 2149 float_raise(float_flag_inexact, s);
2f6c74be 2150 switch (rmode) {
dbe4d53a
AB
2151 case float_round_nearest_even:
2152 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2153 break;
2154 case float_round_ties_away:
2155 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2156 break;
2157 case float_round_to_zero:
2158 one = false;
2159 break;
2160 case float_round_up:
2161 one = !a.sign;
2162 break;
2163 case float_round_down:
2164 one = a.sign;
2165 break;
5d64abb3
RH
2166 case float_round_to_odd:
2167 one = true;
2168 break;
dbe4d53a
AB
2169 default:
2170 g_assert_not_reached();
2171 }
2172
2173 if (one) {
2174 a.frac = DECOMPOSED_IMPLICIT_BIT;
2175 a.exp = 0;
2176 } else {
2177 a.cls = float_class_zero;
2178 }
2179 } else {
2180 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2181 uint64_t frac_lsbm1 = frac_lsb >> 1;
2182 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2183 uint64_t rnd_mask = rnd_even_mask >> 1;
2184 uint64_t inc;
2185
2f6c74be 2186 switch (rmode) {
dbe4d53a
AB
2187 case float_round_nearest_even:
2188 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2189 break;
2190 case float_round_ties_away:
2191 inc = frac_lsbm1;
2192 break;
2193 case float_round_to_zero:
2194 inc = 0;
2195 break;
2196 case float_round_up:
2197 inc = a.sign ? 0 : rnd_mask;
2198 break;
2199 case float_round_down:
2200 inc = a.sign ? rnd_mask : 0;
2201 break;
5d64abb3
RH
2202 case float_round_to_odd:
2203 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2204 break;
dbe4d53a
AB
2205 default:
2206 g_assert_not_reached();
2207 }
2208
2209 if (a.frac & rnd_mask) {
d82f3b2d 2210 float_raise(float_flag_inexact, s);
e99c4373 2211 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2212 a.frac >>= 1;
e99c4373 2213 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2214 a.exp++;
2215 }
e99c4373 2216 a.frac &= ~rnd_mask;
dbe4d53a
AB
2217 }
2218 }
2219 break;
2220 default:
2221 g_assert_not_reached();
2222 }
2223 return a;
2224}
2225
2226float16 float16_round_to_int(float16 a, float_status *s)
2227{
98e256fc
RH
2228 FloatParts64 pa, pr;
2229
2230 float16_unpack_canonical(&pa, a, s);
2231 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2232 return float16_round_pack_canonical(&pr, s);
dbe4d53a
AB
2233}
2234
2235float32 float32_round_to_int(float32 a, float_status *s)
2236{
98e256fc
RH
2237 FloatParts64 pa, pr;
2238
2239 float32_unpack_canonical(&pa, a, s);
2240 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2241 return float32_round_pack_canonical(&pr, s);
dbe4d53a
AB
2242}
2243
2244float64 float64_round_to_int(float64 a, float_status *s)
2245{
98e256fc
RH
2246 FloatParts64 pa, pr;
2247
2248 float64_unpack_canonical(&pa, a, s);
2249 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2250 return float64_round_pack_canonical(&pr, s);
dbe4d53a
AB
2251}
2252
34f0c0a9
LZ
2253/*
2254 * Rounds the bfloat16 value `a' to an integer, and returns the
2255 * result as a bfloat16 value.
2256 */
2257
2258bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2259{
98e256fc
RH
2260 FloatParts64 pa, pr;
2261
2262 bfloat16_unpack_canonical(&pa, a, s);
2263 pr = round_to_int(pa, s->float_rounding_mode, 0, s);
e293e927 2264 return bfloat16_round_pack_canonical(&pr, s);
34f0c0a9
LZ
2265}
2266
ab52f973
AB
2267/*
2268 * Returns the result of converting the floating-point value `a' to
2269 * the two's complement integer format. The conversion is performed
2270 * according to the IEC/IEEE Standard for Binary Floating-Point
2271 * Arithmetic---which means in particular that the conversion is
2272 * rounded according to the current rounding mode. If `a' is a NaN,
2273 * the largest positive integer is returned. Otherwise, if the
2274 * conversion overflows, the largest integer with the same sign as `a'
2275 * is returned.
2276*/
2277
f8155c1d 2278static int64_t round_to_int_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407 2279 int scale, int64_t min, int64_t max,
ab52f973
AB
2280 float_status *s)
2281{
2282 uint64_t r;
2283 int orig_flags = get_float_exception_flags(s);
f8155c1d 2284 FloatParts64 p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2285
2286 switch (p.cls) {
2287 case float_class_snan:
2288 case float_class_qnan:
801bc563 2289 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2290 return max;
2291 case float_class_inf:
801bc563 2292 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2293 return p.sign ? min : max;
2294 case float_class_zero:
2295 return 0;
2296 case float_class_normal:
e99c4373 2297 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2298 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2299 } else {
2300 r = UINT64_MAX;
2301 }
2302 if (p.sign) {
33358375 2303 if (r <= -(uint64_t) min) {
ab52f973
AB
2304 return -r;
2305 } else {
2306 s->float_exception_flags = orig_flags | float_flag_invalid;
2307 return min;
2308 }
2309 } else {
33358375 2310 if (r <= max) {
ab52f973
AB
2311 return r;
2312 } else {
2313 s->float_exception_flags = orig_flags | float_flag_invalid;
2314 return max;
2315 }
2316 }
2317 default:
2318 g_assert_not_reached();
2319 }
2320}
2321
0d93d8ec
FC
2322int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2323 float_status *s)
2324{
98e256fc
RH
2325 FloatParts64 p;
2326
2327 float16_unpack_canonical(&p, a, s);
2328 return round_to_int_and_pack(p, rmode, scale, INT8_MIN, INT8_MAX, s);
0d93d8ec
FC
2329}
2330
3dede407 2331int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2332 float_status *s)
2333{
98e256fc
RH
2334 FloatParts64 p;
2335
2336 float16_unpack_canonical(&p, a, s);
2337 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2338}
2339
3dede407 2340int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2341 float_status *s)
2342{
98e256fc
RH
2343 FloatParts64 p;
2344
2345 float16_unpack_canonical(&p, a, s);
2346 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2347}
2348
3dede407 2349int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2350 float_status *s)
2351{
98e256fc
RH
2352 FloatParts64 p;
2353
2354 float16_unpack_canonical(&p, a, s);
2355 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2356}
2357
3dede407 2358int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2359 float_status *s)
2360{
98e256fc
RH
2361 FloatParts64 p;
2362
2363 float32_unpack_canonical(&p, a, s);
2364 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2365}
2366
3dede407 2367int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2368 float_status *s)
2369{
98e256fc
RH
2370 FloatParts64 p;
2371
2372 float32_unpack_canonical(&p, a, s);
2373 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2374}
2375
3dede407 2376int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2377 float_status *s)
2378{
98e256fc
RH
2379 FloatParts64 p;
2380
2381 float32_unpack_canonical(&p, a, s);
2382 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2383}
2384
3dede407 2385int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2386 float_status *s)
2387{
98e256fc
RH
2388 FloatParts64 p;
2389
2390 float64_unpack_canonical(&p, a, s);
2391 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
2f6c74be
RH
2392}
2393
3dede407 2394int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2395 float_status *s)
2396{
98e256fc
RH
2397 FloatParts64 p;
2398
2399 float64_unpack_canonical(&p, a, s);
2400 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
2f6c74be
RH
2401}
2402
3dede407 2403int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2404 float_status *s)
2405{
98e256fc
RH
2406 FloatParts64 p;
2407
2408 float64_unpack_canonical(&p, a, s);
2409 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
2f6c74be
RH
2410}
2411
0d93d8ec
FC
2412int8_t float16_to_int8(float16 a, float_status *s)
2413{
2414 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2415}
2416
2f6c74be
RH
2417int16_t float16_to_int16(float16 a, float_status *s)
2418{
2419 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2420}
2421
2422int32_t float16_to_int32(float16 a, float_status *s)
2423{
2424 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2425}
2426
2427int64_t float16_to_int64(float16 a, float_status *s)
2428{
2429 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2430}
2431
2432int16_t float32_to_int16(float32 a, float_status *s)
2433{
2434 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2435}
2436
2437int32_t float32_to_int32(float32 a, float_status *s)
2438{
2439 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2440}
2441
2442int64_t float32_to_int64(float32 a, float_status *s)
2443{
2444 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2445}
2446
2447int16_t float64_to_int16(float64 a, float_status *s)
2448{
2449 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2450}
2451
2452int32_t float64_to_int32(float64 a, float_status *s)
2453{
2454 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2455}
2456
2457int64_t float64_to_int64(float64 a, float_status *s)
2458{
2459 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2460}
2461
2462int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2463{
2464 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2465}
2466
2467int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2468{
2469 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2470}
2471
2472int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2473{
2474 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2475}
2476
2f6c74be
RH
2477int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2478{
2479 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2480}
ab52f973 2481
2f6c74be
RH
2482int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2483{
2484 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2485}
2486
2487int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2488{
2489 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2490}
2491
2492int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2493{
2494 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2495}
ab52f973 2496
2f6c74be
RH
2497int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2498{
2499 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2500}
ab52f973 2501
2f6c74be
RH
2502int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2503{
2504 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2505}
ab52f973 2506
34f0c0a9
LZ
2507/*
2508 * Returns the result of converting the floating-point value `a' to
2509 * the two's complement integer format.
2510 */
2511
2512int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2513 float_status *s)
2514{
98e256fc
RH
2515 FloatParts64 p;
2516
2517 bfloat16_unpack_canonical(&p, a, s);
2518 return round_to_int_and_pack(p, rmode, scale, INT16_MIN, INT16_MAX, s);
34f0c0a9
LZ
2519}
2520
2521int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2522 float_status *s)
2523{
98e256fc
RH
2524 FloatParts64 p;
2525
2526 bfloat16_unpack_canonical(&p, a, s);
2527 return round_to_int_and_pack(p, rmode, scale, INT32_MIN, INT32_MAX, s);
34f0c0a9
LZ
2528}
2529
2530int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2531 float_status *s)
2532{
98e256fc
RH
2533 FloatParts64 p;
2534
2535 bfloat16_unpack_canonical(&p, a, s);
2536 return round_to_int_and_pack(p, rmode, scale, INT64_MIN, INT64_MAX, s);
34f0c0a9
LZ
2537}
2538
2539int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2540{
2541 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2542}
2543
2544int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2545{
2546 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2547}
2548
2549int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2550{
2551 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2552}
2553
2554int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2555{
2556 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2557}
2558
2559int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2560{
2561 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2562}
2563
2564int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2565{
2566 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2567}
2568
ab52f973
AB
2569/*
2570 * Returns the result of converting the floating-point value `a' to
2571 * the unsigned integer format. The conversion is performed according
2572 * to the IEC/IEEE Standard for Binary Floating-Point
2573 * Arithmetic---which means in particular that the conversion is
2574 * rounded according to the current rounding mode. If `a' is a NaN,
2575 * the largest unsigned integer is returned. Otherwise, if the
2576 * conversion overflows, the largest unsigned integer is returned. If
2577 * the 'a' is negative, the result is rounded and zero is returned;
2578 * values that do not round to zero will raise the inexact exception
2579 * flag.
2580 */
2581
f8155c1d 2582static uint64_t round_to_uint_and_pack(FloatParts64 in, FloatRoundMode rmode,
3dede407
RH
2583 int scale, uint64_t max,
2584 float_status *s)
ab52f973
AB
2585{
2586 int orig_flags = get_float_exception_flags(s);
f8155c1d 2587 FloatParts64 p = round_to_int(in, rmode, scale, s);
2f6c74be 2588 uint64_t r;
ab52f973
AB
2589
2590 switch (p.cls) {
2591 case float_class_snan:
2592 case float_class_qnan:
2593 s->float_exception_flags = orig_flags | float_flag_invalid;
2594 return max;
2595 case float_class_inf:
801bc563 2596 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2597 return p.sign ? 0 : max;
2598 case float_class_zero:
2599 return 0;
2600 case float_class_normal:
ab52f973
AB
2601 if (p.sign) {
2602 s->float_exception_flags = orig_flags | float_flag_invalid;
2603 return 0;
2604 }
2605
e99c4373 2606 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2607 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2608 } else {
2609 s->float_exception_flags = orig_flags | float_flag_invalid;
2610 return max;
2611 }
2612
2613 /* For uint64 this will never trip, but if p.exp is too large
2614 * to shift a decomposed fraction we shall have exited via the
2615 * 3rd leg above.
2616 */
2617 if (r > max) {
2618 s->float_exception_flags = orig_flags | float_flag_invalid;
2619 return max;
ab52f973 2620 }
2f6c74be 2621 return r;
ab52f973
AB
2622 default:
2623 g_assert_not_reached();
2624 }
2625}
2626
0d93d8ec
FC
2627uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2628 float_status *s)
2629{
98e256fc
RH
2630 FloatParts64 p;
2631
2632 float16_unpack_canonical(&p, a, s);
2633 return round_to_uint_and_pack(p, rmode, scale, UINT8_MAX, s);
0d93d8ec
FC
2634}
2635
3dede407 2636uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2637 float_status *s)
2638{
98e256fc
RH
2639 FloatParts64 p;
2640
2641 float16_unpack_canonical(&p, a, s);
2642 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2643}
2644
3dede407 2645uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2646 float_status *s)
2647{
98e256fc
RH
2648 FloatParts64 p;
2649
2650 float16_unpack_canonical(&p, a, s);
2651 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2652}
2653
3dede407 2654uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2655 float_status *s)
2656{
98e256fc
RH
2657 FloatParts64 p;
2658
2659 float16_unpack_canonical(&p, a, s);
2660 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2661}
2662
3dede407 2663uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2664 float_status *s)
2665{
98e256fc
RH
2666 FloatParts64 p;
2667
2668 float32_unpack_canonical(&p, a, s);
2669 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2670}
2671
3dede407 2672uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2673 float_status *s)
2674{
98e256fc
RH
2675 FloatParts64 p;
2676
2677 float32_unpack_canonical(&p, a, s);
2678 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2679}
2680
3dede407 2681uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2682 float_status *s)
2683{
98e256fc
RH
2684 FloatParts64 p;
2685
2686 float32_unpack_canonical(&p, a, s);
2687 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2688}
2689
3dede407 2690uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2691 float_status *s)
2692{
98e256fc
RH
2693 FloatParts64 p;
2694
2695 float64_unpack_canonical(&p, a, s);
2696 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
2f6c74be
RH
2697}
2698
3dede407 2699uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2700 float_status *s)
2701{
98e256fc
RH
2702 FloatParts64 p;
2703
2704 float64_unpack_canonical(&p, a, s);
2705 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
2f6c74be
RH
2706}
2707
3dede407 2708uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2709 float_status *s)
2710{
98e256fc
RH
2711 FloatParts64 p;
2712
2713 float64_unpack_canonical(&p, a, s);
2714 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
2f6c74be
RH
2715}
2716
0d93d8ec
FC
2717uint8_t float16_to_uint8(float16 a, float_status *s)
2718{
2719 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2720}
2721
2f6c74be
RH
2722uint16_t float16_to_uint16(float16 a, float_status *s)
2723{
2724 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2725}
2726
2727uint32_t float16_to_uint32(float16 a, float_status *s)
2728{
2729 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2730}
2731
2732uint64_t float16_to_uint64(float16 a, float_status *s)
2733{
2734 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2735}
2736
2737uint16_t float32_to_uint16(float32 a, float_status *s)
2738{
2739 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2740}
2741
2742uint32_t float32_to_uint32(float32 a, float_status *s)
2743{
2744 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2745}
2746
2747uint64_t float32_to_uint64(float32 a, float_status *s)
2748{
2749 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2750}
2751
2752uint16_t float64_to_uint16(float64 a, float_status *s)
2753{
2754 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2755}
2756
2757uint32_t float64_to_uint32(float64 a, float_status *s)
2758{
2759 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2760}
2761
2762uint64_t float64_to_uint64(float64 a, float_status *s)
2763{
2764 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2765}
2766
2767uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2768{
2769 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2770}
2771
2772uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2773{
2774 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2775}
2776
2777uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2778{
2779 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2780}
2781
2782uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2783{
2784 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2785}
2786
2787uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2788{
2789 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2790}
2791
2792uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2793{
2794 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2795}
2796
2797uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2798{
2799 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2800}
2801
2802uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2803{
2804 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2805}
2806
2807uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2808{
2809 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2810}
ab52f973 2811
34f0c0a9
LZ
2812/*
2813 * Returns the result of converting the bfloat16 value `a' to
2814 * the unsigned integer format.
2815 */
2816
2817uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2818 int scale, float_status *s)
2819{
98e256fc
RH
2820 FloatParts64 p;
2821
2822 bfloat16_unpack_canonical(&p, a, s);
2823 return round_to_uint_and_pack(p, rmode, scale, UINT16_MAX, s);
34f0c0a9
LZ
2824}
2825
2826uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2827 int scale, float_status *s)
2828{
98e256fc
RH
2829 FloatParts64 p;
2830
2831 bfloat16_unpack_canonical(&p, a, s);
2832 return round_to_uint_and_pack(p, rmode, scale, UINT32_MAX, s);
34f0c0a9
LZ
2833}
2834
2835uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2836 int scale, float_status *s)
2837{
98e256fc
RH
2838 FloatParts64 p;
2839
2840 bfloat16_unpack_canonical(&p, a, s);
2841 return round_to_uint_and_pack(p, rmode, scale, UINT64_MAX, s);
34f0c0a9
LZ
2842}
2843
2844uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2845{
2846 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2847}
2848
2849uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2850{
2851 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2852}
2853
2854uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2855{
2856 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2857}
2858
2859uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2860{
2861 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2862}
2863
2864uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2865{
2866 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2867}
2868
2869uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2870{
2871 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2872}
2873
c02e1fb8
AB
2874/*
2875 * Integer to float conversions
2876 *
2877 * Returns the result of converting the two's complement integer `a'
2878 * to the floating-point format. The conversion is performed according
2879 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2880 */
2881
f8155c1d 2882static FloatParts64 int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2883{
f8155c1d 2884 FloatParts64 r = { .sign = false };
2abdfe24 2885
c02e1fb8
AB
2886 if (a == 0) {
2887 r.cls = float_class_zero;
c02e1fb8 2888 } else {
2abdfe24
RH
2889 uint64_t f = a;
2890 int shift;
2891
2892 r.cls = float_class_normal;
c02e1fb8 2893 if (a < 0) {
2abdfe24 2894 f = -f;
c02e1fb8 2895 r.sign = true;
c02e1fb8 2896 }
e99c4373 2897 shift = clz64(f);
2abdfe24
RH
2898 scale = MIN(MAX(scale, -0x10000), 0x10000);
2899
2900 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2901 r.frac = f << shift;
c02e1fb8
AB
2902 }
2903
2904 return r;
2905}
2906
2abdfe24 2907float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2908{
f8155c1d 2909 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2910 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
2911}
2912
2abdfe24
RH
2913float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2914{
2915 return int64_to_float16_scalbn(a, scale, status);
2916}
2917
2918float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2919{
2920 return int64_to_float16_scalbn(a, scale, status);
2921}
2922
2923float16 int64_to_float16(int64_t a, float_status *status)
2924{
2925 return int64_to_float16_scalbn(a, 0, status);
2926}
2927
c02e1fb8
AB
2928float16 int32_to_float16(int32_t a, float_status *status)
2929{
2abdfe24 2930 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2931}
2932
2933float16 int16_to_float16(int16_t a, float_status *status)
2934{
2abdfe24 2935 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2936}
2937
0d93d8ec
FC
2938float16 int8_to_float16(int8_t a, float_status *status)
2939{
2940 return int64_to_float16_scalbn(a, 0, status);
2941}
2942
2abdfe24 2943float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2944{
f8155c1d 2945 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2946 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
2947}
2948
2abdfe24
RH
2949float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2950{
2951 return int64_to_float32_scalbn(a, scale, status);
2952}
2953
2954float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2955{
2956 return int64_to_float32_scalbn(a, scale, status);
2957}
2958
2959float32 int64_to_float32(int64_t a, float_status *status)
2960{
2961 return int64_to_float32_scalbn(a, 0, status);
2962}
2963
c02e1fb8
AB
2964float32 int32_to_float32(int32_t a, float_status *status)
2965{
2abdfe24 2966 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2967}
2968
2969float32 int16_to_float32(int16_t a, float_status *status)
2970{
2abdfe24 2971 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2972}
2973
2abdfe24 2974float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2975{
f8155c1d 2976 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 2977 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
2978}
2979
2abdfe24
RH
2980float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2981{
2982 return int64_to_float64_scalbn(a, scale, status);
2983}
2984
2985float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2986{
2987 return int64_to_float64_scalbn(a, scale, status);
2988}
2989
2990float64 int64_to_float64(int64_t a, float_status *status)
2991{
2992 return int64_to_float64_scalbn(a, 0, status);
2993}
2994
c02e1fb8
AB
2995float64 int32_to_float64(int32_t a, float_status *status)
2996{
2abdfe24 2997 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2998}
2999
3000float64 int16_to_float64(int16_t a, float_status *status)
3001{
2abdfe24 3002 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3003}
3004
34f0c0a9
LZ
3005/*
3006 * Returns the result of converting the two's complement integer `a'
3007 * to the bfloat16 format.
3008 */
3009
3010bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3011{
f8155c1d 3012 FloatParts64 pa = int_to_float(a, scale, status);
e293e927 3013 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3014}
3015
3016bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3017{
3018 return int64_to_bfloat16_scalbn(a, scale, status);
3019}
3020
3021bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3022{
3023 return int64_to_bfloat16_scalbn(a, scale, status);
3024}
3025
3026bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3027{
3028 return int64_to_bfloat16_scalbn(a, 0, status);
3029}
3030
3031bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3032{
3033 return int64_to_bfloat16_scalbn(a, 0, status);
3034}
3035
3036bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3037{
3038 return int64_to_bfloat16_scalbn(a, 0, status);
3039}
c02e1fb8
AB
3040
3041/*
3042 * Unsigned Integer to float conversions
3043 *
3044 * Returns the result of converting the unsigned integer `a' to the
3045 * floating-point format. The conversion is performed according to the
3046 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3047 */
3048
f8155c1d 3049static FloatParts64 uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 3050{
f8155c1d 3051 FloatParts64 r = { .sign = false };
e99c4373 3052 int shift;
c02e1fb8
AB
3053
3054 if (a == 0) {
3055 r.cls = float_class_zero;
3056 } else {
2abdfe24 3057 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 3058 shift = clz64(a);
c02e1fb8 3059 r.cls = float_class_normal;
e99c4373
RH
3060 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
3061 r.frac = a << shift;
c02e1fb8
AB
3062 }
3063
3064 return r;
3065}
3066
2abdfe24 3067float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3068{
f8155c1d 3069 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3070 return float16_round_pack_canonical(&pa, status);
c02e1fb8
AB
3071}
3072
2abdfe24
RH
3073float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3074{
3075 return uint64_to_float16_scalbn(a, scale, status);
3076}
3077
3078float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3079{
3080 return uint64_to_float16_scalbn(a, scale, status);
3081}
3082
3083float16 uint64_to_float16(uint64_t a, float_status *status)
3084{
3085 return uint64_to_float16_scalbn(a, 0, status);
3086}
3087
c02e1fb8
AB
3088float16 uint32_to_float16(uint32_t a, float_status *status)
3089{
2abdfe24 3090 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3091}
3092
3093float16 uint16_to_float16(uint16_t a, float_status *status)
3094{
2abdfe24 3095 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
3096}
3097
0d93d8ec
FC
3098float16 uint8_to_float16(uint8_t a, float_status *status)
3099{
3100 return uint64_to_float16_scalbn(a, 0, status);
3101}
3102
2abdfe24 3103float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3104{
f8155c1d 3105 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3106 return float32_round_pack_canonical(&pa, status);
c02e1fb8
AB
3107}
3108
2abdfe24
RH
3109float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3110{
3111 return uint64_to_float32_scalbn(a, scale, status);
3112}
3113
3114float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3115{
3116 return uint64_to_float32_scalbn(a, scale, status);
3117}
3118
3119float32 uint64_to_float32(uint64_t a, float_status *status)
3120{
3121 return uint64_to_float32_scalbn(a, 0, status);
3122}
3123
c02e1fb8
AB
3124float32 uint32_to_float32(uint32_t a, float_status *status)
3125{
2abdfe24 3126 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3127}
3128
3129float32 uint16_to_float32(uint16_t a, float_status *status)
3130{
2abdfe24 3131 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3132}
3133
2abdfe24 3134float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3135{
f8155c1d 3136 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3137 return float64_round_pack_canonical(&pa, status);
c02e1fb8
AB
3138}
3139
2abdfe24
RH
3140float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3141{
3142 return uint64_to_float64_scalbn(a, scale, status);
3143}
3144
3145float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3146{
3147 return uint64_to_float64_scalbn(a, scale, status);
3148}
3149
3150float64 uint64_to_float64(uint64_t a, float_status *status)
3151{
3152 return uint64_to_float64_scalbn(a, 0, status);
3153}
3154
c02e1fb8
AB
3155float64 uint32_to_float64(uint32_t a, float_status *status)
3156{
2abdfe24 3157 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3158}
3159
3160float64 uint16_to_float64(uint16_t a, float_status *status)
3161{
2abdfe24 3162 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3163}
3164
34f0c0a9
LZ
3165/*
3166 * Returns the result of converting the unsigned integer `a' to the
3167 * bfloat16 format.
3168 */
3169
3170bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3171{
f8155c1d 3172 FloatParts64 pa = uint_to_float(a, scale, status);
e293e927 3173 return bfloat16_round_pack_canonical(&pa, status);
34f0c0a9
LZ
3174}
3175
3176bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3177{
3178 return uint64_to_bfloat16_scalbn(a, scale, status);
3179}
3180
3181bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3182{
3183 return uint64_to_bfloat16_scalbn(a, scale, status);
3184}
3185
3186bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3187{
3188 return uint64_to_bfloat16_scalbn(a, 0, status);
3189}
3190
3191bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3192{
3193 return uint64_to_bfloat16_scalbn(a, 0, status);
3194}
3195
3196bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3197{
3198 return uint64_to_bfloat16_scalbn(a, 0, status);
3199}
3200
89360067
AB
3201/* Float Min/Max */
3202/* min() and max() functions. These can't be implemented as
3203 * 'compare and pick one input' because that would mishandle
3204 * NaNs and +0 vs -0.
3205 *
3206 * minnum() and maxnum() functions. These are similar to the min()
3207 * and max() functions but if one of the arguments is a QNaN and
3208 * the other is numerical then the numerical argument is returned.
3209 * SNaNs will get quietened before being returned.
3210 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3211 * and maxNum() operations. min() and max() are the typical min/max
3212 * semantics provided by many CPUs which predate that specification.
3213 *
3214 * minnummag() and maxnummag() functions correspond to minNumMag()
3215 * and minNumMag() from the IEEE-754 2008.
3216 */
f8155c1d 3217static FloatParts64 minmax_floats(FloatParts64 a, FloatParts64 b, bool ismin,
89360067
AB
3218 bool ieee, bool ismag, float_status *s)
3219{
3220 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3221 if (ieee) {
3222 /* Takes two floating-point values `a' and `b', one of
3223 * which is a NaN, and returns the appropriate NaN
3224 * result. If either `a' or `b' is a signaling NaN,
3225 * the invalid exception is raised.
3226 */
3227 if (is_snan(a.cls) || is_snan(b.cls)) {
22c355f4 3228 return *parts_pick_nan(&a, &b, s);
89360067
AB
3229 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3230 return b;
3231 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3232 return a;
3233 }
3234 }
22c355f4 3235 return *parts_pick_nan(&a, &b, s);
89360067
AB
3236 } else {
3237 int a_exp, b_exp;
89360067
AB
3238
3239 switch (a.cls) {
3240 case float_class_normal:
3241 a_exp = a.exp;
3242 break;
3243 case float_class_inf:
3244 a_exp = INT_MAX;
3245 break;
3246 case float_class_zero:
3247 a_exp = INT_MIN;
3248 break;
3249 default:
3250 g_assert_not_reached();
3251 break;
3252 }
3253 switch (b.cls) {
3254 case float_class_normal:
3255 b_exp = b.exp;
3256 break;
3257 case float_class_inf:
3258 b_exp = INT_MAX;
3259 break;
3260 case float_class_zero:
3261 b_exp = INT_MIN;
3262 break;
3263 default:
3264 g_assert_not_reached();
3265 break;
3266 }
3267
6245327a
EC
3268 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3269 bool a_less = a_exp < b_exp;
3270 if (a_exp == b_exp) {
3271 a_less = a.frac < b.frac;
3272 }
3273 return a_less ^ ismin ? b : a;
89360067
AB
3274 }
3275
6245327a 3276 if (a.sign == b.sign) {
89360067
AB
3277 bool a_less = a_exp < b_exp;
3278 if (a_exp == b_exp) {
3279 a_less = a.frac < b.frac;
3280 }
6245327a 3281 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3282 } else {
6245327a 3283 return a.sign ^ ismin ? b : a;
89360067
AB
3284 }
3285 }
3286}
3287
3288#define MINMAX(sz, name, ismin, isiee, ismag) \
3289float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3290 float_status *s) \
3291{ \
98e256fc
RH
3292 FloatParts64 pa, pb, pr; \
3293 float ## sz ## _unpack_canonical(&pa, a, s); \
3294 float ## sz ## _unpack_canonical(&pb, b, s); \
3295 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3296 return float ## sz ## _round_pack_canonical(&pr, s); \
89360067
AB
3297}
3298
3299MINMAX(16, min, true, false, false)
3300MINMAX(16, minnum, true, true, false)
3301MINMAX(16, minnummag, true, true, true)
3302MINMAX(16, max, false, false, false)
3303MINMAX(16, maxnum, false, true, false)
3304MINMAX(16, maxnummag, false, true, true)
3305
3306MINMAX(32, min, true, false, false)
3307MINMAX(32, minnum, true, true, false)
3308MINMAX(32, minnummag, true, true, true)
3309MINMAX(32, max, false, false, false)
3310MINMAX(32, maxnum, false, true, false)
3311MINMAX(32, maxnummag, false, true, true)
3312
3313MINMAX(64, min, true, false, false)
3314MINMAX(64, minnum, true, true, false)
3315MINMAX(64, minnummag, true, true, true)
3316MINMAX(64, max, false, false, false)
3317MINMAX(64, maxnum, false, true, false)
3318MINMAX(64, maxnummag, false, true, true)
3319
3320#undef MINMAX
3321
8282310d
LZ
3322#define BF16_MINMAX(name, ismin, isiee, ismag) \
3323bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3324{ \
98e256fc
RH
3325 FloatParts64 pa, pb, pr; \
3326 bfloat16_unpack_canonical(&pa, a, s); \
3327 bfloat16_unpack_canonical(&pb, b, s); \
3328 pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
e293e927 3329 return bfloat16_round_pack_canonical(&pr, s); \
8282310d
LZ
3330}
3331
3332BF16_MINMAX(min, true, false, false)
3333BF16_MINMAX(minnum, true, true, false)
3334BF16_MINMAX(minnummag, true, true, true)
3335BF16_MINMAX(max, false, false, false)
3336BF16_MINMAX(maxnum, false, true, false)
3337BF16_MINMAX(maxnummag, false, true, true)
3338
3339#undef BF16_MINMAX
3340
0c4c9092 3341/* Floating point compare */
f8155c1d 3342static FloatRelation compare_floats(FloatParts64 a, FloatParts64 b, bool is_quiet,
71bfd65c 3343 float_status *s)
0c4c9092
AB
3344{
3345 if (is_nan(a.cls) || is_nan(b.cls)) {
3346 if (!is_quiet ||
3347 a.cls == float_class_snan ||
3348 b.cls == float_class_snan) {
d82f3b2d 3349 float_raise(float_flag_invalid, s);
0c4c9092
AB
3350 }
3351 return float_relation_unordered;
3352 }
3353
3354 if (a.cls == float_class_zero) {
3355 if (b.cls == float_class_zero) {
3356 return float_relation_equal;
3357 }
3358 return b.sign ? float_relation_greater : float_relation_less;
3359 } else if (b.cls == float_class_zero) {
3360 return a.sign ? float_relation_less : float_relation_greater;
3361 }
3362
3363 /* The only really important thing about infinity is its sign. If
3364 * both are infinities the sign marks the smallest of the two.
3365 */
3366 if (a.cls == float_class_inf) {
3367 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3368 return float_relation_equal;
3369 }
3370 return a.sign ? float_relation_less : float_relation_greater;
3371 } else if (b.cls == float_class_inf) {
3372 return b.sign ? float_relation_greater : float_relation_less;
3373 }
3374
3375 if (a.sign != b.sign) {
3376 return a.sign ? float_relation_less : float_relation_greater;
3377 }
3378
3379 if (a.exp == b.exp) {
3380 if (a.frac == b.frac) {
3381 return float_relation_equal;
3382 }
3383 if (a.sign) {
3384 return a.frac > b.frac ?
3385 float_relation_less : float_relation_greater;
3386 } else {
3387 return a.frac > b.frac ?
3388 float_relation_greater : float_relation_less;
3389 }
3390 } else {
3391 if (a.sign) {
3392 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3393 } else {
3394 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3395 }
3396 }
3397}
3398
d9fe9db9
EC
3399#define COMPARE(name, attr, sz) \
3400static int attr \
3401name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092 3402{ \
98e256fc
RH
3403 FloatParts64 pa, pb; \
3404 float ## sz ## _unpack_canonical(&pa, a, s); \
3405 float ## sz ## _unpack_canonical(&pb, b, s); \
d9fe9db9 3406 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3407}
3408
d9fe9db9
EC
3409COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3410COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3411COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3412
3413#undef COMPARE
3414
71bfd65c 3415FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3416{
3417 return soft_f16_compare(a, b, false, s);
3418}
3419
71bfd65c 3420FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3421{
3422 return soft_f16_compare(a, b, true, s);
3423}
3424
71bfd65c 3425static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3426f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3427{
3428 union_float32 ua, ub;
3429
3430 ua.s = xa;
3431 ub.s = xb;
3432
3433 if (QEMU_NO_HARDFLOAT) {
3434 goto soft;
3435 }
3436
3437 float32_input_flush2(&ua.s, &ub.s, s);
3438 if (isgreaterequal(ua.h, ub.h)) {
3439 if (isgreater(ua.h, ub.h)) {
3440 return float_relation_greater;
3441 }
3442 return float_relation_equal;
3443 }
3444 if (likely(isless(ua.h, ub.h))) {
3445 return float_relation_less;
3446 }
3447 /* The only condition remaining is unordered.
3448 * Fall through to set flags.
3449 */
3450 soft:
3451 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3452}
3453
71bfd65c 3454FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3455{
3456 return f32_compare(a, b, false, s);
3457}
3458
71bfd65c 3459FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3460{
3461 return f32_compare(a, b, true, s);
3462}
3463
71bfd65c 3464static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3465f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3466{
3467 union_float64 ua, ub;
3468
3469 ua.s = xa;
3470 ub.s = xb;
3471
3472 if (QEMU_NO_HARDFLOAT) {
3473 goto soft;
3474 }
3475
3476 float64_input_flush2(&ua.s, &ub.s, s);
3477 if (isgreaterequal(ua.h, ub.h)) {
3478 if (isgreater(ua.h, ub.h)) {
3479 return float_relation_greater;
3480 }
3481 return float_relation_equal;
3482 }
3483 if (likely(isless(ua.h, ub.h))) {
3484 return float_relation_less;
3485 }
3486 /* The only condition remaining is unordered.
3487 * Fall through to set flags.
3488 */
3489 soft:
3490 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3491}
3492
71bfd65c 3493FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3494{
3495 return f64_compare(a, b, false, s);
3496}
3497
71bfd65c 3498FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3499{
3500 return f64_compare(a, b, true, s);
3501}
3502
8282310d
LZ
3503static FloatRelation QEMU_FLATTEN
3504soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3505{
98e256fc
RH
3506 FloatParts64 pa, pb;
3507
3508 bfloat16_unpack_canonical(&pa, a, s);
3509 bfloat16_unpack_canonical(&pb, b, s);
8282310d
LZ
3510 return compare_floats(pa, pb, is_quiet, s);
3511}
3512
3513FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3514{
3515 return soft_bf16_compare(a, b, false, s);
3516}
3517
3518FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3519{
3520 return soft_bf16_compare(a, b, true, s);
3521}
3522
0bfc9f19 3523/* Multiply A by 2 raised to the power N. */
f8155c1d 3524static FloatParts64 scalbn_decomposed(FloatParts64 a, int n, float_status *s)
0bfc9f19
AB
3525{
3526 if (unlikely(is_nan(a.cls))) {
7c45bad8 3527 parts_return_nan(&a, s);
0bfc9f19
AB
3528 }
3529 if (a.cls == float_class_normal) {
f8155c1d 3530 /* The largest float type (even though not supported by FloatParts64)
ce8d4082
RH
3531 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3532 * still allows rounding to infinity, without allowing overflow
f8155c1d 3533 * within the int32_t that backs FloatParts64.exp.
ce8d4082
RH
3534 */
3535 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3536 a.exp += n;
3537 }
3538 return a;
3539}
3540
3541float16 float16_scalbn(float16 a, int n, float_status *status)
3542{
98e256fc
RH
3543 FloatParts64 pa, pr;
3544
3545 float16_unpack_canonical(&pa, a, status);
3546 pr = scalbn_decomposed(pa, n, status);
e293e927 3547 return float16_round_pack_canonical(&pr, status);
0bfc9f19
AB
3548}
3549
3550float32 float32_scalbn(float32 a, int n, float_status *status)
3551{
98e256fc
RH
3552 FloatParts64 pa, pr;
3553
3554 float32_unpack_canonical(&pa, a, status);
3555 pr = scalbn_decomposed(pa, n, status);
e293e927 3556 return float32_round_pack_canonical(&pr, status);
0bfc9f19
AB
3557}
3558
3559float64 float64_scalbn(float64 a, int n, float_status *status)
3560{
98e256fc
RH
3561 FloatParts64 pa, pr;
3562
3563 float64_unpack_canonical(&pa, a, status);
3564 pr = scalbn_decomposed(pa, n, status);
e293e927 3565 return float64_round_pack_canonical(&pr, status);
0bfc9f19
AB
3566}
3567
8282310d
LZ
3568bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3569{
98e256fc
RH
3570 FloatParts64 pa, pr;
3571
3572 bfloat16_unpack_canonical(&pa, a, status);
3573 pr = scalbn_decomposed(pa, n, status);
e293e927 3574 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3575}
3576
c13bb2da
AB
3577/*
3578 * Square Root
3579 *
3580 * The old softfloat code did an approximation step before zeroing in
3581 * on the final result. However for simpleness we just compute the
3582 * square root by iterating down from the implicit bit to enough extra
3583 * bits to ensure we get a correctly rounded result.
3584 *
3585 * This does mean however the calculation is slower than before,
3586 * especially for 64 bit floats.
3587 */
3588
f8155c1d 3589static FloatParts64 sqrt_float(FloatParts64 a, float_status *s, const FloatFmt *p)
c13bb2da
AB
3590{
3591 uint64_t a_frac, r_frac, s_frac;
3592 int bit, last_bit;
3593
3594 if (is_nan(a.cls)) {
7c45bad8
RH
3595 parts_return_nan(&a, s);
3596 return a;
c13bb2da
AB
3597 }
3598 if (a.cls == float_class_zero) {
3599 return a; /* sqrt(+-0) = +-0 */
3600 }
3601 if (a.sign) {
d82f3b2d 3602 float_raise(float_flag_invalid, s);
0fc07cad
RH
3603 parts_default_nan(&a, s);
3604 return a;
c13bb2da
AB
3605 }
3606 if (a.cls == float_class_inf) {
3607 return a; /* sqrt(+inf) = +inf */
3608 }
3609
3610 assert(a.cls == float_class_normal);
3611
3612 /* We need two overflow bits at the top. Adding room for that is a
3613 * right shift. If the exponent is odd, we can discard the low bit
3614 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3615 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3616 */
e99c4373 3617 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3618 a.exp >>= 1;
3619
3620 /* Bit-by-bit computation of sqrt. */
3621 r_frac = 0;
3622 s_frac = 0;
3623
3624 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3625 * properly rounded result. Remember we've inserted two more bits
3626 * at the top, so these positions are two less.
c13bb2da 3627 */
e99c4373 3628 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3629 last_bit = MAX(p->frac_shift - 4, 0);
3630 do {
3631 uint64_t q = 1ULL << bit;
3632 uint64_t t_frac = s_frac + q;
3633 if (t_frac <= a_frac) {
3634 s_frac = t_frac + q;
3635 a_frac -= t_frac;
3636 r_frac += q;
3637 }
3638 a_frac <<= 1;
3639 } while (--bit >= last_bit);
3640
3641 /* Undo the right shift done above. If there is any remaining
3642 * fraction, the result is inexact. Set the sticky bit.
3643 */
e99c4373 3644 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3645
3646 return a;
3647}
3648
97ff87c0 3649float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da 3650{
98e256fc
RH
3651 FloatParts64 pa, pr;
3652
3653 float16_unpack_canonical(&pa, a, status);
3654 pr = sqrt_float(pa, status, &float16_params);
e293e927 3655 return float16_round_pack_canonical(&pr, status);
c13bb2da
AB
3656}
3657
f131bae8
EC
3658static float32 QEMU_SOFTFLOAT_ATTR
3659soft_f32_sqrt(float32 a, float_status *status)
c13bb2da 3660{
98e256fc
RH
3661 FloatParts64 pa, pr;
3662
3663 float32_unpack_canonical(&pa, a, status);
3664 pr = sqrt_float(pa, status, &float32_params);
e293e927 3665 return float32_round_pack_canonical(&pr, status);
c13bb2da
AB
3666}
3667
f131bae8
EC
3668static float64 QEMU_SOFTFLOAT_ATTR
3669soft_f64_sqrt(float64 a, float_status *status)
c13bb2da 3670{
98e256fc
RH
3671 FloatParts64 pa, pr;
3672
3673 float64_unpack_canonical(&pa, a, status);
3674 pr = sqrt_float(pa, status, &float64_params);
e293e927 3675 return float64_round_pack_canonical(&pr, status);
c13bb2da
AB
3676}
3677
f131bae8
EC
3678float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3679{
3680 union_float32 ua, ur;
3681
3682 ua.s = xa;
3683 if (unlikely(!can_use_fpu(s))) {
3684 goto soft;
3685 }
3686
3687 float32_input_flush1(&ua.s, s);
3688 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3689 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3690 fpclassify(ua.h) == FP_ZERO) ||
3691 signbit(ua.h))) {
3692 goto soft;
3693 }
3694 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3695 float32_is_neg(ua.s))) {
3696 goto soft;
3697 }
3698 ur.h = sqrtf(ua.h);
3699 return ur.s;
3700
3701 soft:
3702 return soft_f32_sqrt(ua.s, s);
3703}
3704
3705float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3706{
3707 union_float64 ua, ur;
3708
3709 ua.s = xa;
3710 if (unlikely(!can_use_fpu(s))) {
3711 goto soft;
3712 }
3713
3714 float64_input_flush1(&ua.s, s);
3715 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3716 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3717 fpclassify(ua.h) == FP_ZERO) ||
3718 signbit(ua.h))) {
3719 goto soft;
3720 }
3721 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3722 float64_is_neg(ua.s))) {
3723 goto soft;
3724 }
3725 ur.h = sqrt(ua.h);
3726 return ur.s;
3727
3728 soft:
3729 return soft_f64_sqrt(ua.s, s);
3730}
3731
8282310d
LZ
3732bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3733{
98e256fc
RH
3734 FloatParts64 pa, pr;
3735
3736 bfloat16_unpack_canonical(&pa, a, status);
3737 pr = sqrt_float(pa, status, &bfloat16_params);
e293e927 3738 return bfloat16_round_pack_canonical(&pr, status);
8282310d
LZ
3739}
3740
0218a16e
RH
3741/*----------------------------------------------------------------------------
3742| The pattern for a default generated NaN.
3743*----------------------------------------------------------------------------*/
3744
3745float16 float16_default_nan(float_status *status)
3746{
0fc07cad
RH
3747 FloatParts64 p;
3748
3749 parts_default_nan(&p, status);
0218a16e 3750 p.frac >>= float16_params.frac_shift;
71fd178e 3751 return float16_pack_raw(&p);
0218a16e
RH
3752}
3753
3754float32 float32_default_nan(float_status *status)
3755{
0fc07cad
RH
3756 FloatParts64 p;
3757
3758 parts_default_nan(&p, status);
0218a16e 3759 p.frac >>= float32_params.frac_shift;
71fd178e 3760 return float32_pack_raw(&p);
0218a16e
RH
3761}
3762
3763float64 float64_default_nan(float_status *status)
3764{
0fc07cad
RH
3765 FloatParts64 p;
3766
3767 parts_default_nan(&p, status);
0218a16e 3768 p.frac >>= float64_params.frac_shift;
71fd178e 3769 return float64_pack_raw(&p);
0218a16e
RH
3770}
3771
3772float128 float128_default_nan(float_status *status)
3773{
e9034ea8 3774 FloatParts128 p;
0218a16e 3775
0fc07cad 3776 parts_default_nan(&p, status);
e9034ea8
RH
3777 frac_shr(&p, float128_params.frac_shift);
3778 return float128_pack_raw(&p);
0218a16e 3779}
c13bb2da 3780
8282310d
LZ
3781bfloat16 bfloat16_default_nan(float_status *status)
3782{
0fc07cad
RH
3783 FloatParts64 p;
3784
3785 parts_default_nan(&p, status);
8282310d 3786 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3787 return bfloat16_pack_raw(&p);
8282310d
LZ
3788}
3789
158142c2 3790/*----------------------------------------------------------------------------
377ed926
RH
3791| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3792*----------------------------------------------------------------------------*/
3793
3794float16 float16_silence_nan(float16 a, float_status *status)
3795{
3dddb203
RH
3796 FloatParts64 p;
3797
3798 float16_unpack_raw(&p, a);
377ed926 3799 p.frac <<= float16_params.frac_shift;
92ff426d 3800 parts_silence_nan(&p, status);
377ed926 3801 p.frac >>= float16_params.frac_shift;
71fd178e 3802 return float16_pack_raw(&p);
377ed926
RH
3803}
3804
3805float32 float32_silence_nan(float32 a, float_status *status)
3806{
3dddb203
RH
3807 FloatParts64 p;
3808
3809 float32_unpack_raw(&p, a);
377ed926 3810 p.frac <<= float32_params.frac_shift;
92ff426d 3811 parts_silence_nan(&p, status);
377ed926 3812 p.frac >>= float32_params.frac_shift;
71fd178e 3813 return float32_pack_raw(&p);
377ed926
RH
3814}
3815
3816float64 float64_silence_nan(float64 a, float_status *status)
3817{
3dddb203
RH
3818 FloatParts64 p;
3819
3820 float64_unpack_raw(&p, a);
377ed926 3821 p.frac <<= float64_params.frac_shift;
92ff426d 3822 parts_silence_nan(&p, status);
377ed926 3823 p.frac >>= float64_params.frac_shift;
71fd178e 3824 return float64_pack_raw(&p);
377ed926
RH
3825}
3826
8282310d
LZ
3827bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3828{
3dddb203
RH
3829 FloatParts64 p;
3830
3831 bfloat16_unpack_raw(&p, a);
8282310d 3832 p.frac <<= bfloat16_params.frac_shift;
92ff426d 3833 parts_silence_nan(&p, status);
8282310d 3834 p.frac >>= bfloat16_params.frac_shift;
71fd178e 3835 return bfloat16_pack_raw(&p);
8282310d 3836}
e6b405fe 3837
0018b1f4
RH
3838float128 float128_silence_nan(float128 a, float_status *status)
3839{
3840 FloatParts128 p;
3841
3842 float128_unpack_raw(&p, a);
3843 frac_shl(&p, float128_params.frac_shift);
3844 parts_silence_nan(&p, status);
3845 frac_shr(&p, float128_params.frac_shift);
3846 return float128_pack_raw(&p);
3847}
3848
e6b405fe
AB
3849/*----------------------------------------------------------------------------
3850| If `a' is denormal and we are in flush-to-zero mode then set the
3851| input-denormal exception and return zero. Otherwise just return the value.
3852*----------------------------------------------------------------------------*/
3853
f8155c1d 3854static bool parts_squash_denormal(FloatParts64 p, float_status *status)
e6b405fe
AB
3855{
3856 if (p.exp == 0 && p.frac != 0) {
3857 float_raise(float_flag_input_denormal, status);
3858 return true;
3859 }
3860
3861 return false;
3862}
3863
3864float16 float16_squash_input_denormal(float16 a, float_status *status)
3865{
3866 if (status->flush_inputs_to_zero) {
3dddb203
RH
3867 FloatParts64 p;
3868
3869 float16_unpack_raw(&p, a);
e6b405fe
AB
3870 if (parts_squash_denormal(p, status)) {
3871 return float16_set_sign(float16_zero, p.sign);
3872 }
3873 }
3874 return a;
3875}
3876
3877float32 float32_squash_input_denormal(float32 a, float_status *status)
3878{
3879 if (status->flush_inputs_to_zero) {
3dddb203
RH
3880 FloatParts64 p;
3881
3882 float32_unpack_raw(&p, a);
e6b405fe
AB
3883 if (parts_squash_denormal(p, status)) {
3884 return float32_set_sign(float32_zero, p.sign);
3885 }
3886 }
3887 return a;
3888}
3889
3890float64 float64_squash_input_denormal(float64 a, float_status *status)
3891{
3892 if (status->flush_inputs_to_zero) {
3dddb203
RH
3893 FloatParts64 p;
3894
3895 float64_unpack_raw(&p, a);
e6b405fe
AB
3896 if (parts_squash_denormal(p, status)) {
3897 return float64_set_sign(float64_zero, p.sign);
3898 }
3899 }
3900 return a;
3901}
3902
8282310d
LZ
3903bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3904{
3905 if (status->flush_inputs_to_zero) {
3dddb203
RH
3906 FloatParts64 p;
3907
3908 bfloat16_unpack_raw(&p, a);
8282310d
LZ
3909 if (parts_squash_denormal(p, status)) {
3910 return bfloat16_set_sign(bfloat16_zero, p.sign);
3911 }
3912 }
3913 return a;
3914}
3915
377ed926 3916/*----------------------------------------------------------------------------
158142c2
FB
3917| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3918| and 7, and returns the properly rounded 32-bit integer corresponding to the
3919| input. If `zSign' is 1, the input is negated before being converted to an
3920| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3921| is simply rounded to an integer, with the inexact exception raised if the
3922| input cannot be represented exactly as an integer. However, if the fixed-
3923| point input is too large, the invalid exception is raised and the largest
3924| positive or negative integer is returned.
3925*----------------------------------------------------------------------------*/
3926
c120391c
RH
3927static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3928 float_status *status)
158142c2 3929{
8f506c70 3930 int8_t roundingMode;
c120391c 3931 bool roundNearestEven;
8f506c70 3932 int8_t roundIncrement, roundBits;
760e1416 3933 int32_t z;
158142c2 3934
a2f2d288 3935 roundingMode = status->float_rounding_mode;
158142c2 3936 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3937 switch (roundingMode) {
3938 case float_round_nearest_even:
f9288a76 3939 case float_round_ties_away:
dc355b76
PM
3940 roundIncrement = 0x40;
3941 break;
3942 case float_round_to_zero:
3943 roundIncrement = 0;
3944 break;
3945 case float_round_up:
3946 roundIncrement = zSign ? 0 : 0x7f;
3947 break;
3948 case float_round_down:
3949 roundIncrement = zSign ? 0x7f : 0;
3950 break;
5d64abb3
RH
3951 case float_round_to_odd:
3952 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3953 break;
dc355b76
PM
3954 default:
3955 abort();
158142c2
FB
3956 }
3957 roundBits = absZ & 0x7F;
3958 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3959 if (!(roundBits ^ 0x40) && roundNearestEven) {
3960 absZ &= ~1;
3961 }
158142c2
FB
3962 z = absZ;
3963 if ( zSign ) z = - z;
3964 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3965 float_raise(float_flag_invalid, status);
2c217da0 3966 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3967 }
a2f2d288 3968 if (roundBits) {
d82f3b2d 3969 float_raise(float_flag_inexact, status);
a2f2d288 3970 }
158142c2
FB
3971 return z;
3972
3973}
3974
3975/*----------------------------------------------------------------------------
3976| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3977| `absZ1', with binary point between bits 63 and 64 (between the input words),
3978| and returns the properly rounded 64-bit integer corresponding to the input.
3979| If `zSign' is 1, the input is negated before being converted to an integer.
3980| Ordinarily, the fixed-point input is simply rounded to an integer, with
3981| the inexact exception raised if the input cannot be represented exactly as
3982| an integer. However, if the fixed-point input is too large, the invalid
3983| exception is raised and the largest positive or negative integer is
3984| returned.
3985*----------------------------------------------------------------------------*/
3986
c120391c 3987static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3988 float_status *status)
158142c2 3989{
8f506c70 3990 int8_t roundingMode;
c120391c 3991 bool roundNearestEven, increment;
760e1416 3992 int64_t z;
158142c2 3993
a2f2d288 3994 roundingMode = status->float_rounding_mode;
158142c2 3995 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3996 switch (roundingMode) {
3997 case float_round_nearest_even:
f9288a76 3998 case float_round_ties_away:
dc355b76
PM
3999 increment = ((int64_t) absZ1 < 0);
4000 break;
4001 case float_round_to_zero:
4002 increment = 0;
4003 break;
4004 case float_round_up:
4005 increment = !zSign && absZ1;
4006 break;
4007 case float_round_down:
4008 increment = zSign && absZ1;
4009 break;
5d64abb3
RH
4010 case float_round_to_odd:
4011 increment = !(absZ0 & 1) && absZ1;
4012 break;
dc355b76
PM
4013 default:
4014 abort();
158142c2
FB
4015 }
4016 if ( increment ) {
4017 ++absZ0;
4018 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
4019 if (!(absZ1 << 1) && roundNearestEven) {
4020 absZ0 &= ~1;
4021 }
158142c2
FB
4022 }
4023 z = absZ0;
4024 if ( zSign ) z = - z;
4025 if ( z && ( ( z < 0 ) ^ zSign ) ) {
4026 overflow:
ff32e16e 4027 float_raise(float_flag_invalid, status);
2c217da0 4028 return zSign ? INT64_MIN : INT64_MAX;
158142c2 4029 }
a2f2d288 4030 if (absZ1) {
d82f3b2d 4031 float_raise(float_flag_inexact, status);
a2f2d288 4032 }
158142c2
FB
4033 return z;
4034
4035}
4036
fb3ea83a
TM
4037/*----------------------------------------------------------------------------
4038| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
4039| `absZ1', with binary point between bits 63 and 64 (between the input words),
4040| and returns the properly rounded 64-bit unsigned integer corresponding to the
4041| input. Ordinarily, the fixed-point input is simply rounded to an integer,
4042| with the inexact exception raised if the input cannot be represented exactly
4043| as an integer. However, if the fixed-point input is too large, the invalid
4044| exception is raised and the largest unsigned integer is returned.
4045*----------------------------------------------------------------------------*/
4046
c120391c 4047static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 4048 uint64_t absZ1, float_status *status)
fb3ea83a 4049{
8f506c70 4050 int8_t roundingMode;
c120391c 4051 bool roundNearestEven, increment;
fb3ea83a 4052
a2f2d288 4053 roundingMode = status->float_rounding_mode;
fb3ea83a 4054 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
4055 switch (roundingMode) {
4056 case float_round_nearest_even:
f9288a76 4057 case float_round_ties_away:
dc355b76
PM
4058 increment = ((int64_t)absZ1 < 0);
4059 break;
4060 case float_round_to_zero:
4061 increment = 0;
4062 break;
4063 case float_round_up:
4064 increment = !zSign && absZ1;
4065 break;
4066 case float_round_down:
4067 increment = zSign && absZ1;
4068 break;
5d64abb3
RH
4069 case float_round_to_odd:
4070 increment = !(absZ0 & 1) && absZ1;
4071 break;
dc355b76
PM
4072 default:
4073 abort();
fb3ea83a
TM
4074 }
4075 if (increment) {
4076 ++absZ0;
4077 if (absZ0 == 0) {
ff32e16e 4078 float_raise(float_flag_invalid, status);
2c217da0 4079 return UINT64_MAX;
fb3ea83a 4080 }
40662886
PMD
4081 if (!(absZ1 << 1) && roundNearestEven) {
4082 absZ0 &= ~1;
4083 }
fb3ea83a
TM
4084 }
4085
4086 if (zSign && absZ0) {
ff32e16e 4087 float_raise(float_flag_invalid, status);
fb3ea83a
TM
4088 return 0;
4089 }
4090
4091 if (absZ1) {
d82f3b2d 4092 float_raise(float_flag_inexact, status);
fb3ea83a
TM
4093 }
4094 return absZ0;
4095}
4096
158142c2
FB
4097/*----------------------------------------------------------------------------
4098| Normalizes the subnormal single-precision floating-point value represented
4099| by the denormalized significand `aSig'. The normalized exponent and
4100| significand are stored at the locations pointed to by `zExpPtr' and
4101| `zSigPtr', respectively.
4102*----------------------------------------------------------------------------*/
4103
4104static void
0c48262d 4105 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 4106{
8f506c70 4107 int8_t shiftCount;
158142c2 4108
0019d5c3 4109 shiftCount = clz32(aSig) - 8;
158142c2
FB
4110 *zSigPtr = aSig<<shiftCount;
4111 *zExpPtr = 1 - shiftCount;
4112
4113}
4114
158142c2
FB
4115/*----------------------------------------------------------------------------
4116| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4117| and significand `zSig', and returns the proper single-precision floating-
4118| point value corresponding to the abstract input. Ordinarily, the abstract
4119| value is simply rounded and packed into the single-precision format, with
4120| the inexact exception raised if the abstract input cannot be represented
4121| exactly. However, if the abstract value is too large, the overflow and
4122| inexact exceptions are raised and an infinity or maximal finite value is
4123| returned. If the abstract value is too small, the input value is rounded to
4124| a subnormal number, and the underflow and inexact exceptions are raised if
4125| the abstract input cannot be represented exactly as a subnormal single-
4126| precision floating-point number.
4127| The input significand `zSig' has its binary point between bits 30
4128| and 29, which is 7 bits to the left of the usual location. This shifted
4129| significand must be normalized or smaller. If `zSig' is not normalized,
4130| `zExp' must be 0; in that case, the result returned is a subnormal number,
4131| and it must not require rounding. In the usual case that `zSig' is
4132| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4133| The handling of underflow and overflow follows the IEC/IEEE Standard for
4134| Binary Floating-Point Arithmetic.
4135*----------------------------------------------------------------------------*/
4136
c120391c 4137static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4138 float_status *status)
158142c2 4139{
8f506c70 4140 int8_t roundingMode;
c120391c 4141 bool roundNearestEven;
8f506c70 4142 int8_t roundIncrement, roundBits;
c120391c 4143 bool isTiny;
158142c2 4144
a2f2d288 4145 roundingMode = status->float_rounding_mode;
158142c2 4146 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4147 switch (roundingMode) {
4148 case float_round_nearest_even:
f9288a76 4149 case float_round_ties_away:
dc355b76
PM
4150 roundIncrement = 0x40;
4151 break;
4152 case float_round_to_zero:
4153 roundIncrement = 0;
4154 break;
4155 case float_round_up:
4156 roundIncrement = zSign ? 0 : 0x7f;
4157 break;
4158 case float_round_down:
4159 roundIncrement = zSign ? 0x7f : 0;
4160 break;
5d64abb3
RH
4161 case float_round_to_odd:
4162 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4163 break;
dc355b76
PM
4164 default:
4165 abort();
4166 break;
158142c2
FB
4167 }
4168 roundBits = zSig & 0x7F;
bb98fe42 4169 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4170 if ( ( 0xFD < zExp )
4171 || ( ( zExp == 0xFD )
bb98fe42 4172 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4173 ) {
5d64abb3
RH
4174 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4175 roundIncrement != 0;
ff32e16e 4176 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4177 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4178 }
4179 if ( zExp < 0 ) {
a2f2d288 4180 if (status->flush_to_zero) {
ff32e16e 4181 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4182 return packFloat32(zSign, 0, 0);
4183 }
a828b373
RH
4184 isTiny = status->tininess_before_rounding
4185 || (zExp < -1)
4186 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4187 shift32RightJamming( zSig, - zExp, &zSig );
4188 zExp = 0;
4189 roundBits = zSig & 0x7F;
ff32e16e
PM
4190 if (isTiny && roundBits) {
4191 float_raise(float_flag_underflow, status);
4192 }
5d64abb3
RH
4193 if (roundingMode == float_round_to_odd) {
4194 /*
4195 * For round-to-odd case, the roundIncrement depends on
4196 * zSig which just changed.
4197 */
4198 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4199 }
158142c2
FB
4200 }
4201 }
a2f2d288 4202 if (roundBits) {
d82f3b2d 4203 float_raise(float_flag_inexact, status);
a2f2d288 4204 }
158142c2 4205 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4206 if (!(roundBits ^ 0x40) && roundNearestEven) {
4207 zSig &= ~1;
4208 }
158142c2
FB
4209 if ( zSig == 0 ) zExp = 0;
4210 return packFloat32( zSign, zExp, zSig );
4211
4212}
4213
4214/*----------------------------------------------------------------------------
4215| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4216| and significand `zSig', and returns the proper single-precision floating-
4217| point value corresponding to the abstract input. This routine is just like
4218| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4219| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4220| floating-point exponent.
4221*----------------------------------------------------------------------------*/
4222
4223static float32
c120391c 4224 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4225 float_status *status)
158142c2 4226{
8f506c70 4227 int8_t shiftCount;
158142c2 4228
0019d5c3 4229 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4230 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4231 status);
158142c2
FB
4232
4233}
4234
158142c2
FB
4235/*----------------------------------------------------------------------------
4236| Normalizes the subnormal double-precision floating-point value represented
4237| by the denormalized significand `aSig'. The normalized exponent and
4238| significand are stored at the locations pointed to by `zExpPtr' and
4239| `zSigPtr', respectively.
4240*----------------------------------------------------------------------------*/
4241
4242static void
0c48262d 4243 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4244{
8f506c70 4245 int8_t shiftCount;
158142c2 4246
0019d5c3 4247 shiftCount = clz64(aSig) - 11;
158142c2
FB
4248 *zSigPtr = aSig<<shiftCount;
4249 *zExpPtr = 1 - shiftCount;
4250
4251}
4252
4253/*----------------------------------------------------------------------------
4254| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4255| double-precision floating-point value, returning the result. After being
4256| shifted into the proper positions, the three fields are simply added
4257| together to form the result. This means that any integer portion of `zSig'
4258| will be added into the exponent. Since a properly normalized significand
4259| will have an integer portion equal to 1, the `zExp' input should be 1 less
4260| than the desired result exponent whenever `zSig' is a complete, normalized
4261| significand.
4262*----------------------------------------------------------------------------*/
4263
c120391c 4264static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4265{
4266
f090c9d4 4267 return make_float64(
bb98fe42 4268 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4269
4270}
4271
4272/*----------------------------------------------------------------------------
4273| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4274| and significand `zSig', and returns the proper double-precision floating-
4275| point value corresponding to the abstract input. Ordinarily, the abstract
4276| value is simply rounded and packed into the double-precision format, with
4277| the inexact exception raised if the abstract input cannot be represented
4278| exactly. However, if the abstract value is too large, the overflow and
4279| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4280| returned. If the abstract value is too small, the input value is rounded to
4281| a subnormal number, and the underflow and inexact exceptions are raised if
4282| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4283| precision floating-point number.
4284| The input significand `zSig' has its binary point between bits 62
4285| and 61, which is 10 bits to the left of the usual location. This shifted
4286| significand must be normalized or smaller. If `zSig' is not normalized,
4287| `zExp' must be 0; in that case, the result returned is a subnormal number,
4288| and it must not require rounding. In the usual case that `zSig' is
4289| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4290| The handling of underflow and overflow follows the IEC/IEEE Standard for
4291| Binary Floating-Point Arithmetic.
4292*----------------------------------------------------------------------------*/
4293
c120391c 4294static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4295 float_status *status)
158142c2 4296{
8f506c70 4297 int8_t roundingMode;
c120391c 4298 bool roundNearestEven;
0c48262d 4299 int roundIncrement, roundBits;
c120391c 4300 bool isTiny;
158142c2 4301
a2f2d288 4302 roundingMode = status->float_rounding_mode;
158142c2 4303 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4304 switch (roundingMode) {
4305 case float_round_nearest_even:
f9288a76 4306 case float_round_ties_away:
dc355b76
PM
4307 roundIncrement = 0x200;
4308 break;
4309 case float_round_to_zero:
4310 roundIncrement = 0;
4311 break;
4312 case float_round_up:
4313 roundIncrement = zSign ? 0 : 0x3ff;
4314 break;
4315 case float_round_down:
4316 roundIncrement = zSign ? 0x3ff : 0;
4317 break;
9ee6f678
BR
4318 case float_round_to_odd:
4319 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4320 break;
dc355b76
PM
4321 default:
4322 abort();
158142c2
FB
4323 }
4324 roundBits = zSig & 0x3FF;
bb98fe42 4325 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4326 if ( ( 0x7FD < zExp )
4327 || ( ( zExp == 0x7FD )
bb98fe42 4328 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4329 ) {
9ee6f678
BR
4330 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4331 roundIncrement != 0;
ff32e16e 4332 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4333 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4334 }
4335 if ( zExp < 0 ) {
a2f2d288 4336 if (status->flush_to_zero) {
ff32e16e 4337 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4338 return packFloat64(zSign, 0, 0);
4339 }
a828b373
RH
4340 isTiny = status->tininess_before_rounding
4341 || (zExp < -1)
4342 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4343 shift64RightJamming( zSig, - zExp, &zSig );
4344 zExp = 0;
4345 roundBits = zSig & 0x3FF;
ff32e16e
PM
4346 if (isTiny && roundBits) {
4347 float_raise(float_flag_underflow, status);
4348 }
9ee6f678
BR
4349 if (roundingMode == float_round_to_odd) {
4350 /*
4351 * For round-to-odd case, the roundIncrement depends on
4352 * zSig which just changed.
4353 */
4354 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4355 }
158142c2
FB
4356 }
4357 }
a2f2d288 4358 if (roundBits) {
d82f3b2d 4359 float_raise(float_flag_inexact, status);
a2f2d288 4360 }
158142c2 4361 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4362 if (!(roundBits ^ 0x200) && roundNearestEven) {
4363 zSig &= ~1;
4364 }
158142c2
FB
4365 if ( zSig == 0 ) zExp = 0;
4366 return packFloat64( zSign, zExp, zSig );
4367
4368}
4369
4370/*----------------------------------------------------------------------------
4371| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4372| and significand `zSig', and returns the proper double-precision floating-
4373| point value corresponding to the abstract input. This routine is just like
4374| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4375| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4376| floating-point exponent.
4377*----------------------------------------------------------------------------*/
4378
4379static float64
c120391c 4380 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4381 float_status *status)
158142c2 4382{
8f506c70 4383 int8_t shiftCount;
158142c2 4384
0019d5c3 4385 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4386 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4387 status);
158142c2
FB
4388
4389}
4390
158142c2
FB
4391/*----------------------------------------------------------------------------
4392| Normalizes the subnormal extended double-precision floating-point value
4393| represented by the denormalized significand `aSig'. The normalized exponent
4394| and significand are stored at the locations pointed to by `zExpPtr' and
4395| `zSigPtr', respectively.
4396*----------------------------------------------------------------------------*/
4397
88857aca
LV
4398void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4399 uint64_t *zSigPtr)
158142c2 4400{
8f506c70 4401 int8_t shiftCount;
158142c2 4402
0019d5c3 4403 shiftCount = clz64(aSig);
158142c2
FB
4404 *zSigPtr = aSig<<shiftCount;
4405 *zExpPtr = 1 - shiftCount;
158142c2
FB
4406}
4407
4408/*----------------------------------------------------------------------------
4409| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4410| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4411| and returns the proper extended double-precision floating-point value
4412| corresponding to the abstract input. Ordinarily, the abstract value is
4413| rounded and packed into the extended double-precision format, with the
4414| inexact exception raised if the abstract input cannot be represented
4415| exactly. However, if the abstract value is too large, the overflow and
4416| inexact exceptions are raised and an infinity or maximal finite value is
4417| returned. If the abstract value is too small, the input value is rounded to
4418| a subnormal number, and the underflow and inexact exceptions are raised if
4419| the abstract input cannot be represented exactly as a subnormal extended
4420| double-precision floating-point number.
4421| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4422| number of bits as single or double precision, respectively. Otherwise, the
4423| result is rounded to the full precision of the extended double-precision
4424| format.
4425| The input significand must be normalized or smaller. If the input
4426| significand is not normalized, `zExp' must be 0; in that case, the result
4427| returned is a subnormal number, and it must not require rounding. The
4428| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4429| Floating-Point Arithmetic.
4430*----------------------------------------------------------------------------*/
4431
c120391c 4432floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4433 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4434 float_status *status)
158142c2 4435{
8f506c70 4436 int8_t roundingMode;
c120391c 4437 bool roundNearestEven, increment, isTiny;
f42c2224 4438 int64_t roundIncrement, roundMask, roundBits;
158142c2 4439
a2f2d288 4440 roundingMode = status->float_rounding_mode;
158142c2
FB
4441 roundNearestEven = ( roundingMode == float_round_nearest_even );
4442 if ( roundingPrecision == 80 ) goto precision80;
4443 if ( roundingPrecision == 64 ) {
e9321124
AB
4444 roundIncrement = UINT64_C(0x0000000000000400);
4445 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4446 }
4447 else if ( roundingPrecision == 32 ) {
e9321124
AB
4448 roundIncrement = UINT64_C(0x0000008000000000);
4449 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4450 }
4451 else {
4452 goto precision80;
4453 }
4454 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4455 switch (roundingMode) {
4456 case float_round_nearest_even:
f9288a76 4457 case float_round_ties_away:
dc355b76
PM
4458 break;
4459 case float_round_to_zero:
4460 roundIncrement = 0;
4461 break;
4462 case float_round_up:
4463 roundIncrement = zSign ? 0 : roundMask;
4464 break;
4465 case float_round_down:
4466 roundIncrement = zSign ? roundMask : 0;
4467 break;
4468 default:
4469 abort();
158142c2
FB
4470 }
4471 roundBits = zSig0 & roundMask;
bb98fe42 4472 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4473 if ( ( 0x7FFE < zExp )
4474 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4475 ) {
4476 goto overflow;
4477 }
4478 if ( zExp <= 0 ) {
a2f2d288 4479 if (status->flush_to_zero) {
ff32e16e 4480 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4481 return packFloatx80(zSign, 0, 0);
4482 }
a828b373
RH
4483 isTiny = status->tininess_before_rounding
4484 || (zExp < 0 )
4485 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4486 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4487 zExp = 0;
4488 roundBits = zSig0 & roundMask;
ff32e16e
PM
4489 if (isTiny && roundBits) {
4490 float_raise(float_flag_underflow, status);
4491 }
a2f2d288 4492 if (roundBits) {
d82f3b2d 4493 float_raise(float_flag_inexact, status);
a2f2d288 4494 }
158142c2 4495 zSig0 += roundIncrement;
bb98fe42 4496 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4497 roundIncrement = roundMask + 1;
4498 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4499 roundMask |= roundIncrement;
4500 }
4501 zSig0 &= ~ roundMask;
4502 return packFloatx80( zSign, zExp, zSig0 );
4503 }
4504 }
a2f2d288 4505 if (roundBits) {
d82f3b2d 4506 float_raise(float_flag_inexact, status);
a2f2d288 4507 }
158142c2
FB
4508 zSig0 += roundIncrement;
4509 if ( zSig0 < roundIncrement ) {
4510 ++zExp;
e9321124 4511 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4512 }
4513 roundIncrement = roundMask + 1;
4514 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4515 roundMask |= roundIncrement;
4516 }
4517 zSig0 &= ~ roundMask;
4518 if ( zSig0 == 0 ) zExp = 0;
4519 return packFloatx80( zSign, zExp, zSig0 );
4520 precision80:
dc355b76
PM
4521 switch (roundingMode) {
4522 case float_round_nearest_even:
f9288a76 4523 case float_round_ties_away:
dc355b76
PM
4524 increment = ((int64_t)zSig1 < 0);
4525 break;
4526 case float_round_to_zero:
4527 increment = 0;
4528 break;
4529 case float_round_up:
4530 increment = !zSign && zSig1;
4531 break;
4532 case float_round_down:
4533 increment = zSign && zSig1;
4534 break;
4535 default:
4536 abort();
158142c2 4537 }
bb98fe42 4538 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4539 if ( ( 0x7FFE < zExp )
4540 || ( ( zExp == 0x7FFE )
e9321124 4541 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4542 && increment
4543 )
4544 ) {
4545 roundMask = 0;
4546 overflow:
ff32e16e 4547 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4548 if ( ( roundingMode == float_round_to_zero )
4549 || ( zSign && ( roundingMode == float_round_up ) )
4550 || ( ! zSign && ( roundingMode == float_round_down ) )
4551 ) {
4552 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4553 }
0f605c88
LV
4554 return packFloatx80(zSign,
4555 floatx80_infinity_high,
4556 floatx80_infinity_low);
158142c2
FB
4557 }
4558 if ( zExp <= 0 ) {
a828b373
RH
4559 isTiny = status->tininess_before_rounding
4560 || (zExp < 0)
4561 || !increment
4562 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4563 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4564 zExp = 0;
ff32e16e
PM
4565 if (isTiny && zSig1) {
4566 float_raise(float_flag_underflow, status);
4567 }
a2f2d288 4568 if (zSig1) {
d82f3b2d 4569 float_raise(float_flag_inexact, status);
a2f2d288 4570 }
dc355b76
PM
4571 switch (roundingMode) {
4572 case float_round_nearest_even:
f9288a76 4573 case float_round_ties_away:
dc355b76
PM
4574 increment = ((int64_t)zSig1 < 0);
4575 break;
4576 case float_round_to_zero:
4577 increment = 0;
4578 break;
4579 case float_round_up:
4580 increment = !zSign && zSig1;
4581 break;
4582 case float_round_down:
4583 increment = zSign && zSig1;
4584 break;
4585 default:
4586 abort();
158142c2
FB
4587 }
4588 if ( increment ) {
4589 ++zSig0;
40662886
PMD
4590 if (!(zSig1 << 1) && roundNearestEven) {
4591 zSig0 &= ~1;
4592 }
bb98fe42 4593 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4594 }
4595 return packFloatx80( zSign, zExp, zSig0 );
4596 }
4597 }
a2f2d288 4598 if (zSig1) {
d82f3b2d 4599 float_raise(float_flag_inexact, status);
a2f2d288 4600 }
158142c2
FB
4601 if ( increment ) {
4602 ++zSig0;
4603 if ( zSig0 == 0 ) {
4604 ++zExp;
e9321124 4605 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4606 }
4607 else {
40662886
PMD
4608 if (!(zSig1 << 1) && roundNearestEven) {
4609 zSig0 &= ~1;
4610 }
158142c2
FB
4611 }
4612 }
4613 else {
4614 if ( zSig0 == 0 ) zExp = 0;
4615 }
4616 return packFloatx80( zSign, zExp, zSig0 );
4617
4618}
4619
4620/*----------------------------------------------------------------------------
4621| Takes an abstract floating-point value having sign `zSign', exponent
4622| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4623| and returns the proper extended double-precision floating-point value
4624| corresponding to the abstract input. This routine is just like
4625| `roundAndPackFloatx80' except that the input significand does not have to be
4626| normalized.
4627*----------------------------------------------------------------------------*/
4628
88857aca 4629floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4630 bool zSign, int32_t zExp,
88857aca
LV
4631 uint64_t zSig0, uint64_t zSig1,
4632 float_status *status)
158142c2 4633{
8f506c70 4634 int8_t shiftCount;
158142c2
FB
4635
4636 if ( zSig0 == 0 ) {
4637 zSig0 = zSig1;
4638 zSig1 = 0;
4639 zExp -= 64;
4640 }
0019d5c3 4641 shiftCount = clz64(zSig0);
158142c2
FB
4642 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4643 zExp -= shiftCount;
ff32e16e
PM
4644 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4645 zSig0, zSig1, status);
158142c2
FB
4646
4647}
4648
158142c2
FB
4649/*----------------------------------------------------------------------------
4650| Returns the least-significant 64 fraction bits of the quadruple-precision
4651| floating-point value `a'.
4652*----------------------------------------------------------------------------*/
4653
a49db98d 4654static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4655{
4656
4657 return a.low;
4658
4659}
4660
4661/*----------------------------------------------------------------------------
4662| Returns the most-significant 48 fraction bits of the quadruple-precision
4663| floating-point value `a'.
4664*----------------------------------------------------------------------------*/
4665
a49db98d 4666static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4667{
4668
e9321124 4669 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4670
4671}
4672
4673/*----------------------------------------------------------------------------
4674| Returns the exponent bits of the quadruple-precision floating-point value
4675| `a'.
4676*----------------------------------------------------------------------------*/
4677
f4014512 4678static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4679{
4680
4681 return ( a.high>>48 ) & 0x7FFF;
4682
4683}
4684
4685/*----------------------------------------------------------------------------
4686| Returns the sign bit of the quadruple-precision floating-point value `a'.
4687*----------------------------------------------------------------------------*/
4688
c120391c 4689static inline bool extractFloat128Sign(float128 a)
158142c2 4690{
c120391c 4691 return a.high >> 63;
158142c2
FB
4692}
4693
4694/*----------------------------------------------------------------------------
4695| Normalizes the subnormal quadruple-precision floating-point value
4696| represented by the denormalized significand formed by the concatenation of
4697| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4698| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4699| significand are stored at the location pointed to by `zSig0Ptr', and the
4700| least significant 64 bits of the normalized significand are stored at the
4701| location pointed to by `zSig1Ptr'.
4702*----------------------------------------------------------------------------*/
4703
4704static void
4705 normalizeFloat128Subnormal(
bb98fe42
AF
4706 uint64_t aSig0,
4707 uint64_t aSig1,
f4014512 4708 int32_t *zExpPtr,
bb98fe42
AF
4709 uint64_t *zSig0Ptr,
4710 uint64_t *zSig1Ptr
158142c2
FB
4711 )
4712{
8f506c70 4713 int8_t shiftCount;
158142c2
FB
4714
4715 if ( aSig0 == 0 ) {
0019d5c3 4716 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4717 if ( shiftCount < 0 ) {
4718 *zSig0Ptr = aSig1>>( - shiftCount );
4719 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4720 }
4721 else {
4722 *zSig0Ptr = aSig1<<shiftCount;
4723 *zSig1Ptr = 0;
4724 }
4725 *zExpPtr = - shiftCount - 63;
4726 }
4727 else {
0019d5c3 4728 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4729 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4730 *zExpPtr = 1 - shiftCount;
4731 }
4732
4733}
4734
4735/*----------------------------------------------------------------------------
4736| Packs the sign `zSign', the exponent `zExp', and the significand formed
4737| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4738| floating-point value, returning the result. After being shifted into the
4739| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4740| added together to form the most significant 32 bits of the result. This
4741| means that any integer portion of `zSig0' will be added into the exponent.
4742| Since a properly normalized significand will have an integer portion equal
4743| to 1, the `zExp' input should be 1 less than the desired result exponent
4744| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4745| significand.
4746*----------------------------------------------------------------------------*/
4747
a49db98d 4748static inline float128
c120391c 4749packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4750{
4751 float128 z;
4752
4753 z.low = zSig1;
c120391c 4754 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4755 return z;
158142c2
FB
4756}
4757
4758/*----------------------------------------------------------------------------
4759| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4760| and extended significand formed by the concatenation of `zSig0', `zSig1',
4761| and `zSig2', and returns the proper quadruple-precision floating-point value
4762| corresponding to the abstract input. Ordinarily, the abstract value is
4763| simply rounded and packed into the quadruple-precision format, with the
4764| inexact exception raised if the abstract input cannot be represented
4765| exactly. However, if the abstract value is too large, the overflow and
4766| inexact exceptions are raised and an infinity or maximal finite value is
4767| returned. If the abstract value is too small, the input value is rounded to
4768| a subnormal number, and the underflow and inexact exceptions are raised if
4769| the abstract input cannot be represented exactly as a subnormal quadruple-
4770| precision floating-point number.
4771| The input significand must be normalized or smaller. If the input
4772| significand is not normalized, `zExp' must be 0; in that case, the result
4773| returned is a subnormal number, and it must not require rounding. In the
4774| usual case that the input significand is normalized, `zExp' must be 1 less
4775| than the ``true'' floating-point exponent. The handling of underflow and
4776| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4777*----------------------------------------------------------------------------*/
4778
c120391c 4779static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4780 uint64_t zSig0, uint64_t zSig1,
4781 uint64_t zSig2, float_status *status)
158142c2 4782{
8f506c70 4783 int8_t roundingMode;
c120391c 4784 bool roundNearestEven, increment, isTiny;
158142c2 4785
a2f2d288 4786 roundingMode = status->float_rounding_mode;
158142c2 4787 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4788 switch (roundingMode) {
4789 case float_round_nearest_even:
f9288a76 4790 case float_round_ties_away:
dc355b76
PM
4791 increment = ((int64_t)zSig2 < 0);
4792 break;
4793 case float_round_to_zero:
4794 increment = 0;
4795 break;
4796 case float_round_up:
4797 increment = !zSign && zSig2;
4798 break;
4799 case float_round_down:
4800 increment = zSign && zSig2;
4801 break;
9ee6f678
BR
4802 case float_round_to_odd:
4803 increment = !(zSig1 & 0x1) && zSig2;
4804 break;
dc355b76
PM
4805 default:
4806 abort();
158142c2 4807 }
bb98fe42 4808 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4809 if ( ( 0x7FFD < zExp )
4810 || ( ( zExp == 0x7FFD )
4811 && eq128(
e9321124
AB
4812 UINT64_C(0x0001FFFFFFFFFFFF),
4813 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4814 zSig0,
4815 zSig1
4816 )
4817 && increment
4818 )
4819 ) {
ff32e16e 4820 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4821 if ( ( roundingMode == float_round_to_zero )
4822 || ( zSign && ( roundingMode == float_round_up ) )
4823 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4824 || (roundingMode == float_round_to_odd)
158142c2
FB
4825 ) {
4826 return
4827 packFloat128(
4828 zSign,
4829 0x7FFE,
e9321124
AB
4830 UINT64_C(0x0000FFFFFFFFFFFF),
4831 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4832 );
4833 }
4834 return packFloat128( zSign, 0x7FFF, 0, 0 );
4835 }
4836 if ( zExp < 0 ) {
a2f2d288 4837 if (status->flush_to_zero) {
ff32e16e 4838 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4839 return packFloat128(zSign, 0, 0, 0);
4840 }
a828b373
RH
4841 isTiny = status->tininess_before_rounding
4842 || (zExp < -1)
4843 || !increment
4844 || lt128(zSig0, zSig1,
4845 UINT64_C(0x0001FFFFFFFFFFFF),
4846 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4847 shift128ExtraRightJamming(
4848 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4849 zExp = 0;
ff32e16e
PM
4850 if (isTiny && zSig2) {
4851 float_raise(float_flag_underflow, status);
4852 }
dc355b76
PM
4853 switch (roundingMode) {
4854 case float_round_nearest_even:
f9288a76 4855 case float_round_ties_away:
dc355b76
PM
4856 increment = ((int64_t)zSig2 < 0);
4857 break;
4858 case float_round_to_zero:
4859 increment = 0;
4860 break;
4861 case float_round_up:
4862 increment = !zSign && zSig2;
4863 break;
4864 case float_round_down:
4865 increment = zSign && zSig2;
4866 break;
9ee6f678
BR
4867 case float_round_to_odd:
4868 increment = !(zSig1 & 0x1) && zSig2;
4869 break;
dc355b76
PM
4870 default:
4871 abort();
158142c2
FB
4872 }
4873 }
4874 }
a2f2d288 4875 if (zSig2) {
d82f3b2d 4876 float_raise(float_flag_inexact, status);
a2f2d288 4877 }
158142c2
FB
4878 if ( increment ) {
4879 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4880 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4881 zSig1 &= ~1;
4882 }
158142c2
FB
4883 }
4884 else {
4885 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4886 }
4887 return packFloat128( zSign, zExp, zSig0, zSig1 );
4888
4889}
4890
4891/*----------------------------------------------------------------------------
4892| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4893| and significand formed by the concatenation of `zSig0' and `zSig1', and
4894| returns the proper quadruple-precision floating-point value corresponding
4895| to the abstract input. This routine is just like `roundAndPackFloat128'
4896| except that the input significand has fewer bits and does not have to be
4897| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4898| point exponent.
4899*----------------------------------------------------------------------------*/
4900
c120391c 4901static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4902 uint64_t zSig0, uint64_t zSig1,
4903 float_status *status)
158142c2 4904{
8f506c70 4905 int8_t shiftCount;
bb98fe42 4906 uint64_t zSig2;
158142c2
FB
4907
4908 if ( zSig0 == 0 ) {
4909 zSig0 = zSig1;
4910 zSig1 = 0;
4911 zExp -= 64;
4912 }
0019d5c3 4913 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4914 if ( 0 <= shiftCount ) {
4915 zSig2 = 0;
4916 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4917 }
4918 else {
4919 shift128ExtraRightJamming(
4920 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4921 }
4922 zExp -= shiftCount;
ff32e16e 4923 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4924
4925}
4926
158142c2 4927
158142c2
FB
4928/*----------------------------------------------------------------------------
4929| Returns the result of converting the 32-bit two's complement integer `a'
4930| to the extended double-precision floating-point format. The conversion
4931| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4932| Arithmetic.
4933*----------------------------------------------------------------------------*/
4934
e5a41ffa 4935floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4936{
c120391c 4937 bool zSign;
3a87d009 4938 uint32_t absA;
8f506c70 4939 int8_t shiftCount;
bb98fe42 4940 uint64_t zSig;
158142c2
FB
4941
4942 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4943 zSign = ( a < 0 );
4944 absA = zSign ? - a : a;
0019d5c3 4945 shiftCount = clz32(absA) + 32;
158142c2
FB
4946 zSig = absA;
4947 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4948
4949}
4950
158142c2
FB
4951/*----------------------------------------------------------------------------
4952| Returns the result of converting the 32-bit two's complement integer `a' to
4953| the quadruple-precision floating-point format. The conversion is performed
4954| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4955*----------------------------------------------------------------------------*/
4956
e5a41ffa 4957float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4958{
c120391c 4959 bool zSign;
3a87d009 4960 uint32_t absA;
8f506c70 4961 int8_t shiftCount;
bb98fe42 4962 uint64_t zSig0;
158142c2
FB
4963
4964 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4965 zSign = ( a < 0 );
4966 absA = zSign ? - a : a;
0019d5c3 4967 shiftCount = clz32(absA) + 17;
158142c2
FB
4968 zSig0 = absA;
4969 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4970
4971}
4972
158142c2
FB
4973/*----------------------------------------------------------------------------
4974| Returns the result of converting the 64-bit two's complement integer `a'
4975| to the extended double-precision floating-point format. The conversion
4976| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4977| Arithmetic.
4978*----------------------------------------------------------------------------*/
4979
e5a41ffa 4980floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4981{
c120391c 4982 bool zSign;
182f42fd 4983 uint64_t absA;
8f506c70 4984 int8_t shiftCount;
158142c2
FB
4985
4986 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4987 zSign = ( a < 0 );
4988 absA = zSign ? - a : a;
0019d5c3 4989 shiftCount = clz64(absA);
158142c2
FB
4990 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4991
4992}
4993
158142c2
FB
4994/*----------------------------------------------------------------------------
4995| Returns the result of converting the 64-bit two's complement integer `a' to
4996| the quadruple-precision floating-point format. The conversion is performed
4997| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4998*----------------------------------------------------------------------------*/
4999
e5a41ffa 5000float128 int64_to_float128(int64_t a, float_status *status)
158142c2 5001{
c120391c 5002 bool zSign;
182f42fd 5003 uint64_t absA;
8f506c70 5004 int8_t shiftCount;
f4014512 5005 int32_t zExp;
bb98fe42 5006 uint64_t zSig0, zSig1;
158142c2
FB
5007
5008 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
5009 zSign = ( a < 0 );
5010 absA = zSign ? - a : a;
0019d5c3 5011 shiftCount = clz64(absA) + 49;
158142c2
FB
5012 zExp = 0x406E - shiftCount;
5013 if ( 64 <= shiftCount ) {
5014 zSig1 = 0;
5015 zSig0 = absA;
5016 shiftCount -= 64;
5017 }
5018 else {
5019 zSig1 = absA;
5020 zSig0 = 0;
5021 }
5022 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5023 return packFloat128( zSign, zExp, zSig0, zSig1 );
5024
5025}
5026
6bb8e0f1
PM
5027/*----------------------------------------------------------------------------
5028| Returns the result of converting the 64-bit unsigned integer `a'
5029| to the quadruple-precision floating-point format. The conversion is performed
5030| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5031*----------------------------------------------------------------------------*/
5032
e5a41ffa 5033float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
5034{
5035 if (a == 0) {
5036 return float128_zero;
5037 }
6603d506 5038 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
5039}
5040
158142c2
FB
5041/*----------------------------------------------------------------------------
5042| Returns the result of converting the single-precision floating-point value
5043| `a' to the extended double-precision floating-point format. The conversion
5044| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5045| Arithmetic.
5046*----------------------------------------------------------------------------*/
5047
e5a41ffa 5048floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 5049{
c120391c 5050 bool aSign;
0c48262d 5051 int aExp;
bb98fe42 5052 uint32_t aSig;
158142c2 5053
ff32e16e 5054 a = float32_squash_input_denormal(a, status);
158142c2
FB
5055 aSig = extractFloat32Frac( a );
5056 aExp = extractFloat32Exp( a );
5057 aSign = extractFloat32Sign( a );
5058 if ( aExp == 0xFF ) {
ff32e16e 5059 if (aSig) {
7537c2b4
JM
5060 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
5061 status);
5062 return floatx80_silence_nan(res, status);
ff32e16e 5063 }
0f605c88
LV
5064 return packFloatx80(aSign,
5065 floatx80_infinity_high,
5066 floatx80_infinity_low);
158142c2
FB
5067 }
5068 if ( aExp == 0 ) {
5069 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5070 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5071 }
5072 aSig |= 0x00800000;
bb98fe42 5073 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
5074
5075}
5076
158142c2
FB
5077/*----------------------------------------------------------------------------
5078| Returns the result of converting the single-precision floating-point value
5079| `a' to the double-precision floating-point format. The conversion is
5080| performed according to the IEC/IEEE Standard for Binary Floating-Point
5081| Arithmetic.
5082*----------------------------------------------------------------------------*/
5083
e5a41ffa 5084float128 float32_to_float128(float32 a, float_status *status)
158142c2 5085{
c120391c 5086 bool aSign;
0c48262d 5087 int aExp;
bb98fe42 5088 uint32_t aSig;
158142c2 5089
ff32e16e 5090 a = float32_squash_input_denormal(a, status);
158142c2
FB
5091 aSig = extractFloat32Frac( a );
5092 aExp = extractFloat32Exp( a );
5093 aSign = extractFloat32Sign( a );
5094 if ( aExp == 0xFF ) {
ff32e16e
PM
5095 if (aSig) {
5096 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
5097 }
158142c2
FB
5098 return packFloat128( aSign, 0x7FFF, 0, 0 );
5099 }
5100 if ( aExp == 0 ) {
5101 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5102 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5103 --aExp;
5104 }
bb98fe42 5105 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
5106
5107}
5108
158142c2
FB
5109/*----------------------------------------------------------------------------
5110| Returns the remainder of the single-precision floating-point value `a'
5111| with respect to the corresponding value `b'. The operation is performed
5112| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5113*----------------------------------------------------------------------------*/
5114
e5a41ffa 5115float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 5116{
c120391c 5117 bool aSign, zSign;
0c48262d 5118 int aExp, bExp, expDiff;
bb98fe42
AF
5119 uint32_t aSig, bSig;
5120 uint32_t q;
5121 uint64_t aSig64, bSig64, q64;
5122 uint32_t alternateASig;
5123 int32_t sigMean;
ff32e16e
PM
5124 a = float32_squash_input_denormal(a, status);
5125 b = float32_squash_input_denormal(b, status);
158142c2
FB
5126
5127 aSig = extractFloat32Frac( a );
5128 aExp = extractFloat32Exp( a );
5129 aSign = extractFloat32Sign( a );
5130 bSig = extractFloat32Frac( b );
5131 bExp = extractFloat32Exp( b );
158142c2
FB
5132 if ( aExp == 0xFF ) {
5133 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 5134 return propagateFloat32NaN(a, b, status);
158142c2 5135 }
ff32e16e 5136 float_raise(float_flag_invalid, status);
af39bc8c 5137 return float32_default_nan(status);
158142c2
FB
5138 }
5139 if ( bExp == 0xFF ) {
ff32e16e
PM
5140 if (bSig) {
5141 return propagateFloat32NaN(a, b, status);
5142 }
158142c2
FB
5143 return a;
5144 }
5145 if ( bExp == 0 ) {
5146 if ( bSig == 0 ) {
ff32e16e 5147 float_raise(float_flag_invalid, status);
af39bc8c 5148 return float32_default_nan(status);
158142c2
FB
5149 }
5150 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
5151 }
5152 if ( aExp == 0 ) {
5153 if ( aSig == 0 ) return a;
5154 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5155 }
5156 expDiff = aExp - bExp;
5157 aSig |= 0x00800000;
5158 bSig |= 0x00800000;
5159 if ( expDiff < 32 ) {
5160 aSig <<= 8;
5161 bSig <<= 8;
5162 if ( expDiff < 0 ) {
5163 if ( expDiff < -1 ) return a;
5164 aSig >>= 1;
5165 }
5166 q = ( bSig <= aSig );
5167 if ( q ) aSig -= bSig;
5168 if ( 0 < expDiff ) {
bb98fe42 5169 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5170 q >>= 32 - expDiff;
5171 bSig >>= 2;
5172 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5173 }
5174 else {
5175 aSig >>= 2;
5176 bSig >>= 2;
5177 }
5178 }
5179 else {
5180 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5181 aSig64 = ( (uint64_t) aSig )<<40;
5182 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5183 expDiff -= 64;
5184 while ( 0 < expDiff ) {
5185 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5186 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5187 aSig64 = - ( ( bSig * q64 )<<38 );
5188 expDiff -= 62;
5189 }
5190 expDiff += 64;
5191 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5192 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5193 q = q64>>( 64 - expDiff );
5194 bSig <<= 6;
5195 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5196 }
5197 do {
5198 alternateASig = aSig;
5199 ++q;
5200 aSig -= bSig;
bb98fe42 5201 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5202 sigMean = aSig + alternateASig;
5203 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5204 aSig = alternateASig;
5205 }
bb98fe42 5206 zSign = ( (int32_t) aSig < 0 );
158142c2 5207 if ( zSign ) aSig = - aSig;
ff32e16e 5208 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5209}
5210
369be8f6 5211
158142c2 5212
8229c991
AJ
5213/*----------------------------------------------------------------------------
5214| Returns the binary exponential of the single-precision floating-point value
5215| `a'. The operation is performed according to the IEC/IEEE Standard for
5216| Binary Floating-Point Arithmetic.
5217|
5218| Uses the following identities:
5219|
5220| 1. -------------------------------------------------------------------------
5221| x x*ln(2)
5222| 2 = e
5223|
5224| 2. -------------------------------------------------------------------------
5225| 2 3 4 5 n
5226| x x x x x x x
5227| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5228| 1! 2! 3! 4! 5! n!
5229*----------------------------------------------------------------------------*/
5230
5231static const float64 float32_exp2_coefficients[15] =
5232{
d5138cf4
PM
5233 const_float64( 0x3ff0000000000000ll ), /* 1 */
5234 const_float64( 0x3fe0000000000000ll ), /* 2 */
5235 const_float64( 0x3fc5555555555555ll ), /* 3 */
5236 const_float64( 0x3fa5555555555555ll ), /* 4 */
5237 const_float64( 0x3f81111111111111ll ), /* 5 */
5238 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5239 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5240 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5241 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5242 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5243 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5244 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5245 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5246 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5247 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5248};
5249
e5a41ffa 5250float32 float32_exp2(float32 a, float_status *status)
8229c991 5251{
c120391c 5252 bool aSign;
0c48262d 5253 int aExp;
bb98fe42 5254 uint32_t aSig;
8229c991
AJ
5255 float64 r, x, xn;
5256 int i;
ff32e16e 5257 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5258
5259 aSig = extractFloat32Frac( a );
5260 aExp = extractFloat32Exp( a );
5261 aSign = extractFloat32Sign( a );
5262
5263 if ( aExp == 0xFF) {
ff32e16e
PM
5264 if (aSig) {
5265 return propagateFloat32NaN(a, float32_zero, status);
5266 }
8229c991
AJ
5267 return (aSign) ? float32_zero : a;
5268 }
5269 if (aExp == 0) {
5270 if (aSig == 0) return float32_one;
5271 }
5272
ff32e16e 5273 float_raise(float_flag_inexact, status);
8229c991
AJ
5274
5275 /* ******************************* */
5276 /* using float64 for approximation */
5277 /* ******************************* */
ff32e16e
PM
5278 x = float32_to_float64(a, status);
5279 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5280
5281 xn = x;
5282 r = float64_one;
5283 for (i = 0 ; i < 15 ; i++) {
5284 float64 f;
5285
ff32e16e
PM
5286 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5287 r = float64_add(r, f, status);
8229c991 5288
ff32e16e 5289 xn = float64_mul(xn, x, status);
8229c991
AJ
5290 }
5291
5292 return float64_to_float32(r, status);
5293}
5294
374dfc33
AJ
5295/*----------------------------------------------------------------------------
5296| Returns the binary log of the single-precision floating-point value `a'.
5297| The operation is performed according to the IEC/IEEE Standard for Binary
5298| Floating-Point Arithmetic.
5299*----------------------------------------------------------------------------*/
e5a41ffa 5300float32 float32_log2(float32 a, float_status *status)
374dfc33 5301{
c120391c 5302 bool aSign, zSign;
0c48262d 5303 int aExp;
bb98fe42 5304 uint32_t aSig, zSig, i;
374dfc33 5305
ff32e16e 5306 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5307 aSig = extractFloat32Frac( a );
5308 aExp = extractFloat32Exp( a );
5309 aSign = extractFloat32Sign( a );
5310
5311 if ( aExp == 0 ) {
5312 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5313 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5314 }
5315 if ( aSign ) {
ff32e16e 5316 float_raise(float_flag_invalid, status);
af39bc8c 5317 return float32_default_nan(status);
374dfc33
AJ
5318 }
5319 if ( aExp == 0xFF ) {
ff32e16e
PM
5320 if (aSig) {
5321 return propagateFloat32NaN(a, float32_zero, status);
5322 }
374dfc33
AJ
5323 return a;
5324 }
5325
5326 aExp -= 0x7F;
5327 aSig |= 0x00800000;
5328 zSign = aExp < 0;
5329 zSig = aExp << 23;
5330
5331 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5332 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5333 if ( aSig & 0x01000000 ) {
5334 aSig >>= 1;
5335 zSig |= i;
5336 }
5337 }
5338
5339 if ( zSign )
5340 zSig = -zSig;
5341
ff32e16e 5342 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5343}
5344
158142c2 5345/*----------------------------------------------------------------------------
158142c2
FB
5346| Returns the result of converting the double-precision floating-point value
5347| `a' to the extended double-precision floating-point format. The conversion
5348| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5349| Arithmetic.
5350*----------------------------------------------------------------------------*/
5351
e5a41ffa 5352floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5353{
c120391c 5354 bool aSign;
0c48262d 5355 int aExp;
bb98fe42 5356 uint64_t aSig;
158142c2 5357
ff32e16e 5358 a = float64_squash_input_denormal(a, status);
158142c2
FB
5359 aSig = extractFloat64Frac( a );
5360 aExp = extractFloat64Exp( a );
5361 aSign = extractFloat64Sign( a );
5362 if ( aExp == 0x7FF ) {
ff32e16e 5363 if (aSig) {
7537c2b4
JM
5364 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5365 status);
5366 return floatx80_silence_nan(res, status);
ff32e16e 5367 }
0f605c88
LV
5368 return packFloatx80(aSign,
5369 floatx80_infinity_high,
5370 floatx80_infinity_low);
158142c2
FB
5371 }
5372 if ( aExp == 0 ) {
5373 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5374 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5375 }
5376 return
5377 packFloatx80(
e9321124 5378 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5379
5380}
5381
158142c2
FB
5382/*----------------------------------------------------------------------------
5383| Returns the result of converting the double-precision floating-point value
5384| `a' to the quadruple-precision floating-point format. The conversion is
5385| performed according to the IEC/IEEE Standard for Binary Floating-Point
5386| Arithmetic.
5387*----------------------------------------------------------------------------*/
5388
e5a41ffa 5389float128 float64_to_float128(float64 a, float_status *status)
158142c2 5390{
c120391c 5391 bool aSign;
0c48262d 5392 int aExp;
bb98fe42 5393 uint64_t aSig, zSig0, zSig1;
158142c2 5394
ff32e16e 5395 a = float64_squash_input_denormal(a, status);
158142c2
FB
5396 aSig = extractFloat64Frac( a );
5397 aExp = extractFloat64Exp( a );
5398 aSign = extractFloat64Sign( a );
5399 if ( aExp == 0x7FF ) {
ff32e16e
PM
5400 if (aSig) {
5401 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5402 }
158142c2
FB
5403 return packFloat128( aSign, 0x7FFF, 0, 0 );
5404 }
5405 if ( aExp == 0 ) {
5406 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5407 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5408 --aExp;
5409 }
5410 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5411 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5412
5413}
5414
158142c2
FB
5415
5416/*----------------------------------------------------------------------------
5417| Returns the remainder of the double-precision floating-point value `a'
5418| with respect to the corresponding value `b'. The operation is performed
5419| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5420*----------------------------------------------------------------------------*/
5421
e5a41ffa 5422float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5423{
c120391c 5424 bool aSign, zSign;
0c48262d 5425 int aExp, bExp, expDiff;
bb98fe42
AF
5426 uint64_t aSig, bSig;
5427 uint64_t q, alternateASig;
5428 int64_t sigMean;
158142c2 5429
ff32e16e
PM
5430 a = float64_squash_input_denormal(a, status);
5431 b = float64_squash_input_denormal(b, status);
158142c2
FB
5432 aSig = extractFloat64Frac( a );
5433 aExp = extractFloat64Exp( a );
5434 aSign = extractFloat64Sign( a );
5435 bSig = extractFloat64Frac( b );
5436 bExp = extractFloat64Exp( b );
158142c2
FB
5437 if ( aExp == 0x7FF ) {
5438 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5439 return propagateFloat64NaN(a, b, status);
158142c2 5440 }
ff32e16e 5441 float_raise(float_flag_invalid, status);
af39bc8c 5442 return float64_default_nan(status);
158142c2
FB
5443 }
5444 if ( bExp == 0x7FF ) {
ff32e16e
PM
5445 if (bSig) {
5446 return propagateFloat64NaN(a, b, status);
5447 }
158142c2
FB
5448 return a;
5449 }
5450 if ( bExp == 0 ) {
5451 if ( bSig == 0 ) {
ff32e16e 5452 float_raise(float_flag_invalid, status);
af39bc8c 5453 return float64_default_nan(status);
158142c2
FB
5454 }
5455 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5456 }
5457 if ( aExp == 0 ) {
5458 if ( aSig == 0 ) return a;
5459 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5460 }
5461 expDiff = aExp - bExp;
e9321124
AB
5462 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5463 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5464 if ( expDiff < 0 ) {
5465 if ( expDiff < -1 ) return a;
5466 aSig >>= 1;
5467 }
5468 q = ( bSig <= aSig );
5469 if ( q ) aSig -= bSig;
5470 expDiff -= 64;
5471 while ( 0 < expDiff ) {
5472 q = estimateDiv128To64( aSig, 0, bSig );
5473 q = ( 2 < q ) ? q - 2 : 0;
5474 aSig = - ( ( bSig>>2 ) * q );
5475 expDiff -= 62;
5476 }
5477 expDiff += 64;
5478 if ( 0 < expDiff ) {
5479 q = estimateDiv128To64( aSig, 0, bSig );
5480 q = ( 2 < q ) ? q - 2 : 0;
5481 q >>= 64 - expDiff;
5482 bSig >>= 2;
5483 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5484 }
5485 else {
5486 aSig >>= 2;
5487 bSig >>= 2;
5488 }
5489 do {
5490 alternateASig = aSig;
5491 ++q;
5492 aSig -= bSig;
bb98fe42 5493 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5494 sigMean = aSig + alternateASig;
5495 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5496 aSig = alternateASig;
5497 }
bb98fe42 5498 zSign = ( (int64_t) aSig < 0 );
158142c2 5499 if ( zSign ) aSig = - aSig;
ff32e16e 5500 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5501
5502}
5503
374dfc33
AJ
5504/*----------------------------------------------------------------------------
5505| Returns the binary log of the double-precision floating-point value `a'.
5506| The operation is performed according to the IEC/IEEE Standard for Binary
5507| Floating-Point Arithmetic.
5508*----------------------------------------------------------------------------*/
e5a41ffa 5509float64 float64_log2(float64 a, float_status *status)
374dfc33 5510{
c120391c 5511 bool aSign, zSign;
0c48262d 5512 int aExp;
bb98fe42 5513 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5514 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5515
5516 aSig = extractFloat64Frac( a );
5517 aExp = extractFloat64Exp( a );
5518 aSign = extractFloat64Sign( a );
5519
5520 if ( aExp == 0 ) {
5521 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5522 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5523 }
5524 if ( aSign ) {
ff32e16e 5525 float_raise(float_flag_invalid, status);
af39bc8c 5526 return float64_default_nan(status);
374dfc33
AJ
5527 }
5528 if ( aExp == 0x7FF ) {
ff32e16e
PM
5529 if (aSig) {
5530 return propagateFloat64NaN(a, float64_zero, status);
5531 }
374dfc33
AJ
5532 return a;
5533 }
5534
5535 aExp -= 0x3FF;
e9321124 5536 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5537 zSign = aExp < 0;
bb98fe42 5538 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5539 for (i = 1LL << 51; i > 0; i >>= 1) {
5540 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5541 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5542 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5543 aSig >>= 1;
5544 zSig |= i;
5545 }
5546 }
5547
5548 if ( zSign )
5549 zSig = -zSig;
ff32e16e 5550 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5551}
5552
158142c2
FB
5553/*----------------------------------------------------------------------------
5554| Returns the result of converting the extended double-precision floating-
5555| point value `a' to the 32-bit two's complement integer format. The
5556| conversion is performed according to the IEC/IEEE Standard for Binary
5557| Floating-Point Arithmetic---which means in particular that the conversion
5558| is rounded according to the current rounding mode. If `a' is a NaN, the
5559| largest positive integer is returned. Otherwise, if the conversion
5560| overflows, the largest integer with the same sign as `a' is returned.
5561*----------------------------------------------------------------------------*/
5562
f4014512 5563int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5564{
c120391c 5565 bool aSign;
f4014512 5566 int32_t aExp, shiftCount;
bb98fe42 5567 uint64_t aSig;
158142c2 5568
d1eb8f2a
AD
5569 if (floatx80_invalid_encoding(a)) {
5570 float_raise(float_flag_invalid, status);
5571 return 1 << 31;
5572 }
158142c2
FB
5573 aSig = extractFloatx80Frac( a );
5574 aExp = extractFloatx80Exp( a );
5575 aSign = extractFloatx80Sign( a );
bb98fe42 5576 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5577 shiftCount = 0x4037 - aExp;
5578 if ( shiftCount <= 0 ) shiftCount = 1;
5579 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5580 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5581
5582}
5583
5584/*----------------------------------------------------------------------------
5585| Returns the result of converting the extended double-precision floating-
5586| point value `a' to the 32-bit two's complement integer format. The
5587| conversion is performed according to the IEC/IEEE Standard for Binary
5588| Floating-Point Arithmetic, except that the conversion is always rounded
5589| toward zero. If `a' is a NaN, the largest positive integer is returned.
5590| Otherwise, if the conversion overflows, the largest integer with the same
5591| sign as `a' is returned.
5592*----------------------------------------------------------------------------*/
5593
f4014512 5594int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5595{
c120391c 5596 bool aSign;
f4014512 5597 int32_t aExp, shiftCount;
bb98fe42 5598 uint64_t aSig, savedASig;
b3a6a2e0 5599 int32_t z;
158142c2 5600
d1eb8f2a
AD
5601 if (floatx80_invalid_encoding(a)) {
5602 float_raise(float_flag_invalid, status);
5603 return 1 << 31;
5604 }
158142c2
FB
5605 aSig = extractFloatx80Frac( a );
5606 aExp = extractFloatx80Exp( a );
5607 aSign = extractFloatx80Sign( a );
5608 if ( 0x401E < aExp ) {
bb98fe42 5609 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5610 goto invalid;
5611 }
5612 else if ( aExp < 0x3FFF ) {
a2f2d288 5613 if (aExp || aSig) {
d82f3b2d 5614 float_raise(float_flag_inexact, status);
a2f2d288 5615 }
158142c2
FB
5616 return 0;
5617 }
5618 shiftCount = 0x403E - aExp;
5619 savedASig = aSig;
5620 aSig >>= shiftCount;
5621 z = aSig;
5622 if ( aSign ) z = - z;
5623 if ( ( z < 0 ) ^ aSign ) {
5624 invalid:
ff32e16e 5625 float_raise(float_flag_invalid, status);
bb98fe42 5626 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5627 }
5628 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5629 float_raise(float_flag_inexact, status);
158142c2
FB
5630 }
5631 return z;
5632
5633}
5634
5635/*----------------------------------------------------------------------------
5636| Returns the result of converting the extended double-precision floating-
5637| point value `a' to the 64-bit two's complement integer format. The
5638| conversion is performed according to the IEC/IEEE Standard for Binary
5639| Floating-Point Arithmetic---which means in particular that the conversion
5640| is rounded according to the current rounding mode. If `a' is a NaN,
5641| the largest positive integer is returned. Otherwise, if the conversion
5642| overflows, the largest integer with the same sign as `a' is returned.
5643*----------------------------------------------------------------------------*/
5644
f42c2224 5645int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5646{
c120391c 5647 bool aSign;
f4014512 5648 int32_t aExp, shiftCount;
bb98fe42 5649 uint64_t aSig, aSigExtra;
158142c2 5650
d1eb8f2a
AD
5651 if (floatx80_invalid_encoding(a)) {
5652 float_raise(float_flag_invalid, status);
5653 return 1ULL << 63;
5654 }
158142c2
FB
5655 aSig = extractFloatx80Frac( a );
5656 aExp = extractFloatx80Exp( a );
5657 aSign = extractFloatx80Sign( a );
5658 shiftCount = 0x403E - aExp;
5659 if ( shiftCount <= 0 ) {
5660 if ( shiftCount ) {
ff32e16e 5661 float_raise(float_flag_invalid, status);
0f605c88 5662 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5663 return INT64_MAX;
158142c2 5664 }
2c217da0 5665 return INT64_MIN;
158142c2
FB
5666 }
5667 aSigExtra = 0;
5668 }
5669 else {
5670 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5671 }
ff32e16e 5672 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5673
5674}
5675
5676/*----------------------------------------------------------------------------
5677| Returns the result of converting the extended double-precision floating-
5678| point value `a' to the 64-bit two's complement integer format. The
5679| conversion is performed according to the IEC/IEEE Standard for Binary
5680| Floating-Point Arithmetic, except that the conversion is always rounded
5681| toward zero. If `a' is a NaN, the largest positive integer is returned.
5682| Otherwise, if the conversion overflows, the largest integer with the same
5683| sign as `a' is returned.
5684*----------------------------------------------------------------------------*/
5685
f42c2224 5686int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5687{
c120391c 5688 bool aSign;
f4014512 5689 int32_t aExp, shiftCount;
bb98fe42 5690 uint64_t aSig;
f42c2224 5691 int64_t z;
158142c2 5692
d1eb8f2a
AD
5693 if (floatx80_invalid_encoding(a)) {
5694 float_raise(float_flag_invalid, status);
5695 return 1ULL << 63;
5696 }
158142c2
FB
5697 aSig = extractFloatx80Frac( a );
5698 aExp = extractFloatx80Exp( a );
5699 aSign = extractFloatx80Sign( a );
5700 shiftCount = aExp - 0x403E;
5701 if ( 0 <= shiftCount ) {
e9321124 5702 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5703 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5704 float_raise(float_flag_invalid, status);
158142c2 5705 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5706 return INT64_MAX;
158142c2
FB
5707 }
5708 }
2c217da0 5709 return INT64_MIN;
158142c2
FB
5710 }
5711 else if ( aExp < 0x3FFF ) {
a2f2d288 5712 if (aExp | aSig) {
d82f3b2d 5713 float_raise(float_flag_inexact, status);
a2f2d288 5714 }
158142c2
FB
5715 return 0;
5716 }
5717 z = aSig>>( - shiftCount );
bb98fe42 5718 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5719 float_raise(float_flag_inexact, status);
158142c2
FB
5720 }
5721 if ( aSign ) z = - z;
5722 return z;
5723
5724}
5725
5726/*----------------------------------------------------------------------------
5727| Returns the result of converting the extended double-precision floating-
5728| point value `a' to the single-precision floating-point format. The
5729| conversion is performed according to the IEC/IEEE Standard for Binary
5730| Floating-Point Arithmetic.
5731*----------------------------------------------------------------------------*/
5732
e5a41ffa 5733float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5734{
c120391c 5735 bool aSign;
f4014512 5736 int32_t aExp;
bb98fe42 5737 uint64_t aSig;
158142c2 5738
d1eb8f2a
AD
5739 if (floatx80_invalid_encoding(a)) {
5740 float_raise(float_flag_invalid, status);
5741 return float32_default_nan(status);
5742 }
158142c2
FB
5743 aSig = extractFloatx80Frac( a );
5744 aExp = extractFloatx80Exp( a );
5745 aSign = extractFloatx80Sign( a );
5746 if ( aExp == 0x7FFF ) {
bb98fe42 5747 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5748 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5749 status);
5750 return float32_silence_nan(res, status);
158142c2
FB
5751 }
5752 return packFloat32( aSign, 0xFF, 0 );
5753 }
5754 shift64RightJamming( aSig, 33, &aSig );
5755 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5756 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5757
5758}
5759
5760/*----------------------------------------------------------------------------
5761| Returns the result of converting the extended double-precision floating-
5762| point value `a' to the double-precision floating-point format. The
5763| conversion is performed according to the IEC/IEEE Standard for Binary
5764| Floating-Point Arithmetic.
5765*----------------------------------------------------------------------------*/
5766
e5a41ffa 5767float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5768{
c120391c 5769 bool aSign;
f4014512 5770 int32_t aExp;
bb98fe42 5771 uint64_t aSig, zSig;
158142c2 5772
d1eb8f2a
AD
5773 if (floatx80_invalid_encoding(a)) {
5774 float_raise(float_flag_invalid, status);
5775 return float64_default_nan(status);
5776 }
158142c2
FB
5777 aSig = extractFloatx80Frac( a );
5778 aExp = extractFloatx80Exp( a );
5779 aSign = extractFloatx80Sign( a );
5780 if ( aExp == 0x7FFF ) {
bb98fe42 5781 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5782 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5783 status);
5784 return float64_silence_nan(res, status);
158142c2
FB
5785 }
5786 return packFloat64( aSign, 0x7FF, 0 );
5787 }
5788 shift64RightJamming( aSig, 1, &zSig );
5789 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5790 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5791
5792}
5793
158142c2
FB
5794/*----------------------------------------------------------------------------
5795| Returns the result of converting the extended double-precision floating-
5796| point value `a' to the quadruple-precision floating-point format. The
5797| conversion is performed according to the IEC/IEEE Standard for Binary
5798| Floating-Point Arithmetic.
5799*----------------------------------------------------------------------------*/
5800
e5a41ffa 5801float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5802{
c120391c 5803 bool aSign;
0c48262d 5804 int aExp;
bb98fe42 5805 uint64_t aSig, zSig0, zSig1;
158142c2 5806
d1eb8f2a
AD
5807 if (floatx80_invalid_encoding(a)) {
5808 float_raise(float_flag_invalid, status);
5809 return float128_default_nan(status);
5810 }
158142c2
FB
5811 aSig = extractFloatx80Frac( a );
5812 aExp = extractFloatx80Exp( a );
5813 aSign = extractFloatx80Sign( a );
bb98fe42 5814 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5815 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5816 status);
5817 return float128_silence_nan(res, status);
158142c2
FB
5818 }
5819 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5820 return packFloat128( aSign, aExp, zSig0, zSig1 );
5821
5822}
5823
0f721292
LV
5824/*----------------------------------------------------------------------------
5825| Rounds the extended double-precision floating-point value `a'
5826| to the precision provided by floatx80_rounding_precision and returns the
5827| result as an extended double-precision floating-point value.
5828| The operation is performed according to the IEC/IEEE Standard for Binary
5829| Floating-Point Arithmetic.
5830*----------------------------------------------------------------------------*/
5831
5832floatx80 floatx80_round(floatx80 a, float_status *status)
5833{
5834 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5835 extractFloatx80Sign(a),
5836 extractFloatx80Exp(a),
5837 extractFloatx80Frac(a), 0, status);
5838}
5839
158142c2
FB
5840/*----------------------------------------------------------------------------
5841| Rounds the extended double-precision floating-point value `a' to an integer,
5842| and returns the result as an extended quadruple-precision floating-point
5843| value. The operation is performed according to the IEC/IEEE Standard for
5844| Binary Floating-Point Arithmetic.
5845*----------------------------------------------------------------------------*/
5846
e5a41ffa 5847floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5848{
c120391c 5849 bool aSign;
f4014512 5850 int32_t aExp;
bb98fe42 5851 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5852 floatx80 z;
5853
d1eb8f2a
AD
5854 if (floatx80_invalid_encoding(a)) {
5855 float_raise(float_flag_invalid, status);
5856 return floatx80_default_nan(status);
5857 }
158142c2
FB
5858 aExp = extractFloatx80Exp( a );
5859 if ( 0x403E <= aExp ) {
bb98fe42 5860 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5861 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5862 }
5863 return a;
5864 }
5865 if ( aExp < 0x3FFF ) {
5866 if ( ( aExp == 0 )
9ecaf5cc 5867 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5868 return a;
5869 }
d82f3b2d 5870 float_raise(float_flag_inexact, status);
158142c2 5871 aSign = extractFloatx80Sign( a );
a2f2d288 5872 switch (status->float_rounding_mode) {
158142c2 5873 case float_round_nearest_even:
bb98fe42 5874 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5875 ) {
5876 return
e9321124 5877 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5878 }
5879 break;
f9288a76
PM
5880 case float_round_ties_away:
5881 if (aExp == 0x3FFE) {
e9321124 5882 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5883 }
5884 break;
158142c2
FB
5885 case float_round_down:
5886 return
5887 aSign ?
e9321124 5888 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5889 : packFloatx80( 0, 0, 0 );
5890 case float_round_up:
5891 return
5892 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5893 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5894
5895 case float_round_to_zero:
5896 break;
5897 default:
5898 g_assert_not_reached();
158142c2
FB
5899 }
5900 return packFloatx80( aSign, 0, 0 );
5901 }
5902 lastBitMask = 1;
5903 lastBitMask <<= 0x403E - aExp;
5904 roundBitsMask = lastBitMask - 1;
5905 z = a;
a2f2d288 5906 switch (status->float_rounding_mode) {
dc355b76 5907 case float_round_nearest_even:
158142c2 5908 z.low += lastBitMask>>1;
dc355b76
PM
5909 if ((z.low & roundBitsMask) == 0) {
5910 z.low &= ~lastBitMask;
5911 }
5912 break;
f9288a76
PM
5913 case float_round_ties_away:
5914 z.low += lastBitMask >> 1;
5915 break;
dc355b76
PM
5916 case float_round_to_zero:
5917 break;
5918 case float_round_up:
5919 if (!extractFloatx80Sign(z)) {
5920 z.low += roundBitsMask;
5921 }
5922 break;
5923 case float_round_down:
5924 if (extractFloatx80Sign(z)) {
158142c2
FB
5925 z.low += roundBitsMask;
5926 }
dc355b76
PM
5927 break;
5928 default:
5929 abort();
158142c2
FB
5930 }
5931 z.low &= ~ roundBitsMask;
5932 if ( z.low == 0 ) {
5933 ++z.high;
e9321124 5934 z.low = UINT64_C(0x8000000000000000);
158142c2 5935 }
a2f2d288 5936 if (z.low != a.low) {
d82f3b2d 5937 float_raise(float_flag_inexact, status);
a2f2d288 5938 }
158142c2
FB
5939 return z;
5940
5941}
5942
5943/*----------------------------------------------------------------------------
5944| Returns the result of adding the absolute values of the extended double-
5945| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5946| negated before being returned. `zSign' is ignored if the result is a NaN.
5947| The addition is performed according to the IEC/IEEE Standard for Binary
5948| Floating-Point Arithmetic.
5949*----------------------------------------------------------------------------*/
5950
c120391c 5951static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5952 float_status *status)
158142c2 5953{
f4014512 5954 int32_t aExp, bExp, zExp;
bb98fe42 5955 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5956 int32_t expDiff;
158142c2
FB
5957
5958 aSig = extractFloatx80Frac( a );
5959 aExp = extractFloatx80Exp( a );
5960 bSig = extractFloatx80Frac( b );
5961 bExp = extractFloatx80Exp( b );
5962 expDiff = aExp - bExp;
5963 if ( 0 < expDiff ) {
5964 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5965 if ((uint64_t)(aSig << 1)) {
5966 return propagateFloatx80NaN(a, b, status);
5967 }
158142c2
FB
5968 return a;
5969 }
5970 if ( bExp == 0 ) --expDiff;
5971 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5972 zExp = aExp;
5973 }
5974 else if ( expDiff < 0 ) {
5975 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5976 if ((uint64_t)(bSig << 1)) {
5977 return propagateFloatx80NaN(a, b, status);
5978 }
0f605c88
LV
5979 return packFloatx80(zSign,
5980 floatx80_infinity_high,
5981 floatx80_infinity_low);
158142c2
FB
5982 }
5983 if ( aExp == 0 ) ++expDiff;
5984 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5985 zExp = bExp;
5986 }
5987 else {
5988 if ( aExp == 0x7FFF ) {
bb98fe42 5989 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5990 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5991 }
5992 return a;
5993 }
5994 zSig1 = 0;
5995 zSig0 = aSig + bSig;
5996 if ( aExp == 0 ) {
41602807
JM
5997 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5998 /* At least one of the values is a pseudo-denormal,
5999 * and there is a carry out of the result. */
6000 zExp = 1;
6001 goto shiftRight1;
6002 }
2f311075
RH
6003 if (zSig0 == 0) {
6004 return packFloatx80(zSign, 0, 0);
6005 }
158142c2
FB
6006 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
6007 goto roundAndPack;
6008 }
6009 zExp = aExp;
6010 goto shiftRight1;
6011 }
6012 zSig0 = aSig + bSig;
bb98fe42 6013 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
6014 shiftRight1:
6015 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 6016 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
6017 ++zExp;
6018 roundAndPack:
a2f2d288 6019 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6020 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6021}
6022
6023/*----------------------------------------------------------------------------
6024| Returns the result of subtracting the absolute values of the extended
6025| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
6026| difference is negated before being returned. `zSign' is ignored if the
6027| result is a NaN. The subtraction is performed according to the IEC/IEEE
6028| Standard for Binary Floating-Point Arithmetic.
6029*----------------------------------------------------------------------------*/
6030
c120391c 6031static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 6032 float_status *status)
158142c2 6033{
f4014512 6034 int32_t aExp, bExp, zExp;
bb98fe42 6035 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 6036 int32_t expDiff;
158142c2
FB
6037
6038 aSig = extractFloatx80Frac( a );
6039 aExp = extractFloatx80Exp( a );
6040 bSig = extractFloatx80Frac( b );
6041 bExp = extractFloatx80Exp( b );
6042 expDiff = aExp - bExp;
6043 if ( 0 < expDiff ) goto aExpBigger;
6044 if ( expDiff < 0 ) goto bExpBigger;
6045 if ( aExp == 0x7FFF ) {
bb98fe42 6046 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 6047 return propagateFloatx80NaN(a, b, status);
158142c2 6048 }
ff32e16e 6049 float_raise(float_flag_invalid, status);
af39bc8c 6050 return floatx80_default_nan(status);
158142c2
FB
6051 }
6052 if ( aExp == 0 ) {
6053 aExp = 1;
6054 bExp = 1;
6055 }
6056 zSig1 = 0;
6057 if ( bSig < aSig ) goto aBigger;
6058 if ( aSig < bSig ) goto bBigger;
a2f2d288 6059 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
6060 bExpBigger:
6061 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6062 if ((uint64_t)(bSig << 1)) {
6063 return propagateFloatx80NaN(a, b, status);
6064 }
0f605c88
LV
6065 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
6066 floatx80_infinity_low);
158142c2
FB
6067 }
6068 if ( aExp == 0 ) ++expDiff;
6069 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
6070 bBigger:
6071 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
6072 zExp = bExp;
6073 zSign ^= 1;
6074 goto normalizeRoundAndPack;
6075 aExpBigger:
6076 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6077 if ((uint64_t)(aSig << 1)) {
6078 return propagateFloatx80NaN(a, b, status);
6079 }
158142c2
FB
6080 return a;
6081 }
6082 if ( bExp == 0 ) --expDiff;
6083 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
6084 aBigger:
6085 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
6086 zExp = aExp;
6087 normalizeRoundAndPack:
a2f2d288 6088 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6089 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6090}
6091
6092/*----------------------------------------------------------------------------
6093| Returns the result of adding the extended double-precision floating-point
6094| values `a' and `b'. The operation is performed according to the IEC/IEEE
6095| Standard for Binary Floating-Point Arithmetic.
6096*----------------------------------------------------------------------------*/
6097
e5a41ffa 6098floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 6099{
c120391c 6100 bool aSign, bSign;
158142c2 6101
d1eb8f2a
AD
6102 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6103 float_raise(float_flag_invalid, status);
6104 return floatx80_default_nan(status);
6105 }
158142c2
FB
6106 aSign = extractFloatx80Sign( a );
6107 bSign = extractFloatx80Sign( b );
6108 if ( aSign == bSign ) {
ff32e16e 6109 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6110 }
6111 else {
ff32e16e 6112 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6113 }
6114
6115}
6116
6117/*----------------------------------------------------------------------------
6118| Returns the result of subtracting the extended double-precision floating-
6119| point values `a' and `b'. The operation is performed according to the
6120| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6121*----------------------------------------------------------------------------*/
6122
e5a41ffa 6123floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 6124{
c120391c 6125 bool aSign, bSign;
158142c2 6126
d1eb8f2a
AD
6127 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6128 float_raise(float_flag_invalid, status);
6129 return floatx80_default_nan(status);
6130 }
158142c2
FB
6131 aSign = extractFloatx80Sign( a );
6132 bSign = extractFloatx80Sign( b );
6133 if ( aSign == bSign ) {
ff32e16e 6134 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6135 }
6136 else {
ff32e16e 6137 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6138 }
6139
6140}
6141
6142/*----------------------------------------------------------------------------
6143| Returns the result of multiplying the extended double-precision floating-
6144| point values `a' and `b'. The operation is performed according to the
6145| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6146*----------------------------------------------------------------------------*/
6147
e5a41ffa 6148floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 6149{
c120391c 6150 bool aSign, bSign, zSign;
f4014512 6151 int32_t aExp, bExp, zExp;
bb98fe42 6152 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6153
d1eb8f2a
AD
6154 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6155 float_raise(float_flag_invalid, status);
6156 return floatx80_default_nan(status);
6157 }
158142c2
FB
6158 aSig = extractFloatx80Frac( a );
6159 aExp = extractFloatx80Exp( a );
6160 aSign = extractFloatx80Sign( a );
6161 bSig = extractFloatx80Frac( b );
6162 bExp = extractFloatx80Exp( b );
6163 bSign = extractFloatx80Sign( b );
6164 zSign = aSign ^ bSign;
6165 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6166 if ( (uint64_t) ( aSig<<1 )
6167 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6168 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6169 }
6170 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6171 return packFloatx80(zSign, floatx80_infinity_high,
6172 floatx80_infinity_low);
158142c2
FB
6173 }
6174 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6175 if ((uint64_t)(bSig << 1)) {
6176 return propagateFloatx80NaN(a, b, status);
6177 }
158142c2
FB
6178 if ( ( aExp | aSig ) == 0 ) {
6179 invalid:
ff32e16e 6180 float_raise(float_flag_invalid, status);
af39bc8c 6181 return floatx80_default_nan(status);
158142c2 6182 }
0f605c88
LV
6183 return packFloatx80(zSign, floatx80_infinity_high,
6184 floatx80_infinity_low);
158142c2
FB
6185 }
6186 if ( aExp == 0 ) {
6187 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6188 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6189 }
6190 if ( bExp == 0 ) {
6191 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6192 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6193 }
6194 zExp = aExp + bExp - 0x3FFE;
6195 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6196 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6197 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6198 --zExp;
6199 }
a2f2d288 6200 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6201 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6202}
6203
6204/*----------------------------------------------------------------------------
6205| Returns the result of dividing the extended double-precision floating-point
6206| value `a' by the corresponding value `b'. The operation is performed
6207| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6208*----------------------------------------------------------------------------*/
6209
e5a41ffa 6210floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6211{
c120391c 6212 bool aSign, bSign, zSign;
f4014512 6213 int32_t aExp, bExp, zExp;
bb98fe42
AF
6214 uint64_t aSig, bSig, zSig0, zSig1;
6215 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6216
d1eb8f2a
AD
6217 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6218 float_raise(float_flag_invalid, status);
6219 return floatx80_default_nan(status);
6220 }
158142c2
FB
6221 aSig = extractFloatx80Frac( a );
6222 aExp = extractFloatx80Exp( a );
6223 aSign = extractFloatx80Sign( a );
6224 bSig = extractFloatx80Frac( b );
6225 bExp = extractFloatx80Exp( b );
6226 bSign = extractFloatx80Sign( b );
6227 zSign = aSign ^ bSign;
6228 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6229 if ((uint64_t)(aSig << 1)) {
6230 return propagateFloatx80NaN(a, b, status);
6231 }
158142c2 6232 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6233 if ((uint64_t)(bSig << 1)) {
6234 return propagateFloatx80NaN(a, b, status);
6235 }
158142c2
FB
6236 goto invalid;
6237 }
0f605c88
LV
6238 return packFloatx80(zSign, floatx80_infinity_high,
6239 floatx80_infinity_low);
158142c2
FB
6240 }
6241 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6242 if ((uint64_t)(bSig << 1)) {
6243 return propagateFloatx80NaN(a, b, status);
6244 }
158142c2
FB
6245 return packFloatx80( zSign, 0, 0 );
6246 }
6247 if ( bExp == 0 ) {
6248 if ( bSig == 0 ) {
6249 if ( ( aExp | aSig ) == 0 ) {
6250 invalid:
ff32e16e 6251 float_raise(float_flag_invalid, status);
af39bc8c 6252 return floatx80_default_nan(status);
158142c2 6253 }
ff32e16e 6254 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6255 return packFloatx80(zSign, floatx80_infinity_high,
6256 floatx80_infinity_low);
158142c2
FB
6257 }
6258 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6259 }
6260 if ( aExp == 0 ) {
6261 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6262 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6263 }
6264 zExp = aExp - bExp + 0x3FFE;
6265 rem1 = 0;
6266 if ( bSig <= aSig ) {
6267 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6268 ++zExp;
6269 }
6270 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6271 mul64To128( bSig, zSig0, &term0, &term1 );
6272 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6273 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6274 --zSig0;
6275 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6276 }
6277 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6278 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6279 mul64To128( bSig, zSig1, &term1, &term2 );
6280 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6281 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6282 --zSig1;
6283 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6284 }
6285 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6286 }
a2f2d288 6287 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6288 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6289}
6290
6291/*----------------------------------------------------------------------------
6292| Returns the remainder of the extended double-precision floating-point value
6293| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6294| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6295| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6296| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6297| the absolute value of the integer quotient.
158142c2
FB
6298*----------------------------------------------------------------------------*/
6299
445810ec 6300floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6301 float_status *status)
158142c2 6302{
c120391c 6303 bool aSign, zSign;
b662495d 6304 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6305 uint64_t aSig0, aSig1, bSig;
6306 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6307
445810ec 6308 *quotient = 0;
d1eb8f2a
AD
6309 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6310 float_raise(float_flag_invalid, status);
6311 return floatx80_default_nan(status);
6312 }
158142c2 6313 aSig0 = extractFloatx80Frac( a );
b662495d 6314 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6315 aSign = extractFloatx80Sign( a );
6316 bSig = extractFloatx80Frac( b );
6317 bExp = extractFloatx80Exp( b );
158142c2 6318 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6319 if ( (uint64_t) ( aSig0<<1 )
6320 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6321 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6322 }
6323 goto invalid;
6324 }
6325 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6326 if ((uint64_t)(bSig << 1)) {
6327 return propagateFloatx80NaN(a, b, status);
6328 }
b662495d
JM
6329 if (aExp == 0 && aSig0 >> 63) {
6330 /*
6331 * Pseudo-denormal argument must be returned in normalized
6332 * form.
6333 */
6334 return packFloatx80(aSign, 1, aSig0);
6335 }
158142c2
FB
6336 return a;
6337 }
6338 if ( bExp == 0 ) {
6339 if ( bSig == 0 ) {
6340 invalid:
ff32e16e 6341 float_raise(float_flag_invalid, status);
af39bc8c 6342 return floatx80_default_nan(status);
158142c2
FB
6343 }
6344 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6345 }
6346 if ( aExp == 0 ) {
499a2f7b 6347 if ( aSig0 == 0 ) return a;
158142c2
FB
6348 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6349 }
158142c2
FB
6350 zSign = aSign;
6351 expDiff = aExp - bExp;
6352 aSig1 = 0;
6353 if ( expDiff < 0 ) {
b662495d
JM
6354 if ( mod || expDiff < -1 ) {
6355 if (aExp == 1 && aExpOrig == 0) {
6356 /*
6357 * Pseudo-denormal argument must be returned in
6358 * normalized form.
6359 */
6360 return packFloatx80(aSign, aExp, aSig0);
6361 }
6362 return a;
6363 }
158142c2
FB
6364 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6365 expDiff = 0;
6366 }
445810ec 6367 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6368 if ( q ) aSig0 -= bSig;
6369 expDiff -= 64;
6370 while ( 0 < expDiff ) {
6371 q = estimateDiv128To64( aSig0, aSig1, bSig );
6372 q = ( 2 < q ) ? q - 2 : 0;
6373 mul64To128( bSig, q, &term0, &term1 );
6374 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6375 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6376 expDiff -= 62;
445810ec
JM
6377 *quotient <<= 62;
6378 *quotient += q;
158142c2
FB
6379 }
6380 expDiff += 64;
6381 if ( 0 < expDiff ) {
6382 q = estimateDiv128To64( aSig0, aSig1, bSig );
6383 q = ( 2 < q ) ? q - 2 : 0;
6384 q >>= 64 - expDiff;
6385 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6386 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6387 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6388 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6389 ++q;
6390 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6391 }
445810ec
JM
6392 if (expDiff < 64) {
6393 *quotient <<= expDiff;
6394 } else {
6395 *quotient = 0;
6396 }
6397 *quotient += q;
158142c2
FB
6398 }
6399 else {
6400 term1 = 0;
6401 term0 = bSig;
6402 }
6b8b0136
JM
6403 if (!mod) {
6404 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6405 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6406 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6407 && ( q & 1 ) )
6408 ) {
6409 aSig0 = alternateASig0;
6410 aSig1 = alternateASig1;
6411 zSign = ! zSign;
445810ec 6412 ++*quotient;
6b8b0136 6413 }
158142c2
FB
6414 }
6415 return
6416 normalizeRoundAndPackFloatx80(
ff32e16e 6417 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6418
6419}
6420
6b8b0136
JM
6421/*----------------------------------------------------------------------------
6422| Returns the remainder of the extended double-precision floating-point value
6423| `a' with respect to the corresponding value `b'. The operation is performed
6424| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6425*----------------------------------------------------------------------------*/
6426
6427floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6428{
445810ec
JM
6429 uint64_t quotient;
6430 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6431}
6432
6433/*----------------------------------------------------------------------------
6434| Returns the remainder of the extended double-precision floating-point value
6435| `a' with respect to the corresponding value `b', with the quotient truncated
6436| toward zero.
6437*----------------------------------------------------------------------------*/
6438
6439floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6440{
445810ec
JM
6441 uint64_t quotient;
6442 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6443}
6444
158142c2
FB
6445/*----------------------------------------------------------------------------
6446| Returns the square root of the extended double-precision floating-point
6447| value `a'. The operation is performed according to the IEC/IEEE Standard
6448| for Binary Floating-Point Arithmetic.
6449*----------------------------------------------------------------------------*/
6450
e5a41ffa 6451floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6452{
c120391c 6453 bool aSign;
f4014512 6454 int32_t aExp, zExp;
bb98fe42
AF
6455 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6456 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6457
d1eb8f2a
AD
6458 if (floatx80_invalid_encoding(a)) {
6459 float_raise(float_flag_invalid, status);
6460 return floatx80_default_nan(status);
6461 }
158142c2
FB
6462 aSig0 = extractFloatx80Frac( a );
6463 aExp = extractFloatx80Exp( a );
6464 aSign = extractFloatx80Sign( a );
6465 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6466 if ((uint64_t)(aSig0 << 1)) {
6467 return propagateFloatx80NaN(a, a, status);
6468 }
158142c2
FB
6469 if ( ! aSign ) return a;
6470 goto invalid;
6471 }
6472 if ( aSign ) {
6473 if ( ( aExp | aSig0 ) == 0 ) return a;
6474 invalid:
ff32e16e 6475 float_raise(float_flag_invalid, status);
af39bc8c 6476 return floatx80_default_nan(status);
158142c2
FB
6477 }
6478 if ( aExp == 0 ) {
6479 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6480 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6481 }
6482 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6483 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6484 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6485 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6486 doubleZSig0 = zSig0<<1;
6487 mul64To128( zSig0, zSig0, &term0, &term1 );
6488 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6489 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6490 --zSig0;
6491 doubleZSig0 -= 2;
6492 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6493 }
6494 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6495 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6496 if ( zSig1 == 0 ) zSig1 = 1;
6497 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6498 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6499 mul64To128( zSig1, zSig1, &term2, &term3 );
6500 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6501 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6502 --zSig1;
6503 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6504 term3 |= 1;
6505 term2 |= doubleZSig0;
6506 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6507 }
6508 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6509 }
6510 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6511 zSig0 |= doubleZSig0;
a2f2d288
PM
6512 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6513 0, zExp, zSig0, zSig1, status);
158142c2
FB
6514}
6515
6516/*----------------------------------------------------------------------------
158142c2
FB
6517| Returns the result of converting the quadruple-precision floating-point
6518| value `a' to the 32-bit two's complement integer format. The conversion
6519| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6520| Arithmetic---which means in particular that the conversion is rounded
6521| according to the current rounding mode. If `a' is a NaN, the largest
6522| positive integer is returned. Otherwise, if the conversion overflows, the
6523| largest integer with the same sign as `a' is returned.
6524*----------------------------------------------------------------------------*/
6525
f4014512 6526int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6527{
c120391c 6528 bool aSign;
f4014512 6529 int32_t aExp, shiftCount;
bb98fe42 6530 uint64_t aSig0, aSig1;
158142c2
FB
6531
6532 aSig1 = extractFloat128Frac1( a );
6533 aSig0 = extractFloat128Frac0( a );
6534 aExp = extractFloat128Exp( a );
6535 aSign = extractFloat128Sign( a );
6536 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6537 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6538 aSig0 |= ( aSig1 != 0 );
6539 shiftCount = 0x4028 - aExp;
6540 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6541 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6542
6543}
6544
6545/*----------------------------------------------------------------------------
6546| Returns the result of converting the quadruple-precision floating-point
6547| value `a' to the 32-bit two's complement integer format. The conversion
6548| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6549| Arithmetic, except that the conversion is always rounded toward zero. If
6550| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6551| conversion overflows, the largest integer with the same sign as `a' is
6552| returned.
6553*----------------------------------------------------------------------------*/
6554
f4014512 6555int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6556{
c120391c 6557 bool aSign;
f4014512 6558 int32_t aExp, shiftCount;
bb98fe42 6559 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6560 int32_t z;
158142c2
FB
6561
6562 aSig1 = extractFloat128Frac1( a );
6563 aSig0 = extractFloat128Frac0( a );
6564 aExp = extractFloat128Exp( a );
6565 aSign = extractFloat128Sign( a );
6566 aSig0 |= ( aSig1 != 0 );
6567 if ( 0x401E < aExp ) {
6568 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6569 goto invalid;
6570 }
6571 else if ( aExp < 0x3FFF ) {
a2f2d288 6572 if (aExp || aSig0) {
d82f3b2d 6573 float_raise(float_flag_inexact, status);
a2f2d288 6574 }
158142c2
FB
6575 return 0;
6576 }
e9321124 6577 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6578 shiftCount = 0x402F - aExp;
6579 savedASig = aSig0;
6580 aSig0 >>= shiftCount;
6581 z = aSig0;
6582 if ( aSign ) z = - z;
6583 if ( ( z < 0 ) ^ aSign ) {
6584 invalid:
ff32e16e 6585 float_raise(float_flag_invalid, status);
2c217da0 6586 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6587 }
6588 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6589 float_raise(float_flag_inexact, status);
158142c2
FB
6590 }
6591 return z;
6592
6593}
6594
6595/*----------------------------------------------------------------------------
6596| Returns the result of converting the quadruple-precision floating-point
6597| value `a' to the 64-bit two's complement integer format. The conversion
6598| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6599| Arithmetic---which means in particular that the conversion is rounded
6600| according to the current rounding mode. If `a' is a NaN, the largest
6601| positive integer is returned. Otherwise, if the conversion overflows, the
6602| largest integer with the same sign as `a' is returned.
6603*----------------------------------------------------------------------------*/
6604
f42c2224 6605int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6606{
c120391c 6607 bool aSign;
f4014512 6608 int32_t aExp, shiftCount;
bb98fe42 6609 uint64_t aSig0, aSig1;
158142c2
FB
6610
6611 aSig1 = extractFloat128Frac1( a );
6612 aSig0 = extractFloat128Frac0( a );
6613 aExp = extractFloat128Exp( a );
6614 aSign = extractFloat128Sign( a );
e9321124 6615 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6616 shiftCount = 0x402F - aExp;
6617 if ( shiftCount <= 0 ) {
6618 if ( 0x403E < aExp ) {
ff32e16e 6619 float_raise(float_flag_invalid, status);
158142c2
FB
6620 if ( ! aSign
6621 || ( ( aExp == 0x7FFF )
e9321124 6622 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6623 )
6624 ) {
2c217da0 6625 return INT64_MAX;
158142c2 6626 }
2c217da0 6627 return INT64_MIN;
158142c2
FB
6628 }
6629 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6630 }
6631 else {
6632 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6633 }
ff32e16e 6634 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6635
6636}
6637
6638/*----------------------------------------------------------------------------
6639| Returns the result of converting the quadruple-precision floating-point
6640| value `a' to the 64-bit two's complement integer format. The conversion
6641| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6642| Arithmetic, except that the conversion is always rounded toward zero.
6643| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6644| the conversion overflows, the largest integer with the same sign as `a' is
6645| returned.
6646*----------------------------------------------------------------------------*/
6647
f42c2224 6648int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6649{
c120391c 6650 bool aSign;
f4014512 6651 int32_t aExp, shiftCount;
bb98fe42 6652 uint64_t aSig0, aSig1;
f42c2224 6653 int64_t z;
158142c2
FB
6654
6655 aSig1 = extractFloat128Frac1( a );
6656 aSig0 = extractFloat128Frac0( a );
6657 aExp = extractFloat128Exp( a );
6658 aSign = extractFloat128Sign( a );
e9321124 6659 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6660 shiftCount = aExp - 0x402F;
6661 if ( 0 < shiftCount ) {
6662 if ( 0x403E <= aExp ) {
e9321124
AB
6663 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6664 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6665 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6666 if (aSig1) {
d82f3b2d 6667 float_raise(float_flag_inexact, status);
a2f2d288 6668 }
158142c2
FB
6669 }
6670 else {
ff32e16e 6671 float_raise(float_flag_invalid, status);
158142c2 6672 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6673 return INT64_MAX;
158142c2
FB
6674 }
6675 }
2c217da0 6676 return INT64_MIN;
158142c2
FB
6677 }
6678 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6679 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6680 float_raise(float_flag_inexact, status);
158142c2
FB
6681 }
6682 }
6683 else {
6684 if ( aExp < 0x3FFF ) {
6685 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6686 float_raise(float_flag_inexact, status);
158142c2
FB
6687 }
6688 return 0;
6689 }
6690 z = aSig0>>( - shiftCount );
6691 if ( aSig1
bb98fe42 6692 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6693 float_raise(float_flag_inexact, status);
158142c2
FB
6694 }
6695 }
6696 if ( aSign ) z = - z;
6697 return z;
6698
6699}
6700
2e6d8568
BR
6701/*----------------------------------------------------------------------------
6702| Returns the result of converting the quadruple-precision floating-point value
6703| `a' to the 64-bit unsigned integer format. The conversion is
6704| performed according to the IEC/IEEE Standard for Binary Floating-Point
6705| Arithmetic---which means in particular that the conversion is rounded
6706| according to the current rounding mode. If `a' is a NaN, the largest
6707| positive integer is returned. If the conversion overflows, the
6708| largest unsigned integer is returned. If 'a' is negative, the value is
6709| rounded and zero is returned; negative values that do not round to zero
6710| will raise the inexact exception.
6711*----------------------------------------------------------------------------*/
6712
6713uint64_t float128_to_uint64(float128 a, float_status *status)
6714{
c120391c 6715 bool aSign;
2e6d8568
BR
6716 int aExp;
6717 int shiftCount;
6718 uint64_t aSig0, aSig1;
6719
6720 aSig0 = extractFloat128Frac0(a);
6721 aSig1 = extractFloat128Frac1(a);
6722 aExp = extractFloat128Exp(a);
6723 aSign = extractFloat128Sign(a);
6724 if (aSign && (aExp > 0x3FFE)) {
6725 float_raise(float_flag_invalid, status);
6726 if (float128_is_any_nan(a)) {
2c217da0 6727 return UINT64_MAX;
2e6d8568
BR
6728 } else {
6729 return 0;
6730 }
6731 }
6732 if (aExp) {
2c217da0 6733 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6734 }
6735 shiftCount = 0x402F - aExp;
6736 if (shiftCount <= 0) {
6737 if (0x403E < aExp) {
6738 float_raise(float_flag_invalid, status);
2c217da0 6739 return UINT64_MAX;
2e6d8568
BR
6740 }
6741 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6742 } else {
6743 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6744 }
6745 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6746}
6747
6748uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6749{
6750 uint64_t v;
6751 signed char current_rounding_mode = status->float_rounding_mode;
6752
6753 set_float_rounding_mode(float_round_to_zero, status);
6754 v = float128_to_uint64(a, status);
6755 set_float_rounding_mode(current_rounding_mode, status);
6756
6757 return v;
6758}
6759
158142c2
FB
6760/*----------------------------------------------------------------------------
6761| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6762| value `a' to the 32-bit unsigned integer format. The conversion
6763| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6764| Arithmetic except that the conversion is always rounded toward zero.
6765| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6766| if the conversion overflows, the largest unsigned integer is returned.
6767| If 'a' is negative, the value is rounded and zero is returned; negative
6768| values that do not round to zero will raise the inexact exception.
6769*----------------------------------------------------------------------------*/
6770
6771uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6772{
6773 uint64_t v;
6774 uint32_t res;
6775 int old_exc_flags = get_float_exception_flags(status);
6776
6777 v = float128_to_uint64_round_to_zero(a, status);
6778 if (v > 0xffffffff) {
6779 res = 0xffffffff;
6780 } else {
6781 return v;
6782 }
6783 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6784 float_raise(float_flag_invalid, status);
6785 return res;
6786}
6787
6788/*----------------------------------------------------------------------------
6789| Returns the result of converting the quadruple-precision floating-point value
6790| `a' to the 32-bit unsigned integer format. The conversion is
6791| performed according to the IEC/IEEE Standard for Binary Floating-Point
6792| Arithmetic---which means in particular that the conversion is rounded
6793| according to the current rounding mode. If `a' is a NaN, the largest
6794| positive integer is returned. If the conversion overflows, the
6795| largest unsigned integer is returned. If 'a' is negative, the value is
6796| rounded and zero is returned; negative values that do not round to zero
6797| will raise the inexact exception.
6798*----------------------------------------------------------------------------*/
6799
6800uint32_t float128_to_uint32(float128 a, float_status *status)
6801{
6802 uint64_t v;
6803 uint32_t res;
6804 int old_exc_flags = get_float_exception_flags(status);
6805
6806 v = float128_to_uint64(a, status);
6807 if (v > 0xffffffff) {
6808 res = 0xffffffff;
6809 } else {
6810 return v;
6811 }
6812 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6813 float_raise(float_flag_invalid, status);
6814 return res;
6815}
6816
6817/*----------------------------------------------------------------------------
6818| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6819| value `a' to the single-precision floating-point format. The conversion
6820| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6821| Arithmetic.
6822*----------------------------------------------------------------------------*/
6823
e5a41ffa 6824float32 float128_to_float32(float128 a, float_status *status)
158142c2 6825{
c120391c 6826 bool aSign;
f4014512 6827 int32_t aExp;
bb98fe42
AF
6828 uint64_t aSig0, aSig1;
6829 uint32_t zSig;
158142c2
FB
6830
6831 aSig1 = extractFloat128Frac1( a );
6832 aSig0 = extractFloat128Frac0( a );
6833 aExp = extractFloat128Exp( a );
6834 aSign = extractFloat128Sign( a );
6835 if ( aExp == 0x7FFF ) {
6836 if ( aSig0 | aSig1 ) {
ff32e16e 6837 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6838 }
6839 return packFloat32( aSign, 0xFF, 0 );
6840 }
6841 aSig0 |= ( aSig1 != 0 );
6842 shift64RightJamming( aSig0, 18, &aSig0 );
6843 zSig = aSig0;
6844 if ( aExp || zSig ) {
6845 zSig |= 0x40000000;
6846 aExp -= 0x3F81;
6847 }
ff32e16e 6848 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6849
6850}
6851
6852/*----------------------------------------------------------------------------
6853| Returns the result of converting the quadruple-precision floating-point
6854| value `a' to the double-precision floating-point format. The conversion
6855| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6856| Arithmetic.
6857*----------------------------------------------------------------------------*/
6858
e5a41ffa 6859float64 float128_to_float64(float128 a, float_status *status)
158142c2 6860{
c120391c 6861 bool aSign;
f4014512 6862 int32_t aExp;
bb98fe42 6863 uint64_t aSig0, aSig1;
158142c2
FB
6864
6865 aSig1 = extractFloat128Frac1( a );
6866 aSig0 = extractFloat128Frac0( a );
6867 aExp = extractFloat128Exp( a );
6868 aSign = extractFloat128Sign( a );
6869 if ( aExp == 0x7FFF ) {
6870 if ( aSig0 | aSig1 ) {
ff32e16e 6871 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6872 }
6873 return packFloat64( aSign, 0x7FF, 0 );
6874 }
6875 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6876 aSig0 |= ( aSig1 != 0 );
6877 if ( aExp || aSig0 ) {
e9321124 6878 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6879 aExp -= 0x3C01;
6880 }
ff32e16e 6881 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6882
6883}
6884
158142c2
FB
6885/*----------------------------------------------------------------------------
6886| Returns the result of converting the quadruple-precision floating-point
6887| value `a' to the extended double-precision floating-point format. The
6888| conversion is performed according to the IEC/IEEE Standard for Binary
6889| Floating-Point Arithmetic.
6890*----------------------------------------------------------------------------*/
6891
e5a41ffa 6892floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6893{
c120391c 6894 bool aSign;
f4014512 6895 int32_t aExp;
bb98fe42 6896 uint64_t aSig0, aSig1;
158142c2
FB
6897
6898 aSig1 = extractFloat128Frac1( a );
6899 aSig0 = extractFloat128Frac0( a );
6900 aExp = extractFloat128Exp( a );
6901 aSign = extractFloat128Sign( a );
6902 if ( aExp == 0x7FFF ) {
6903 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6904 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6905 status);
6906 return floatx80_silence_nan(res, status);
158142c2 6907 }
0f605c88
LV
6908 return packFloatx80(aSign, floatx80_infinity_high,
6909 floatx80_infinity_low);
158142c2
FB
6910 }
6911 if ( aExp == 0 ) {
6912 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6913 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6914 }
6915 else {
e9321124 6916 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6917 }
6918 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6919 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6920
6921}
6922
158142c2
FB
6923/*----------------------------------------------------------------------------
6924| Rounds the quadruple-precision floating-point value `a' to an integer, and
6925| returns the result as a quadruple-precision floating-point value. The
6926| operation is performed according to the IEC/IEEE Standard for Binary
6927| Floating-Point Arithmetic.
6928*----------------------------------------------------------------------------*/
6929
e5a41ffa 6930float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6931{
c120391c 6932 bool aSign;
f4014512 6933 int32_t aExp;
bb98fe42 6934 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6935 float128 z;
6936
6937 aExp = extractFloat128Exp( a );
6938 if ( 0x402F <= aExp ) {
6939 if ( 0x406F <= aExp ) {
6940 if ( ( aExp == 0x7FFF )
6941 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6942 ) {
ff32e16e 6943 return propagateFloat128NaN(a, a, status);
158142c2
FB
6944 }
6945 return a;
6946 }
6947 lastBitMask = 1;
6948 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6949 roundBitsMask = lastBitMask - 1;
6950 z = a;
a2f2d288 6951 switch (status->float_rounding_mode) {
dc355b76 6952 case float_round_nearest_even:
158142c2
FB
6953 if ( lastBitMask ) {
6954 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6955 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6956 }
6957 else {
bb98fe42 6958 if ( (int64_t) z.low < 0 ) {
158142c2 6959 ++z.high;
bb98fe42 6960 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6961 }
6962 }
dc355b76 6963 break;
f9288a76
PM
6964 case float_round_ties_away:
6965 if (lastBitMask) {
6966 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6967 } else {
6968 if ((int64_t) z.low < 0) {
6969 ++z.high;
6970 }
6971 }
6972 break;
dc355b76
PM
6973 case float_round_to_zero:
6974 break;
6975 case float_round_up:
6976 if (!extractFloat128Sign(z)) {
6977 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6978 }
6979 break;
6980 case float_round_down:
6981 if (extractFloat128Sign(z)) {
6982 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6983 }
dc355b76 6984 break;
5d64abb3
RH
6985 case float_round_to_odd:
6986 /*
6987 * Note that if lastBitMask == 0, the last bit is the lsb
6988 * of high, and roundBitsMask == -1.
6989 */
6990 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6991 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6992 }
6993 break;
dc355b76
PM
6994 default:
6995 abort();
158142c2
FB
6996 }
6997 z.low &= ~ roundBitsMask;
6998 }
6999 else {
7000 if ( aExp < 0x3FFF ) {
bb98fe42 7001 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 7002 float_raise(float_flag_inexact, status);
158142c2 7003 aSign = extractFloat128Sign( a );
a2f2d288 7004 switch (status->float_rounding_mode) {
5d64abb3 7005 case float_round_nearest_even:
158142c2
FB
7006 if ( ( aExp == 0x3FFE )
7007 && ( extractFloat128Frac0( a )
7008 | extractFloat128Frac1( a ) )
7009 ) {
7010 return packFloat128( aSign, 0x3FFF, 0, 0 );
7011 }
7012 break;
f9288a76
PM
7013 case float_round_ties_away:
7014 if (aExp == 0x3FFE) {
7015 return packFloat128(aSign, 0x3FFF, 0, 0);
7016 }
7017 break;
5d64abb3 7018 case float_round_down:
158142c2
FB
7019 return
7020 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7021 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7022 case float_round_up:
158142c2
FB
7023 return
7024 aSign ? packFloat128( 1, 0, 0, 0 )
7025 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7026
7027 case float_round_to_odd:
7028 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7029
7030 case float_round_to_zero:
7031 break;
158142c2
FB
7032 }
7033 return packFloat128( aSign, 0, 0, 0 );
7034 }
7035 lastBitMask = 1;
7036 lastBitMask <<= 0x402F - aExp;
7037 roundBitsMask = lastBitMask - 1;
7038 z.low = 0;
7039 z.high = a.high;
a2f2d288 7040 switch (status->float_rounding_mode) {
dc355b76 7041 case float_round_nearest_even:
158142c2
FB
7042 z.high += lastBitMask>>1;
7043 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7044 z.high &= ~ lastBitMask;
7045 }
dc355b76 7046 break;
f9288a76
PM
7047 case float_round_ties_away:
7048 z.high += lastBitMask>>1;
7049 break;
dc355b76
PM
7050 case float_round_to_zero:
7051 break;
7052 case float_round_up:
7053 if (!extractFloat128Sign(z)) {
158142c2
FB
7054 z.high |= ( a.low != 0 );
7055 z.high += roundBitsMask;
7056 }
dc355b76
PM
7057 break;
7058 case float_round_down:
7059 if (extractFloat128Sign(z)) {
7060 z.high |= (a.low != 0);
7061 z.high += roundBitsMask;
7062 }
7063 break;
5d64abb3
RH
7064 case float_round_to_odd:
7065 if ((z.high & lastBitMask) == 0) {
7066 z.high |= (a.low != 0);
7067 z.high += roundBitsMask;
7068 }
7069 break;
dc355b76
PM
7070 default:
7071 abort();
158142c2
FB
7072 }
7073 z.high &= ~ roundBitsMask;
7074 }
7075 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 7076 float_raise(float_flag_inexact, status);
158142c2
FB
7077 }
7078 return z;
7079
7080}
7081
158142c2
FB
7082/*----------------------------------------------------------------------------
7083| Returns the result of dividing the quadruple-precision floating-point value
7084| `a' by the corresponding value `b'. The operation is performed according to
7085| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7086*----------------------------------------------------------------------------*/
7087
e5a41ffa 7088float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7089{
c120391c 7090 bool aSign, bSign, zSign;
f4014512 7091 int32_t aExp, bExp, zExp;
bb98fe42
AF
7092 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7093 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7094
7095 aSig1 = extractFloat128Frac1( a );
7096 aSig0 = extractFloat128Frac0( a );
7097 aExp = extractFloat128Exp( a );
7098 aSign = extractFloat128Sign( a );
7099 bSig1 = extractFloat128Frac1( b );
7100 bSig0 = extractFloat128Frac0( b );
7101 bExp = extractFloat128Exp( b );
7102 bSign = extractFloat128Sign( b );
7103 zSign = aSign ^ bSign;
7104 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7105 if (aSig0 | aSig1) {
7106 return propagateFloat128NaN(a, b, status);
7107 }
158142c2 7108 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7109 if (bSig0 | bSig1) {
7110 return propagateFloat128NaN(a, b, status);
7111 }
158142c2
FB
7112 goto invalid;
7113 }
7114 return packFloat128( zSign, 0x7FFF, 0, 0 );
7115 }
7116 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7117 if (bSig0 | bSig1) {
7118 return propagateFloat128NaN(a, b, status);
7119 }
158142c2
FB
7120 return packFloat128( zSign, 0, 0, 0 );
7121 }
7122 if ( bExp == 0 ) {
7123 if ( ( bSig0 | bSig1 ) == 0 ) {
7124 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7125 invalid:
ff32e16e 7126 float_raise(float_flag_invalid, status);
af39bc8c 7127 return float128_default_nan(status);
158142c2 7128 }
ff32e16e 7129 float_raise(float_flag_divbyzero, status);
158142c2
FB
7130 return packFloat128( zSign, 0x7FFF, 0, 0 );
7131 }
7132 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7133 }
7134 if ( aExp == 0 ) {
7135 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7136 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7137 }
7138 zExp = aExp - bExp + 0x3FFD;
7139 shortShift128Left(
e9321124 7140 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7141 shortShift128Left(
e9321124 7142 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7143 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7144 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7145 ++zExp;
7146 }
7147 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7148 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7149 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7150 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7151 --zSig0;
7152 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7153 }
7154 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7155 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7156 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7157 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7158 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7159 --zSig1;
7160 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7161 }
7162 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7163 }
7164 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7165 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7166
7167}
7168
7169/*----------------------------------------------------------------------------
7170| Returns the remainder of the quadruple-precision floating-point value `a'
7171| with respect to the corresponding value `b'. The operation is performed
7172| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7173*----------------------------------------------------------------------------*/
7174
e5a41ffa 7175float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7176{
c120391c 7177 bool aSign, zSign;
f4014512 7178 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7179 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7180 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7181 int64_t sigMean0;
158142c2
FB
7182
7183 aSig1 = extractFloat128Frac1( a );
7184 aSig0 = extractFloat128Frac0( a );
7185 aExp = extractFloat128Exp( a );
7186 aSign = extractFloat128Sign( a );
7187 bSig1 = extractFloat128Frac1( b );
7188 bSig0 = extractFloat128Frac0( b );
7189 bExp = extractFloat128Exp( b );
158142c2
FB
7190 if ( aExp == 0x7FFF ) {
7191 if ( ( aSig0 | aSig1 )
7192 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7193 return propagateFloat128NaN(a, b, status);
158142c2
FB
7194 }
7195 goto invalid;
7196 }
7197 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7198 if (bSig0 | bSig1) {
7199 return propagateFloat128NaN(a, b, status);
7200 }
158142c2
FB
7201 return a;
7202 }
7203 if ( bExp == 0 ) {
7204 if ( ( bSig0 | bSig1 ) == 0 ) {
7205 invalid:
ff32e16e 7206 float_raise(float_flag_invalid, status);
af39bc8c 7207 return float128_default_nan(status);
158142c2
FB
7208 }
7209 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7210 }
7211 if ( aExp == 0 ) {
7212 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7213 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7214 }
7215 expDiff = aExp - bExp;
7216 if ( expDiff < -1 ) return a;
7217 shortShift128Left(
e9321124 7218 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7219 aSig1,
7220 15 - ( expDiff < 0 ),
7221 &aSig0,
7222 &aSig1
7223 );
7224 shortShift128Left(
e9321124 7225 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7226 q = le128( bSig0, bSig1, aSig0, aSig1 );
7227 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7228 expDiff -= 64;
7229 while ( 0 < expDiff ) {
7230 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7231 q = ( 4 < q ) ? q - 4 : 0;
7232 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7233 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7234 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7235 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7236 expDiff -= 61;
7237 }
7238 if ( -64 < expDiff ) {
7239 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7240 q = ( 4 < q ) ? q - 4 : 0;
7241 q >>= - expDiff;
7242 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7243 expDiff += 52;
7244 if ( expDiff < 0 ) {
7245 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7246 }
7247 else {
7248 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7249 }
7250 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7251 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7252 }
7253 else {
7254 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7255 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7256 }
7257 do {
7258 alternateASig0 = aSig0;
7259 alternateASig1 = aSig1;
7260 ++q;
7261 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7262 } while ( 0 <= (int64_t) aSig0 );
158142c2 7263 add128(
bb98fe42 7264 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7265 if ( ( sigMean0 < 0 )
7266 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7267 aSig0 = alternateASig0;
7268 aSig1 = alternateASig1;
7269 }
bb98fe42 7270 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7271 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7272 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7273 status);
158142c2
FB
7274}
7275
7276/*----------------------------------------------------------------------------
7277| Returns the square root of the quadruple-precision floating-point value `a'.
7278| The operation is performed according to the IEC/IEEE Standard for Binary
7279| Floating-Point Arithmetic.
7280*----------------------------------------------------------------------------*/
7281
e5a41ffa 7282float128 float128_sqrt(float128 a, float_status *status)
158142c2 7283{
c120391c 7284 bool aSign;
f4014512 7285 int32_t aExp, zExp;
bb98fe42
AF
7286 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7287 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7288
7289 aSig1 = extractFloat128Frac1( a );
7290 aSig0 = extractFloat128Frac0( a );
7291 aExp = extractFloat128Exp( a );
7292 aSign = extractFloat128Sign( a );
7293 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7294 if (aSig0 | aSig1) {
7295 return propagateFloat128NaN(a, a, status);
7296 }
158142c2
FB
7297 if ( ! aSign ) return a;
7298 goto invalid;
7299 }
7300 if ( aSign ) {
7301 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7302 invalid:
ff32e16e 7303 float_raise(float_flag_invalid, status);
af39bc8c 7304 return float128_default_nan(status);
158142c2
FB
7305 }
7306 if ( aExp == 0 ) {
7307 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7308 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7309 }
7310 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7311 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7312 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7313 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7314 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7315 doubleZSig0 = zSig0<<1;
7316 mul64To128( zSig0, zSig0, &term0, &term1 );
7317 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7318 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7319 --zSig0;
7320 doubleZSig0 -= 2;
7321 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7322 }
7323 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7324 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7325 if ( zSig1 == 0 ) zSig1 = 1;
7326 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7327 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7328 mul64To128( zSig1, zSig1, &term2, &term3 );
7329 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7330 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7331 --zSig1;
7332 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7333 term3 |= 1;
7334 term2 |= doubleZSig0;
7335 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7336 }
7337 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7338 }
7339 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7340 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7341
7342}
7343
71bfd65c
RH
7344static inline FloatRelation
7345floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7346 float_status *status)
f6714d36 7347{
c120391c 7348 bool aSign, bSign;
f6714d36 7349
d1eb8f2a
AD
7350 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7351 float_raise(float_flag_invalid, status);
7352 return float_relation_unordered;
7353 }
f6714d36
AJ
7354 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7355 ( extractFloatx80Frac( a )<<1 ) ) ||
7356 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7357 ( extractFloatx80Frac( b )<<1 ) )) {
7358 if (!is_quiet ||
af39bc8c
AM
7359 floatx80_is_signaling_nan(a, status) ||
7360 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7361 float_raise(float_flag_invalid, status);
f6714d36
AJ
7362 }
7363 return float_relation_unordered;
7364 }
7365 aSign = extractFloatx80Sign( a );
7366 bSign = extractFloatx80Sign( b );
7367 if ( aSign != bSign ) {
7368
7369 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7370 ( ( a.low | b.low ) == 0 ) ) {
7371 /* zero case */
7372 return float_relation_equal;
7373 } else {
7374 return 1 - (2 * aSign);
7375 }
7376 } else {
be53fa78
JM
7377 /* Normalize pseudo-denormals before comparison. */
7378 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7379 ++a.high;
7380 }
7381 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7382 ++b.high;
7383 }
f6714d36
AJ
7384 if (a.low == b.low && a.high == b.high) {
7385 return float_relation_equal;
7386 } else {
7387 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7388 }
7389 }
7390}
7391
71bfd65c 7392FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7393{
ff32e16e 7394 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7395}
7396
71bfd65c
RH
7397FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7398 float_status *status)
f6714d36 7399{
ff32e16e 7400 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7401}
7402
71bfd65c
RH
7403static inline FloatRelation
7404float128_compare_internal(float128 a, float128 b, bool is_quiet,
7405 float_status *status)
1f587329 7406{
c120391c 7407 bool aSign, bSign;
1f587329
BS
7408
7409 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7410 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7411 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7412 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7413 if (!is_quiet ||
af39bc8c
AM
7414 float128_is_signaling_nan(a, status) ||
7415 float128_is_signaling_nan(b, status)) {
ff32e16e 7416 float_raise(float_flag_invalid, status);
1f587329
BS
7417 }
7418 return float_relation_unordered;
7419 }
7420 aSign = extractFloat128Sign( a );
7421 bSign = extractFloat128Sign( b );
7422 if ( aSign != bSign ) {
7423 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7424 /* zero case */
7425 return float_relation_equal;
7426 } else {
7427 return 1 - (2 * aSign);
7428 }
7429 } else {
7430 if (a.low == b.low && a.high == b.high) {
7431 return float_relation_equal;
7432 } else {
7433 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7434 }
7435 }
7436}
7437
71bfd65c 7438FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7439{
ff32e16e 7440 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7441}
7442
71bfd65c
RH
7443FloatRelation float128_compare_quiet(float128 a, float128 b,
7444 float_status *status)
1f587329 7445{
ff32e16e 7446 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7447}
7448
e5a41ffa 7449floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7450{
c120391c 7451 bool aSign;
326b9e98 7452 int32_t aExp;
bb98fe42 7453 uint64_t aSig;
9ee6e8bb 7454
d1eb8f2a
AD
7455 if (floatx80_invalid_encoding(a)) {
7456 float_raise(float_flag_invalid, status);
7457 return floatx80_default_nan(status);
7458 }
9ee6e8bb
PB
7459 aSig = extractFloatx80Frac( a );
7460 aExp = extractFloatx80Exp( a );
7461 aSign = extractFloatx80Sign( a );
7462
326b9e98
AJ
7463 if ( aExp == 0x7FFF ) {
7464 if ( aSig<<1 ) {
ff32e16e 7465 return propagateFloatx80NaN(a, a, status);
326b9e98 7466 }
9ee6e8bb
PB
7467 return a;
7468 }
326b9e98 7469
3c85c37f
PM
7470 if (aExp == 0) {
7471 if (aSig == 0) {
7472 return a;
7473 }
7474 aExp++;
7475 }
69397542 7476
326b9e98
AJ
7477 if (n > 0x10000) {
7478 n = 0x10000;
7479 } else if (n < -0x10000) {
7480 n = -0x10000;
7481 }
7482
9ee6e8bb 7483 aExp += n;
a2f2d288
PM
7484 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7485 aSign, aExp, aSig, 0, status);
9ee6e8bb 7486}
9ee6e8bb 7487
e5a41ffa 7488float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7489{
c120391c 7490 bool aSign;
326b9e98 7491 int32_t aExp;
bb98fe42 7492 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7493
7494 aSig1 = extractFloat128Frac1( a );
7495 aSig0 = extractFloat128Frac0( a );
7496 aExp = extractFloat128Exp( a );
7497 aSign = extractFloat128Sign( a );
7498 if ( aExp == 0x7FFF ) {
326b9e98 7499 if ( aSig0 | aSig1 ) {
ff32e16e 7500 return propagateFloat128NaN(a, a, status);
326b9e98 7501 }
9ee6e8bb
PB
7502 return a;
7503 }
3c85c37f 7504 if (aExp != 0) {
e9321124 7505 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7506 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7507 return a;
3c85c37f
PM
7508 } else {
7509 aExp++;
7510 }
69397542 7511
326b9e98
AJ
7512 if (n > 0x10000) {
7513 n = 0x10000;
7514 } else if (n < -0x10000) {
7515 n = -0x10000;
7516 }
7517
69397542
PB
7518 aExp += n - 1;
7519 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7520 , status);
9ee6e8bb
PB
7521
7522}
f6b3b108
EC
7523
7524static void __attribute__((constructor)) softfloat_init(void)
7525{
7526 union_float64 ua, ub, uc, ur;
7527
7528 if (QEMU_NO_HARDFLOAT) {
7529 return;
7530 }
7531 /*
7532 * Test that the host's FMA is not obviously broken. For example,
7533 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7534 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7535 */
7536 ua.s = 0x0020000000000001ULL;
7537 ub.s = 0x3ca0000000000000ULL;
7538 uc.s = 0x0020000000000000ULL;
7539 ur.h = fma(ua.h, ub.h, uc.h);
7540 if (ur.s != 0x0020000000000001ULL) {
7541 force_soft_fma = true;
7542 }
7543}