]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
tests/fp: add quad support to the benchmark utility
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 s->float_exception_flags |= float_flag_overflow;
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 s->float_exception_flags |= float_flag_overflow;
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
247d1f21
RH
472/* Simple helpers for checking if, or what kind of, NaN we have */
473static inline __attribute__((unused)) bool is_nan(FloatClass c)
474{
475 return unlikely(c >= float_class_qnan);
476}
477
478static inline __attribute__((unused)) bool is_snan(FloatClass c)
479{
480 return c == float_class_snan;
481}
482
483static inline __attribute__((unused)) bool is_qnan(FloatClass c)
484{
485 return c == float_class_qnan;
486}
487
a90119b5
AB
488/*
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
493 *
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
497 */
498
499typedef struct {
500 uint64_t frac;
501 int32_t exp;
502 FloatClass cls;
503 bool sign;
504} FloatParts;
505
506#define DECOMPOSED_BINARY_POINT (64 - 2)
507#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
508#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
509
510/* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * frac_lsb: least significant bit of fraction
ca3a3d5a 518 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 519 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
520 * The following optional modifiers are available:
521 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
522 */
523typedef struct {
524 int exp_size;
525 int exp_bias;
526 int exp_max;
527 int frac_size;
528 int frac_shift;
529 uint64_t frac_lsb;
530 uint64_t frac_lsbm1;
531 uint64_t round_mask;
532 uint64_t roundeven_mask;
ca3a3d5a 533 bool arm_althp;
a90119b5
AB
534} FloatFmt;
535
536/* Expand fields based on the size of exponent and fraction */
537#define FLOAT_PARAMS(E, F) \
538 .exp_size = E, \
539 .exp_bias = ((1 << E) - 1) >> 1, \
540 .exp_max = (1 << E) - 1, \
541 .frac_size = F, \
542 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
543 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
544 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
545 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
546 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
547
548static const FloatFmt float16_params = {
549 FLOAT_PARAMS(5, 10)
550};
551
6fed16b2
AB
552static const FloatFmt float16_params_ahp = {
553 FLOAT_PARAMS(5, 10),
554 .arm_althp = true
555};
556
8282310d
LZ
557static const FloatFmt bfloat16_params = {
558 FLOAT_PARAMS(8, 7)
559};
560
a90119b5
AB
561static const FloatFmt float32_params = {
562 FLOAT_PARAMS(8, 23)
563};
564
565static const FloatFmt float64_params = {
566 FLOAT_PARAMS(11, 52)
567};
568
6fff2167
AB
569/* Unpack a float to parts, but do not canonicalize. */
570static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
571{
572 const int sign_pos = fmt.frac_size + fmt.exp_size;
573
574 return (FloatParts) {
575 .cls = float_class_unclassified,
576 .sign = extract64(raw, sign_pos, 1),
577 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
578 .frac = extract64(raw, 0, fmt.frac_size),
579 };
580}
581
582static inline FloatParts float16_unpack_raw(float16 f)
583{
584 return unpack_raw(float16_params, f);
585}
586
8282310d
LZ
587static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
588{
589 return unpack_raw(bfloat16_params, f);
590}
591
6fff2167
AB
592static inline FloatParts float32_unpack_raw(float32 f)
593{
594 return unpack_raw(float32_params, f);
595}
596
597static inline FloatParts float64_unpack_raw(float64 f)
598{
599 return unpack_raw(float64_params, f);
600}
601
602/* Pack a float from parts, but do not canonicalize. */
603static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
604{
605 const int sign_pos = fmt.frac_size + fmt.exp_size;
606 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
607 return deposit64(ret, sign_pos, 1, p.sign);
608}
609
610static inline float16 float16_pack_raw(FloatParts p)
611{
612 return make_float16(pack_raw(float16_params, p));
613}
614
8282310d
LZ
615static inline bfloat16 bfloat16_pack_raw(FloatParts p)
616{
617 return pack_raw(bfloat16_params, p);
618}
619
6fff2167
AB
620static inline float32 float32_pack_raw(FloatParts p)
621{
622 return make_float32(pack_raw(float32_params, p));
623}
624
625static inline float64 float64_pack_raw(FloatParts p)
626{
627 return make_float64(pack_raw(float64_params, p));
628}
629
0664335a
RH
630/*----------------------------------------------------------------------------
631| Functions and definitions to determine: (1) whether tininess for underflow
632| is detected before or after rounding by default, (2) what (if anything)
633| happens when exceptions are raised, (3) how signaling NaNs are distinguished
634| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
635| are propagated from function inputs to output. These details are target-
636| specific.
637*----------------------------------------------------------------------------*/
139c1837 638#include "softfloat-specialize.c.inc"
0664335a 639
6fff2167 640/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
641static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
642 float_status *status)
6fff2167 643{
ca3a3d5a 644 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
645 if (part.frac == 0) {
646 part.cls = float_class_inf;
647 } else {
94933df0 648 part.frac <<= parm->frac_shift;
298b468e
RH
649 part.cls = (parts_is_snan_frac(part.frac, status)
650 ? float_class_snan : float_class_qnan);
6fff2167
AB
651 }
652 } else if (part.exp == 0) {
653 if (likely(part.frac == 0)) {
654 part.cls = float_class_zero;
655 } else if (status->flush_inputs_to_zero) {
656 float_raise(float_flag_input_denormal, status);
657 part.cls = float_class_zero;
658 part.frac = 0;
659 } else {
660 int shift = clz64(part.frac) - 1;
661 part.cls = float_class_normal;
662 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
663 part.frac <<= shift;
664 }
665 } else {
666 part.cls = float_class_normal;
667 part.exp -= parm->exp_bias;
668 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
669 }
670 return part;
671}
672
673/* Round and uncanonicalize a floating-point number by parts. There
674 * are FRAC_SHIFT bits that may require rounding at the bottom of the
675 * fraction; these bits will be removed. The exponent will be biased
676 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
677 */
678
679static FloatParts round_canonical(FloatParts p, float_status *s,
680 const FloatFmt *parm)
681{
5d64abb3 682 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
683 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
684 const uint64_t round_mask = parm->round_mask;
685 const uint64_t roundeven_mask = parm->roundeven_mask;
686 const int exp_max = parm->exp_max;
687 const int frac_shift = parm->frac_shift;
688 uint64_t frac, inc;
689 int exp, flags = 0;
690 bool overflow_norm;
691
692 frac = p.frac;
693 exp = p.exp;
694
695 switch (p.cls) {
696 case float_class_normal:
697 switch (s->float_rounding_mode) {
698 case float_round_nearest_even:
699 overflow_norm = false;
700 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
701 break;
702 case float_round_ties_away:
703 overflow_norm = false;
704 inc = frac_lsbm1;
705 break;
706 case float_round_to_zero:
707 overflow_norm = true;
708 inc = 0;
709 break;
710 case float_round_up:
711 inc = p.sign ? 0 : round_mask;
712 overflow_norm = p.sign;
713 break;
714 case float_round_down:
715 inc = p.sign ? round_mask : 0;
716 overflow_norm = !p.sign;
717 break;
5d64abb3
RH
718 case float_round_to_odd:
719 overflow_norm = true;
720 inc = frac & frac_lsb ? 0 : round_mask;
721 break;
6fff2167
AB
722 default:
723 g_assert_not_reached();
724 }
725
726 exp += parm->exp_bias;
727 if (likely(exp > 0)) {
728 if (frac & round_mask) {
729 flags |= float_flag_inexact;
730 frac += inc;
731 if (frac & DECOMPOSED_OVERFLOW_BIT) {
732 frac >>= 1;
733 exp++;
734 }
735 }
736 frac >>= frac_shift;
737
ca3a3d5a
AB
738 if (parm->arm_althp) {
739 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
740 if (unlikely(exp > exp_max)) {
741 /* Overflow. Return the maximum normal. */
742 flags = float_flag_invalid;
743 exp = exp_max;
744 frac = -1;
745 }
746 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
747 flags |= float_flag_overflow | float_flag_inexact;
748 if (overflow_norm) {
749 exp = exp_max - 1;
750 frac = -1;
751 } else {
752 p.cls = float_class_inf;
753 goto do_inf;
754 }
755 }
756 } else if (s->flush_to_zero) {
757 flags |= float_flag_output_denormal;
758 p.cls = float_class_zero;
759 goto do_zero;
760 } else {
a828b373 761 bool is_tiny = s->tininess_before_rounding
6fff2167
AB
762 || (exp < 0)
763 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
764
765 shift64RightJamming(frac, 1 - exp, &frac);
766 if (frac & round_mask) {
767 /* Need to recompute round-to-even. */
5d64abb3
RH
768 switch (s->float_rounding_mode) {
769 case float_round_nearest_even:
6fff2167
AB
770 inc = ((frac & roundeven_mask) != frac_lsbm1
771 ? frac_lsbm1 : 0);
5d64abb3
RH
772 break;
773 case float_round_to_odd:
774 inc = frac & frac_lsb ? 0 : round_mask;
775 break;
3dede407
RH
776 default:
777 break;
6fff2167
AB
778 }
779 flags |= float_flag_inexact;
780 frac += inc;
781 }
782
783 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
784 frac >>= frac_shift;
785
786 if (is_tiny && (flags & float_flag_inexact)) {
787 flags |= float_flag_underflow;
788 }
789 if (exp == 0 && frac == 0) {
790 p.cls = float_class_zero;
791 }
792 }
793 break;
794
795 case float_class_zero:
796 do_zero:
797 exp = 0;
798 frac = 0;
799 break;
800
801 case float_class_inf:
802 do_inf:
ca3a3d5a 803 assert(!parm->arm_althp);
6fff2167
AB
804 exp = exp_max;
805 frac = 0;
806 break;
807
808 case float_class_qnan:
809 case float_class_snan:
ca3a3d5a 810 assert(!parm->arm_althp);
6fff2167 811 exp = exp_max;
94933df0 812 frac >>= parm->frac_shift;
6fff2167
AB
813 break;
814
815 default:
816 g_assert_not_reached();
817 }
818
819 float_raise(flags, s);
820 p.exp = exp;
821 p.frac = frac;
822 return p;
823}
824
6fed16b2
AB
825/* Explicit FloatFmt version */
826static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
827 const FloatFmt *params)
828{
f9943c7f 829 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
830}
831
6fff2167
AB
832static FloatParts float16_unpack_canonical(float16 f, float_status *s)
833{
6fed16b2
AB
834 return float16a_unpack_canonical(f, s, &float16_params);
835}
836
8282310d
LZ
837static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
838{
839 return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
840}
841
6fed16b2
AB
842static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
843 const FloatFmt *params)
844{
845 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
846}
847
848static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
849{
6fed16b2 850 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
851}
852
8282310d
LZ
853static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
854{
855 return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
856}
857
6fff2167
AB
858static FloatParts float32_unpack_canonical(float32 f, float_status *s)
859{
f9943c7f 860 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
861}
862
863static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
864{
0bcfbcbe 865 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
866}
867
868static FloatParts float64_unpack_canonical(float64 f, float_status *s)
869{
f9943c7f 870 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
871}
872
873static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
874{
0bcfbcbe 875 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
876}
877
dbe4d53a
AB
878static FloatParts return_nan(FloatParts a, float_status *s)
879{
880 switch (a.cls) {
881 case float_class_snan:
882 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 883 a = parts_silence_nan(a, s);
dbe4d53a
AB
884 /* fall through */
885 case float_class_qnan:
886 if (s->default_nan_mode) {
f7e598e2 887 return parts_default_nan(s);
dbe4d53a
AB
888 }
889 break;
890
891 default:
892 g_assert_not_reached();
893 }
894 return a;
895}
896
6fff2167
AB
897static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
898{
899 if (is_snan(a.cls) || is_snan(b.cls)) {
900 s->float_exception_flags |= float_flag_invalid;
901 }
902
903 if (s->default_nan_mode) {
f7e598e2 904 return parts_default_nan(s);
6fff2167 905 } else {
4f251cfd 906 if (pickNaN(a.cls, b.cls,
6fff2167 907 a.frac > b.frac ||
913602e3 908 (a.frac == b.frac && a.sign < b.sign), s)) {
6fff2167
AB
909 a = b;
910 }
0bcfbcbe
RH
911 if (is_snan(a.cls)) {
912 return parts_silence_nan(a, s);
913 }
6fff2167
AB
914 }
915 return a;
916}
917
d446830a
AB
918static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
919 bool inf_zero, float_status *s)
920{
1839189b
PM
921 int which;
922
d446830a
AB
923 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
924 s->float_exception_flags |= float_flag_invalid;
925 }
926
3bd2dec1 927 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 928
d446830a 929 if (s->default_nan_mode) {
1839189b
PM
930 /* Note that this check is after pickNaNMulAdd so that function
931 * has an opportunity to set the Invalid flag.
932 */
f7e598e2 933 which = 3;
1839189b 934 }
d446830a 935
1839189b
PM
936 switch (which) {
937 case 0:
938 break;
939 case 1:
940 a = b;
941 break;
942 case 2:
943 a = c;
944 break;
945 case 3:
f7e598e2 946 return parts_default_nan(s);
1839189b
PM
947 default:
948 g_assert_not_reached();
d446830a 949 }
1839189b 950
0bcfbcbe
RH
951 if (is_snan(a.cls)) {
952 return parts_silence_nan(a, s);
953 }
d446830a
AB
954 return a;
955}
956
6fff2167
AB
957/*
958 * Returns the result of adding or subtracting the values of the
959 * floating-point values `a' and `b'. The operation is performed
960 * according to the IEC/IEEE Standard for Binary Floating-Point
961 * Arithmetic.
962 */
963
964static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
965 float_status *s)
966{
967 bool a_sign = a.sign;
968 bool b_sign = b.sign ^ subtract;
969
970 if (a_sign != b_sign) {
971 /* Subtraction */
972
973 if (a.cls == float_class_normal && b.cls == float_class_normal) {
974 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
975 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
976 a.frac = a.frac - b.frac;
977 } else {
978 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
979 a.frac = b.frac - a.frac;
980 a.exp = b.exp;
981 a_sign ^= 1;
982 }
983
984 if (a.frac == 0) {
985 a.cls = float_class_zero;
986 a.sign = s->float_rounding_mode == float_round_down;
987 } else {
988 int shift = clz64(a.frac) - 1;
989 a.frac = a.frac << shift;
990 a.exp = a.exp - shift;
991 a.sign = a_sign;
992 }
993 return a;
994 }
995 if (is_nan(a.cls) || is_nan(b.cls)) {
996 return pick_nan(a, b, s);
997 }
998 if (a.cls == float_class_inf) {
999 if (b.cls == float_class_inf) {
1000 float_raise(float_flag_invalid, s);
f7e598e2 1001 return parts_default_nan(s);
6fff2167
AB
1002 }
1003 return a;
1004 }
1005 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1006 a.sign = s->float_rounding_mode == float_round_down;
1007 return a;
1008 }
1009 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1010 b.sign = a_sign ^ 1;
1011 return b;
1012 }
1013 if (b.cls == float_class_zero) {
1014 return a;
1015 }
1016 } else {
1017 /* Addition */
1018 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1019 if (a.exp > b.exp) {
1020 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1021 } else if (a.exp < b.exp) {
1022 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1023 a.exp = b.exp;
1024 }
1025 a.frac += b.frac;
1026 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1027 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1028 a.exp += 1;
1029 }
1030 return a;
1031 }
1032 if (is_nan(a.cls) || is_nan(b.cls)) {
1033 return pick_nan(a, b, s);
1034 }
1035 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1036 return a;
1037 }
1038 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1039 b.sign = b_sign;
1040 return b;
1041 }
1042 }
1043 g_assert_not_reached();
1044}
1045
1046/*
1047 * Returns the result of adding or subtracting the floating-point
1048 * values `a' and `b'. The operation is performed according to the
1049 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1050 */
1051
97ff87c0 1052float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1053{
1054 FloatParts pa = float16_unpack_canonical(a, status);
1055 FloatParts pb = float16_unpack_canonical(b, status);
1056 FloatParts pr = addsub_floats(pa, pb, false, status);
1057
1058 return float16_round_pack_canonical(pr, status);
1059}
1060
1b615d48
EC
1061float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1062{
1063 FloatParts pa = float16_unpack_canonical(a, status);
1064 FloatParts pb = float16_unpack_canonical(b, status);
1065 FloatParts pr = addsub_floats(pa, pb, true, status);
1066
1067 return float16_round_pack_canonical(pr, status);
1068}
1069
1070static float32 QEMU_SOFTFLOAT_ATTR
1071soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1072{
1073 FloatParts pa = float32_unpack_canonical(a, status);
1074 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1075 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1076
1077 return float32_round_pack_canonical(pr, status);
1078}
1079
1b615d48
EC
1080static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1081{
1082 return soft_f32_addsub(a, b, false, status);
1083}
1084
1085static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1086{
1087 return soft_f32_addsub(a, b, true, status);
1088}
1089
1090static float64 QEMU_SOFTFLOAT_ATTR
1091soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1092{
1093 FloatParts pa = float64_unpack_canonical(a, status);
1094 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1095 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1096
1097 return float64_round_pack_canonical(pr, status);
1098}
1099
1b615d48 1100static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1101{
1b615d48
EC
1102 return soft_f64_addsub(a, b, false, status);
1103}
6fff2167 1104
1b615d48
EC
1105static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1106{
1107 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1108}
1109
1b615d48 1110static float hard_f32_add(float a, float b)
6fff2167 1111{
1b615d48
EC
1112 return a + b;
1113}
6fff2167 1114
1b615d48
EC
1115static float hard_f32_sub(float a, float b)
1116{
1117 return a - b;
6fff2167
AB
1118}
1119
1b615d48 1120static double hard_f64_add(double a, double b)
6fff2167 1121{
1b615d48
EC
1122 return a + b;
1123}
6fff2167 1124
1b615d48
EC
1125static double hard_f64_sub(double a, double b)
1126{
1127 return a - b;
1128}
1129
b240c9c4 1130static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1131{
1132 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1133 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1134 }
1135 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1136}
1137
b240c9c4 1138static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1139{
1140 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1141 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1142 } else {
1143 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1144 }
1145}
1146
1147static float32 float32_addsub(float32 a, float32 b, float_status *s,
1148 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1149{
1150 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1151 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1152}
1153
1154static float64 float64_addsub(float64 a, float64 b, float_status *s,
1155 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1156{
1157 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1158 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1159}
1160
1161float32 QEMU_FLATTEN
1162float32_add(float32 a, float32 b, float_status *s)
1163{
1164 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1165}
1166
1167float32 QEMU_FLATTEN
1168float32_sub(float32 a, float32 b, float_status *s)
1169{
1170 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1171}
1172
1173float64 QEMU_FLATTEN
1174float64_add(float64 a, float64 b, float_status *s)
1175{
1176 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1177}
1178
1179float64 QEMU_FLATTEN
1180float64_sub(float64 a, float64 b, float_status *s)
1181{
1182 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1183}
1184
8282310d
LZ
1185/*
1186 * Returns the result of adding or subtracting the bfloat16
1187 * values `a' and `b'.
1188 */
1189bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1190{
1191 FloatParts pa = bfloat16_unpack_canonical(a, status);
1192 FloatParts pb = bfloat16_unpack_canonical(b, status);
1193 FloatParts pr = addsub_floats(pa, pb, false, status);
1194
1195 return bfloat16_round_pack_canonical(pr, status);
1196}
1197
1198bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1199{
1200 FloatParts pa = bfloat16_unpack_canonical(a, status);
1201 FloatParts pb = bfloat16_unpack_canonical(b, status);
1202 FloatParts pr = addsub_floats(pa, pb, true, status);
1203
1204 return bfloat16_round_pack_canonical(pr, status);
1205}
1206
74d707e2
AB
1207/*
1208 * Returns the result of multiplying the floating-point values `a' and
1209 * `b'. The operation is performed according to the IEC/IEEE Standard
1210 * for Binary Floating-Point Arithmetic.
1211 */
1212
1213static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1214{
1215 bool sign = a.sign ^ b.sign;
1216
1217 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1218 uint64_t hi, lo;
1219 int exp = a.exp + b.exp;
1220
1221 mul64To128(a.frac, b.frac, &hi, &lo);
1222 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1223 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1224 shift64RightJamming(lo, 1, &lo);
1225 exp += 1;
1226 }
1227
1228 /* Re-use a */
1229 a.exp = exp;
1230 a.sign = sign;
1231 a.frac = lo;
1232 return a;
1233 }
1234 /* handle all the NaN cases */
1235 if (is_nan(a.cls) || is_nan(b.cls)) {
1236 return pick_nan(a, b, s);
1237 }
1238 /* Inf * Zero == NaN */
1239 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1240 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1241 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1242 return parts_default_nan(s);
74d707e2
AB
1243 }
1244 /* Multiply by 0 or Inf */
1245 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1246 a.sign = sign;
1247 return a;
1248 }
1249 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1250 b.sign = sign;
1251 return b;
1252 }
1253 g_assert_not_reached();
1254}
1255
97ff87c0 1256float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1257{
1258 FloatParts pa = float16_unpack_canonical(a, status);
1259 FloatParts pb = float16_unpack_canonical(b, status);
1260 FloatParts pr = mul_floats(pa, pb, status);
1261
1262 return float16_round_pack_canonical(pr, status);
1263}
1264
2dfabc86
EC
1265static float32 QEMU_SOFTFLOAT_ATTR
1266soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1267{
1268 FloatParts pa = float32_unpack_canonical(a, status);
1269 FloatParts pb = float32_unpack_canonical(b, status);
1270 FloatParts pr = mul_floats(pa, pb, status);
1271
1272 return float32_round_pack_canonical(pr, status);
1273}
1274
2dfabc86
EC
1275static float64 QEMU_SOFTFLOAT_ATTR
1276soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1277{
1278 FloatParts pa = float64_unpack_canonical(a, status);
1279 FloatParts pb = float64_unpack_canonical(b, status);
1280 FloatParts pr = mul_floats(pa, pb, status);
1281
1282 return float64_round_pack_canonical(pr, status);
1283}
1284
2dfabc86
EC
1285static float hard_f32_mul(float a, float b)
1286{
1287 return a * b;
1288}
1289
1290static double hard_f64_mul(double a, double b)
1291{
1292 return a * b;
1293}
1294
2dfabc86
EC
1295float32 QEMU_FLATTEN
1296float32_mul(float32 a, float32 b, float_status *s)
1297{
1298 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1299 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1300}
1301
1302float64 QEMU_FLATTEN
1303float64_mul(float64 a, float64 b, float_status *s)
1304{
1305 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1306 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1307}
1308
8282310d
LZ
1309/*
1310 * Returns the result of multiplying the bfloat16
1311 * values `a' and `b'.
1312 */
1313
1314bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1315{
1316 FloatParts pa = bfloat16_unpack_canonical(a, status);
1317 FloatParts pb = bfloat16_unpack_canonical(b, status);
1318 FloatParts pr = mul_floats(pa, pb, status);
1319
1320 return bfloat16_round_pack_canonical(pr, status);
1321}
1322
d446830a
AB
1323/*
1324 * Returns the result of multiplying the floating-point values `a' and
1325 * `b' then adding 'c', with no intermediate rounding step after the
1326 * multiplication. The operation is performed according to the
1327 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1328 * The flags argument allows the caller to select negation of the
1329 * addend, the intermediate product, or the final result. (The
1330 * difference between this and having the caller do a separate
1331 * negation is that negating externally will flip the sign bit on
1332 * NaNs.)
1333 */
1334
1335static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1336 int flags, float_status *s)
1337{
1338 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1339 ((1 << float_class_inf) | (1 << float_class_zero));
1340 bool p_sign;
1341 bool sign_flip = flags & float_muladd_negate_result;
1342 FloatClass p_class;
1343 uint64_t hi, lo;
1344 int p_exp;
1345
1346 /* It is implementation-defined whether the cases of (0,inf,qnan)
1347 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1348 * they return if they do), so we have to hand this information
1349 * off to the target-specific pick-a-NaN routine.
1350 */
1351 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1352 return pick_nan_muladd(a, b, c, inf_zero, s);
1353 }
1354
1355 if (inf_zero) {
1356 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1357 return parts_default_nan(s);
d446830a
AB
1358 }
1359
1360 if (flags & float_muladd_negate_c) {
1361 c.sign ^= 1;
1362 }
1363
1364 p_sign = a.sign ^ b.sign;
1365
1366 if (flags & float_muladd_negate_product) {
1367 p_sign ^= 1;
1368 }
1369
1370 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1371 p_class = float_class_inf;
1372 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1373 p_class = float_class_zero;
1374 } else {
1375 p_class = float_class_normal;
1376 }
1377
1378 if (c.cls == float_class_inf) {
1379 if (p_class == float_class_inf && p_sign != c.sign) {
1380 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1381 return parts_default_nan(s);
d446830a
AB
1382 } else {
1383 a.cls = float_class_inf;
1384 a.sign = c.sign ^ sign_flip;
f7e598e2 1385 return a;
d446830a 1386 }
d446830a
AB
1387 }
1388
1389 if (p_class == float_class_inf) {
1390 a.cls = float_class_inf;
1391 a.sign = p_sign ^ sign_flip;
1392 return a;
1393 }
1394
1395 if (p_class == float_class_zero) {
1396 if (c.cls == float_class_zero) {
1397 if (p_sign != c.sign) {
1398 p_sign = s->float_rounding_mode == float_round_down;
1399 }
1400 c.sign = p_sign;
1401 } else if (flags & float_muladd_halve_result) {
1402 c.exp -= 1;
1403 }
1404 c.sign ^= sign_flip;
1405 return c;
1406 }
1407
1408 /* a & b should be normals now... */
1409 assert(a.cls == float_class_normal &&
1410 b.cls == float_class_normal);
1411
1412 p_exp = a.exp + b.exp;
1413
1414 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1415 * result.
1416 */
1417 mul64To128(a.frac, b.frac, &hi, &lo);
1418 /* binary point now at bit 124 */
1419
1420 /* check for overflow */
1421 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1422 shift128RightJamming(hi, lo, 1, &hi, &lo);
1423 p_exp += 1;
1424 }
1425
1426 /* + add/sub */
1427 if (c.cls == float_class_zero) {
1428 /* move binary point back to 62 */
1429 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1430 } else {
1431 int exp_diff = p_exp - c.exp;
1432 if (p_sign == c.sign) {
1433 /* Addition */
1434 if (exp_diff <= 0) {
1435 shift128RightJamming(hi, lo,
1436 DECOMPOSED_BINARY_POINT - exp_diff,
1437 &hi, &lo);
1438 lo += c.frac;
1439 p_exp = c.exp;
1440 } else {
1441 uint64_t c_hi, c_lo;
1442 /* shift c to the same binary point as the product (124) */
1443 c_hi = c.frac >> 2;
1444 c_lo = 0;
1445 shift128RightJamming(c_hi, c_lo,
1446 exp_diff,
1447 &c_hi, &c_lo);
1448 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1449 /* move binary point back to 62 */
1450 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1451 }
1452
1453 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1454 shift64RightJamming(lo, 1, &lo);
1455 p_exp += 1;
1456 }
1457
1458 } else {
1459 /* Subtraction */
1460 uint64_t c_hi, c_lo;
1461 /* make C binary point match product at bit 124 */
1462 c_hi = c.frac >> 2;
1463 c_lo = 0;
1464
1465 if (exp_diff <= 0) {
1466 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1467 if (exp_diff == 0
1468 &&
1469 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1470 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1471 } else {
1472 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1473 p_sign ^= 1;
1474 p_exp = c.exp;
1475 }
1476 } else {
1477 shift128RightJamming(c_hi, c_lo,
1478 exp_diff,
1479 &c_hi, &c_lo);
1480 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1481 }
1482
1483 if (hi == 0 && lo == 0) {
1484 a.cls = float_class_zero;
1485 a.sign = s->float_rounding_mode == float_round_down;
1486 a.sign ^= sign_flip;
1487 return a;
1488 } else {
1489 int shift;
1490 if (hi != 0) {
1491 shift = clz64(hi);
1492 } else {
1493 shift = clz64(lo) + 64;
1494 }
1495 /* Normalizing to a binary point of 124 is the
1496 correct adjust for the exponent. However since we're
1497 shifting, we might as well put the binary point back
1498 at 62 where we really want it. Therefore shift as
1499 if we're leaving 1 bit at the top of the word, but
1500 adjust the exponent as if we're leaving 3 bits. */
1501 shift -= 1;
1502 if (shift >= 64) {
1503 lo = lo << (shift - 64);
1504 } else {
1505 hi = (hi << shift) | (lo >> (64 - shift));
1506 lo = hi | ((lo << shift) != 0);
1507 }
1508 p_exp -= shift - 2;
1509 }
1510 }
1511 }
1512
1513 if (flags & float_muladd_halve_result) {
1514 p_exp -= 1;
1515 }
1516
1517 /* finally prepare our result */
1518 a.cls = float_class_normal;
1519 a.sign = p_sign ^ sign_flip;
1520 a.exp = p_exp;
1521 a.frac = lo;
1522
1523 return a;
1524}
1525
97ff87c0 1526float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1527 int flags, float_status *status)
1528{
1529 FloatParts pa = float16_unpack_canonical(a, status);
1530 FloatParts pb = float16_unpack_canonical(b, status);
1531 FloatParts pc = float16_unpack_canonical(c, status);
1532 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534 return float16_round_pack_canonical(pr, status);
1535}
1536
ccf770ba
EC
1537static float32 QEMU_SOFTFLOAT_ATTR
1538soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1539 float_status *status)
d446830a
AB
1540{
1541 FloatParts pa = float32_unpack_canonical(a, status);
1542 FloatParts pb = float32_unpack_canonical(b, status);
1543 FloatParts pc = float32_unpack_canonical(c, status);
1544 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1545
1546 return float32_round_pack_canonical(pr, status);
1547}
1548
ccf770ba
EC
1549static float64 QEMU_SOFTFLOAT_ATTR
1550soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1551 float_status *status)
d446830a
AB
1552{
1553 FloatParts pa = float64_unpack_canonical(a, status);
1554 FloatParts pb = float64_unpack_canonical(b, status);
1555 FloatParts pc = float64_unpack_canonical(c, status);
1556 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1557
1558 return float64_round_pack_canonical(pr, status);
1559}
1560
f6b3b108
EC
1561static bool force_soft_fma;
1562
ccf770ba
EC
1563float32 QEMU_FLATTEN
1564float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1565{
1566 union_float32 ua, ub, uc, ur;
1567
1568 ua.s = xa;
1569 ub.s = xb;
1570 uc.s = xc;
1571
1572 if (unlikely(!can_use_fpu(s))) {
1573 goto soft;
1574 }
1575 if (unlikely(flags & float_muladd_halve_result)) {
1576 goto soft;
1577 }
1578
1579 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1580 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1581 goto soft;
1582 }
f6b3b108
EC
1583
1584 if (unlikely(force_soft_fma)) {
1585 goto soft;
1586 }
1587
ccf770ba
EC
1588 /*
1589 * When (a || b) == 0, there's no need to check for under/over flow,
1590 * since we know the addend is (normal || 0) and the product is 0.
1591 */
1592 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1593 union_float32 up;
1594 bool prod_sign;
1595
1596 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1597 prod_sign ^= !!(flags & float_muladd_negate_product);
1598 up.s = float32_set_sign(float32_zero, prod_sign);
1599
1600 if (flags & float_muladd_negate_c) {
1601 uc.h = -uc.h;
1602 }
1603 ur.h = up.h + uc.h;
1604 } else {
896f51fb
KC
1605 union_float32 ua_orig = ua;
1606 union_float32 uc_orig = uc;
1607
ccf770ba
EC
1608 if (flags & float_muladd_negate_product) {
1609 ua.h = -ua.h;
1610 }
1611 if (flags & float_muladd_negate_c) {
1612 uc.h = -uc.h;
1613 }
1614
1615 ur.h = fmaf(ua.h, ub.h, uc.h);
1616
1617 if (unlikely(f32_is_inf(ur))) {
1618 s->float_exception_flags |= float_flag_overflow;
1619 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1620 ua = ua_orig;
1621 uc = uc_orig;
ccf770ba
EC
1622 goto soft;
1623 }
1624 }
1625 if (flags & float_muladd_negate_result) {
1626 return float32_chs(ur.s);
1627 }
1628 return ur.s;
1629
1630 soft:
1631 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1632}
1633
1634float64 QEMU_FLATTEN
1635float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1636{
1637 union_float64 ua, ub, uc, ur;
1638
1639 ua.s = xa;
1640 ub.s = xb;
1641 uc.s = xc;
1642
1643 if (unlikely(!can_use_fpu(s))) {
1644 goto soft;
1645 }
1646 if (unlikely(flags & float_muladd_halve_result)) {
1647 goto soft;
1648 }
1649
1650 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1651 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1652 goto soft;
1653 }
f6b3b108
EC
1654
1655 if (unlikely(force_soft_fma)) {
1656 goto soft;
1657 }
1658
ccf770ba
EC
1659 /*
1660 * When (a || b) == 0, there's no need to check for under/over flow,
1661 * since we know the addend is (normal || 0) and the product is 0.
1662 */
1663 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1664 union_float64 up;
1665 bool prod_sign;
1666
1667 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1668 prod_sign ^= !!(flags & float_muladd_negate_product);
1669 up.s = float64_set_sign(float64_zero, prod_sign);
1670
1671 if (flags & float_muladd_negate_c) {
1672 uc.h = -uc.h;
1673 }
1674 ur.h = up.h + uc.h;
1675 } else {
896f51fb
KC
1676 union_float64 ua_orig = ua;
1677 union_float64 uc_orig = uc;
1678
ccf770ba
EC
1679 if (flags & float_muladd_negate_product) {
1680 ua.h = -ua.h;
1681 }
1682 if (flags & float_muladd_negate_c) {
1683 uc.h = -uc.h;
1684 }
1685
1686 ur.h = fma(ua.h, ub.h, uc.h);
1687
1688 if (unlikely(f64_is_inf(ur))) {
1689 s->float_exception_flags |= float_flag_overflow;
1690 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1691 ua = ua_orig;
1692 uc = uc_orig;
ccf770ba
EC
1693 goto soft;
1694 }
1695 }
1696 if (flags & float_muladd_negate_result) {
1697 return float64_chs(ur.s);
1698 }
1699 return ur.s;
1700
1701 soft:
1702 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1703}
1704
8282310d
LZ
1705/*
1706 * Returns the result of multiplying the bfloat16 values `a'
1707 * and `b' then adding 'c', with no intermediate rounding step after the
1708 * multiplication.
1709 */
1710
1711bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1712 int flags, float_status *status)
1713{
1714 FloatParts pa = bfloat16_unpack_canonical(a, status);
1715 FloatParts pb = bfloat16_unpack_canonical(b, status);
1716 FloatParts pc = bfloat16_unpack_canonical(c, status);
1717 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1718
1719 return bfloat16_round_pack_canonical(pr, status);
1720}
1721
cf07323d
AB
1722/*
1723 * Returns the result of dividing the floating-point value `a' by the
1724 * corresponding value `b'. The operation is performed according to
1725 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1726 */
1727
1728static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1729{
1730 bool sign = a.sign ^ b.sign;
1731
1732 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1733 uint64_t n0, n1, q, r;
cf07323d 1734 int exp = a.exp - b.exp;
5dfbc9e4
RH
1735
1736 /*
1737 * We want a 2*N / N-bit division to produce exactly an N-bit
1738 * result, so that we do not lose any precision and so that we
1739 * do not have to renormalize afterward. If A.frac < B.frac,
1740 * then division would produce an (N-1)-bit result; shift A left
1741 * by one to produce the an N-bit result, and decrement the
1742 * exponent to match.
1743 *
1744 * The udiv_qrnnd algorithm that we're using requires normalization,
1745 * i.e. the msb of the denominator must be set. Since we know that
1746 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1747 * by one (more), and the remainder must be shifted right by one.
1748 */
cf07323d
AB
1749 if (a.frac < b.frac) {
1750 exp -= 1;
5dfbc9e4 1751 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1752 } else {
5dfbc9e4 1753 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1754 }
5dfbc9e4
RH
1755 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1756
1757 /*
1758 * Set lsb if there is a remainder, to set inexact.
1759 * As mentioned above, to find the actual value of the remainder we
1760 * would need to shift right, but (1) we are only concerned about
1761 * non-zero-ness, and (2) the remainder will always be even because
1762 * both inputs to the division primitive are even.
1763 */
1764 a.frac = q | (r != 0);
cf07323d
AB
1765 a.sign = sign;
1766 a.exp = exp;
1767 return a;
1768 }
1769 /* handle all the NaN cases */
1770 if (is_nan(a.cls) || is_nan(b.cls)) {
1771 return pick_nan(a, b, s);
1772 }
1773 /* 0/0 or Inf/Inf */
1774 if (a.cls == b.cls
1775 &&
1776 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1777 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1778 return parts_default_nan(s);
cf07323d 1779 }
9cb4e398
AB
1780 /* Inf / x or 0 / x */
1781 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1782 a.sign = sign;
1783 return a;
1784 }
cf07323d
AB
1785 /* Div 0 => Inf */
1786 if (b.cls == float_class_zero) {
1787 s->float_exception_flags |= float_flag_divbyzero;
1788 a.cls = float_class_inf;
1789 a.sign = sign;
1790 return a;
1791 }
cf07323d
AB
1792 /* Div by Inf */
1793 if (b.cls == float_class_inf) {
1794 a.cls = float_class_zero;
1795 a.sign = sign;
1796 return a;
1797 }
1798 g_assert_not_reached();
1799}
1800
1801float16 float16_div(float16 a, float16 b, float_status *status)
1802{
1803 FloatParts pa = float16_unpack_canonical(a, status);
1804 FloatParts pb = float16_unpack_canonical(b, status);
1805 FloatParts pr = div_floats(pa, pb, status);
1806
1807 return float16_round_pack_canonical(pr, status);
1808}
1809
4a629561
EC
1810static float32 QEMU_SOFTFLOAT_ATTR
1811soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1812{
1813 FloatParts pa = float32_unpack_canonical(a, status);
1814 FloatParts pb = float32_unpack_canonical(b, status);
1815 FloatParts pr = div_floats(pa, pb, status);
1816
1817 return float32_round_pack_canonical(pr, status);
1818}
1819
4a629561
EC
1820static float64 QEMU_SOFTFLOAT_ATTR
1821soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1822{
1823 FloatParts pa = float64_unpack_canonical(a, status);
1824 FloatParts pb = float64_unpack_canonical(b, status);
1825 FloatParts pr = div_floats(pa, pb, status);
1826
1827 return float64_round_pack_canonical(pr, status);
1828}
1829
4a629561
EC
1830static float hard_f32_div(float a, float b)
1831{
1832 return a / b;
1833}
1834
1835static double hard_f64_div(double a, double b)
1836{
1837 return a / b;
1838}
1839
1840static bool f32_div_pre(union_float32 a, union_float32 b)
1841{
1842 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1843 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1844 fpclassify(b.h) == FP_NORMAL;
1845 }
1846 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1847}
1848
1849static bool f64_div_pre(union_float64 a, union_float64 b)
1850{
1851 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1852 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1853 fpclassify(b.h) == FP_NORMAL;
1854 }
1855 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1856}
1857
1858static bool f32_div_post(union_float32 a, union_float32 b)
1859{
1860 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1861 return fpclassify(a.h) != FP_ZERO;
1862 }
1863 return !float32_is_zero(a.s);
1864}
1865
1866static bool f64_div_post(union_float64 a, union_float64 b)
1867{
1868 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1869 return fpclassify(a.h) != FP_ZERO;
1870 }
1871 return !float64_is_zero(a.s);
1872}
1873
1874float32 QEMU_FLATTEN
1875float32_div(float32 a, float32 b, float_status *s)
1876{
1877 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1878 f32_div_pre, f32_div_post);
4a629561
EC
1879}
1880
1881float64 QEMU_FLATTEN
1882float64_div(float64 a, float64 b, float_status *s)
1883{
1884 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1885 f64_div_pre, f64_div_post);
4a629561
EC
1886}
1887
8282310d
LZ
1888/*
1889 * Returns the result of dividing the bfloat16
1890 * value `a' by the corresponding value `b'.
1891 */
1892
1893bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1894{
1895 FloatParts pa = bfloat16_unpack_canonical(a, status);
1896 FloatParts pb = bfloat16_unpack_canonical(b, status);
1897 FloatParts pr = div_floats(pa, pb, status);
1898
1899 return bfloat16_round_pack_canonical(pr, status);
1900}
1901
6fed16b2
AB
1902/*
1903 * Float to Float conversions
1904 *
1905 * Returns the result of converting one float format to another. The
1906 * conversion is performed according to the IEC/IEEE Standard for
1907 * Binary Floating-Point Arithmetic.
1908 *
1909 * The float_to_float helper only needs to take care of raising
1910 * invalid exceptions and handling the conversion on NaNs.
1911 */
1912
1913static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1914 float_status *s)
1915{
1916 if (dstf->arm_althp) {
1917 switch (a.cls) {
1918 case float_class_qnan:
1919 case float_class_snan:
1920 /* There is no NaN in the destination format. Raise Invalid
1921 * and return a zero with the sign of the input NaN.
1922 */
1923 s->float_exception_flags |= float_flag_invalid;
1924 a.cls = float_class_zero;
1925 a.frac = 0;
1926 a.exp = 0;
1927 break;
1928
1929 case float_class_inf:
1930 /* There is no Inf in the destination format. Raise Invalid
1931 * and return the maximum normal with the correct sign.
1932 */
1933 s->float_exception_flags |= float_flag_invalid;
1934 a.cls = float_class_normal;
1935 a.exp = dstf->exp_max;
1936 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1937 break;
1938
1939 default:
1940 break;
1941 }
1942 } else if (is_nan(a.cls)) {
1943 if (is_snan(a.cls)) {
1944 s->float_exception_flags |= float_flag_invalid;
1945 a = parts_silence_nan(a, s);
1946 }
1947 if (s->default_nan_mode) {
1948 return parts_default_nan(s);
1949 }
1950 }
1951 return a;
1952}
1953
1954float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1955{
1956 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1957 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1958 FloatParts pr = float_to_float(p, &float32_params, s);
1959 return float32_round_pack_canonical(pr, s);
1960}
1961
1962float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1963{
1964 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1965 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1966 FloatParts pr = float_to_float(p, &float64_params, s);
1967 return float64_round_pack_canonical(pr, s);
1968}
1969
1970float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1971{
1972 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1973 FloatParts p = float32_unpack_canonical(a, s);
1974 FloatParts pr = float_to_float(p, fmt16, s);
1975 return float16a_round_pack_canonical(pr, s, fmt16);
1976}
1977
21381dcf
MK
1978static float64 QEMU_SOFTFLOAT_ATTR
1979soft_float32_to_float64(float32 a, float_status *s)
6fed16b2
AB
1980{
1981 FloatParts p = float32_unpack_canonical(a, s);
1982 FloatParts pr = float_to_float(p, &float64_params, s);
1983 return float64_round_pack_canonical(pr, s);
1984}
1985
21381dcf
MK
1986float64 float32_to_float64(float32 a, float_status *s)
1987{
1988 if (likely(float32_is_normal(a))) {
1989 /* Widening conversion can never produce inexact results. */
1990 union_float32 uf;
1991 union_float64 ud;
1992 uf.s = a;
1993 ud.h = uf.h;
1994 return ud.s;
1995 } else if (float32_is_zero(a)) {
1996 return float64_set_sign(float64_zero, float32_is_neg(a));
1997 } else {
1998 return soft_float32_to_float64(a, s);
1999 }
2000}
2001
6fed16b2
AB
2002float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2003{
2004 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2005 FloatParts p = float64_unpack_canonical(a, s);
2006 FloatParts pr = float_to_float(p, fmt16, s);
2007 return float16a_round_pack_canonical(pr, s, fmt16);
2008}
2009
2010float32 float64_to_float32(float64 a, float_status *s)
2011{
2012 FloatParts p = float64_unpack_canonical(a, s);
2013 FloatParts pr = float_to_float(p, &float32_params, s);
2014 return float32_round_pack_canonical(pr, s);
2015}
2016
34f0c0a9
LZ
2017float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2018{
2019 FloatParts p = bfloat16_unpack_canonical(a, s);
2020 FloatParts pr = float_to_float(p, &float32_params, s);
2021 return float32_round_pack_canonical(pr, s);
2022}
2023
2024float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2025{
2026 FloatParts p = bfloat16_unpack_canonical(a, s);
2027 FloatParts pr = float_to_float(p, &float64_params, s);
2028 return float64_round_pack_canonical(pr, s);
2029}
2030
2031bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2032{
2033 FloatParts p = float32_unpack_canonical(a, s);
2034 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2035 return bfloat16_round_pack_canonical(pr, s);
2036}
2037
2038bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2039{
2040 FloatParts p = float64_unpack_canonical(a, s);
2041 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2042 return bfloat16_round_pack_canonical(pr, s);
2043}
2044
dbe4d53a
AB
2045/*
2046 * Rounds the floating-point value `a' to an integer, and returns the
2047 * result as a floating-point value. The operation is performed
2048 * according to the IEC/IEEE Standard for Binary Floating-Point
2049 * Arithmetic.
2050 */
2051
3dede407 2052static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
2f6c74be 2053 int scale, float_status *s)
dbe4d53a 2054{
2f6c74be
RH
2055 switch (a.cls) {
2056 case float_class_qnan:
2057 case float_class_snan:
dbe4d53a 2058 return return_nan(a, s);
dbe4d53a 2059
dbe4d53a
AB
2060 case float_class_zero:
2061 case float_class_inf:
dbe4d53a
AB
2062 /* already "integral" */
2063 break;
2f6c74be 2064
dbe4d53a 2065 case float_class_normal:
2f6c74be
RH
2066 scale = MIN(MAX(scale, -0x10000), 0x10000);
2067 a.exp += scale;
2068
dbe4d53a
AB
2069 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2070 /* already integral */
2071 break;
2072 }
2073 if (a.exp < 0) {
2074 bool one;
2075 /* all fractional */
2076 s->float_exception_flags |= float_flag_inexact;
2f6c74be 2077 switch (rmode) {
dbe4d53a
AB
2078 case float_round_nearest_even:
2079 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2080 break;
2081 case float_round_ties_away:
2082 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2083 break;
2084 case float_round_to_zero:
2085 one = false;
2086 break;
2087 case float_round_up:
2088 one = !a.sign;
2089 break;
2090 case float_round_down:
2091 one = a.sign;
2092 break;
5d64abb3
RH
2093 case float_round_to_odd:
2094 one = true;
2095 break;
dbe4d53a
AB
2096 default:
2097 g_assert_not_reached();
2098 }
2099
2100 if (one) {
2101 a.frac = DECOMPOSED_IMPLICIT_BIT;
2102 a.exp = 0;
2103 } else {
2104 a.cls = float_class_zero;
2105 }
2106 } else {
2107 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2108 uint64_t frac_lsbm1 = frac_lsb >> 1;
2109 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2110 uint64_t rnd_mask = rnd_even_mask >> 1;
2111 uint64_t inc;
2112
2f6c74be 2113 switch (rmode) {
dbe4d53a
AB
2114 case float_round_nearest_even:
2115 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2116 break;
2117 case float_round_ties_away:
2118 inc = frac_lsbm1;
2119 break;
2120 case float_round_to_zero:
2121 inc = 0;
2122 break;
2123 case float_round_up:
2124 inc = a.sign ? 0 : rnd_mask;
2125 break;
2126 case float_round_down:
2127 inc = a.sign ? rnd_mask : 0;
2128 break;
5d64abb3
RH
2129 case float_round_to_odd:
2130 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2131 break;
dbe4d53a
AB
2132 default:
2133 g_assert_not_reached();
2134 }
2135
2136 if (a.frac & rnd_mask) {
2137 s->float_exception_flags |= float_flag_inexact;
2138 a.frac += inc;
2139 a.frac &= ~rnd_mask;
2140 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2141 a.frac >>= 1;
2142 a.exp++;
2143 }
2144 }
2145 }
2146 break;
2147 default:
2148 g_assert_not_reached();
2149 }
2150 return a;
2151}
2152
2153float16 float16_round_to_int(float16 a, float_status *s)
2154{
2155 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2156 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2157 return float16_round_pack_canonical(pr, s);
2158}
2159
2160float32 float32_round_to_int(float32 a, float_status *s)
2161{
2162 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2163 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2164 return float32_round_pack_canonical(pr, s);
2165}
2166
2167float64 float64_round_to_int(float64 a, float_status *s)
2168{
2169 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2170 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2171 return float64_round_pack_canonical(pr, s);
2172}
2173
34f0c0a9
LZ
2174/*
2175 * Rounds the bfloat16 value `a' to an integer, and returns the
2176 * result as a bfloat16 value.
2177 */
2178
2179bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2180{
2181 FloatParts pa = bfloat16_unpack_canonical(a, s);
2182 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2183 return bfloat16_round_pack_canonical(pr, s);
2184}
2185
ab52f973
AB
2186/*
2187 * Returns the result of converting the floating-point value `a' to
2188 * the two's complement integer format. The conversion is performed
2189 * according to the IEC/IEEE Standard for Binary Floating-Point
2190 * Arithmetic---which means in particular that the conversion is
2191 * rounded according to the current rounding mode. If `a' is a NaN,
2192 * the largest positive integer is returned. Otherwise, if the
2193 * conversion overflows, the largest integer with the same sign as `a'
2194 * is returned.
2195*/
2196
3dede407
RH
2197static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2198 int scale, int64_t min, int64_t max,
ab52f973
AB
2199 float_status *s)
2200{
2201 uint64_t r;
2202 int orig_flags = get_float_exception_flags(s);
2f6c74be 2203 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2204
2205 switch (p.cls) {
2206 case float_class_snan:
2207 case float_class_qnan:
801bc563 2208 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2209 return max;
2210 case float_class_inf:
801bc563 2211 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2212 return p.sign ? min : max;
2213 case float_class_zero:
2214 return 0;
2215 case float_class_normal:
2216 if (p.exp < DECOMPOSED_BINARY_POINT) {
2217 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2218 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2219 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2220 } else {
2221 r = UINT64_MAX;
2222 }
2223 if (p.sign) {
33358375 2224 if (r <= -(uint64_t) min) {
ab52f973
AB
2225 return -r;
2226 } else {
2227 s->float_exception_flags = orig_flags | float_flag_invalid;
2228 return min;
2229 }
2230 } else {
33358375 2231 if (r <= max) {
ab52f973
AB
2232 return r;
2233 } else {
2234 s->float_exception_flags = orig_flags | float_flag_invalid;
2235 return max;
2236 }
2237 }
2238 default:
2239 g_assert_not_reached();
2240 }
2241}
2242
0d93d8ec
FC
2243int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2244 float_status *s)
2245{
2246 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2247 rmode, scale, INT8_MIN, INT8_MAX, s);
2248}
2249
3dede407 2250int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2251 float_status *s)
2252{
2253 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2254 rmode, scale, INT16_MIN, INT16_MAX, s);
2255}
2256
3dede407 2257int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2258 float_status *s)
2259{
2260 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2261 rmode, scale, INT32_MIN, INT32_MAX, s);
2262}
2263
3dede407 2264int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2265 float_status *s)
2266{
2267 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2268 rmode, scale, INT64_MIN, INT64_MAX, s);
2269}
2270
3dede407 2271int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2272 float_status *s)
2273{
2274 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2275 rmode, scale, INT16_MIN, INT16_MAX, s);
2276}
2277
3dede407 2278int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2279 float_status *s)
2280{
2281 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2282 rmode, scale, INT32_MIN, INT32_MAX, s);
2283}
2284
3dede407 2285int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2286 float_status *s)
2287{
2288 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2289 rmode, scale, INT64_MIN, INT64_MAX, s);
2290}
2291
3dede407 2292int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2293 float_status *s)
2294{
2295 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2296 rmode, scale, INT16_MIN, INT16_MAX, s);
2297}
2298
3dede407 2299int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2300 float_status *s)
2301{
2302 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2303 rmode, scale, INT32_MIN, INT32_MAX, s);
2304}
2305
3dede407 2306int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2307 float_status *s)
2308{
2309 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2310 rmode, scale, INT64_MIN, INT64_MAX, s);
2311}
2312
0d93d8ec
FC
2313int8_t float16_to_int8(float16 a, float_status *s)
2314{
2315 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2316}
2317
2f6c74be
RH
2318int16_t float16_to_int16(float16 a, float_status *s)
2319{
2320 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2321}
2322
2323int32_t float16_to_int32(float16 a, float_status *s)
2324{
2325 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2326}
2327
2328int64_t float16_to_int64(float16 a, float_status *s)
2329{
2330 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2331}
2332
2333int16_t float32_to_int16(float32 a, float_status *s)
2334{
2335 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2336}
2337
2338int32_t float32_to_int32(float32 a, float_status *s)
2339{
2340 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2341}
2342
2343int64_t float32_to_int64(float32 a, float_status *s)
2344{
2345 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2346}
2347
2348int16_t float64_to_int16(float64 a, float_status *s)
2349{
2350 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2351}
2352
2353int32_t float64_to_int32(float64 a, float_status *s)
2354{
2355 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2356}
2357
2358int64_t float64_to_int64(float64 a, float_status *s)
2359{
2360 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2361}
2362
2363int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2364{
2365 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2366}
2367
2368int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2369{
2370 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2371}
2372
2373int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2374{
2375 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2376}
2377
2f6c74be
RH
2378int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2379{
2380 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2381}
ab52f973 2382
2f6c74be
RH
2383int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2384{
2385 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2386}
2387
2388int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2389{
2390 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2391}
2392
2393int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2394{
2395 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2396}
ab52f973 2397
2f6c74be
RH
2398int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2399{
2400 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2401}
ab52f973 2402
2f6c74be
RH
2403int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2404{
2405 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2406}
ab52f973 2407
34f0c0a9
LZ
2408/*
2409 * Returns the result of converting the floating-point value `a' to
2410 * the two's complement integer format.
2411 */
2412
2413int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2414 float_status *s)
2415{
2416 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2417 rmode, scale, INT16_MIN, INT16_MAX, s);
2418}
2419
2420int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2421 float_status *s)
2422{
2423 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2424 rmode, scale, INT32_MIN, INT32_MAX, s);
2425}
2426
2427int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2428 float_status *s)
2429{
2430 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2431 rmode, scale, INT64_MIN, INT64_MAX, s);
2432}
2433
2434int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2435{
2436 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2437}
2438
2439int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2440{
2441 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2442}
2443
2444int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2445{
2446 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2447}
2448
2449int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2450{
2451 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2452}
2453
2454int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2455{
2456 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2457}
2458
2459int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2460{
2461 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2462}
2463
ab52f973
AB
2464/*
2465 * Returns the result of converting the floating-point value `a' to
2466 * the unsigned integer format. The conversion is performed according
2467 * to the IEC/IEEE Standard for Binary Floating-Point
2468 * Arithmetic---which means in particular that the conversion is
2469 * rounded according to the current rounding mode. If `a' is a NaN,
2470 * the largest unsigned integer is returned. Otherwise, if the
2471 * conversion overflows, the largest unsigned integer is returned. If
2472 * the 'a' is negative, the result is rounded and zero is returned;
2473 * values that do not round to zero will raise the inexact exception
2474 * flag.
2475 */
2476
3dede407
RH
2477static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2478 int scale, uint64_t max,
2479 float_status *s)
ab52f973
AB
2480{
2481 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2482 FloatParts p = round_to_int(in, rmode, scale, s);
2483 uint64_t r;
ab52f973
AB
2484
2485 switch (p.cls) {
2486 case float_class_snan:
2487 case float_class_qnan:
2488 s->float_exception_flags = orig_flags | float_flag_invalid;
2489 return max;
2490 case float_class_inf:
801bc563 2491 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2492 return p.sign ? 0 : max;
2493 case float_class_zero:
2494 return 0;
2495 case float_class_normal:
ab52f973
AB
2496 if (p.sign) {
2497 s->float_exception_flags = orig_flags | float_flag_invalid;
2498 return 0;
2499 }
2500
2501 if (p.exp < DECOMPOSED_BINARY_POINT) {
2502 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2503 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2504 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2505 } else {
2506 s->float_exception_flags = orig_flags | float_flag_invalid;
2507 return max;
2508 }
2509
2510 /* For uint64 this will never trip, but if p.exp is too large
2511 * to shift a decomposed fraction we shall have exited via the
2512 * 3rd leg above.
2513 */
2514 if (r > max) {
2515 s->float_exception_flags = orig_flags | float_flag_invalid;
2516 return max;
ab52f973 2517 }
2f6c74be 2518 return r;
ab52f973
AB
2519 default:
2520 g_assert_not_reached();
2521 }
2522}
2523
0d93d8ec
FC
2524uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2525 float_status *s)
2526{
2527 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2528 rmode, scale, UINT8_MAX, s);
2529}
2530
3dede407 2531uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2532 float_status *s)
2533{
2534 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2535 rmode, scale, UINT16_MAX, s);
2536}
2537
3dede407 2538uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2539 float_status *s)
2540{
2541 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2542 rmode, scale, UINT32_MAX, s);
2543}
2544
3dede407 2545uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2546 float_status *s)
2547{
2548 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2549 rmode, scale, UINT64_MAX, s);
2550}
2551
3dede407 2552uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2553 float_status *s)
2554{
2555 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2556 rmode, scale, UINT16_MAX, s);
2557}
2558
3dede407 2559uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2560 float_status *s)
2561{
2562 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2563 rmode, scale, UINT32_MAX, s);
2564}
2565
3dede407 2566uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2567 float_status *s)
2568{
2569 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2570 rmode, scale, UINT64_MAX, s);
2571}
2572
3dede407 2573uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2574 float_status *s)
2575{
2576 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2577 rmode, scale, UINT16_MAX, s);
2578}
2579
3dede407 2580uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2581 float_status *s)
2582{
2583 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2584 rmode, scale, UINT32_MAX, s);
2585}
2586
3dede407 2587uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2588 float_status *s)
2589{
2590 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2591 rmode, scale, UINT64_MAX, s);
2592}
2593
0d93d8ec
FC
2594uint8_t float16_to_uint8(float16 a, float_status *s)
2595{
2596 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2597}
2598
2f6c74be
RH
2599uint16_t float16_to_uint16(float16 a, float_status *s)
2600{
2601 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2602}
2603
2604uint32_t float16_to_uint32(float16 a, float_status *s)
2605{
2606 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2607}
2608
2609uint64_t float16_to_uint64(float16 a, float_status *s)
2610{
2611 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2612}
2613
2614uint16_t float32_to_uint16(float32 a, float_status *s)
2615{
2616 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2617}
2618
2619uint32_t float32_to_uint32(float32 a, float_status *s)
2620{
2621 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2622}
2623
2624uint64_t float32_to_uint64(float32 a, float_status *s)
2625{
2626 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2627}
2628
2629uint16_t float64_to_uint16(float64 a, float_status *s)
2630{
2631 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2632}
2633
2634uint32_t float64_to_uint32(float64 a, float_status *s)
2635{
2636 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2637}
2638
2639uint64_t float64_to_uint64(float64 a, float_status *s)
2640{
2641 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2642}
2643
2644uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2645{
2646 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2647}
2648
2649uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2650{
2651 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2652}
2653
2654uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2655{
2656 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2657}
2658
2659uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2660{
2661 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2662}
2663
2664uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2665{
2666 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2667}
2668
2669uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2670{
2671 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2672}
2673
2674uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2675{
2676 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2677}
2678
2679uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2680{
2681 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2682}
2683
2684uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2685{
2686 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2687}
ab52f973 2688
34f0c0a9
LZ
2689/*
2690 * Returns the result of converting the bfloat16 value `a' to
2691 * the unsigned integer format.
2692 */
2693
2694uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2695 int scale, float_status *s)
2696{
2697 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2698 rmode, scale, UINT16_MAX, s);
2699}
2700
2701uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2702 int scale, float_status *s)
2703{
2704 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2705 rmode, scale, UINT32_MAX, s);
2706}
2707
2708uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2709 int scale, float_status *s)
2710{
2711 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2712 rmode, scale, UINT64_MAX, s);
2713}
2714
2715uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2716{
2717 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2718}
2719
2720uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2721{
2722 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2723}
2724
2725uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2726{
2727 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2728}
2729
2730uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2731{
2732 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2733}
2734
2735uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2736{
2737 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2738}
2739
2740uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2741{
2742 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2743}
2744
c02e1fb8
AB
2745/*
2746 * Integer to float conversions
2747 *
2748 * Returns the result of converting the two's complement integer `a'
2749 * to the floating-point format. The conversion is performed according
2750 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2751 */
2752
2abdfe24 2753static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2754{
2abdfe24
RH
2755 FloatParts r = { .sign = false };
2756
c02e1fb8
AB
2757 if (a == 0) {
2758 r.cls = float_class_zero;
c02e1fb8 2759 } else {
2abdfe24
RH
2760 uint64_t f = a;
2761 int shift;
2762
2763 r.cls = float_class_normal;
c02e1fb8 2764 if (a < 0) {
2abdfe24 2765 f = -f;
c02e1fb8 2766 r.sign = true;
c02e1fb8 2767 }
2abdfe24
RH
2768 shift = clz64(f) - 1;
2769 scale = MIN(MAX(scale, -0x10000), 0x10000);
2770
2771 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2772 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2773 }
2774
2775 return r;
2776}
2777
2abdfe24 2778float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2779{
2abdfe24 2780 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2781 return float16_round_pack_canonical(pa, status);
2782}
2783
2abdfe24
RH
2784float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2785{
2786 return int64_to_float16_scalbn(a, scale, status);
2787}
2788
2789float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2790{
2791 return int64_to_float16_scalbn(a, scale, status);
2792}
2793
2794float16 int64_to_float16(int64_t a, float_status *status)
2795{
2796 return int64_to_float16_scalbn(a, 0, status);
2797}
2798
c02e1fb8
AB
2799float16 int32_to_float16(int32_t a, float_status *status)
2800{
2abdfe24 2801 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2802}
2803
2804float16 int16_to_float16(int16_t a, float_status *status)
2805{
2abdfe24 2806 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2807}
2808
0d93d8ec
FC
2809float16 int8_to_float16(int8_t a, float_status *status)
2810{
2811 return int64_to_float16_scalbn(a, 0, status);
2812}
2813
2abdfe24 2814float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2815{
2abdfe24 2816 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2817 return float32_round_pack_canonical(pa, status);
2818}
2819
2abdfe24
RH
2820float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2821{
2822 return int64_to_float32_scalbn(a, scale, status);
2823}
2824
2825float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2826{
2827 return int64_to_float32_scalbn(a, scale, status);
2828}
2829
2830float32 int64_to_float32(int64_t a, float_status *status)
2831{
2832 return int64_to_float32_scalbn(a, 0, status);
2833}
2834
c02e1fb8
AB
2835float32 int32_to_float32(int32_t a, float_status *status)
2836{
2abdfe24 2837 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2838}
2839
2840float32 int16_to_float32(int16_t a, float_status *status)
2841{
2abdfe24 2842 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2843}
2844
2abdfe24 2845float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2846{
2abdfe24 2847 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2848 return float64_round_pack_canonical(pa, status);
2849}
2850
2abdfe24
RH
2851float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2852{
2853 return int64_to_float64_scalbn(a, scale, status);
2854}
2855
2856float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2857{
2858 return int64_to_float64_scalbn(a, scale, status);
2859}
2860
2861float64 int64_to_float64(int64_t a, float_status *status)
2862{
2863 return int64_to_float64_scalbn(a, 0, status);
2864}
2865
c02e1fb8
AB
2866float64 int32_to_float64(int32_t a, float_status *status)
2867{
2abdfe24 2868 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2869}
2870
2871float64 int16_to_float64(int16_t a, float_status *status)
2872{
2abdfe24 2873 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2874}
2875
34f0c0a9
LZ
2876/*
2877 * Returns the result of converting the two's complement integer `a'
2878 * to the bfloat16 format.
2879 */
2880
2881bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2882{
2883 FloatParts pa = int_to_float(a, scale, status);
2884 return bfloat16_round_pack_canonical(pa, status);
2885}
2886
2887bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
2888{
2889 return int64_to_bfloat16_scalbn(a, scale, status);
2890}
2891
2892bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
2893{
2894 return int64_to_bfloat16_scalbn(a, scale, status);
2895}
2896
2897bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
2898{
2899 return int64_to_bfloat16_scalbn(a, 0, status);
2900}
2901
2902bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
2903{
2904 return int64_to_bfloat16_scalbn(a, 0, status);
2905}
2906
2907bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
2908{
2909 return int64_to_bfloat16_scalbn(a, 0, status);
2910}
c02e1fb8
AB
2911
2912/*
2913 * Unsigned Integer to float conversions
2914 *
2915 * Returns the result of converting the unsigned integer `a' to the
2916 * floating-point format. The conversion is performed according to the
2917 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2918 */
2919
2abdfe24 2920static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2921{
2abdfe24 2922 FloatParts r = { .sign = false };
c02e1fb8
AB
2923
2924 if (a == 0) {
2925 r.cls = float_class_zero;
2926 } else {
2abdfe24 2927 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2928 r.cls = float_class_normal;
2abdfe24
RH
2929 if ((int64_t)a < 0) {
2930 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2931 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2932 r.frac = a;
2933 } else {
2abdfe24
RH
2934 int shift = clz64(a) - 1;
2935 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2936 r.frac = a << shift;
c02e1fb8
AB
2937 }
2938 }
2939
2940 return r;
2941}
2942
2abdfe24 2943float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2944{
2abdfe24 2945 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2946 return float16_round_pack_canonical(pa, status);
2947}
2948
2abdfe24
RH
2949float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2950{
2951 return uint64_to_float16_scalbn(a, scale, status);
2952}
2953
2954float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2955{
2956 return uint64_to_float16_scalbn(a, scale, status);
2957}
2958
2959float16 uint64_to_float16(uint64_t a, float_status *status)
2960{
2961 return uint64_to_float16_scalbn(a, 0, status);
2962}
2963
c02e1fb8
AB
2964float16 uint32_to_float16(uint32_t a, float_status *status)
2965{
2abdfe24 2966 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2967}
2968
2969float16 uint16_to_float16(uint16_t a, float_status *status)
2970{
2abdfe24 2971 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2972}
2973
0d93d8ec
FC
2974float16 uint8_to_float16(uint8_t a, float_status *status)
2975{
2976 return uint64_to_float16_scalbn(a, 0, status);
2977}
2978
2abdfe24 2979float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2980{
2abdfe24 2981 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2982 return float32_round_pack_canonical(pa, status);
2983}
2984
2abdfe24
RH
2985float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2986{
2987 return uint64_to_float32_scalbn(a, scale, status);
2988}
2989
2990float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2991{
2992 return uint64_to_float32_scalbn(a, scale, status);
2993}
2994
2995float32 uint64_to_float32(uint64_t a, float_status *status)
2996{
2997 return uint64_to_float32_scalbn(a, 0, status);
2998}
2999
c02e1fb8
AB
3000float32 uint32_to_float32(uint32_t a, float_status *status)
3001{
2abdfe24 3002 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3003}
3004
3005float32 uint16_to_float32(uint16_t a, float_status *status)
3006{
2abdfe24 3007 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
3008}
3009
2abdfe24 3010float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 3011{
2abdfe24 3012 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
3013 return float64_round_pack_canonical(pa, status);
3014}
3015
2abdfe24
RH
3016float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3017{
3018 return uint64_to_float64_scalbn(a, scale, status);
3019}
3020
3021float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3022{
3023 return uint64_to_float64_scalbn(a, scale, status);
3024}
3025
3026float64 uint64_to_float64(uint64_t a, float_status *status)
3027{
3028 return uint64_to_float64_scalbn(a, 0, status);
3029}
3030
c02e1fb8
AB
3031float64 uint32_to_float64(uint32_t a, float_status *status)
3032{
2abdfe24 3033 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3034}
3035
3036float64 uint16_to_float64(uint16_t a, float_status *status)
3037{
2abdfe24 3038 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3039}
3040
34f0c0a9
LZ
3041/*
3042 * Returns the result of converting the unsigned integer `a' to the
3043 * bfloat16 format.
3044 */
3045
3046bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3047{
3048 FloatParts pa = uint_to_float(a, scale, status);
3049 return bfloat16_round_pack_canonical(pa, status);
3050}
3051
3052bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3053{
3054 return uint64_to_bfloat16_scalbn(a, scale, status);
3055}
3056
3057bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3058{
3059 return uint64_to_bfloat16_scalbn(a, scale, status);
3060}
3061
3062bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3063{
3064 return uint64_to_bfloat16_scalbn(a, 0, status);
3065}
3066
3067bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3068{
3069 return uint64_to_bfloat16_scalbn(a, 0, status);
3070}
3071
3072bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3073{
3074 return uint64_to_bfloat16_scalbn(a, 0, status);
3075}
3076
89360067
AB
3077/* Float Min/Max */
3078/* min() and max() functions. These can't be implemented as
3079 * 'compare and pick one input' because that would mishandle
3080 * NaNs and +0 vs -0.
3081 *
3082 * minnum() and maxnum() functions. These are similar to the min()
3083 * and max() functions but if one of the arguments is a QNaN and
3084 * the other is numerical then the numerical argument is returned.
3085 * SNaNs will get quietened before being returned.
3086 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3087 * and maxNum() operations. min() and max() are the typical min/max
3088 * semantics provided by many CPUs which predate that specification.
3089 *
3090 * minnummag() and maxnummag() functions correspond to minNumMag()
3091 * and minNumMag() from the IEEE-754 2008.
3092 */
3093static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
3094 bool ieee, bool ismag, float_status *s)
3095{
3096 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3097 if (ieee) {
3098 /* Takes two floating-point values `a' and `b', one of
3099 * which is a NaN, and returns the appropriate NaN
3100 * result. If either `a' or `b' is a signaling NaN,
3101 * the invalid exception is raised.
3102 */
3103 if (is_snan(a.cls) || is_snan(b.cls)) {
3104 return pick_nan(a, b, s);
3105 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3106 return b;
3107 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3108 return a;
3109 }
3110 }
3111 return pick_nan(a, b, s);
3112 } else {
3113 int a_exp, b_exp;
89360067
AB
3114
3115 switch (a.cls) {
3116 case float_class_normal:
3117 a_exp = a.exp;
3118 break;
3119 case float_class_inf:
3120 a_exp = INT_MAX;
3121 break;
3122 case float_class_zero:
3123 a_exp = INT_MIN;
3124 break;
3125 default:
3126 g_assert_not_reached();
3127 break;
3128 }
3129 switch (b.cls) {
3130 case float_class_normal:
3131 b_exp = b.exp;
3132 break;
3133 case float_class_inf:
3134 b_exp = INT_MAX;
3135 break;
3136 case float_class_zero:
3137 b_exp = INT_MIN;
3138 break;
3139 default:
3140 g_assert_not_reached();
3141 break;
3142 }
3143
6245327a
EC
3144 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3145 bool a_less = a_exp < b_exp;
3146 if (a_exp == b_exp) {
3147 a_less = a.frac < b.frac;
3148 }
3149 return a_less ^ ismin ? b : a;
89360067
AB
3150 }
3151
6245327a 3152 if (a.sign == b.sign) {
89360067
AB
3153 bool a_less = a_exp < b_exp;
3154 if (a_exp == b_exp) {
3155 a_less = a.frac < b.frac;
3156 }
6245327a 3157 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3158 } else {
6245327a 3159 return a.sign ^ ismin ? b : a;
89360067
AB
3160 }
3161 }
3162}
3163
3164#define MINMAX(sz, name, ismin, isiee, ismag) \
3165float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3166 float_status *s) \
3167{ \
3168 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3169 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3170 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3171 \
3172 return float ## sz ## _round_pack_canonical(pr, s); \
3173}
3174
3175MINMAX(16, min, true, false, false)
3176MINMAX(16, minnum, true, true, false)
3177MINMAX(16, minnummag, true, true, true)
3178MINMAX(16, max, false, false, false)
3179MINMAX(16, maxnum, false, true, false)
3180MINMAX(16, maxnummag, false, true, true)
3181
3182MINMAX(32, min, true, false, false)
3183MINMAX(32, minnum, true, true, false)
3184MINMAX(32, minnummag, true, true, true)
3185MINMAX(32, max, false, false, false)
3186MINMAX(32, maxnum, false, true, false)
3187MINMAX(32, maxnummag, false, true, true)
3188
3189MINMAX(64, min, true, false, false)
3190MINMAX(64, minnum, true, true, false)
3191MINMAX(64, minnummag, true, true, true)
3192MINMAX(64, max, false, false, false)
3193MINMAX(64, maxnum, false, true, false)
3194MINMAX(64, maxnummag, false, true, true)
3195
3196#undef MINMAX
3197
8282310d
LZ
3198#define BF16_MINMAX(name, ismin, isiee, ismag) \
3199bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3200{ \
3201 FloatParts pa = bfloat16_unpack_canonical(a, s); \
3202 FloatParts pb = bfloat16_unpack_canonical(b, s); \
3203 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3204 \
3205 return bfloat16_round_pack_canonical(pr, s); \
3206}
3207
3208BF16_MINMAX(min, true, false, false)
3209BF16_MINMAX(minnum, true, true, false)
3210BF16_MINMAX(minnummag, true, true, true)
3211BF16_MINMAX(max, false, false, false)
3212BF16_MINMAX(maxnum, false, true, false)
3213BF16_MINMAX(maxnummag, false, true, true)
3214
3215#undef BF16_MINMAX
3216
0c4c9092 3217/* Floating point compare */
71bfd65c
RH
3218static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
3219 float_status *s)
0c4c9092
AB
3220{
3221 if (is_nan(a.cls) || is_nan(b.cls)) {
3222 if (!is_quiet ||
3223 a.cls == float_class_snan ||
3224 b.cls == float_class_snan) {
3225 s->float_exception_flags |= float_flag_invalid;
3226 }
3227 return float_relation_unordered;
3228 }
3229
3230 if (a.cls == float_class_zero) {
3231 if (b.cls == float_class_zero) {
3232 return float_relation_equal;
3233 }
3234 return b.sign ? float_relation_greater : float_relation_less;
3235 } else if (b.cls == float_class_zero) {
3236 return a.sign ? float_relation_less : float_relation_greater;
3237 }
3238
3239 /* The only really important thing about infinity is its sign. If
3240 * both are infinities the sign marks the smallest of the two.
3241 */
3242 if (a.cls == float_class_inf) {
3243 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3244 return float_relation_equal;
3245 }
3246 return a.sign ? float_relation_less : float_relation_greater;
3247 } else if (b.cls == float_class_inf) {
3248 return b.sign ? float_relation_greater : float_relation_less;
3249 }
3250
3251 if (a.sign != b.sign) {
3252 return a.sign ? float_relation_less : float_relation_greater;
3253 }
3254
3255 if (a.exp == b.exp) {
3256 if (a.frac == b.frac) {
3257 return float_relation_equal;
3258 }
3259 if (a.sign) {
3260 return a.frac > b.frac ?
3261 float_relation_less : float_relation_greater;
3262 } else {
3263 return a.frac > b.frac ?
3264 float_relation_greater : float_relation_less;
3265 }
3266 } else {
3267 if (a.sign) {
3268 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3269 } else {
3270 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3271 }
3272 }
3273}
3274
d9fe9db9
EC
3275#define COMPARE(name, attr, sz) \
3276static int attr \
3277name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
3278{ \
3279 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3280 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 3281 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3282}
3283
d9fe9db9
EC
3284COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3285COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3286COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3287
3288#undef COMPARE
3289
71bfd65c 3290FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3291{
3292 return soft_f16_compare(a, b, false, s);
3293}
3294
71bfd65c 3295FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3296{
3297 return soft_f16_compare(a, b, true, s);
3298}
3299
71bfd65c 3300static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3301f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3302{
3303 union_float32 ua, ub;
3304
3305 ua.s = xa;
3306 ub.s = xb;
3307
3308 if (QEMU_NO_HARDFLOAT) {
3309 goto soft;
3310 }
3311
3312 float32_input_flush2(&ua.s, &ub.s, s);
3313 if (isgreaterequal(ua.h, ub.h)) {
3314 if (isgreater(ua.h, ub.h)) {
3315 return float_relation_greater;
3316 }
3317 return float_relation_equal;
3318 }
3319 if (likely(isless(ua.h, ub.h))) {
3320 return float_relation_less;
3321 }
3322 /* The only condition remaining is unordered.
3323 * Fall through to set flags.
3324 */
3325 soft:
3326 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3327}
3328
71bfd65c 3329FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3330{
3331 return f32_compare(a, b, false, s);
3332}
3333
71bfd65c 3334FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3335{
3336 return f32_compare(a, b, true, s);
3337}
3338
71bfd65c 3339static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3340f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3341{
3342 union_float64 ua, ub;
3343
3344 ua.s = xa;
3345 ub.s = xb;
3346
3347 if (QEMU_NO_HARDFLOAT) {
3348 goto soft;
3349 }
3350
3351 float64_input_flush2(&ua.s, &ub.s, s);
3352 if (isgreaterequal(ua.h, ub.h)) {
3353 if (isgreater(ua.h, ub.h)) {
3354 return float_relation_greater;
3355 }
3356 return float_relation_equal;
3357 }
3358 if (likely(isless(ua.h, ub.h))) {
3359 return float_relation_less;
3360 }
3361 /* The only condition remaining is unordered.
3362 * Fall through to set flags.
3363 */
3364 soft:
3365 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3366}
3367
71bfd65c 3368FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3369{
3370 return f64_compare(a, b, false, s);
3371}
3372
71bfd65c 3373FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3374{
3375 return f64_compare(a, b, true, s);
3376}
3377
8282310d
LZ
3378static FloatRelation QEMU_FLATTEN
3379soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3380{
3381 FloatParts pa = bfloat16_unpack_canonical(a, s);
3382 FloatParts pb = bfloat16_unpack_canonical(b, s);
3383 return compare_floats(pa, pb, is_quiet, s);
3384}
3385
3386FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3387{
3388 return soft_bf16_compare(a, b, false, s);
3389}
3390
3391FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3392{
3393 return soft_bf16_compare(a, b, true, s);
3394}
3395
0bfc9f19
AB
3396/* Multiply A by 2 raised to the power N. */
3397static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3398{
3399 if (unlikely(is_nan(a.cls))) {
3400 return return_nan(a, s);
3401 }
3402 if (a.cls == float_class_normal) {
ce8d4082
RH
3403 /* The largest float type (even though not supported by FloatParts)
3404 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3405 * still allows rounding to infinity, without allowing overflow
3406 * within the int32_t that backs FloatParts.exp.
3407 */
3408 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3409 a.exp += n;
3410 }
3411 return a;
3412}
3413
3414float16 float16_scalbn(float16 a, int n, float_status *status)
3415{
3416 FloatParts pa = float16_unpack_canonical(a, status);
3417 FloatParts pr = scalbn_decomposed(pa, n, status);
3418 return float16_round_pack_canonical(pr, status);
3419}
3420
3421float32 float32_scalbn(float32 a, int n, float_status *status)
3422{
3423 FloatParts pa = float32_unpack_canonical(a, status);
3424 FloatParts pr = scalbn_decomposed(pa, n, status);
3425 return float32_round_pack_canonical(pr, status);
3426}
3427
3428float64 float64_scalbn(float64 a, int n, float_status *status)
3429{
3430 FloatParts pa = float64_unpack_canonical(a, status);
3431 FloatParts pr = scalbn_decomposed(pa, n, status);
3432 return float64_round_pack_canonical(pr, status);
3433}
3434
8282310d
LZ
3435bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3436{
3437 FloatParts pa = bfloat16_unpack_canonical(a, status);
3438 FloatParts pr = scalbn_decomposed(pa, n, status);
3439 return bfloat16_round_pack_canonical(pr, status);
3440}
3441
c13bb2da
AB
3442/*
3443 * Square Root
3444 *
3445 * The old softfloat code did an approximation step before zeroing in
3446 * on the final result. However for simpleness we just compute the
3447 * square root by iterating down from the implicit bit to enough extra
3448 * bits to ensure we get a correctly rounded result.
3449 *
3450 * This does mean however the calculation is slower than before,
3451 * especially for 64 bit floats.
3452 */
3453
3454static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3455{
3456 uint64_t a_frac, r_frac, s_frac;
3457 int bit, last_bit;
3458
3459 if (is_nan(a.cls)) {
3460 return return_nan(a, s);
3461 }
3462 if (a.cls == float_class_zero) {
3463 return a; /* sqrt(+-0) = +-0 */
3464 }
3465 if (a.sign) {
3466 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3467 return parts_default_nan(s);
c13bb2da
AB
3468 }
3469 if (a.cls == float_class_inf) {
3470 return a; /* sqrt(+inf) = +inf */
3471 }
3472
3473 assert(a.cls == float_class_normal);
3474
3475 /* We need two overflow bits at the top. Adding room for that is a
3476 * right shift. If the exponent is odd, we can discard the low bit
3477 * by multiplying the fraction by 2; that's a left shift. Combine
3478 * those and we shift right if the exponent is even.
3479 */
3480 a_frac = a.frac;
3481 if (!(a.exp & 1)) {
3482 a_frac >>= 1;
3483 }
3484 a.exp >>= 1;
3485
3486 /* Bit-by-bit computation of sqrt. */
3487 r_frac = 0;
3488 s_frac = 0;
3489
3490 /* Iterate from implicit bit down to the 3 extra bits to compute a
3491 * properly rounded result. Remember we've inserted one more bit
3492 * at the top, so these positions are one less.
3493 */
3494 bit = DECOMPOSED_BINARY_POINT - 1;
3495 last_bit = MAX(p->frac_shift - 4, 0);
3496 do {
3497 uint64_t q = 1ULL << bit;
3498 uint64_t t_frac = s_frac + q;
3499 if (t_frac <= a_frac) {
3500 s_frac = t_frac + q;
3501 a_frac -= t_frac;
3502 r_frac += q;
3503 }
3504 a_frac <<= 1;
3505 } while (--bit >= last_bit);
3506
3507 /* Undo the right shift done above. If there is any remaining
3508 * fraction, the result is inexact. Set the sticky bit.
3509 */
3510 a.frac = (r_frac << 1) + (a_frac != 0);
3511
3512 return a;
3513}
3514
97ff87c0 3515float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3516{
3517 FloatParts pa = float16_unpack_canonical(a, status);
3518 FloatParts pr = sqrt_float(pa, status, &float16_params);
3519 return float16_round_pack_canonical(pr, status);
3520}
3521
f131bae8
EC
3522static float32 QEMU_SOFTFLOAT_ATTR
3523soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3524{
3525 FloatParts pa = float32_unpack_canonical(a, status);
3526 FloatParts pr = sqrt_float(pa, status, &float32_params);
3527 return float32_round_pack_canonical(pr, status);
3528}
3529
f131bae8
EC
3530static float64 QEMU_SOFTFLOAT_ATTR
3531soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3532{
3533 FloatParts pa = float64_unpack_canonical(a, status);
3534 FloatParts pr = sqrt_float(pa, status, &float64_params);
3535 return float64_round_pack_canonical(pr, status);
3536}
3537
f131bae8
EC
3538float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3539{
3540 union_float32 ua, ur;
3541
3542 ua.s = xa;
3543 if (unlikely(!can_use_fpu(s))) {
3544 goto soft;
3545 }
3546
3547 float32_input_flush1(&ua.s, s);
3548 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3549 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3550 fpclassify(ua.h) == FP_ZERO) ||
3551 signbit(ua.h))) {
3552 goto soft;
3553 }
3554 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3555 float32_is_neg(ua.s))) {
3556 goto soft;
3557 }
3558 ur.h = sqrtf(ua.h);
3559 return ur.s;
3560
3561 soft:
3562 return soft_f32_sqrt(ua.s, s);
3563}
3564
3565float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3566{
3567 union_float64 ua, ur;
3568
3569 ua.s = xa;
3570 if (unlikely(!can_use_fpu(s))) {
3571 goto soft;
3572 }
3573
3574 float64_input_flush1(&ua.s, s);
3575 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3576 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3577 fpclassify(ua.h) == FP_ZERO) ||
3578 signbit(ua.h))) {
3579 goto soft;
3580 }
3581 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3582 float64_is_neg(ua.s))) {
3583 goto soft;
3584 }
3585 ur.h = sqrt(ua.h);
3586 return ur.s;
3587
3588 soft:
3589 return soft_f64_sqrt(ua.s, s);
3590}
3591
8282310d
LZ
3592bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3593{
3594 FloatParts pa = bfloat16_unpack_canonical(a, status);
3595 FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
3596 return bfloat16_round_pack_canonical(pr, status);
3597}
3598
0218a16e
RH
3599/*----------------------------------------------------------------------------
3600| The pattern for a default generated NaN.
3601*----------------------------------------------------------------------------*/
3602
3603float16 float16_default_nan(float_status *status)
3604{
3605 FloatParts p = parts_default_nan(status);
3606 p.frac >>= float16_params.frac_shift;
3607 return float16_pack_raw(p);
3608}
3609
3610float32 float32_default_nan(float_status *status)
3611{
3612 FloatParts p = parts_default_nan(status);
3613 p.frac >>= float32_params.frac_shift;
3614 return float32_pack_raw(p);
3615}
3616
3617float64 float64_default_nan(float_status *status)
3618{
3619 FloatParts p = parts_default_nan(status);
3620 p.frac >>= float64_params.frac_shift;
3621 return float64_pack_raw(p);
3622}
3623
3624float128 float128_default_nan(float_status *status)
3625{
3626 FloatParts p = parts_default_nan(status);
3627 float128 r;
3628
3629 /* Extrapolate from the choices made by parts_default_nan to fill
3630 * in the quad-floating format. If the low bit is set, assume we
3631 * want to set all non-snan bits.
3632 */
3633 r.low = -(p.frac & 1);
3634 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3635 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3636 r.high |= (uint64_t)p.sign << 63;
3637
3638 return r;
3639}
c13bb2da 3640
8282310d
LZ
3641bfloat16 bfloat16_default_nan(float_status *status)
3642{
3643 FloatParts p = parts_default_nan(status);
3644 p.frac >>= bfloat16_params.frac_shift;
3645 return bfloat16_pack_raw(p);
3646}
3647
158142c2 3648/*----------------------------------------------------------------------------
377ed926
RH
3649| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3650*----------------------------------------------------------------------------*/
3651
3652float16 float16_silence_nan(float16 a, float_status *status)
3653{
3654 FloatParts p = float16_unpack_raw(a);
3655 p.frac <<= float16_params.frac_shift;
3656 p = parts_silence_nan(p, status);
3657 p.frac >>= float16_params.frac_shift;
3658 return float16_pack_raw(p);
3659}
3660
3661float32 float32_silence_nan(float32 a, float_status *status)
3662{
3663 FloatParts p = float32_unpack_raw(a);
3664 p.frac <<= float32_params.frac_shift;
3665 p = parts_silence_nan(p, status);
3666 p.frac >>= float32_params.frac_shift;
3667 return float32_pack_raw(p);
3668}
3669
3670float64 float64_silence_nan(float64 a, float_status *status)
3671{
3672 FloatParts p = float64_unpack_raw(a);
3673 p.frac <<= float64_params.frac_shift;
3674 p = parts_silence_nan(p, status);
3675 p.frac >>= float64_params.frac_shift;
3676 return float64_pack_raw(p);
3677}
3678
8282310d
LZ
3679bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3680{
3681 FloatParts p = bfloat16_unpack_raw(a);
3682 p.frac <<= bfloat16_params.frac_shift;
3683 p = parts_silence_nan(p, status);
3684 p.frac >>= bfloat16_params.frac_shift;
3685 return bfloat16_pack_raw(p);
3686}
e6b405fe
AB
3687
3688/*----------------------------------------------------------------------------
3689| If `a' is denormal and we are in flush-to-zero mode then set the
3690| input-denormal exception and return zero. Otherwise just return the value.
3691*----------------------------------------------------------------------------*/
3692
3693static bool parts_squash_denormal(FloatParts p, float_status *status)
3694{
3695 if (p.exp == 0 && p.frac != 0) {
3696 float_raise(float_flag_input_denormal, status);
3697 return true;
3698 }
3699
3700 return false;
3701}
3702
3703float16 float16_squash_input_denormal(float16 a, float_status *status)
3704{
3705 if (status->flush_inputs_to_zero) {
3706 FloatParts p = float16_unpack_raw(a);
3707 if (parts_squash_denormal(p, status)) {
3708 return float16_set_sign(float16_zero, p.sign);
3709 }
3710 }
3711 return a;
3712}
3713
3714float32 float32_squash_input_denormal(float32 a, float_status *status)
3715{
3716 if (status->flush_inputs_to_zero) {
3717 FloatParts p = float32_unpack_raw(a);
3718 if (parts_squash_denormal(p, status)) {
3719 return float32_set_sign(float32_zero, p.sign);
3720 }
3721 }
3722 return a;
3723}
3724
3725float64 float64_squash_input_denormal(float64 a, float_status *status)
3726{
3727 if (status->flush_inputs_to_zero) {
3728 FloatParts p = float64_unpack_raw(a);
3729 if (parts_squash_denormal(p, status)) {
3730 return float64_set_sign(float64_zero, p.sign);
3731 }
3732 }
3733 return a;
3734}
3735
8282310d
LZ
3736bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3737{
3738 if (status->flush_inputs_to_zero) {
3739 FloatParts p = bfloat16_unpack_raw(a);
3740 if (parts_squash_denormal(p, status)) {
3741 return bfloat16_set_sign(bfloat16_zero, p.sign);
3742 }
3743 }
3744 return a;
3745}
3746
377ed926 3747/*----------------------------------------------------------------------------
158142c2
FB
3748| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3749| and 7, and returns the properly rounded 32-bit integer corresponding to the
3750| input. If `zSign' is 1, the input is negated before being converted to an
3751| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3752| is simply rounded to an integer, with the inexact exception raised if the
3753| input cannot be represented exactly as an integer. However, if the fixed-
3754| point input is too large, the invalid exception is raised and the largest
3755| positive or negative integer is returned.
3756*----------------------------------------------------------------------------*/
3757
c120391c
RH
3758static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3759 float_status *status)
158142c2 3760{
8f506c70 3761 int8_t roundingMode;
c120391c 3762 bool roundNearestEven;
8f506c70 3763 int8_t roundIncrement, roundBits;
760e1416 3764 int32_t z;
158142c2 3765
a2f2d288 3766 roundingMode = status->float_rounding_mode;
158142c2 3767 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3768 switch (roundingMode) {
3769 case float_round_nearest_even:
f9288a76 3770 case float_round_ties_away:
dc355b76
PM
3771 roundIncrement = 0x40;
3772 break;
3773 case float_round_to_zero:
3774 roundIncrement = 0;
3775 break;
3776 case float_round_up:
3777 roundIncrement = zSign ? 0 : 0x7f;
3778 break;
3779 case float_round_down:
3780 roundIncrement = zSign ? 0x7f : 0;
3781 break;
5d64abb3
RH
3782 case float_round_to_odd:
3783 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3784 break;
dc355b76
PM
3785 default:
3786 abort();
158142c2
FB
3787 }
3788 roundBits = absZ & 0x7F;
3789 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3790 if (!(roundBits ^ 0x40) && roundNearestEven) {
3791 absZ &= ~1;
3792 }
158142c2
FB
3793 z = absZ;
3794 if ( zSign ) z = - z;
3795 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3796 float_raise(float_flag_invalid, status);
2c217da0 3797 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3798 }
a2f2d288
PM
3799 if (roundBits) {
3800 status->float_exception_flags |= float_flag_inexact;
3801 }
158142c2
FB
3802 return z;
3803
3804}
3805
3806/*----------------------------------------------------------------------------
3807| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3808| `absZ1', with binary point between bits 63 and 64 (between the input words),
3809| and returns the properly rounded 64-bit integer corresponding to the input.
3810| If `zSign' is 1, the input is negated before being converted to an integer.
3811| Ordinarily, the fixed-point input is simply rounded to an integer, with
3812| the inexact exception raised if the input cannot be represented exactly as
3813| an integer. However, if the fixed-point input is too large, the invalid
3814| exception is raised and the largest positive or negative integer is
3815| returned.
3816*----------------------------------------------------------------------------*/
3817
c120391c 3818static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3819 float_status *status)
158142c2 3820{
8f506c70 3821 int8_t roundingMode;
c120391c 3822 bool roundNearestEven, increment;
760e1416 3823 int64_t z;
158142c2 3824
a2f2d288 3825 roundingMode = status->float_rounding_mode;
158142c2 3826 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3827 switch (roundingMode) {
3828 case float_round_nearest_even:
f9288a76 3829 case float_round_ties_away:
dc355b76
PM
3830 increment = ((int64_t) absZ1 < 0);
3831 break;
3832 case float_round_to_zero:
3833 increment = 0;
3834 break;
3835 case float_round_up:
3836 increment = !zSign && absZ1;
3837 break;
3838 case float_round_down:
3839 increment = zSign && absZ1;
3840 break;
5d64abb3
RH
3841 case float_round_to_odd:
3842 increment = !(absZ0 & 1) && absZ1;
3843 break;
dc355b76
PM
3844 default:
3845 abort();
158142c2
FB
3846 }
3847 if ( increment ) {
3848 ++absZ0;
3849 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
3850 if (!(absZ1 << 1) && roundNearestEven) {
3851 absZ0 &= ~1;
3852 }
158142c2
FB
3853 }
3854 z = absZ0;
3855 if ( zSign ) z = - z;
3856 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3857 overflow:
ff32e16e 3858 float_raise(float_flag_invalid, status);
2c217da0 3859 return zSign ? INT64_MIN : INT64_MAX;
158142c2 3860 }
a2f2d288
PM
3861 if (absZ1) {
3862 status->float_exception_flags |= float_flag_inexact;
3863 }
158142c2
FB
3864 return z;
3865
3866}
3867
fb3ea83a
TM
3868/*----------------------------------------------------------------------------
3869| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3870| `absZ1', with binary point between bits 63 and 64 (between the input words),
3871| and returns the properly rounded 64-bit unsigned integer corresponding to the
3872| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3873| with the inexact exception raised if the input cannot be represented exactly
3874| as an integer. However, if the fixed-point input is too large, the invalid
3875| exception is raised and the largest unsigned integer is returned.
3876*----------------------------------------------------------------------------*/
3877
c120391c 3878static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 3879 uint64_t absZ1, float_status *status)
fb3ea83a 3880{
8f506c70 3881 int8_t roundingMode;
c120391c 3882 bool roundNearestEven, increment;
fb3ea83a 3883
a2f2d288 3884 roundingMode = status->float_rounding_mode;
fb3ea83a 3885 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3886 switch (roundingMode) {
3887 case float_round_nearest_even:
f9288a76 3888 case float_round_ties_away:
dc355b76
PM
3889 increment = ((int64_t)absZ1 < 0);
3890 break;
3891 case float_round_to_zero:
3892 increment = 0;
3893 break;
3894 case float_round_up:
3895 increment = !zSign && absZ1;
3896 break;
3897 case float_round_down:
3898 increment = zSign && absZ1;
3899 break;
5d64abb3
RH
3900 case float_round_to_odd:
3901 increment = !(absZ0 & 1) && absZ1;
3902 break;
dc355b76
PM
3903 default:
3904 abort();
fb3ea83a
TM
3905 }
3906 if (increment) {
3907 ++absZ0;
3908 if (absZ0 == 0) {
ff32e16e 3909 float_raise(float_flag_invalid, status);
2c217da0 3910 return UINT64_MAX;
fb3ea83a 3911 }
40662886
PMD
3912 if (!(absZ1 << 1) && roundNearestEven) {
3913 absZ0 &= ~1;
3914 }
fb3ea83a
TM
3915 }
3916
3917 if (zSign && absZ0) {
ff32e16e 3918 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3919 return 0;
3920 }
3921
3922 if (absZ1) {
a2f2d288 3923 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3924 }
3925 return absZ0;
3926}
3927
158142c2
FB
3928/*----------------------------------------------------------------------------
3929| Normalizes the subnormal single-precision floating-point value represented
3930| by the denormalized significand `aSig'. The normalized exponent and
3931| significand are stored at the locations pointed to by `zExpPtr' and
3932| `zSigPtr', respectively.
3933*----------------------------------------------------------------------------*/
3934
3935static void
0c48262d 3936 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3937{
8f506c70 3938 int8_t shiftCount;
158142c2 3939
0019d5c3 3940 shiftCount = clz32(aSig) - 8;
158142c2
FB
3941 *zSigPtr = aSig<<shiftCount;
3942 *zExpPtr = 1 - shiftCount;
3943
3944}
3945
158142c2
FB
3946/*----------------------------------------------------------------------------
3947| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3948| and significand `zSig', and returns the proper single-precision floating-
3949| point value corresponding to the abstract input. Ordinarily, the abstract
3950| value is simply rounded and packed into the single-precision format, with
3951| the inexact exception raised if the abstract input cannot be represented
3952| exactly. However, if the abstract value is too large, the overflow and
3953| inexact exceptions are raised and an infinity or maximal finite value is
3954| returned. If the abstract value is too small, the input value is rounded to
3955| a subnormal number, and the underflow and inexact exceptions are raised if
3956| the abstract input cannot be represented exactly as a subnormal single-
3957| precision floating-point number.
3958| The input significand `zSig' has its binary point between bits 30
3959| and 29, which is 7 bits to the left of the usual location. This shifted
3960| significand must be normalized or smaller. If `zSig' is not normalized,
3961| `zExp' must be 0; in that case, the result returned is a subnormal number,
3962| and it must not require rounding. In the usual case that `zSig' is
3963| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3964| The handling of underflow and overflow follows the IEC/IEEE Standard for
3965| Binary Floating-Point Arithmetic.
3966*----------------------------------------------------------------------------*/
3967
c120391c 3968static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 3969 float_status *status)
158142c2 3970{
8f506c70 3971 int8_t roundingMode;
c120391c 3972 bool roundNearestEven;
8f506c70 3973 int8_t roundIncrement, roundBits;
c120391c 3974 bool isTiny;
158142c2 3975
a2f2d288 3976 roundingMode = status->float_rounding_mode;
158142c2 3977 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3978 switch (roundingMode) {
3979 case float_round_nearest_even:
f9288a76 3980 case float_round_ties_away:
dc355b76
PM
3981 roundIncrement = 0x40;
3982 break;
3983 case float_round_to_zero:
3984 roundIncrement = 0;
3985 break;
3986 case float_round_up:
3987 roundIncrement = zSign ? 0 : 0x7f;
3988 break;
3989 case float_round_down:
3990 roundIncrement = zSign ? 0x7f : 0;
3991 break;
5d64abb3
RH
3992 case float_round_to_odd:
3993 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3994 break;
dc355b76
PM
3995 default:
3996 abort();
3997 break;
158142c2
FB
3998 }
3999 roundBits = zSig & 0x7F;
bb98fe42 4000 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
4001 if ( ( 0xFD < zExp )
4002 || ( ( zExp == 0xFD )
bb98fe42 4003 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4004 ) {
5d64abb3
RH
4005 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4006 roundIncrement != 0;
ff32e16e 4007 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 4008 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
4009 }
4010 if ( zExp < 0 ) {
a2f2d288 4011 if (status->flush_to_zero) {
ff32e16e 4012 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4013 return packFloat32(zSign, 0, 0);
4014 }
a828b373
RH
4015 isTiny = status->tininess_before_rounding
4016 || (zExp < -1)
4017 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
4018 shift32RightJamming( zSig, - zExp, &zSig );
4019 zExp = 0;
4020 roundBits = zSig & 0x7F;
ff32e16e
PM
4021 if (isTiny && roundBits) {
4022 float_raise(float_flag_underflow, status);
4023 }
5d64abb3
RH
4024 if (roundingMode == float_round_to_odd) {
4025 /*
4026 * For round-to-odd case, the roundIncrement depends on
4027 * zSig which just changed.
4028 */
4029 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4030 }
158142c2
FB
4031 }
4032 }
a2f2d288
PM
4033 if (roundBits) {
4034 status->float_exception_flags |= float_flag_inexact;
4035 }
158142c2 4036 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4037 if (!(roundBits ^ 0x40) && roundNearestEven) {
4038 zSig &= ~1;
4039 }
158142c2
FB
4040 if ( zSig == 0 ) zExp = 0;
4041 return packFloat32( zSign, zExp, zSig );
4042
4043}
4044
4045/*----------------------------------------------------------------------------
4046| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4047| and significand `zSig', and returns the proper single-precision floating-
4048| point value corresponding to the abstract input. This routine is just like
4049| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4050| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4051| floating-point exponent.
4052*----------------------------------------------------------------------------*/
4053
4054static float32
c120391c 4055 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4056 float_status *status)
158142c2 4057{
8f506c70 4058 int8_t shiftCount;
158142c2 4059
0019d5c3 4060 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4061 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4062 status);
158142c2
FB
4063
4064}
4065
158142c2
FB
4066/*----------------------------------------------------------------------------
4067| Normalizes the subnormal double-precision floating-point value represented
4068| by the denormalized significand `aSig'. The normalized exponent and
4069| significand are stored at the locations pointed to by `zExpPtr' and
4070| `zSigPtr', respectively.
4071*----------------------------------------------------------------------------*/
4072
4073static void
0c48262d 4074 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4075{
8f506c70 4076 int8_t shiftCount;
158142c2 4077
0019d5c3 4078 shiftCount = clz64(aSig) - 11;
158142c2
FB
4079 *zSigPtr = aSig<<shiftCount;
4080 *zExpPtr = 1 - shiftCount;
4081
4082}
4083
4084/*----------------------------------------------------------------------------
4085| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4086| double-precision floating-point value, returning the result. After being
4087| shifted into the proper positions, the three fields are simply added
4088| together to form the result. This means that any integer portion of `zSig'
4089| will be added into the exponent. Since a properly normalized significand
4090| will have an integer portion equal to 1, the `zExp' input should be 1 less
4091| than the desired result exponent whenever `zSig' is a complete, normalized
4092| significand.
4093*----------------------------------------------------------------------------*/
4094
c120391c 4095static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4096{
4097
f090c9d4 4098 return make_float64(
bb98fe42 4099 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4100
4101}
4102
4103/*----------------------------------------------------------------------------
4104| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105| and significand `zSig', and returns the proper double-precision floating-
4106| point value corresponding to the abstract input. Ordinarily, the abstract
4107| value is simply rounded and packed into the double-precision format, with
4108| the inexact exception raised if the abstract input cannot be represented
4109| exactly. However, if the abstract value is too large, the overflow and
4110| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4111| returned. If the abstract value is too small, the input value is rounded to
4112| a subnormal number, and the underflow and inexact exceptions are raised if
4113| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4114| precision floating-point number.
4115| The input significand `zSig' has its binary point between bits 62
4116| and 61, which is 10 bits to the left of the usual location. This shifted
4117| significand must be normalized or smaller. If `zSig' is not normalized,
4118| `zExp' must be 0; in that case, the result returned is a subnormal number,
4119| and it must not require rounding. In the usual case that `zSig' is
4120| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121| The handling of underflow and overflow follows the IEC/IEEE Standard for
4122| Binary Floating-Point Arithmetic.
4123*----------------------------------------------------------------------------*/
4124
c120391c 4125static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4126 float_status *status)
158142c2 4127{
8f506c70 4128 int8_t roundingMode;
c120391c 4129 bool roundNearestEven;
0c48262d 4130 int roundIncrement, roundBits;
c120391c 4131 bool isTiny;
158142c2 4132
a2f2d288 4133 roundingMode = status->float_rounding_mode;
158142c2 4134 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4135 switch (roundingMode) {
4136 case float_round_nearest_even:
f9288a76 4137 case float_round_ties_away:
dc355b76
PM
4138 roundIncrement = 0x200;
4139 break;
4140 case float_round_to_zero:
4141 roundIncrement = 0;
4142 break;
4143 case float_round_up:
4144 roundIncrement = zSign ? 0 : 0x3ff;
4145 break;
4146 case float_round_down:
4147 roundIncrement = zSign ? 0x3ff : 0;
4148 break;
9ee6f678
BR
4149 case float_round_to_odd:
4150 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4151 break;
dc355b76
PM
4152 default:
4153 abort();
158142c2
FB
4154 }
4155 roundBits = zSig & 0x3FF;
bb98fe42 4156 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4157 if ( ( 0x7FD < zExp )
4158 || ( ( zExp == 0x7FD )
bb98fe42 4159 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4160 ) {
9ee6f678
BR
4161 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4162 roundIncrement != 0;
ff32e16e 4163 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4164 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4165 }
4166 if ( zExp < 0 ) {
a2f2d288 4167 if (status->flush_to_zero) {
ff32e16e 4168 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4169 return packFloat64(zSign, 0, 0);
4170 }
a828b373
RH
4171 isTiny = status->tininess_before_rounding
4172 || (zExp < -1)
4173 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4174 shift64RightJamming( zSig, - zExp, &zSig );
4175 zExp = 0;
4176 roundBits = zSig & 0x3FF;
ff32e16e
PM
4177 if (isTiny && roundBits) {
4178 float_raise(float_flag_underflow, status);
4179 }
9ee6f678
BR
4180 if (roundingMode == float_round_to_odd) {
4181 /*
4182 * For round-to-odd case, the roundIncrement depends on
4183 * zSig which just changed.
4184 */
4185 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4186 }
158142c2
FB
4187 }
4188 }
a2f2d288
PM
4189 if (roundBits) {
4190 status->float_exception_flags |= float_flag_inexact;
4191 }
158142c2 4192 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4193 if (!(roundBits ^ 0x200) && roundNearestEven) {
4194 zSig &= ~1;
4195 }
158142c2
FB
4196 if ( zSig == 0 ) zExp = 0;
4197 return packFloat64( zSign, zExp, zSig );
4198
4199}
4200
4201/*----------------------------------------------------------------------------
4202| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4203| and significand `zSig', and returns the proper double-precision floating-
4204| point value corresponding to the abstract input. This routine is just like
4205| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4206| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4207| floating-point exponent.
4208*----------------------------------------------------------------------------*/
4209
4210static float64
c120391c 4211 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4212 float_status *status)
158142c2 4213{
8f506c70 4214 int8_t shiftCount;
158142c2 4215
0019d5c3 4216 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4217 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4218 status);
158142c2
FB
4219
4220}
4221
158142c2
FB
4222/*----------------------------------------------------------------------------
4223| Normalizes the subnormal extended double-precision floating-point value
4224| represented by the denormalized significand `aSig'. The normalized exponent
4225| and significand are stored at the locations pointed to by `zExpPtr' and
4226| `zSigPtr', respectively.
4227*----------------------------------------------------------------------------*/
4228
88857aca
LV
4229void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4230 uint64_t *zSigPtr)
158142c2 4231{
8f506c70 4232 int8_t shiftCount;
158142c2 4233
0019d5c3 4234 shiftCount = clz64(aSig);
158142c2
FB
4235 *zSigPtr = aSig<<shiftCount;
4236 *zExpPtr = 1 - shiftCount;
158142c2
FB
4237}
4238
4239/*----------------------------------------------------------------------------
4240| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4241| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4242| and returns the proper extended double-precision floating-point value
4243| corresponding to the abstract input. Ordinarily, the abstract value is
4244| rounded and packed into the extended double-precision format, with the
4245| inexact exception raised if the abstract input cannot be represented
4246| exactly. However, if the abstract value is too large, the overflow and
4247| inexact exceptions are raised and an infinity or maximal finite value is
4248| returned. If the abstract value is too small, the input value is rounded to
4249| a subnormal number, and the underflow and inexact exceptions are raised if
4250| the abstract input cannot be represented exactly as a subnormal extended
4251| double-precision floating-point number.
4252| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4253| number of bits as single or double precision, respectively. Otherwise, the
4254| result is rounded to the full precision of the extended double-precision
4255| format.
4256| The input significand must be normalized or smaller. If the input
4257| significand is not normalized, `zExp' must be 0; in that case, the result
4258| returned is a subnormal number, and it must not require rounding. The
4259| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4260| Floating-Point Arithmetic.
4261*----------------------------------------------------------------------------*/
4262
c120391c 4263floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4264 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4265 float_status *status)
158142c2 4266{
8f506c70 4267 int8_t roundingMode;
c120391c 4268 bool roundNearestEven, increment, isTiny;
f42c2224 4269 int64_t roundIncrement, roundMask, roundBits;
158142c2 4270
a2f2d288 4271 roundingMode = status->float_rounding_mode;
158142c2
FB
4272 roundNearestEven = ( roundingMode == float_round_nearest_even );
4273 if ( roundingPrecision == 80 ) goto precision80;
4274 if ( roundingPrecision == 64 ) {
e9321124
AB
4275 roundIncrement = UINT64_C(0x0000000000000400);
4276 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4277 }
4278 else if ( roundingPrecision == 32 ) {
e9321124
AB
4279 roundIncrement = UINT64_C(0x0000008000000000);
4280 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4281 }
4282 else {
4283 goto precision80;
4284 }
4285 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4286 switch (roundingMode) {
4287 case float_round_nearest_even:
f9288a76 4288 case float_round_ties_away:
dc355b76
PM
4289 break;
4290 case float_round_to_zero:
4291 roundIncrement = 0;
4292 break;
4293 case float_round_up:
4294 roundIncrement = zSign ? 0 : roundMask;
4295 break;
4296 case float_round_down:
4297 roundIncrement = zSign ? roundMask : 0;
4298 break;
4299 default:
4300 abort();
158142c2
FB
4301 }
4302 roundBits = zSig0 & roundMask;
bb98fe42 4303 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4304 if ( ( 0x7FFE < zExp )
4305 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4306 ) {
4307 goto overflow;
4308 }
4309 if ( zExp <= 0 ) {
a2f2d288 4310 if (status->flush_to_zero) {
ff32e16e 4311 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4312 return packFloatx80(zSign, 0, 0);
4313 }
a828b373
RH
4314 isTiny = status->tininess_before_rounding
4315 || (zExp < 0 )
4316 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4317 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4318 zExp = 0;
4319 roundBits = zSig0 & roundMask;
ff32e16e
PM
4320 if (isTiny && roundBits) {
4321 float_raise(float_flag_underflow, status);
4322 }
a2f2d288
PM
4323 if (roundBits) {
4324 status->float_exception_flags |= float_flag_inexact;
4325 }
158142c2 4326 zSig0 += roundIncrement;
bb98fe42 4327 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4328 roundIncrement = roundMask + 1;
4329 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4330 roundMask |= roundIncrement;
4331 }
4332 zSig0 &= ~ roundMask;
4333 return packFloatx80( zSign, zExp, zSig0 );
4334 }
4335 }
a2f2d288
PM
4336 if (roundBits) {
4337 status->float_exception_flags |= float_flag_inexact;
4338 }
158142c2
FB
4339 zSig0 += roundIncrement;
4340 if ( zSig0 < roundIncrement ) {
4341 ++zExp;
e9321124 4342 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4343 }
4344 roundIncrement = roundMask + 1;
4345 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4346 roundMask |= roundIncrement;
4347 }
4348 zSig0 &= ~ roundMask;
4349 if ( zSig0 == 0 ) zExp = 0;
4350 return packFloatx80( zSign, zExp, zSig0 );
4351 precision80:
dc355b76
PM
4352 switch (roundingMode) {
4353 case float_round_nearest_even:
f9288a76 4354 case float_round_ties_away:
dc355b76
PM
4355 increment = ((int64_t)zSig1 < 0);
4356 break;
4357 case float_round_to_zero:
4358 increment = 0;
4359 break;
4360 case float_round_up:
4361 increment = !zSign && zSig1;
4362 break;
4363 case float_round_down:
4364 increment = zSign && zSig1;
4365 break;
4366 default:
4367 abort();
158142c2 4368 }
bb98fe42 4369 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4370 if ( ( 0x7FFE < zExp )
4371 || ( ( zExp == 0x7FFE )
e9321124 4372 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4373 && increment
4374 )
4375 ) {
4376 roundMask = 0;
4377 overflow:
ff32e16e 4378 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4379 if ( ( roundingMode == float_round_to_zero )
4380 || ( zSign && ( roundingMode == float_round_up ) )
4381 || ( ! zSign && ( roundingMode == float_round_down ) )
4382 ) {
4383 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4384 }
0f605c88
LV
4385 return packFloatx80(zSign,
4386 floatx80_infinity_high,
4387 floatx80_infinity_low);
158142c2
FB
4388 }
4389 if ( zExp <= 0 ) {
a828b373
RH
4390 isTiny = status->tininess_before_rounding
4391 || (zExp < 0)
4392 || !increment
4393 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4394 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4395 zExp = 0;
ff32e16e
PM
4396 if (isTiny && zSig1) {
4397 float_raise(float_flag_underflow, status);
4398 }
a2f2d288
PM
4399 if (zSig1) {
4400 status->float_exception_flags |= float_flag_inexact;
4401 }
dc355b76
PM
4402 switch (roundingMode) {
4403 case float_round_nearest_even:
f9288a76 4404 case float_round_ties_away:
dc355b76
PM
4405 increment = ((int64_t)zSig1 < 0);
4406 break;
4407 case float_round_to_zero:
4408 increment = 0;
4409 break;
4410 case float_round_up:
4411 increment = !zSign && zSig1;
4412 break;
4413 case float_round_down:
4414 increment = zSign && zSig1;
4415 break;
4416 default:
4417 abort();
158142c2
FB
4418 }
4419 if ( increment ) {
4420 ++zSig0;
40662886
PMD
4421 if (!(zSig1 << 1) && roundNearestEven) {
4422 zSig0 &= ~1;
4423 }
bb98fe42 4424 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4425 }
4426 return packFloatx80( zSign, zExp, zSig0 );
4427 }
4428 }
a2f2d288
PM
4429 if (zSig1) {
4430 status->float_exception_flags |= float_flag_inexact;
4431 }
158142c2
FB
4432 if ( increment ) {
4433 ++zSig0;
4434 if ( zSig0 == 0 ) {
4435 ++zExp;
e9321124 4436 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4437 }
4438 else {
40662886
PMD
4439 if (!(zSig1 << 1) && roundNearestEven) {
4440 zSig0 &= ~1;
4441 }
158142c2
FB
4442 }
4443 }
4444 else {
4445 if ( zSig0 == 0 ) zExp = 0;
4446 }
4447 return packFloatx80( zSign, zExp, zSig0 );
4448
4449}
4450
4451/*----------------------------------------------------------------------------
4452| Takes an abstract floating-point value having sign `zSign', exponent
4453| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4454| and returns the proper extended double-precision floating-point value
4455| corresponding to the abstract input. This routine is just like
4456| `roundAndPackFloatx80' except that the input significand does not have to be
4457| normalized.
4458*----------------------------------------------------------------------------*/
4459
88857aca 4460floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4461 bool zSign, int32_t zExp,
88857aca
LV
4462 uint64_t zSig0, uint64_t zSig1,
4463 float_status *status)
158142c2 4464{
8f506c70 4465 int8_t shiftCount;
158142c2
FB
4466
4467 if ( zSig0 == 0 ) {
4468 zSig0 = zSig1;
4469 zSig1 = 0;
4470 zExp -= 64;
4471 }
0019d5c3 4472 shiftCount = clz64(zSig0);
158142c2
FB
4473 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4474 zExp -= shiftCount;
ff32e16e
PM
4475 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4476 zSig0, zSig1, status);
158142c2
FB
4477
4478}
4479
158142c2
FB
4480/*----------------------------------------------------------------------------
4481| Returns the least-significant 64 fraction bits of the quadruple-precision
4482| floating-point value `a'.
4483*----------------------------------------------------------------------------*/
4484
a49db98d 4485static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4486{
4487
4488 return a.low;
4489
4490}
4491
4492/*----------------------------------------------------------------------------
4493| Returns the most-significant 48 fraction bits of the quadruple-precision
4494| floating-point value `a'.
4495*----------------------------------------------------------------------------*/
4496
a49db98d 4497static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4498{
4499
e9321124 4500 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4501
4502}
4503
4504/*----------------------------------------------------------------------------
4505| Returns the exponent bits of the quadruple-precision floating-point value
4506| `a'.
4507*----------------------------------------------------------------------------*/
4508
f4014512 4509static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4510{
4511
4512 return ( a.high>>48 ) & 0x7FFF;
4513
4514}
4515
4516/*----------------------------------------------------------------------------
4517| Returns the sign bit of the quadruple-precision floating-point value `a'.
4518*----------------------------------------------------------------------------*/
4519
c120391c 4520static inline bool extractFloat128Sign(float128 a)
158142c2 4521{
c120391c 4522 return a.high >> 63;
158142c2
FB
4523}
4524
4525/*----------------------------------------------------------------------------
4526| Normalizes the subnormal quadruple-precision floating-point value
4527| represented by the denormalized significand formed by the concatenation of
4528| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4529| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4530| significand are stored at the location pointed to by `zSig0Ptr', and the
4531| least significant 64 bits of the normalized significand are stored at the
4532| location pointed to by `zSig1Ptr'.
4533*----------------------------------------------------------------------------*/
4534
4535static void
4536 normalizeFloat128Subnormal(
bb98fe42
AF
4537 uint64_t aSig0,
4538 uint64_t aSig1,
f4014512 4539 int32_t *zExpPtr,
bb98fe42
AF
4540 uint64_t *zSig0Ptr,
4541 uint64_t *zSig1Ptr
158142c2
FB
4542 )
4543{
8f506c70 4544 int8_t shiftCount;
158142c2
FB
4545
4546 if ( aSig0 == 0 ) {
0019d5c3 4547 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4548 if ( shiftCount < 0 ) {
4549 *zSig0Ptr = aSig1>>( - shiftCount );
4550 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4551 }
4552 else {
4553 *zSig0Ptr = aSig1<<shiftCount;
4554 *zSig1Ptr = 0;
4555 }
4556 *zExpPtr = - shiftCount - 63;
4557 }
4558 else {
0019d5c3 4559 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4560 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4561 *zExpPtr = 1 - shiftCount;
4562 }
4563
4564}
4565
4566/*----------------------------------------------------------------------------
4567| Packs the sign `zSign', the exponent `zExp', and the significand formed
4568| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4569| floating-point value, returning the result. After being shifted into the
4570| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4571| added together to form the most significant 32 bits of the result. This
4572| means that any integer portion of `zSig0' will be added into the exponent.
4573| Since a properly normalized significand will have an integer portion equal
4574| to 1, the `zExp' input should be 1 less than the desired result exponent
4575| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4576| significand.
4577*----------------------------------------------------------------------------*/
4578
a49db98d 4579static inline float128
c120391c 4580packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4581{
4582 float128 z;
4583
4584 z.low = zSig1;
c120391c 4585 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4586 return z;
158142c2
FB
4587}
4588
4589/*----------------------------------------------------------------------------
4590| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4591| and extended significand formed by the concatenation of `zSig0', `zSig1',
4592| and `zSig2', and returns the proper quadruple-precision floating-point value
4593| corresponding to the abstract input. Ordinarily, the abstract value is
4594| simply rounded and packed into the quadruple-precision format, with the
4595| inexact exception raised if the abstract input cannot be represented
4596| exactly. However, if the abstract value is too large, the overflow and
4597| inexact exceptions are raised and an infinity or maximal finite value is
4598| returned. If the abstract value is too small, the input value is rounded to
4599| a subnormal number, and the underflow and inexact exceptions are raised if
4600| the abstract input cannot be represented exactly as a subnormal quadruple-
4601| precision floating-point number.
4602| The input significand must be normalized or smaller. If the input
4603| significand is not normalized, `zExp' must be 0; in that case, the result
4604| returned is a subnormal number, and it must not require rounding. In the
4605| usual case that the input significand is normalized, `zExp' must be 1 less
4606| than the ``true'' floating-point exponent. The handling of underflow and
4607| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4608*----------------------------------------------------------------------------*/
4609
c120391c 4610static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4611 uint64_t zSig0, uint64_t zSig1,
4612 uint64_t zSig2, float_status *status)
158142c2 4613{
8f506c70 4614 int8_t roundingMode;
c120391c 4615 bool roundNearestEven, increment, isTiny;
158142c2 4616
a2f2d288 4617 roundingMode = status->float_rounding_mode;
158142c2 4618 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4619 switch (roundingMode) {
4620 case float_round_nearest_even:
f9288a76 4621 case float_round_ties_away:
dc355b76
PM
4622 increment = ((int64_t)zSig2 < 0);
4623 break;
4624 case float_round_to_zero:
4625 increment = 0;
4626 break;
4627 case float_round_up:
4628 increment = !zSign && zSig2;
4629 break;
4630 case float_round_down:
4631 increment = zSign && zSig2;
4632 break;
9ee6f678
BR
4633 case float_round_to_odd:
4634 increment = !(zSig1 & 0x1) && zSig2;
4635 break;
dc355b76
PM
4636 default:
4637 abort();
158142c2 4638 }
bb98fe42 4639 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4640 if ( ( 0x7FFD < zExp )
4641 || ( ( zExp == 0x7FFD )
4642 && eq128(
e9321124
AB
4643 UINT64_C(0x0001FFFFFFFFFFFF),
4644 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4645 zSig0,
4646 zSig1
4647 )
4648 && increment
4649 )
4650 ) {
ff32e16e 4651 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4652 if ( ( roundingMode == float_round_to_zero )
4653 || ( zSign && ( roundingMode == float_round_up ) )
4654 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4655 || (roundingMode == float_round_to_odd)
158142c2
FB
4656 ) {
4657 return
4658 packFloat128(
4659 zSign,
4660 0x7FFE,
e9321124
AB
4661 UINT64_C(0x0000FFFFFFFFFFFF),
4662 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4663 );
4664 }
4665 return packFloat128( zSign, 0x7FFF, 0, 0 );
4666 }
4667 if ( zExp < 0 ) {
a2f2d288 4668 if (status->flush_to_zero) {
ff32e16e 4669 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4670 return packFloat128(zSign, 0, 0, 0);
4671 }
a828b373
RH
4672 isTiny = status->tininess_before_rounding
4673 || (zExp < -1)
4674 || !increment
4675 || lt128(zSig0, zSig1,
4676 UINT64_C(0x0001FFFFFFFFFFFF),
4677 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4678 shift128ExtraRightJamming(
4679 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4680 zExp = 0;
ff32e16e
PM
4681 if (isTiny && zSig2) {
4682 float_raise(float_flag_underflow, status);
4683 }
dc355b76
PM
4684 switch (roundingMode) {
4685 case float_round_nearest_even:
f9288a76 4686 case float_round_ties_away:
dc355b76
PM
4687 increment = ((int64_t)zSig2 < 0);
4688 break;
4689 case float_round_to_zero:
4690 increment = 0;
4691 break;
4692 case float_round_up:
4693 increment = !zSign && zSig2;
4694 break;
4695 case float_round_down:
4696 increment = zSign && zSig2;
4697 break;
9ee6f678
BR
4698 case float_round_to_odd:
4699 increment = !(zSig1 & 0x1) && zSig2;
4700 break;
dc355b76
PM
4701 default:
4702 abort();
158142c2
FB
4703 }
4704 }
4705 }
a2f2d288
PM
4706 if (zSig2) {
4707 status->float_exception_flags |= float_flag_inexact;
4708 }
158142c2
FB
4709 if ( increment ) {
4710 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4711 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4712 zSig1 &= ~1;
4713 }
158142c2
FB
4714 }
4715 else {
4716 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4717 }
4718 return packFloat128( zSign, zExp, zSig0, zSig1 );
4719
4720}
4721
4722/*----------------------------------------------------------------------------
4723| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4724| and significand formed by the concatenation of `zSig0' and `zSig1', and
4725| returns the proper quadruple-precision floating-point value corresponding
4726| to the abstract input. This routine is just like `roundAndPackFloat128'
4727| except that the input significand has fewer bits and does not have to be
4728| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4729| point exponent.
4730*----------------------------------------------------------------------------*/
4731
c120391c 4732static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4733 uint64_t zSig0, uint64_t zSig1,
4734 float_status *status)
158142c2 4735{
8f506c70 4736 int8_t shiftCount;
bb98fe42 4737 uint64_t zSig2;
158142c2
FB
4738
4739 if ( zSig0 == 0 ) {
4740 zSig0 = zSig1;
4741 zSig1 = 0;
4742 zExp -= 64;
4743 }
0019d5c3 4744 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4745 if ( 0 <= shiftCount ) {
4746 zSig2 = 0;
4747 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4748 }
4749 else {
4750 shift128ExtraRightJamming(
4751 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4752 }
4753 zExp -= shiftCount;
ff32e16e 4754 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4755
4756}
4757
158142c2 4758
158142c2
FB
4759/*----------------------------------------------------------------------------
4760| Returns the result of converting the 32-bit two's complement integer `a'
4761| to the extended double-precision floating-point format. The conversion
4762| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4763| Arithmetic.
4764*----------------------------------------------------------------------------*/
4765
e5a41ffa 4766floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4767{
c120391c 4768 bool zSign;
3a87d009 4769 uint32_t absA;
8f506c70 4770 int8_t shiftCount;
bb98fe42 4771 uint64_t zSig;
158142c2
FB
4772
4773 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4774 zSign = ( a < 0 );
4775 absA = zSign ? - a : a;
0019d5c3 4776 shiftCount = clz32(absA) + 32;
158142c2
FB
4777 zSig = absA;
4778 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4779
4780}
4781
158142c2
FB
4782/*----------------------------------------------------------------------------
4783| Returns the result of converting the 32-bit two's complement integer `a' to
4784| the quadruple-precision floating-point format. The conversion is performed
4785| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4786*----------------------------------------------------------------------------*/
4787
e5a41ffa 4788float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4789{
c120391c 4790 bool zSign;
3a87d009 4791 uint32_t absA;
8f506c70 4792 int8_t shiftCount;
bb98fe42 4793 uint64_t zSig0;
158142c2
FB
4794
4795 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4796 zSign = ( a < 0 );
4797 absA = zSign ? - a : a;
0019d5c3 4798 shiftCount = clz32(absA) + 17;
158142c2
FB
4799 zSig0 = absA;
4800 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4801
4802}
4803
158142c2
FB
4804/*----------------------------------------------------------------------------
4805| Returns the result of converting the 64-bit two's complement integer `a'
4806| to the extended double-precision floating-point format. The conversion
4807| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4808| Arithmetic.
4809*----------------------------------------------------------------------------*/
4810
e5a41ffa 4811floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4812{
c120391c 4813 bool zSign;
182f42fd 4814 uint64_t absA;
8f506c70 4815 int8_t shiftCount;
158142c2
FB
4816
4817 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4818 zSign = ( a < 0 );
4819 absA = zSign ? - a : a;
0019d5c3 4820 shiftCount = clz64(absA);
158142c2
FB
4821 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4822
4823}
4824
158142c2
FB
4825/*----------------------------------------------------------------------------
4826| Returns the result of converting the 64-bit two's complement integer `a' to
4827| the quadruple-precision floating-point format. The conversion is performed
4828| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4829*----------------------------------------------------------------------------*/
4830
e5a41ffa 4831float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4832{
c120391c 4833 bool zSign;
182f42fd 4834 uint64_t absA;
8f506c70 4835 int8_t shiftCount;
f4014512 4836 int32_t zExp;
bb98fe42 4837 uint64_t zSig0, zSig1;
158142c2
FB
4838
4839 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4840 zSign = ( a < 0 );
4841 absA = zSign ? - a : a;
0019d5c3 4842 shiftCount = clz64(absA) + 49;
158142c2
FB
4843 zExp = 0x406E - shiftCount;
4844 if ( 64 <= shiftCount ) {
4845 zSig1 = 0;
4846 zSig0 = absA;
4847 shiftCount -= 64;
4848 }
4849 else {
4850 zSig1 = absA;
4851 zSig0 = 0;
4852 }
4853 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4854 return packFloat128( zSign, zExp, zSig0, zSig1 );
4855
4856}
4857
6bb8e0f1
PM
4858/*----------------------------------------------------------------------------
4859| Returns the result of converting the 64-bit unsigned integer `a'
4860| to the quadruple-precision floating-point format. The conversion is performed
4861| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4862*----------------------------------------------------------------------------*/
4863
e5a41ffa 4864float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4865{
4866 if (a == 0) {
4867 return float128_zero;
4868 }
6603d506 4869 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4870}
4871
158142c2
FB
4872/*----------------------------------------------------------------------------
4873| Returns the result of converting the single-precision floating-point value
4874| `a' to the extended double-precision floating-point format. The conversion
4875| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4876| Arithmetic.
4877*----------------------------------------------------------------------------*/
4878
e5a41ffa 4879floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4880{
c120391c 4881 bool aSign;
0c48262d 4882 int aExp;
bb98fe42 4883 uint32_t aSig;
158142c2 4884
ff32e16e 4885 a = float32_squash_input_denormal(a, status);
158142c2
FB
4886 aSig = extractFloat32Frac( a );
4887 aExp = extractFloat32Exp( a );
4888 aSign = extractFloat32Sign( a );
4889 if ( aExp == 0xFF ) {
ff32e16e 4890 if (aSig) {
7537c2b4
JM
4891 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4892 status);
4893 return floatx80_silence_nan(res, status);
ff32e16e 4894 }
0f605c88
LV
4895 return packFloatx80(aSign,
4896 floatx80_infinity_high,
4897 floatx80_infinity_low);
158142c2
FB
4898 }
4899 if ( aExp == 0 ) {
4900 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4901 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4902 }
4903 aSig |= 0x00800000;
bb98fe42 4904 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4905
4906}
4907
158142c2
FB
4908/*----------------------------------------------------------------------------
4909| Returns the result of converting the single-precision floating-point value
4910| `a' to the double-precision floating-point format. The conversion is
4911| performed according to the IEC/IEEE Standard for Binary Floating-Point
4912| Arithmetic.
4913*----------------------------------------------------------------------------*/
4914
e5a41ffa 4915float128 float32_to_float128(float32 a, float_status *status)
158142c2 4916{
c120391c 4917 bool aSign;
0c48262d 4918 int aExp;
bb98fe42 4919 uint32_t aSig;
158142c2 4920
ff32e16e 4921 a = float32_squash_input_denormal(a, status);
158142c2
FB
4922 aSig = extractFloat32Frac( a );
4923 aExp = extractFloat32Exp( a );
4924 aSign = extractFloat32Sign( a );
4925 if ( aExp == 0xFF ) {
ff32e16e
PM
4926 if (aSig) {
4927 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4928 }
158142c2
FB
4929 return packFloat128( aSign, 0x7FFF, 0, 0 );
4930 }
4931 if ( aExp == 0 ) {
4932 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4933 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4934 --aExp;
4935 }
bb98fe42 4936 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4937
4938}
4939
158142c2
FB
4940/*----------------------------------------------------------------------------
4941| Returns the remainder of the single-precision floating-point value `a'
4942| with respect to the corresponding value `b'. The operation is performed
4943| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4944*----------------------------------------------------------------------------*/
4945
e5a41ffa 4946float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4947{
c120391c 4948 bool aSign, zSign;
0c48262d 4949 int aExp, bExp, expDiff;
bb98fe42
AF
4950 uint32_t aSig, bSig;
4951 uint32_t q;
4952 uint64_t aSig64, bSig64, q64;
4953 uint32_t alternateASig;
4954 int32_t sigMean;
ff32e16e
PM
4955 a = float32_squash_input_denormal(a, status);
4956 b = float32_squash_input_denormal(b, status);
158142c2
FB
4957
4958 aSig = extractFloat32Frac( a );
4959 aExp = extractFloat32Exp( a );
4960 aSign = extractFloat32Sign( a );
4961 bSig = extractFloat32Frac( b );
4962 bExp = extractFloat32Exp( b );
158142c2
FB
4963 if ( aExp == 0xFF ) {
4964 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4965 return propagateFloat32NaN(a, b, status);
158142c2 4966 }
ff32e16e 4967 float_raise(float_flag_invalid, status);
af39bc8c 4968 return float32_default_nan(status);
158142c2
FB
4969 }
4970 if ( bExp == 0xFF ) {
ff32e16e
PM
4971 if (bSig) {
4972 return propagateFloat32NaN(a, b, status);
4973 }
158142c2
FB
4974 return a;
4975 }
4976 if ( bExp == 0 ) {
4977 if ( bSig == 0 ) {
ff32e16e 4978 float_raise(float_flag_invalid, status);
af39bc8c 4979 return float32_default_nan(status);
158142c2
FB
4980 }
4981 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4982 }
4983 if ( aExp == 0 ) {
4984 if ( aSig == 0 ) return a;
4985 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4986 }
4987 expDiff = aExp - bExp;
4988 aSig |= 0x00800000;
4989 bSig |= 0x00800000;
4990 if ( expDiff < 32 ) {
4991 aSig <<= 8;
4992 bSig <<= 8;
4993 if ( expDiff < 0 ) {
4994 if ( expDiff < -1 ) return a;
4995 aSig >>= 1;
4996 }
4997 q = ( bSig <= aSig );
4998 if ( q ) aSig -= bSig;
4999 if ( 0 < expDiff ) {
bb98fe42 5000 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
5001 q >>= 32 - expDiff;
5002 bSig >>= 2;
5003 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5004 }
5005 else {
5006 aSig >>= 2;
5007 bSig >>= 2;
5008 }
5009 }
5010 else {
5011 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
5012 aSig64 = ( (uint64_t) aSig )<<40;
5013 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
5014 expDiff -= 64;
5015 while ( 0 < expDiff ) {
5016 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5017 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5018 aSig64 = - ( ( bSig * q64 )<<38 );
5019 expDiff -= 62;
5020 }
5021 expDiff += 64;
5022 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5023 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5024 q = q64>>( 64 - expDiff );
5025 bSig <<= 6;
5026 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5027 }
5028 do {
5029 alternateASig = aSig;
5030 ++q;
5031 aSig -= bSig;
bb98fe42 5032 } while ( 0 <= (int32_t) aSig );
158142c2
FB
5033 sigMean = aSig + alternateASig;
5034 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5035 aSig = alternateASig;
5036 }
bb98fe42 5037 zSign = ( (int32_t) aSig < 0 );
158142c2 5038 if ( zSign ) aSig = - aSig;
ff32e16e 5039 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5040}
5041
369be8f6 5042
158142c2 5043
8229c991
AJ
5044/*----------------------------------------------------------------------------
5045| Returns the binary exponential of the single-precision floating-point value
5046| `a'. The operation is performed according to the IEC/IEEE Standard for
5047| Binary Floating-Point Arithmetic.
5048|
5049| Uses the following identities:
5050|
5051| 1. -------------------------------------------------------------------------
5052| x x*ln(2)
5053| 2 = e
5054|
5055| 2. -------------------------------------------------------------------------
5056| 2 3 4 5 n
5057| x x x x x x x
5058| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5059| 1! 2! 3! 4! 5! n!
5060*----------------------------------------------------------------------------*/
5061
5062static const float64 float32_exp2_coefficients[15] =
5063{
d5138cf4
PM
5064 const_float64( 0x3ff0000000000000ll ), /* 1 */
5065 const_float64( 0x3fe0000000000000ll ), /* 2 */
5066 const_float64( 0x3fc5555555555555ll ), /* 3 */
5067 const_float64( 0x3fa5555555555555ll ), /* 4 */
5068 const_float64( 0x3f81111111111111ll ), /* 5 */
5069 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5070 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5071 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5072 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5073 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5074 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5075 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5076 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5077 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5078 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5079};
5080
e5a41ffa 5081float32 float32_exp2(float32 a, float_status *status)
8229c991 5082{
c120391c 5083 bool aSign;
0c48262d 5084 int aExp;
bb98fe42 5085 uint32_t aSig;
8229c991
AJ
5086 float64 r, x, xn;
5087 int i;
ff32e16e 5088 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5089
5090 aSig = extractFloat32Frac( a );
5091 aExp = extractFloat32Exp( a );
5092 aSign = extractFloat32Sign( a );
5093
5094 if ( aExp == 0xFF) {
ff32e16e
PM
5095 if (aSig) {
5096 return propagateFloat32NaN(a, float32_zero, status);
5097 }
8229c991
AJ
5098 return (aSign) ? float32_zero : a;
5099 }
5100 if (aExp == 0) {
5101 if (aSig == 0) return float32_one;
5102 }
5103
ff32e16e 5104 float_raise(float_flag_inexact, status);
8229c991
AJ
5105
5106 /* ******************************* */
5107 /* using float64 for approximation */
5108 /* ******************************* */
ff32e16e
PM
5109 x = float32_to_float64(a, status);
5110 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5111
5112 xn = x;
5113 r = float64_one;
5114 for (i = 0 ; i < 15 ; i++) {
5115 float64 f;
5116
ff32e16e
PM
5117 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5118 r = float64_add(r, f, status);
8229c991 5119
ff32e16e 5120 xn = float64_mul(xn, x, status);
8229c991
AJ
5121 }
5122
5123 return float64_to_float32(r, status);
5124}
5125
374dfc33
AJ
5126/*----------------------------------------------------------------------------
5127| Returns the binary log of the single-precision floating-point value `a'.
5128| The operation is performed according to the IEC/IEEE Standard for Binary
5129| Floating-Point Arithmetic.
5130*----------------------------------------------------------------------------*/
e5a41ffa 5131float32 float32_log2(float32 a, float_status *status)
374dfc33 5132{
c120391c 5133 bool aSign, zSign;
0c48262d 5134 int aExp;
bb98fe42 5135 uint32_t aSig, zSig, i;
374dfc33 5136
ff32e16e 5137 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5138 aSig = extractFloat32Frac( a );
5139 aExp = extractFloat32Exp( a );
5140 aSign = extractFloat32Sign( a );
5141
5142 if ( aExp == 0 ) {
5143 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5144 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5145 }
5146 if ( aSign ) {
ff32e16e 5147 float_raise(float_flag_invalid, status);
af39bc8c 5148 return float32_default_nan(status);
374dfc33
AJ
5149 }
5150 if ( aExp == 0xFF ) {
ff32e16e
PM
5151 if (aSig) {
5152 return propagateFloat32NaN(a, float32_zero, status);
5153 }
374dfc33
AJ
5154 return a;
5155 }
5156
5157 aExp -= 0x7F;
5158 aSig |= 0x00800000;
5159 zSign = aExp < 0;
5160 zSig = aExp << 23;
5161
5162 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5163 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5164 if ( aSig & 0x01000000 ) {
5165 aSig >>= 1;
5166 zSig |= i;
5167 }
5168 }
5169
5170 if ( zSign )
5171 zSig = -zSig;
5172
ff32e16e 5173 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5174}
5175
158142c2 5176/*----------------------------------------------------------------------------
158142c2
FB
5177| Returns the result of converting the double-precision floating-point value
5178| `a' to the extended double-precision floating-point format. The conversion
5179| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5180| Arithmetic.
5181*----------------------------------------------------------------------------*/
5182
e5a41ffa 5183floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5184{
c120391c 5185 bool aSign;
0c48262d 5186 int aExp;
bb98fe42 5187 uint64_t aSig;
158142c2 5188
ff32e16e 5189 a = float64_squash_input_denormal(a, status);
158142c2
FB
5190 aSig = extractFloat64Frac( a );
5191 aExp = extractFloat64Exp( a );
5192 aSign = extractFloat64Sign( a );
5193 if ( aExp == 0x7FF ) {
ff32e16e 5194 if (aSig) {
7537c2b4
JM
5195 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5196 status);
5197 return floatx80_silence_nan(res, status);
ff32e16e 5198 }
0f605c88
LV
5199 return packFloatx80(aSign,
5200 floatx80_infinity_high,
5201 floatx80_infinity_low);
158142c2
FB
5202 }
5203 if ( aExp == 0 ) {
5204 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5205 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5206 }
5207 return
5208 packFloatx80(
e9321124 5209 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5210
5211}
5212
158142c2
FB
5213/*----------------------------------------------------------------------------
5214| Returns the result of converting the double-precision floating-point value
5215| `a' to the quadruple-precision floating-point format. The conversion is
5216| performed according to the IEC/IEEE Standard for Binary Floating-Point
5217| Arithmetic.
5218*----------------------------------------------------------------------------*/
5219
e5a41ffa 5220float128 float64_to_float128(float64 a, float_status *status)
158142c2 5221{
c120391c 5222 bool aSign;
0c48262d 5223 int aExp;
bb98fe42 5224 uint64_t aSig, zSig0, zSig1;
158142c2 5225
ff32e16e 5226 a = float64_squash_input_denormal(a, status);
158142c2
FB
5227 aSig = extractFloat64Frac( a );
5228 aExp = extractFloat64Exp( a );
5229 aSign = extractFloat64Sign( a );
5230 if ( aExp == 0x7FF ) {
ff32e16e
PM
5231 if (aSig) {
5232 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5233 }
158142c2
FB
5234 return packFloat128( aSign, 0x7FFF, 0, 0 );
5235 }
5236 if ( aExp == 0 ) {
5237 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5238 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5239 --aExp;
5240 }
5241 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5242 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5243
5244}
5245
158142c2
FB
5246
5247/*----------------------------------------------------------------------------
5248| Returns the remainder of the double-precision floating-point value `a'
5249| with respect to the corresponding value `b'. The operation is performed
5250| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5251*----------------------------------------------------------------------------*/
5252
e5a41ffa 5253float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5254{
c120391c 5255 bool aSign, zSign;
0c48262d 5256 int aExp, bExp, expDiff;
bb98fe42
AF
5257 uint64_t aSig, bSig;
5258 uint64_t q, alternateASig;
5259 int64_t sigMean;
158142c2 5260
ff32e16e
PM
5261 a = float64_squash_input_denormal(a, status);
5262 b = float64_squash_input_denormal(b, status);
158142c2
FB
5263 aSig = extractFloat64Frac( a );
5264 aExp = extractFloat64Exp( a );
5265 aSign = extractFloat64Sign( a );
5266 bSig = extractFloat64Frac( b );
5267 bExp = extractFloat64Exp( b );
158142c2
FB
5268 if ( aExp == 0x7FF ) {
5269 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5270 return propagateFloat64NaN(a, b, status);
158142c2 5271 }
ff32e16e 5272 float_raise(float_flag_invalid, status);
af39bc8c 5273 return float64_default_nan(status);
158142c2
FB
5274 }
5275 if ( bExp == 0x7FF ) {
ff32e16e
PM
5276 if (bSig) {
5277 return propagateFloat64NaN(a, b, status);
5278 }
158142c2
FB
5279 return a;
5280 }
5281 if ( bExp == 0 ) {
5282 if ( bSig == 0 ) {
ff32e16e 5283 float_raise(float_flag_invalid, status);
af39bc8c 5284 return float64_default_nan(status);
158142c2
FB
5285 }
5286 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5287 }
5288 if ( aExp == 0 ) {
5289 if ( aSig == 0 ) return a;
5290 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5291 }
5292 expDiff = aExp - bExp;
e9321124
AB
5293 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5294 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5295 if ( expDiff < 0 ) {
5296 if ( expDiff < -1 ) return a;
5297 aSig >>= 1;
5298 }
5299 q = ( bSig <= aSig );
5300 if ( q ) aSig -= bSig;
5301 expDiff -= 64;
5302 while ( 0 < expDiff ) {
5303 q = estimateDiv128To64( aSig, 0, bSig );
5304 q = ( 2 < q ) ? q - 2 : 0;
5305 aSig = - ( ( bSig>>2 ) * q );
5306 expDiff -= 62;
5307 }
5308 expDiff += 64;
5309 if ( 0 < expDiff ) {
5310 q = estimateDiv128To64( aSig, 0, bSig );
5311 q = ( 2 < q ) ? q - 2 : 0;
5312 q >>= 64 - expDiff;
5313 bSig >>= 2;
5314 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5315 }
5316 else {
5317 aSig >>= 2;
5318 bSig >>= 2;
5319 }
5320 do {
5321 alternateASig = aSig;
5322 ++q;
5323 aSig -= bSig;
bb98fe42 5324 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5325 sigMean = aSig + alternateASig;
5326 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5327 aSig = alternateASig;
5328 }
bb98fe42 5329 zSign = ( (int64_t) aSig < 0 );
158142c2 5330 if ( zSign ) aSig = - aSig;
ff32e16e 5331 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5332
5333}
5334
374dfc33
AJ
5335/*----------------------------------------------------------------------------
5336| Returns the binary log of the double-precision floating-point value `a'.
5337| The operation is performed according to the IEC/IEEE Standard for Binary
5338| Floating-Point Arithmetic.
5339*----------------------------------------------------------------------------*/
e5a41ffa 5340float64 float64_log2(float64 a, float_status *status)
374dfc33 5341{
c120391c 5342 bool aSign, zSign;
0c48262d 5343 int aExp;
bb98fe42 5344 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5345 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5346
5347 aSig = extractFloat64Frac( a );
5348 aExp = extractFloat64Exp( a );
5349 aSign = extractFloat64Sign( a );
5350
5351 if ( aExp == 0 ) {
5352 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5353 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5354 }
5355 if ( aSign ) {
ff32e16e 5356 float_raise(float_flag_invalid, status);
af39bc8c 5357 return float64_default_nan(status);
374dfc33
AJ
5358 }
5359 if ( aExp == 0x7FF ) {
ff32e16e
PM
5360 if (aSig) {
5361 return propagateFloat64NaN(a, float64_zero, status);
5362 }
374dfc33
AJ
5363 return a;
5364 }
5365
5366 aExp -= 0x3FF;
e9321124 5367 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5368 zSign = aExp < 0;
bb98fe42 5369 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5370 for (i = 1LL << 51; i > 0; i >>= 1) {
5371 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5372 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5373 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5374 aSig >>= 1;
5375 zSig |= i;
5376 }
5377 }
5378
5379 if ( zSign )
5380 zSig = -zSig;
ff32e16e 5381 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5382}
5383
158142c2
FB
5384/*----------------------------------------------------------------------------
5385| Returns the result of converting the extended double-precision floating-
5386| point value `a' to the 32-bit two's complement integer format. The
5387| conversion is performed according to the IEC/IEEE Standard for Binary
5388| Floating-Point Arithmetic---which means in particular that the conversion
5389| is rounded according to the current rounding mode. If `a' is a NaN, the
5390| largest positive integer is returned. Otherwise, if the conversion
5391| overflows, the largest integer with the same sign as `a' is returned.
5392*----------------------------------------------------------------------------*/
5393
f4014512 5394int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5395{
c120391c 5396 bool aSign;
f4014512 5397 int32_t aExp, shiftCount;
bb98fe42 5398 uint64_t aSig;
158142c2 5399
d1eb8f2a
AD
5400 if (floatx80_invalid_encoding(a)) {
5401 float_raise(float_flag_invalid, status);
5402 return 1 << 31;
5403 }
158142c2
FB
5404 aSig = extractFloatx80Frac( a );
5405 aExp = extractFloatx80Exp( a );
5406 aSign = extractFloatx80Sign( a );
bb98fe42 5407 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5408 shiftCount = 0x4037 - aExp;
5409 if ( shiftCount <= 0 ) shiftCount = 1;
5410 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5411 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5412
5413}
5414
5415/*----------------------------------------------------------------------------
5416| Returns the result of converting the extended double-precision floating-
5417| point value `a' to the 32-bit two's complement integer format. The
5418| conversion is performed according to the IEC/IEEE Standard for Binary
5419| Floating-Point Arithmetic, except that the conversion is always rounded
5420| toward zero. If `a' is a NaN, the largest positive integer is returned.
5421| Otherwise, if the conversion overflows, the largest integer with the same
5422| sign as `a' is returned.
5423*----------------------------------------------------------------------------*/
5424
f4014512 5425int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5426{
c120391c 5427 bool aSign;
f4014512 5428 int32_t aExp, shiftCount;
bb98fe42 5429 uint64_t aSig, savedASig;
b3a6a2e0 5430 int32_t z;
158142c2 5431
d1eb8f2a
AD
5432 if (floatx80_invalid_encoding(a)) {
5433 float_raise(float_flag_invalid, status);
5434 return 1 << 31;
5435 }
158142c2
FB
5436 aSig = extractFloatx80Frac( a );
5437 aExp = extractFloatx80Exp( a );
5438 aSign = extractFloatx80Sign( a );
5439 if ( 0x401E < aExp ) {
bb98fe42 5440 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5441 goto invalid;
5442 }
5443 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5444 if (aExp || aSig) {
5445 status->float_exception_flags |= float_flag_inexact;
5446 }
158142c2
FB
5447 return 0;
5448 }
5449 shiftCount = 0x403E - aExp;
5450 savedASig = aSig;
5451 aSig >>= shiftCount;
5452 z = aSig;
5453 if ( aSign ) z = - z;
5454 if ( ( z < 0 ) ^ aSign ) {
5455 invalid:
ff32e16e 5456 float_raise(float_flag_invalid, status);
bb98fe42 5457 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5458 }
5459 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5460 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5461 }
5462 return z;
5463
5464}
5465
5466/*----------------------------------------------------------------------------
5467| Returns the result of converting the extended double-precision floating-
5468| point value `a' to the 64-bit two's complement integer format. The
5469| conversion is performed according to the IEC/IEEE Standard for Binary
5470| Floating-Point Arithmetic---which means in particular that the conversion
5471| is rounded according to the current rounding mode. If `a' is a NaN,
5472| the largest positive integer is returned. Otherwise, if the conversion
5473| overflows, the largest integer with the same sign as `a' is returned.
5474*----------------------------------------------------------------------------*/
5475
f42c2224 5476int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5477{
c120391c 5478 bool aSign;
f4014512 5479 int32_t aExp, shiftCount;
bb98fe42 5480 uint64_t aSig, aSigExtra;
158142c2 5481
d1eb8f2a
AD
5482 if (floatx80_invalid_encoding(a)) {
5483 float_raise(float_flag_invalid, status);
5484 return 1ULL << 63;
5485 }
158142c2
FB
5486 aSig = extractFloatx80Frac( a );
5487 aExp = extractFloatx80Exp( a );
5488 aSign = extractFloatx80Sign( a );
5489 shiftCount = 0x403E - aExp;
5490 if ( shiftCount <= 0 ) {
5491 if ( shiftCount ) {
ff32e16e 5492 float_raise(float_flag_invalid, status);
0f605c88 5493 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5494 return INT64_MAX;
158142c2 5495 }
2c217da0 5496 return INT64_MIN;
158142c2
FB
5497 }
5498 aSigExtra = 0;
5499 }
5500 else {
5501 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5502 }
ff32e16e 5503 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5504
5505}
5506
5507/*----------------------------------------------------------------------------
5508| Returns the result of converting the extended double-precision floating-
5509| point value `a' to the 64-bit two's complement integer format. The
5510| conversion is performed according to the IEC/IEEE Standard for Binary
5511| Floating-Point Arithmetic, except that the conversion is always rounded
5512| toward zero. If `a' is a NaN, the largest positive integer is returned.
5513| Otherwise, if the conversion overflows, the largest integer with the same
5514| sign as `a' is returned.
5515*----------------------------------------------------------------------------*/
5516
f42c2224 5517int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5518{
c120391c 5519 bool aSign;
f4014512 5520 int32_t aExp, shiftCount;
bb98fe42 5521 uint64_t aSig;
f42c2224 5522 int64_t z;
158142c2 5523
d1eb8f2a
AD
5524 if (floatx80_invalid_encoding(a)) {
5525 float_raise(float_flag_invalid, status);
5526 return 1ULL << 63;
5527 }
158142c2
FB
5528 aSig = extractFloatx80Frac( a );
5529 aExp = extractFloatx80Exp( a );
5530 aSign = extractFloatx80Sign( a );
5531 shiftCount = aExp - 0x403E;
5532 if ( 0 <= shiftCount ) {
e9321124 5533 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5534 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5535 float_raise(float_flag_invalid, status);
158142c2 5536 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5537 return INT64_MAX;
158142c2
FB
5538 }
5539 }
2c217da0 5540 return INT64_MIN;
158142c2
FB
5541 }
5542 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5543 if (aExp | aSig) {
5544 status->float_exception_flags |= float_flag_inexact;
5545 }
158142c2
FB
5546 return 0;
5547 }
5548 z = aSig>>( - shiftCount );
bb98fe42 5549 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5550 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5551 }
5552 if ( aSign ) z = - z;
5553 return z;
5554
5555}
5556
5557/*----------------------------------------------------------------------------
5558| Returns the result of converting the extended double-precision floating-
5559| point value `a' to the single-precision floating-point format. The
5560| conversion is performed according to the IEC/IEEE Standard for Binary
5561| Floating-Point Arithmetic.
5562*----------------------------------------------------------------------------*/
5563
e5a41ffa 5564float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5565{
c120391c 5566 bool aSign;
f4014512 5567 int32_t aExp;
bb98fe42 5568 uint64_t aSig;
158142c2 5569
d1eb8f2a
AD
5570 if (floatx80_invalid_encoding(a)) {
5571 float_raise(float_flag_invalid, status);
5572 return float32_default_nan(status);
5573 }
158142c2
FB
5574 aSig = extractFloatx80Frac( a );
5575 aExp = extractFloatx80Exp( a );
5576 aSign = extractFloatx80Sign( a );
5577 if ( aExp == 0x7FFF ) {
bb98fe42 5578 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5579 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5580 status);
5581 return float32_silence_nan(res, status);
158142c2
FB
5582 }
5583 return packFloat32( aSign, 0xFF, 0 );
5584 }
5585 shift64RightJamming( aSig, 33, &aSig );
5586 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5587 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5588
5589}
5590
5591/*----------------------------------------------------------------------------
5592| Returns the result of converting the extended double-precision floating-
5593| point value `a' to the double-precision floating-point format. The
5594| conversion is performed according to the IEC/IEEE Standard for Binary
5595| Floating-Point Arithmetic.
5596*----------------------------------------------------------------------------*/
5597
e5a41ffa 5598float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5599{
c120391c 5600 bool aSign;
f4014512 5601 int32_t aExp;
bb98fe42 5602 uint64_t aSig, zSig;
158142c2 5603
d1eb8f2a
AD
5604 if (floatx80_invalid_encoding(a)) {
5605 float_raise(float_flag_invalid, status);
5606 return float64_default_nan(status);
5607 }
158142c2
FB
5608 aSig = extractFloatx80Frac( a );
5609 aExp = extractFloatx80Exp( a );
5610 aSign = extractFloatx80Sign( a );
5611 if ( aExp == 0x7FFF ) {
bb98fe42 5612 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5613 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5614 status);
5615 return float64_silence_nan(res, status);
158142c2
FB
5616 }
5617 return packFloat64( aSign, 0x7FF, 0 );
5618 }
5619 shift64RightJamming( aSig, 1, &zSig );
5620 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5621 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5622
5623}
5624
158142c2
FB
5625/*----------------------------------------------------------------------------
5626| Returns the result of converting the extended double-precision floating-
5627| point value `a' to the quadruple-precision floating-point format. The
5628| conversion is performed according to the IEC/IEEE Standard for Binary
5629| Floating-Point Arithmetic.
5630*----------------------------------------------------------------------------*/
5631
e5a41ffa 5632float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5633{
c120391c 5634 bool aSign;
0c48262d 5635 int aExp;
bb98fe42 5636 uint64_t aSig, zSig0, zSig1;
158142c2 5637
d1eb8f2a
AD
5638 if (floatx80_invalid_encoding(a)) {
5639 float_raise(float_flag_invalid, status);
5640 return float128_default_nan(status);
5641 }
158142c2
FB
5642 aSig = extractFloatx80Frac( a );
5643 aExp = extractFloatx80Exp( a );
5644 aSign = extractFloatx80Sign( a );
bb98fe42 5645 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5646 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5647 status);
5648 return float128_silence_nan(res, status);
158142c2
FB
5649 }
5650 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5651 return packFloat128( aSign, aExp, zSig0, zSig1 );
5652
5653}
5654
0f721292
LV
5655/*----------------------------------------------------------------------------
5656| Rounds the extended double-precision floating-point value `a'
5657| to the precision provided by floatx80_rounding_precision and returns the
5658| result as an extended double-precision floating-point value.
5659| The operation is performed according to the IEC/IEEE Standard for Binary
5660| Floating-Point Arithmetic.
5661*----------------------------------------------------------------------------*/
5662
5663floatx80 floatx80_round(floatx80 a, float_status *status)
5664{
5665 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5666 extractFloatx80Sign(a),
5667 extractFloatx80Exp(a),
5668 extractFloatx80Frac(a), 0, status);
5669}
5670
158142c2
FB
5671/*----------------------------------------------------------------------------
5672| Rounds the extended double-precision floating-point value `a' to an integer,
5673| and returns the result as an extended quadruple-precision floating-point
5674| value. The operation is performed according to the IEC/IEEE Standard for
5675| Binary Floating-Point Arithmetic.
5676*----------------------------------------------------------------------------*/
5677
e5a41ffa 5678floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5679{
c120391c 5680 bool aSign;
f4014512 5681 int32_t aExp;
bb98fe42 5682 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5683 floatx80 z;
5684
d1eb8f2a
AD
5685 if (floatx80_invalid_encoding(a)) {
5686 float_raise(float_flag_invalid, status);
5687 return floatx80_default_nan(status);
5688 }
158142c2
FB
5689 aExp = extractFloatx80Exp( a );
5690 if ( 0x403E <= aExp ) {
bb98fe42 5691 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5692 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5693 }
5694 return a;
5695 }
5696 if ( aExp < 0x3FFF ) {
5697 if ( ( aExp == 0 )
9ecaf5cc 5698 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5699 return a;
5700 }
a2f2d288 5701 status->float_exception_flags |= float_flag_inexact;
158142c2 5702 aSign = extractFloatx80Sign( a );
a2f2d288 5703 switch (status->float_rounding_mode) {
158142c2 5704 case float_round_nearest_even:
bb98fe42 5705 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5706 ) {
5707 return
e9321124 5708 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5709 }
5710 break;
f9288a76
PM
5711 case float_round_ties_away:
5712 if (aExp == 0x3FFE) {
e9321124 5713 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5714 }
5715 break;
158142c2
FB
5716 case float_round_down:
5717 return
5718 aSign ?
e9321124 5719 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5720 : packFloatx80( 0, 0, 0 );
5721 case float_round_up:
5722 return
5723 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5724 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5725
5726 case float_round_to_zero:
5727 break;
5728 default:
5729 g_assert_not_reached();
158142c2
FB
5730 }
5731 return packFloatx80( aSign, 0, 0 );
5732 }
5733 lastBitMask = 1;
5734 lastBitMask <<= 0x403E - aExp;
5735 roundBitsMask = lastBitMask - 1;
5736 z = a;
a2f2d288 5737 switch (status->float_rounding_mode) {
dc355b76 5738 case float_round_nearest_even:
158142c2 5739 z.low += lastBitMask>>1;
dc355b76
PM
5740 if ((z.low & roundBitsMask) == 0) {
5741 z.low &= ~lastBitMask;
5742 }
5743 break;
f9288a76
PM
5744 case float_round_ties_away:
5745 z.low += lastBitMask >> 1;
5746 break;
dc355b76
PM
5747 case float_round_to_zero:
5748 break;
5749 case float_round_up:
5750 if (!extractFloatx80Sign(z)) {
5751 z.low += roundBitsMask;
5752 }
5753 break;
5754 case float_round_down:
5755 if (extractFloatx80Sign(z)) {
158142c2
FB
5756 z.low += roundBitsMask;
5757 }
dc355b76
PM
5758 break;
5759 default:
5760 abort();
158142c2
FB
5761 }
5762 z.low &= ~ roundBitsMask;
5763 if ( z.low == 0 ) {
5764 ++z.high;
e9321124 5765 z.low = UINT64_C(0x8000000000000000);
158142c2 5766 }
a2f2d288
PM
5767 if (z.low != a.low) {
5768 status->float_exception_flags |= float_flag_inexact;
5769 }
158142c2
FB
5770 return z;
5771
5772}
5773
5774/*----------------------------------------------------------------------------
5775| Returns the result of adding the absolute values of the extended double-
5776| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5777| negated before being returned. `zSign' is ignored if the result is a NaN.
5778| The addition is performed according to the IEC/IEEE Standard for Binary
5779| Floating-Point Arithmetic.
5780*----------------------------------------------------------------------------*/
5781
c120391c 5782static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5783 float_status *status)
158142c2 5784{
f4014512 5785 int32_t aExp, bExp, zExp;
bb98fe42 5786 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5787 int32_t expDiff;
158142c2
FB
5788
5789 aSig = extractFloatx80Frac( a );
5790 aExp = extractFloatx80Exp( a );
5791 bSig = extractFloatx80Frac( b );
5792 bExp = extractFloatx80Exp( b );
5793 expDiff = aExp - bExp;
5794 if ( 0 < expDiff ) {
5795 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5796 if ((uint64_t)(aSig << 1)) {
5797 return propagateFloatx80NaN(a, b, status);
5798 }
158142c2
FB
5799 return a;
5800 }
5801 if ( bExp == 0 ) --expDiff;
5802 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5803 zExp = aExp;
5804 }
5805 else if ( expDiff < 0 ) {
5806 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5807 if ((uint64_t)(bSig << 1)) {
5808 return propagateFloatx80NaN(a, b, status);
5809 }
0f605c88
LV
5810 return packFloatx80(zSign,
5811 floatx80_infinity_high,
5812 floatx80_infinity_low);
158142c2
FB
5813 }
5814 if ( aExp == 0 ) ++expDiff;
5815 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5816 zExp = bExp;
5817 }
5818 else {
5819 if ( aExp == 0x7FFF ) {
bb98fe42 5820 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5821 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5822 }
5823 return a;
5824 }
5825 zSig1 = 0;
5826 zSig0 = aSig + bSig;
5827 if ( aExp == 0 ) {
41602807
JM
5828 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5829 /* At least one of the values is a pseudo-denormal,
5830 * and there is a carry out of the result. */
5831 zExp = 1;
5832 goto shiftRight1;
5833 }
2f311075
RH
5834 if (zSig0 == 0) {
5835 return packFloatx80(zSign, 0, 0);
5836 }
158142c2
FB
5837 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5838 goto roundAndPack;
5839 }
5840 zExp = aExp;
5841 goto shiftRight1;
5842 }
5843 zSig0 = aSig + bSig;
bb98fe42 5844 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5845 shiftRight1:
5846 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5847 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5848 ++zExp;
5849 roundAndPack:
a2f2d288 5850 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5851 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5852}
5853
5854/*----------------------------------------------------------------------------
5855| Returns the result of subtracting the absolute values of the extended
5856| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5857| difference is negated before being returned. `zSign' is ignored if the
5858| result is a NaN. The subtraction is performed according to the IEC/IEEE
5859| Standard for Binary Floating-Point Arithmetic.
5860*----------------------------------------------------------------------------*/
5861
c120391c 5862static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5863 float_status *status)
158142c2 5864{
f4014512 5865 int32_t aExp, bExp, zExp;
bb98fe42 5866 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5867 int32_t expDiff;
158142c2
FB
5868
5869 aSig = extractFloatx80Frac( a );
5870 aExp = extractFloatx80Exp( a );
5871 bSig = extractFloatx80Frac( b );
5872 bExp = extractFloatx80Exp( b );
5873 expDiff = aExp - bExp;
5874 if ( 0 < expDiff ) goto aExpBigger;
5875 if ( expDiff < 0 ) goto bExpBigger;
5876 if ( aExp == 0x7FFF ) {
bb98fe42 5877 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5878 return propagateFloatx80NaN(a, b, status);
158142c2 5879 }
ff32e16e 5880 float_raise(float_flag_invalid, status);
af39bc8c 5881 return floatx80_default_nan(status);
158142c2
FB
5882 }
5883 if ( aExp == 0 ) {
5884 aExp = 1;
5885 bExp = 1;
5886 }
5887 zSig1 = 0;
5888 if ( bSig < aSig ) goto aBigger;
5889 if ( aSig < bSig ) goto bBigger;
a2f2d288 5890 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5891 bExpBigger:
5892 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5893 if ((uint64_t)(bSig << 1)) {
5894 return propagateFloatx80NaN(a, b, status);
5895 }
0f605c88
LV
5896 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5897 floatx80_infinity_low);
158142c2
FB
5898 }
5899 if ( aExp == 0 ) ++expDiff;
5900 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5901 bBigger:
5902 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5903 zExp = bExp;
5904 zSign ^= 1;
5905 goto normalizeRoundAndPack;
5906 aExpBigger:
5907 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5908 if ((uint64_t)(aSig << 1)) {
5909 return propagateFloatx80NaN(a, b, status);
5910 }
158142c2
FB
5911 return a;
5912 }
5913 if ( bExp == 0 ) --expDiff;
5914 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5915 aBigger:
5916 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5917 zExp = aExp;
5918 normalizeRoundAndPack:
a2f2d288 5919 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5920 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5921}
5922
5923/*----------------------------------------------------------------------------
5924| Returns the result of adding the extended double-precision floating-point
5925| values `a' and `b'. The operation is performed according to the IEC/IEEE
5926| Standard for Binary Floating-Point Arithmetic.
5927*----------------------------------------------------------------------------*/
5928
e5a41ffa 5929floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5930{
c120391c 5931 bool aSign, bSign;
158142c2 5932
d1eb8f2a
AD
5933 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5934 float_raise(float_flag_invalid, status);
5935 return floatx80_default_nan(status);
5936 }
158142c2
FB
5937 aSign = extractFloatx80Sign( a );
5938 bSign = extractFloatx80Sign( b );
5939 if ( aSign == bSign ) {
ff32e16e 5940 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5941 }
5942 else {
ff32e16e 5943 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5944 }
5945
5946}
5947
5948/*----------------------------------------------------------------------------
5949| Returns the result of subtracting the extended double-precision floating-
5950| point values `a' and `b'. The operation is performed according to the
5951| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5952*----------------------------------------------------------------------------*/
5953
e5a41ffa 5954floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5955{
c120391c 5956 bool aSign, bSign;
158142c2 5957
d1eb8f2a
AD
5958 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5959 float_raise(float_flag_invalid, status);
5960 return floatx80_default_nan(status);
5961 }
158142c2
FB
5962 aSign = extractFloatx80Sign( a );
5963 bSign = extractFloatx80Sign( b );
5964 if ( aSign == bSign ) {
ff32e16e 5965 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5966 }
5967 else {
ff32e16e 5968 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5969 }
5970
5971}
5972
5973/*----------------------------------------------------------------------------
5974| Returns the result of multiplying the extended double-precision floating-
5975| point values `a' and `b'. The operation is performed according to the
5976| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5977*----------------------------------------------------------------------------*/
5978
e5a41ffa 5979floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5980{
c120391c 5981 bool aSign, bSign, zSign;
f4014512 5982 int32_t aExp, bExp, zExp;
bb98fe42 5983 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5984
d1eb8f2a
AD
5985 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5986 float_raise(float_flag_invalid, status);
5987 return floatx80_default_nan(status);
5988 }
158142c2
FB
5989 aSig = extractFloatx80Frac( a );
5990 aExp = extractFloatx80Exp( a );
5991 aSign = extractFloatx80Sign( a );
5992 bSig = extractFloatx80Frac( b );
5993 bExp = extractFloatx80Exp( b );
5994 bSign = extractFloatx80Sign( b );
5995 zSign = aSign ^ bSign;
5996 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5997 if ( (uint64_t) ( aSig<<1 )
5998 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5999 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6000 }
6001 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6002 return packFloatx80(zSign, floatx80_infinity_high,
6003 floatx80_infinity_low);
158142c2
FB
6004 }
6005 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6006 if ((uint64_t)(bSig << 1)) {
6007 return propagateFloatx80NaN(a, b, status);
6008 }
158142c2
FB
6009 if ( ( aExp | aSig ) == 0 ) {
6010 invalid:
ff32e16e 6011 float_raise(float_flag_invalid, status);
af39bc8c 6012 return floatx80_default_nan(status);
158142c2 6013 }
0f605c88
LV
6014 return packFloatx80(zSign, floatx80_infinity_high,
6015 floatx80_infinity_low);
158142c2
FB
6016 }
6017 if ( aExp == 0 ) {
6018 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6019 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6020 }
6021 if ( bExp == 0 ) {
6022 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6023 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6024 }
6025 zExp = aExp + bExp - 0x3FFE;
6026 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6027 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6028 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6029 --zExp;
6030 }
a2f2d288 6031 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6032 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6033}
6034
6035/*----------------------------------------------------------------------------
6036| Returns the result of dividing the extended double-precision floating-point
6037| value `a' by the corresponding value `b'. The operation is performed
6038| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6039*----------------------------------------------------------------------------*/
6040
e5a41ffa 6041floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6042{
c120391c 6043 bool aSign, bSign, zSign;
f4014512 6044 int32_t aExp, bExp, zExp;
bb98fe42
AF
6045 uint64_t aSig, bSig, zSig0, zSig1;
6046 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6047
d1eb8f2a
AD
6048 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6049 float_raise(float_flag_invalid, status);
6050 return floatx80_default_nan(status);
6051 }
158142c2
FB
6052 aSig = extractFloatx80Frac( a );
6053 aExp = extractFloatx80Exp( a );
6054 aSign = extractFloatx80Sign( a );
6055 bSig = extractFloatx80Frac( b );
6056 bExp = extractFloatx80Exp( b );
6057 bSign = extractFloatx80Sign( b );
6058 zSign = aSign ^ bSign;
6059 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6060 if ((uint64_t)(aSig << 1)) {
6061 return propagateFloatx80NaN(a, b, status);
6062 }
158142c2 6063 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6064 if ((uint64_t)(bSig << 1)) {
6065 return propagateFloatx80NaN(a, b, status);
6066 }
158142c2
FB
6067 goto invalid;
6068 }
0f605c88
LV
6069 return packFloatx80(zSign, floatx80_infinity_high,
6070 floatx80_infinity_low);
158142c2
FB
6071 }
6072 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6073 if ((uint64_t)(bSig << 1)) {
6074 return propagateFloatx80NaN(a, b, status);
6075 }
158142c2
FB
6076 return packFloatx80( zSign, 0, 0 );
6077 }
6078 if ( bExp == 0 ) {
6079 if ( bSig == 0 ) {
6080 if ( ( aExp | aSig ) == 0 ) {
6081 invalid:
ff32e16e 6082 float_raise(float_flag_invalid, status);
af39bc8c 6083 return floatx80_default_nan(status);
158142c2 6084 }
ff32e16e 6085 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6086 return packFloatx80(zSign, floatx80_infinity_high,
6087 floatx80_infinity_low);
158142c2
FB
6088 }
6089 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6090 }
6091 if ( aExp == 0 ) {
6092 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6093 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6094 }
6095 zExp = aExp - bExp + 0x3FFE;
6096 rem1 = 0;
6097 if ( bSig <= aSig ) {
6098 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6099 ++zExp;
6100 }
6101 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6102 mul64To128( bSig, zSig0, &term0, &term1 );
6103 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6104 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6105 --zSig0;
6106 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6107 }
6108 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6109 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6110 mul64To128( bSig, zSig1, &term1, &term2 );
6111 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6112 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6113 --zSig1;
6114 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6115 }
6116 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6117 }
a2f2d288 6118 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6119 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6120}
6121
6122/*----------------------------------------------------------------------------
6123| Returns the remainder of the extended double-precision floating-point value
6124| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6125| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6126| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6127| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6128| the absolute value of the integer quotient.
158142c2
FB
6129*----------------------------------------------------------------------------*/
6130
445810ec 6131floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6132 float_status *status)
158142c2 6133{
c120391c 6134 bool aSign, zSign;
b662495d 6135 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6136 uint64_t aSig0, aSig1, bSig;
6137 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6138
445810ec 6139 *quotient = 0;
d1eb8f2a
AD
6140 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6141 float_raise(float_flag_invalid, status);
6142 return floatx80_default_nan(status);
6143 }
158142c2 6144 aSig0 = extractFloatx80Frac( a );
b662495d 6145 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6146 aSign = extractFloatx80Sign( a );
6147 bSig = extractFloatx80Frac( b );
6148 bExp = extractFloatx80Exp( b );
158142c2 6149 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6150 if ( (uint64_t) ( aSig0<<1 )
6151 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6152 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6153 }
6154 goto invalid;
6155 }
6156 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6157 if ((uint64_t)(bSig << 1)) {
6158 return propagateFloatx80NaN(a, b, status);
6159 }
b662495d
JM
6160 if (aExp == 0 && aSig0 >> 63) {
6161 /*
6162 * Pseudo-denormal argument must be returned in normalized
6163 * form.
6164 */
6165 return packFloatx80(aSign, 1, aSig0);
6166 }
158142c2
FB
6167 return a;
6168 }
6169 if ( bExp == 0 ) {
6170 if ( bSig == 0 ) {
6171 invalid:
ff32e16e 6172 float_raise(float_flag_invalid, status);
af39bc8c 6173 return floatx80_default_nan(status);
158142c2
FB
6174 }
6175 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6176 }
6177 if ( aExp == 0 ) {
499a2f7b 6178 if ( aSig0 == 0 ) return a;
158142c2
FB
6179 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6180 }
158142c2
FB
6181 zSign = aSign;
6182 expDiff = aExp - bExp;
6183 aSig1 = 0;
6184 if ( expDiff < 0 ) {
b662495d
JM
6185 if ( mod || expDiff < -1 ) {
6186 if (aExp == 1 && aExpOrig == 0) {
6187 /*
6188 * Pseudo-denormal argument must be returned in
6189 * normalized form.
6190 */
6191 return packFloatx80(aSign, aExp, aSig0);
6192 }
6193 return a;
6194 }
158142c2
FB
6195 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6196 expDiff = 0;
6197 }
445810ec 6198 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6199 if ( q ) aSig0 -= bSig;
6200 expDiff -= 64;
6201 while ( 0 < expDiff ) {
6202 q = estimateDiv128To64( aSig0, aSig1, bSig );
6203 q = ( 2 < q ) ? q - 2 : 0;
6204 mul64To128( bSig, q, &term0, &term1 );
6205 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6206 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6207 expDiff -= 62;
445810ec
JM
6208 *quotient <<= 62;
6209 *quotient += q;
158142c2
FB
6210 }
6211 expDiff += 64;
6212 if ( 0 < expDiff ) {
6213 q = estimateDiv128To64( aSig0, aSig1, bSig );
6214 q = ( 2 < q ) ? q - 2 : 0;
6215 q >>= 64 - expDiff;
6216 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6217 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6218 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6219 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6220 ++q;
6221 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6222 }
445810ec
JM
6223 if (expDiff < 64) {
6224 *quotient <<= expDiff;
6225 } else {
6226 *quotient = 0;
6227 }
6228 *quotient += q;
158142c2
FB
6229 }
6230 else {
6231 term1 = 0;
6232 term0 = bSig;
6233 }
6b8b0136
JM
6234 if (!mod) {
6235 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6236 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6237 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6238 && ( q & 1 ) )
6239 ) {
6240 aSig0 = alternateASig0;
6241 aSig1 = alternateASig1;
6242 zSign = ! zSign;
445810ec 6243 ++*quotient;
6b8b0136 6244 }
158142c2
FB
6245 }
6246 return
6247 normalizeRoundAndPackFloatx80(
ff32e16e 6248 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6249
6250}
6251
6b8b0136
JM
6252/*----------------------------------------------------------------------------
6253| Returns the remainder of the extended double-precision floating-point value
6254| `a' with respect to the corresponding value `b'. The operation is performed
6255| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6256*----------------------------------------------------------------------------*/
6257
6258floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6259{
445810ec
JM
6260 uint64_t quotient;
6261 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6262}
6263
6264/*----------------------------------------------------------------------------
6265| Returns the remainder of the extended double-precision floating-point value
6266| `a' with respect to the corresponding value `b', with the quotient truncated
6267| toward zero.
6268*----------------------------------------------------------------------------*/
6269
6270floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6271{
445810ec
JM
6272 uint64_t quotient;
6273 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6274}
6275
158142c2
FB
6276/*----------------------------------------------------------------------------
6277| Returns the square root of the extended double-precision floating-point
6278| value `a'. The operation is performed according to the IEC/IEEE Standard
6279| for Binary Floating-Point Arithmetic.
6280*----------------------------------------------------------------------------*/
6281
e5a41ffa 6282floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6283{
c120391c 6284 bool aSign;
f4014512 6285 int32_t aExp, zExp;
bb98fe42
AF
6286 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6287 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6288
d1eb8f2a
AD
6289 if (floatx80_invalid_encoding(a)) {
6290 float_raise(float_flag_invalid, status);
6291 return floatx80_default_nan(status);
6292 }
158142c2
FB
6293 aSig0 = extractFloatx80Frac( a );
6294 aExp = extractFloatx80Exp( a );
6295 aSign = extractFloatx80Sign( a );
6296 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6297 if ((uint64_t)(aSig0 << 1)) {
6298 return propagateFloatx80NaN(a, a, status);
6299 }
158142c2
FB
6300 if ( ! aSign ) return a;
6301 goto invalid;
6302 }
6303 if ( aSign ) {
6304 if ( ( aExp | aSig0 ) == 0 ) return a;
6305 invalid:
ff32e16e 6306 float_raise(float_flag_invalid, status);
af39bc8c 6307 return floatx80_default_nan(status);
158142c2
FB
6308 }
6309 if ( aExp == 0 ) {
6310 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6311 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6312 }
6313 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6314 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6315 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6316 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6317 doubleZSig0 = zSig0<<1;
6318 mul64To128( zSig0, zSig0, &term0, &term1 );
6319 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6320 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6321 --zSig0;
6322 doubleZSig0 -= 2;
6323 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6324 }
6325 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6326 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6327 if ( zSig1 == 0 ) zSig1 = 1;
6328 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6329 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6330 mul64To128( zSig1, zSig1, &term2, &term3 );
6331 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6332 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6333 --zSig1;
6334 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6335 term3 |= 1;
6336 term2 |= doubleZSig0;
6337 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6338 }
6339 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6340 }
6341 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6342 zSig0 |= doubleZSig0;
a2f2d288
PM
6343 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6344 0, zExp, zSig0, zSig1, status);
158142c2
FB
6345}
6346
6347/*----------------------------------------------------------------------------
158142c2
FB
6348| Returns the result of converting the quadruple-precision floating-point
6349| value `a' to the 32-bit two's complement integer format. The conversion
6350| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6351| Arithmetic---which means in particular that the conversion is rounded
6352| according to the current rounding mode. If `a' is a NaN, the largest
6353| positive integer is returned. Otherwise, if the conversion overflows, the
6354| largest integer with the same sign as `a' is returned.
6355*----------------------------------------------------------------------------*/
6356
f4014512 6357int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6358{
c120391c 6359 bool aSign;
f4014512 6360 int32_t aExp, shiftCount;
bb98fe42 6361 uint64_t aSig0, aSig1;
158142c2
FB
6362
6363 aSig1 = extractFloat128Frac1( a );
6364 aSig0 = extractFloat128Frac0( a );
6365 aExp = extractFloat128Exp( a );
6366 aSign = extractFloat128Sign( a );
6367 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6368 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6369 aSig0 |= ( aSig1 != 0 );
6370 shiftCount = 0x4028 - aExp;
6371 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6372 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6373
6374}
6375
6376/*----------------------------------------------------------------------------
6377| Returns the result of converting the quadruple-precision floating-point
6378| value `a' to the 32-bit two's complement integer format. The conversion
6379| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6380| Arithmetic, except that the conversion is always rounded toward zero. If
6381| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6382| conversion overflows, the largest integer with the same sign as `a' is
6383| returned.
6384*----------------------------------------------------------------------------*/
6385
f4014512 6386int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6387{
c120391c 6388 bool aSign;
f4014512 6389 int32_t aExp, shiftCount;
bb98fe42 6390 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6391 int32_t z;
158142c2
FB
6392
6393 aSig1 = extractFloat128Frac1( a );
6394 aSig0 = extractFloat128Frac0( a );
6395 aExp = extractFloat128Exp( a );
6396 aSign = extractFloat128Sign( a );
6397 aSig0 |= ( aSig1 != 0 );
6398 if ( 0x401E < aExp ) {
6399 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6400 goto invalid;
6401 }
6402 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6403 if (aExp || aSig0) {
6404 status->float_exception_flags |= float_flag_inexact;
6405 }
158142c2
FB
6406 return 0;
6407 }
e9321124 6408 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6409 shiftCount = 0x402F - aExp;
6410 savedASig = aSig0;
6411 aSig0 >>= shiftCount;
6412 z = aSig0;
6413 if ( aSign ) z = - z;
6414 if ( ( z < 0 ) ^ aSign ) {
6415 invalid:
ff32e16e 6416 float_raise(float_flag_invalid, status);
2c217da0 6417 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6418 }
6419 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6420 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6421 }
6422 return z;
6423
6424}
6425
6426/*----------------------------------------------------------------------------
6427| Returns the result of converting the quadruple-precision floating-point
6428| value `a' to the 64-bit two's complement integer format. The conversion
6429| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6430| Arithmetic---which means in particular that the conversion is rounded
6431| according to the current rounding mode. If `a' is a NaN, the largest
6432| positive integer is returned. Otherwise, if the conversion overflows, the
6433| largest integer with the same sign as `a' is returned.
6434*----------------------------------------------------------------------------*/
6435
f42c2224 6436int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6437{
c120391c 6438 bool aSign;
f4014512 6439 int32_t aExp, shiftCount;
bb98fe42 6440 uint64_t aSig0, aSig1;
158142c2
FB
6441
6442 aSig1 = extractFloat128Frac1( a );
6443 aSig0 = extractFloat128Frac0( a );
6444 aExp = extractFloat128Exp( a );
6445 aSign = extractFloat128Sign( a );
e9321124 6446 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6447 shiftCount = 0x402F - aExp;
6448 if ( shiftCount <= 0 ) {
6449 if ( 0x403E < aExp ) {
ff32e16e 6450 float_raise(float_flag_invalid, status);
158142c2
FB
6451 if ( ! aSign
6452 || ( ( aExp == 0x7FFF )
e9321124 6453 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6454 )
6455 ) {
2c217da0 6456 return INT64_MAX;
158142c2 6457 }
2c217da0 6458 return INT64_MIN;
158142c2
FB
6459 }
6460 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6461 }
6462 else {
6463 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6464 }
ff32e16e 6465 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6466
6467}
6468
6469/*----------------------------------------------------------------------------
6470| Returns the result of converting the quadruple-precision floating-point
6471| value `a' to the 64-bit two's complement integer format. The conversion
6472| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6473| Arithmetic, except that the conversion is always rounded toward zero.
6474| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6475| the conversion overflows, the largest integer with the same sign as `a' is
6476| returned.
6477*----------------------------------------------------------------------------*/
6478
f42c2224 6479int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6480{
c120391c 6481 bool aSign;
f4014512 6482 int32_t aExp, shiftCount;
bb98fe42 6483 uint64_t aSig0, aSig1;
f42c2224 6484 int64_t z;
158142c2
FB
6485
6486 aSig1 = extractFloat128Frac1( a );
6487 aSig0 = extractFloat128Frac0( a );
6488 aExp = extractFloat128Exp( a );
6489 aSign = extractFloat128Sign( a );
e9321124 6490 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6491 shiftCount = aExp - 0x402F;
6492 if ( 0 < shiftCount ) {
6493 if ( 0x403E <= aExp ) {
e9321124
AB
6494 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6495 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6496 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288
PM
6497 if (aSig1) {
6498 status->float_exception_flags |= float_flag_inexact;
6499 }
158142c2
FB
6500 }
6501 else {
ff32e16e 6502 float_raise(float_flag_invalid, status);
158142c2 6503 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6504 return INT64_MAX;
158142c2
FB
6505 }
6506 }
2c217da0 6507 return INT64_MIN;
158142c2
FB
6508 }
6509 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6510 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6511 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6512 }
6513 }
6514 else {
6515 if ( aExp < 0x3FFF ) {
6516 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6517 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6518 }
6519 return 0;
6520 }
6521 z = aSig0>>( - shiftCount );
6522 if ( aSig1
bb98fe42 6523 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6524 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6525 }
6526 }
6527 if ( aSign ) z = - z;
6528 return z;
6529
6530}
6531
2e6d8568
BR
6532/*----------------------------------------------------------------------------
6533| Returns the result of converting the quadruple-precision floating-point value
6534| `a' to the 64-bit unsigned integer format. The conversion is
6535| performed according to the IEC/IEEE Standard for Binary Floating-Point
6536| Arithmetic---which means in particular that the conversion is rounded
6537| according to the current rounding mode. If `a' is a NaN, the largest
6538| positive integer is returned. If the conversion overflows, the
6539| largest unsigned integer is returned. If 'a' is negative, the value is
6540| rounded and zero is returned; negative values that do not round to zero
6541| will raise the inexact exception.
6542*----------------------------------------------------------------------------*/
6543
6544uint64_t float128_to_uint64(float128 a, float_status *status)
6545{
c120391c 6546 bool aSign;
2e6d8568
BR
6547 int aExp;
6548 int shiftCount;
6549 uint64_t aSig0, aSig1;
6550
6551 aSig0 = extractFloat128Frac0(a);
6552 aSig1 = extractFloat128Frac1(a);
6553 aExp = extractFloat128Exp(a);
6554 aSign = extractFloat128Sign(a);
6555 if (aSign && (aExp > 0x3FFE)) {
6556 float_raise(float_flag_invalid, status);
6557 if (float128_is_any_nan(a)) {
2c217da0 6558 return UINT64_MAX;
2e6d8568
BR
6559 } else {
6560 return 0;
6561 }
6562 }
6563 if (aExp) {
2c217da0 6564 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6565 }
6566 shiftCount = 0x402F - aExp;
6567 if (shiftCount <= 0) {
6568 if (0x403E < aExp) {
6569 float_raise(float_flag_invalid, status);
2c217da0 6570 return UINT64_MAX;
2e6d8568
BR
6571 }
6572 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6573 } else {
6574 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6575 }
6576 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6577}
6578
6579uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6580{
6581 uint64_t v;
6582 signed char current_rounding_mode = status->float_rounding_mode;
6583
6584 set_float_rounding_mode(float_round_to_zero, status);
6585 v = float128_to_uint64(a, status);
6586 set_float_rounding_mode(current_rounding_mode, status);
6587
6588 return v;
6589}
6590
158142c2
FB
6591/*----------------------------------------------------------------------------
6592| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6593| value `a' to the 32-bit unsigned integer format. The conversion
6594| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6595| Arithmetic except that the conversion is always rounded toward zero.
6596| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6597| if the conversion overflows, the largest unsigned integer is returned.
6598| If 'a' is negative, the value is rounded and zero is returned; negative
6599| values that do not round to zero will raise the inexact exception.
6600*----------------------------------------------------------------------------*/
6601
6602uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6603{
6604 uint64_t v;
6605 uint32_t res;
6606 int old_exc_flags = get_float_exception_flags(status);
6607
6608 v = float128_to_uint64_round_to_zero(a, status);
6609 if (v > 0xffffffff) {
6610 res = 0xffffffff;
6611 } else {
6612 return v;
6613 }
6614 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6615 float_raise(float_flag_invalid, status);
6616 return res;
6617}
6618
6619/*----------------------------------------------------------------------------
6620| Returns the result of converting the quadruple-precision floating-point value
6621| `a' to the 32-bit unsigned integer format. The conversion is
6622| performed according to the IEC/IEEE Standard for Binary Floating-Point
6623| Arithmetic---which means in particular that the conversion is rounded
6624| according to the current rounding mode. If `a' is a NaN, the largest
6625| positive integer is returned. If the conversion overflows, the
6626| largest unsigned integer is returned. If 'a' is negative, the value is
6627| rounded and zero is returned; negative values that do not round to zero
6628| will raise the inexact exception.
6629*----------------------------------------------------------------------------*/
6630
6631uint32_t float128_to_uint32(float128 a, float_status *status)
6632{
6633 uint64_t v;
6634 uint32_t res;
6635 int old_exc_flags = get_float_exception_flags(status);
6636
6637 v = float128_to_uint64(a, status);
6638 if (v > 0xffffffff) {
6639 res = 0xffffffff;
6640 } else {
6641 return v;
6642 }
6643 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6644 float_raise(float_flag_invalid, status);
6645 return res;
6646}
6647
6648/*----------------------------------------------------------------------------
6649| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6650| value `a' to the single-precision floating-point format. The conversion
6651| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6652| Arithmetic.
6653*----------------------------------------------------------------------------*/
6654
e5a41ffa 6655float32 float128_to_float32(float128 a, float_status *status)
158142c2 6656{
c120391c 6657 bool aSign;
f4014512 6658 int32_t aExp;
bb98fe42
AF
6659 uint64_t aSig0, aSig1;
6660 uint32_t zSig;
158142c2
FB
6661
6662 aSig1 = extractFloat128Frac1( a );
6663 aSig0 = extractFloat128Frac0( a );
6664 aExp = extractFloat128Exp( a );
6665 aSign = extractFloat128Sign( a );
6666 if ( aExp == 0x7FFF ) {
6667 if ( aSig0 | aSig1 ) {
ff32e16e 6668 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6669 }
6670 return packFloat32( aSign, 0xFF, 0 );
6671 }
6672 aSig0 |= ( aSig1 != 0 );
6673 shift64RightJamming( aSig0, 18, &aSig0 );
6674 zSig = aSig0;
6675 if ( aExp || zSig ) {
6676 zSig |= 0x40000000;
6677 aExp -= 0x3F81;
6678 }
ff32e16e 6679 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6680
6681}
6682
6683/*----------------------------------------------------------------------------
6684| Returns the result of converting the quadruple-precision floating-point
6685| value `a' to the double-precision floating-point format. The conversion
6686| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6687| Arithmetic.
6688*----------------------------------------------------------------------------*/
6689
e5a41ffa 6690float64 float128_to_float64(float128 a, float_status *status)
158142c2 6691{
c120391c 6692 bool aSign;
f4014512 6693 int32_t aExp;
bb98fe42 6694 uint64_t aSig0, aSig1;
158142c2
FB
6695
6696 aSig1 = extractFloat128Frac1( a );
6697 aSig0 = extractFloat128Frac0( a );
6698 aExp = extractFloat128Exp( a );
6699 aSign = extractFloat128Sign( a );
6700 if ( aExp == 0x7FFF ) {
6701 if ( aSig0 | aSig1 ) {
ff32e16e 6702 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6703 }
6704 return packFloat64( aSign, 0x7FF, 0 );
6705 }
6706 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6707 aSig0 |= ( aSig1 != 0 );
6708 if ( aExp || aSig0 ) {
e9321124 6709 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6710 aExp -= 0x3C01;
6711 }
ff32e16e 6712 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6713
6714}
6715
158142c2
FB
6716/*----------------------------------------------------------------------------
6717| Returns the result of converting the quadruple-precision floating-point
6718| value `a' to the extended double-precision floating-point format. The
6719| conversion is performed according to the IEC/IEEE Standard for Binary
6720| Floating-Point Arithmetic.
6721*----------------------------------------------------------------------------*/
6722
e5a41ffa 6723floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6724{
c120391c 6725 bool aSign;
f4014512 6726 int32_t aExp;
bb98fe42 6727 uint64_t aSig0, aSig1;
158142c2
FB
6728
6729 aSig1 = extractFloat128Frac1( a );
6730 aSig0 = extractFloat128Frac0( a );
6731 aExp = extractFloat128Exp( a );
6732 aSign = extractFloat128Sign( a );
6733 if ( aExp == 0x7FFF ) {
6734 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6735 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6736 status);
6737 return floatx80_silence_nan(res, status);
158142c2 6738 }
0f605c88
LV
6739 return packFloatx80(aSign, floatx80_infinity_high,
6740 floatx80_infinity_low);
158142c2
FB
6741 }
6742 if ( aExp == 0 ) {
6743 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6744 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6745 }
6746 else {
e9321124 6747 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6748 }
6749 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6750 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6751
6752}
6753
158142c2
FB
6754/*----------------------------------------------------------------------------
6755| Rounds the quadruple-precision floating-point value `a' to an integer, and
6756| returns the result as a quadruple-precision floating-point value. The
6757| operation is performed according to the IEC/IEEE Standard for Binary
6758| Floating-Point Arithmetic.
6759*----------------------------------------------------------------------------*/
6760
e5a41ffa 6761float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6762{
c120391c 6763 bool aSign;
f4014512 6764 int32_t aExp;
bb98fe42 6765 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6766 float128 z;
6767
6768 aExp = extractFloat128Exp( a );
6769 if ( 0x402F <= aExp ) {
6770 if ( 0x406F <= aExp ) {
6771 if ( ( aExp == 0x7FFF )
6772 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6773 ) {
ff32e16e 6774 return propagateFloat128NaN(a, a, status);
158142c2
FB
6775 }
6776 return a;
6777 }
6778 lastBitMask = 1;
6779 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6780 roundBitsMask = lastBitMask - 1;
6781 z = a;
a2f2d288 6782 switch (status->float_rounding_mode) {
dc355b76 6783 case float_round_nearest_even:
158142c2
FB
6784 if ( lastBitMask ) {
6785 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6786 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6787 }
6788 else {
bb98fe42 6789 if ( (int64_t) z.low < 0 ) {
158142c2 6790 ++z.high;
bb98fe42 6791 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6792 }
6793 }
dc355b76 6794 break;
f9288a76
PM
6795 case float_round_ties_away:
6796 if (lastBitMask) {
6797 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6798 } else {
6799 if ((int64_t) z.low < 0) {
6800 ++z.high;
6801 }
6802 }
6803 break;
dc355b76
PM
6804 case float_round_to_zero:
6805 break;
6806 case float_round_up:
6807 if (!extractFloat128Sign(z)) {
6808 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6809 }
6810 break;
6811 case float_round_down:
6812 if (extractFloat128Sign(z)) {
6813 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6814 }
dc355b76 6815 break;
5d64abb3
RH
6816 case float_round_to_odd:
6817 /*
6818 * Note that if lastBitMask == 0, the last bit is the lsb
6819 * of high, and roundBitsMask == -1.
6820 */
6821 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6822 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6823 }
6824 break;
dc355b76
PM
6825 default:
6826 abort();
158142c2
FB
6827 }
6828 z.low &= ~ roundBitsMask;
6829 }
6830 else {
6831 if ( aExp < 0x3FFF ) {
bb98fe42 6832 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 6833 status->float_exception_flags |= float_flag_inexact;
158142c2 6834 aSign = extractFloat128Sign( a );
a2f2d288 6835 switch (status->float_rounding_mode) {
5d64abb3 6836 case float_round_nearest_even:
158142c2
FB
6837 if ( ( aExp == 0x3FFE )
6838 && ( extractFloat128Frac0( a )
6839 | extractFloat128Frac1( a ) )
6840 ) {
6841 return packFloat128( aSign, 0x3FFF, 0, 0 );
6842 }
6843 break;
f9288a76
PM
6844 case float_round_ties_away:
6845 if (aExp == 0x3FFE) {
6846 return packFloat128(aSign, 0x3FFF, 0, 0);
6847 }
6848 break;
5d64abb3 6849 case float_round_down:
158142c2
FB
6850 return
6851 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6852 : packFloat128( 0, 0, 0, 0 );
5d64abb3 6853 case float_round_up:
158142c2
FB
6854 return
6855 aSign ? packFloat128( 1, 0, 0, 0 )
6856 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
6857
6858 case float_round_to_odd:
6859 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
6860
6861 case float_round_to_zero:
6862 break;
158142c2
FB
6863 }
6864 return packFloat128( aSign, 0, 0, 0 );
6865 }
6866 lastBitMask = 1;
6867 lastBitMask <<= 0x402F - aExp;
6868 roundBitsMask = lastBitMask - 1;
6869 z.low = 0;
6870 z.high = a.high;
a2f2d288 6871 switch (status->float_rounding_mode) {
dc355b76 6872 case float_round_nearest_even:
158142c2
FB
6873 z.high += lastBitMask>>1;
6874 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6875 z.high &= ~ lastBitMask;
6876 }
dc355b76 6877 break;
f9288a76
PM
6878 case float_round_ties_away:
6879 z.high += lastBitMask>>1;
6880 break;
dc355b76
PM
6881 case float_round_to_zero:
6882 break;
6883 case float_round_up:
6884 if (!extractFloat128Sign(z)) {
158142c2
FB
6885 z.high |= ( a.low != 0 );
6886 z.high += roundBitsMask;
6887 }
dc355b76
PM
6888 break;
6889 case float_round_down:
6890 if (extractFloat128Sign(z)) {
6891 z.high |= (a.low != 0);
6892 z.high += roundBitsMask;
6893 }
6894 break;
5d64abb3
RH
6895 case float_round_to_odd:
6896 if ((z.high & lastBitMask) == 0) {
6897 z.high |= (a.low != 0);
6898 z.high += roundBitsMask;
6899 }
6900 break;
dc355b76
PM
6901 default:
6902 abort();
158142c2
FB
6903 }
6904 z.high &= ~ roundBitsMask;
6905 }
6906 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 6907 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6908 }
6909 return z;
6910
6911}
6912
6913/*----------------------------------------------------------------------------
6914| Returns the result of adding the absolute values of the quadruple-precision
6915| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6916| before being returned. `zSign' is ignored if the result is a NaN.
6917| The addition is performed according to the IEC/IEEE Standard for Binary
6918| Floating-Point Arithmetic.
6919*----------------------------------------------------------------------------*/
6920
c120391c 6921static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 6922 float_status *status)
158142c2 6923{
f4014512 6924 int32_t aExp, bExp, zExp;
bb98fe42 6925 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6926 int32_t expDiff;
158142c2
FB
6927
6928 aSig1 = extractFloat128Frac1( a );
6929 aSig0 = extractFloat128Frac0( a );
6930 aExp = extractFloat128Exp( a );
6931 bSig1 = extractFloat128Frac1( b );
6932 bSig0 = extractFloat128Frac0( b );
6933 bExp = extractFloat128Exp( b );
6934 expDiff = aExp - bExp;
6935 if ( 0 < expDiff ) {
6936 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6937 if (aSig0 | aSig1) {
6938 return propagateFloat128NaN(a, b, status);
6939 }
158142c2
FB
6940 return a;
6941 }
6942 if ( bExp == 0 ) {
6943 --expDiff;
6944 }
6945 else {
e9321124 6946 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6947 }
6948 shift128ExtraRightJamming(
6949 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6950 zExp = aExp;
6951 }
6952 else if ( expDiff < 0 ) {
6953 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6954 if (bSig0 | bSig1) {
6955 return propagateFloat128NaN(a, b, status);
6956 }
158142c2
FB
6957 return packFloat128( zSign, 0x7FFF, 0, 0 );
6958 }
6959 if ( aExp == 0 ) {
6960 ++expDiff;
6961 }
6962 else {
e9321124 6963 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6964 }
6965 shift128ExtraRightJamming(
6966 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6967 zExp = bExp;
6968 }
6969 else {
6970 if ( aExp == 0x7FFF ) {
6971 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6972 return propagateFloat128NaN(a, b, status);
158142c2
FB
6973 }
6974 return a;
6975 }
6976 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6977 if ( aExp == 0 ) {
a2f2d288 6978 if (status->flush_to_zero) {
e6afc87f 6979 if (zSig0 | zSig1) {
ff32e16e 6980 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6981 }
6982 return packFloat128(zSign, 0, 0, 0);
6983 }
fe76d976
PB
6984 return packFloat128( zSign, 0, zSig0, zSig1 );
6985 }
158142c2 6986 zSig2 = 0;
e9321124 6987 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
6988 zExp = aExp;
6989 goto shiftRight1;
6990 }
e9321124 6991 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6992 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6993 --zExp;
e9321124 6994 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
6995 ++zExp;
6996 shiftRight1:
6997 shift128ExtraRightJamming(
6998 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6999 roundAndPack:
ff32e16e 7000 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7001
7002}
7003
7004/*----------------------------------------------------------------------------
7005| Returns the result of subtracting the absolute values of the quadruple-
7006| precision floating-point values `a' and `b'. If `zSign' is 1, the
7007| difference is negated before being returned. `zSign' is ignored if the
7008| result is a NaN. The subtraction is performed according to the IEC/IEEE
7009| Standard for Binary Floating-Point Arithmetic.
7010*----------------------------------------------------------------------------*/
7011
c120391c 7012static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7013 float_status *status)
158142c2 7014{
f4014512 7015 int32_t aExp, bExp, zExp;
bb98fe42 7016 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7017 int32_t expDiff;
158142c2
FB
7018
7019 aSig1 = extractFloat128Frac1( a );
7020 aSig0 = extractFloat128Frac0( a );
7021 aExp = extractFloat128Exp( a );
7022 bSig1 = extractFloat128Frac1( b );
7023 bSig0 = extractFloat128Frac0( b );
7024 bExp = extractFloat128Exp( b );
7025 expDiff = aExp - bExp;
7026 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7027 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7028 if ( 0 < expDiff ) goto aExpBigger;
7029 if ( expDiff < 0 ) goto bExpBigger;
7030 if ( aExp == 0x7FFF ) {
7031 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7032 return propagateFloat128NaN(a, b, status);
158142c2 7033 }
ff32e16e 7034 float_raise(float_flag_invalid, status);
af39bc8c 7035 return float128_default_nan(status);
158142c2
FB
7036 }
7037 if ( aExp == 0 ) {
7038 aExp = 1;
7039 bExp = 1;
7040 }
7041 if ( bSig0 < aSig0 ) goto aBigger;
7042 if ( aSig0 < bSig0 ) goto bBigger;
7043 if ( bSig1 < aSig1 ) goto aBigger;
7044 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7045 return packFloat128(status->float_rounding_mode == float_round_down,
7046 0, 0, 0);
158142c2
FB
7047 bExpBigger:
7048 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7049 if (bSig0 | bSig1) {
7050 return propagateFloat128NaN(a, b, status);
7051 }
158142c2
FB
7052 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7053 }
7054 if ( aExp == 0 ) {
7055 ++expDiff;
7056 }
7057 else {
e9321124 7058 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7059 }
7060 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7061 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7062 bBigger:
7063 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7064 zExp = bExp;
7065 zSign ^= 1;
7066 goto normalizeRoundAndPack;
7067 aExpBigger:
7068 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7069 if (aSig0 | aSig1) {
7070 return propagateFloat128NaN(a, b, status);
7071 }
158142c2
FB
7072 return a;
7073 }
7074 if ( bExp == 0 ) {
7075 --expDiff;
7076 }
7077 else {
e9321124 7078 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7079 }
7080 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7081 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7082 aBigger:
7083 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7084 zExp = aExp;
7085 normalizeRoundAndPack:
7086 --zExp;
ff32e16e
PM
7087 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7088 status);
158142c2
FB
7089
7090}
7091
7092/*----------------------------------------------------------------------------
7093| Returns the result of adding the quadruple-precision floating-point values
7094| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7095| for Binary Floating-Point Arithmetic.
7096*----------------------------------------------------------------------------*/
7097
e5a41ffa 7098float128 float128_add(float128 a, float128 b, float_status *status)
158142c2 7099{
c120391c 7100 bool aSign, bSign;
158142c2
FB
7101
7102 aSign = extractFloat128Sign( a );
7103 bSign = extractFloat128Sign( b );
7104 if ( aSign == bSign ) {
ff32e16e 7105 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7106 }
7107 else {
ff32e16e 7108 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7109 }
7110
7111}
7112
7113/*----------------------------------------------------------------------------
7114| Returns the result of subtracting the quadruple-precision floating-point
7115| values `a' and `b'. The operation is performed according to the IEC/IEEE
7116| Standard for Binary Floating-Point Arithmetic.
7117*----------------------------------------------------------------------------*/
7118
e5a41ffa 7119float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2 7120{
c120391c 7121 bool aSign, bSign;
158142c2
FB
7122
7123 aSign = extractFloat128Sign( a );
7124 bSign = extractFloat128Sign( b );
7125 if ( aSign == bSign ) {
ff32e16e 7126 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7127 }
7128 else {
ff32e16e 7129 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7130 }
7131
7132}
7133
7134/*----------------------------------------------------------------------------
7135| Returns the result of multiplying the quadruple-precision floating-point
7136| values `a' and `b'. The operation is performed according to the IEC/IEEE
7137| Standard for Binary Floating-Point Arithmetic.
7138*----------------------------------------------------------------------------*/
7139
e5a41ffa 7140float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2 7141{
c120391c 7142 bool aSign, bSign, zSign;
f4014512 7143 int32_t aExp, bExp, zExp;
bb98fe42 7144 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7145
7146 aSig1 = extractFloat128Frac1( a );
7147 aSig0 = extractFloat128Frac0( a );
7148 aExp = extractFloat128Exp( a );
7149 aSign = extractFloat128Sign( a );
7150 bSig1 = extractFloat128Frac1( b );
7151 bSig0 = extractFloat128Frac0( b );
7152 bExp = extractFloat128Exp( b );
7153 bSign = extractFloat128Sign( b );
7154 zSign = aSign ^ bSign;
7155 if ( aExp == 0x7FFF ) {
7156 if ( ( aSig0 | aSig1 )
7157 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7158 return propagateFloat128NaN(a, b, status);
158142c2
FB
7159 }
7160 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7161 return packFloat128( zSign, 0x7FFF, 0, 0 );
7162 }
7163 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7164 if (bSig0 | bSig1) {
7165 return propagateFloat128NaN(a, b, status);
7166 }
158142c2
FB
7167 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7168 invalid:
ff32e16e 7169 float_raise(float_flag_invalid, status);
af39bc8c 7170 return float128_default_nan(status);
158142c2
FB
7171 }
7172 return packFloat128( zSign, 0x7FFF, 0, 0 );
7173 }
7174 if ( aExp == 0 ) {
7175 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7176 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7177 }
7178 if ( bExp == 0 ) {
7179 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7180 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7181 }
7182 zExp = aExp + bExp - 0x4000;
e9321124 7183 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7184 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7185 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7186 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7187 zSig2 |= ( zSig3 != 0 );
e9321124 7188 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7189 shift128ExtraRightJamming(
7190 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7191 ++zExp;
7192 }
ff32e16e 7193 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7194
7195}
7196
7197/*----------------------------------------------------------------------------
7198| Returns the result of dividing the quadruple-precision floating-point value
7199| `a' by the corresponding value `b'. The operation is performed according to
7200| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7201*----------------------------------------------------------------------------*/
7202
e5a41ffa 7203float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7204{
c120391c 7205 bool aSign, bSign, zSign;
f4014512 7206 int32_t aExp, bExp, zExp;
bb98fe42
AF
7207 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7208 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7209
7210 aSig1 = extractFloat128Frac1( a );
7211 aSig0 = extractFloat128Frac0( a );
7212 aExp = extractFloat128Exp( a );
7213 aSign = extractFloat128Sign( a );
7214 bSig1 = extractFloat128Frac1( b );
7215 bSig0 = extractFloat128Frac0( b );
7216 bExp = extractFloat128Exp( b );
7217 bSign = extractFloat128Sign( b );
7218 zSign = aSign ^ bSign;
7219 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7220 if (aSig0 | aSig1) {
7221 return propagateFloat128NaN(a, b, status);
7222 }
158142c2 7223 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7224 if (bSig0 | bSig1) {
7225 return propagateFloat128NaN(a, b, status);
7226 }
158142c2
FB
7227 goto invalid;
7228 }
7229 return packFloat128( zSign, 0x7FFF, 0, 0 );
7230 }
7231 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7232 if (bSig0 | bSig1) {
7233 return propagateFloat128NaN(a, b, status);
7234 }
158142c2
FB
7235 return packFloat128( zSign, 0, 0, 0 );
7236 }
7237 if ( bExp == 0 ) {
7238 if ( ( bSig0 | bSig1 ) == 0 ) {
7239 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7240 invalid:
ff32e16e 7241 float_raise(float_flag_invalid, status);
af39bc8c 7242 return float128_default_nan(status);
158142c2 7243 }
ff32e16e 7244 float_raise(float_flag_divbyzero, status);
158142c2
FB
7245 return packFloat128( zSign, 0x7FFF, 0, 0 );
7246 }
7247 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7248 }
7249 if ( aExp == 0 ) {
7250 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7251 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7252 }
7253 zExp = aExp - bExp + 0x3FFD;
7254 shortShift128Left(
e9321124 7255 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7256 shortShift128Left(
e9321124 7257 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7258 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7259 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7260 ++zExp;
7261 }
7262 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7263 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7264 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7265 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7266 --zSig0;
7267 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7268 }
7269 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7270 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7271 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7272 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7273 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7274 --zSig1;
7275 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7276 }
7277 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7278 }
7279 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7280 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7281
7282}
7283
7284/*----------------------------------------------------------------------------
7285| Returns the remainder of the quadruple-precision floating-point value `a'
7286| with respect to the corresponding value `b'. The operation is performed
7287| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7288*----------------------------------------------------------------------------*/
7289
e5a41ffa 7290float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7291{
c120391c 7292 bool aSign, zSign;
f4014512 7293 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7294 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7295 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7296 int64_t sigMean0;
158142c2
FB
7297
7298 aSig1 = extractFloat128Frac1( a );
7299 aSig0 = extractFloat128Frac0( a );
7300 aExp = extractFloat128Exp( a );
7301 aSign = extractFloat128Sign( a );
7302 bSig1 = extractFloat128Frac1( b );
7303 bSig0 = extractFloat128Frac0( b );
7304 bExp = extractFloat128Exp( b );
158142c2
FB
7305 if ( aExp == 0x7FFF ) {
7306 if ( ( aSig0 | aSig1 )
7307 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7308 return propagateFloat128NaN(a, b, status);
158142c2
FB
7309 }
7310 goto invalid;
7311 }
7312 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7313 if (bSig0 | bSig1) {
7314 return propagateFloat128NaN(a, b, status);
7315 }
158142c2
FB
7316 return a;
7317 }
7318 if ( bExp == 0 ) {
7319 if ( ( bSig0 | bSig1 ) == 0 ) {
7320 invalid:
ff32e16e 7321 float_raise(float_flag_invalid, status);
af39bc8c 7322 return float128_default_nan(status);
158142c2
FB
7323 }
7324 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7325 }
7326 if ( aExp == 0 ) {
7327 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7328 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7329 }
7330 expDiff = aExp - bExp;
7331 if ( expDiff < -1 ) return a;
7332 shortShift128Left(
e9321124 7333 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7334 aSig1,
7335 15 - ( expDiff < 0 ),
7336 &aSig0,
7337 &aSig1
7338 );
7339 shortShift128Left(
e9321124 7340 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7341 q = le128( bSig0, bSig1, aSig0, aSig1 );
7342 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7343 expDiff -= 64;
7344 while ( 0 < expDiff ) {
7345 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7346 q = ( 4 < q ) ? q - 4 : 0;
7347 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7348 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7349 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7350 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7351 expDiff -= 61;
7352 }
7353 if ( -64 < expDiff ) {
7354 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7355 q = ( 4 < q ) ? q - 4 : 0;
7356 q >>= - expDiff;
7357 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7358 expDiff += 52;
7359 if ( expDiff < 0 ) {
7360 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7361 }
7362 else {
7363 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7364 }
7365 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7366 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7367 }
7368 else {
7369 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7370 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7371 }
7372 do {
7373 alternateASig0 = aSig0;
7374 alternateASig1 = aSig1;
7375 ++q;
7376 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7377 } while ( 0 <= (int64_t) aSig0 );
158142c2 7378 add128(
bb98fe42 7379 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7380 if ( ( sigMean0 < 0 )
7381 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7382 aSig0 = alternateASig0;
7383 aSig1 = alternateASig1;
7384 }
bb98fe42 7385 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7386 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7387 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7388 status);
158142c2
FB
7389}
7390
7391/*----------------------------------------------------------------------------
7392| Returns the square root of the quadruple-precision floating-point value `a'.
7393| The operation is performed according to the IEC/IEEE Standard for Binary
7394| Floating-Point Arithmetic.
7395*----------------------------------------------------------------------------*/
7396
e5a41ffa 7397float128 float128_sqrt(float128 a, float_status *status)
158142c2 7398{
c120391c 7399 bool aSign;
f4014512 7400 int32_t aExp, zExp;
bb98fe42
AF
7401 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7402 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7403
7404 aSig1 = extractFloat128Frac1( a );
7405 aSig0 = extractFloat128Frac0( a );
7406 aExp = extractFloat128Exp( a );
7407 aSign = extractFloat128Sign( a );
7408 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7409 if (aSig0 | aSig1) {
7410 return propagateFloat128NaN(a, a, status);
7411 }
158142c2
FB
7412 if ( ! aSign ) return a;
7413 goto invalid;
7414 }
7415 if ( aSign ) {
7416 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7417 invalid:
ff32e16e 7418 float_raise(float_flag_invalid, status);
af39bc8c 7419 return float128_default_nan(status);
158142c2
FB
7420 }
7421 if ( aExp == 0 ) {
7422 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7423 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7424 }
7425 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7426 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7427 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7428 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7429 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7430 doubleZSig0 = zSig0<<1;
7431 mul64To128( zSig0, zSig0, &term0, &term1 );
7432 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7433 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7434 --zSig0;
7435 doubleZSig0 -= 2;
7436 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7437 }
7438 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7439 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7440 if ( zSig1 == 0 ) zSig1 = 1;
7441 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7442 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7443 mul64To128( zSig1, zSig1, &term2, &term3 );
7444 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7445 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7446 --zSig1;
7447 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7448 term3 |= 1;
7449 term2 |= doubleZSig0;
7450 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7451 }
7452 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7453 }
7454 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7455 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7456
7457}
7458
71bfd65c
RH
7459static inline FloatRelation
7460floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7461 float_status *status)
f6714d36 7462{
c120391c 7463 bool aSign, bSign;
f6714d36 7464
d1eb8f2a
AD
7465 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7466 float_raise(float_flag_invalid, status);
7467 return float_relation_unordered;
7468 }
f6714d36
AJ
7469 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7470 ( extractFloatx80Frac( a )<<1 ) ) ||
7471 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7472 ( extractFloatx80Frac( b )<<1 ) )) {
7473 if (!is_quiet ||
af39bc8c
AM
7474 floatx80_is_signaling_nan(a, status) ||
7475 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7476 float_raise(float_flag_invalid, status);
f6714d36
AJ
7477 }
7478 return float_relation_unordered;
7479 }
7480 aSign = extractFloatx80Sign( a );
7481 bSign = extractFloatx80Sign( b );
7482 if ( aSign != bSign ) {
7483
7484 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7485 ( ( a.low | b.low ) == 0 ) ) {
7486 /* zero case */
7487 return float_relation_equal;
7488 } else {
7489 return 1 - (2 * aSign);
7490 }
7491 } else {
be53fa78
JM
7492 /* Normalize pseudo-denormals before comparison. */
7493 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7494 ++a.high;
7495 }
7496 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7497 ++b.high;
7498 }
f6714d36
AJ
7499 if (a.low == b.low && a.high == b.high) {
7500 return float_relation_equal;
7501 } else {
7502 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7503 }
7504 }
7505}
7506
71bfd65c 7507FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7508{
ff32e16e 7509 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7510}
7511
71bfd65c
RH
7512FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7513 float_status *status)
f6714d36 7514{
ff32e16e 7515 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7516}
7517
71bfd65c
RH
7518static inline FloatRelation
7519float128_compare_internal(float128 a, float128 b, bool is_quiet,
7520 float_status *status)
1f587329 7521{
c120391c 7522 bool aSign, bSign;
1f587329
BS
7523
7524 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7525 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7526 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7527 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7528 if (!is_quiet ||
af39bc8c
AM
7529 float128_is_signaling_nan(a, status) ||
7530 float128_is_signaling_nan(b, status)) {
ff32e16e 7531 float_raise(float_flag_invalid, status);
1f587329
BS
7532 }
7533 return float_relation_unordered;
7534 }
7535 aSign = extractFloat128Sign( a );
7536 bSign = extractFloat128Sign( b );
7537 if ( aSign != bSign ) {
7538 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7539 /* zero case */
7540 return float_relation_equal;
7541 } else {
7542 return 1 - (2 * aSign);
7543 }
7544 } else {
7545 if (a.low == b.low && a.high == b.high) {
7546 return float_relation_equal;
7547 } else {
7548 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7549 }
7550 }
7551}
7552
71bfd65c 7553FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7554{
ff32e16e 7555 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7556}
7557
71bfd65c
RH
7558FloatRelation float128_compare_quiet(float128 a, float128 b,
7559 float_status *status)
1f587329 7560{
ff32e16e 7561 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7562}
7563
e5a41ffa 7564floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7565{
c120391c 7566 bool aSign;
326b9e98 7567 int32_t aExp;
bb98fe42 7568 uint64_t aSig;
9ee6e8bb 7569
d1eb8f2a
AD
7570 if (floatx80_invalid_encoding(a)) {
7571 float_raise(float_flag_invalid, status);
7572 return floatx80_default_nan(status);
7573 }
9ee6e8bb
PB
7574 aSig = extractFloatx80Frac( a );
7575 aExp = extractFloatx80Exp( a );
7576 aSign = extractFloatx80Sign( a );
7577
326b9e98
AJ
7578 if ( aExp == 0x7FFF ) {
7579 if ( aSig<<1 ) {
ff32e16e 7580 return propagateFloatx80NaN(a, a, status);
326b9e98 7581 }
9ee6e8bb
PB
7582 return a;
7583 }
326b9e98 7584
3c85c37f
PM
7585 if (aExp == 0) {
7586 if (aSig == 0) {
7587 return a;
7588 }
7589 aExp++;
7590 }
69397542 7591
326b9e98
AJ
7592 if (n > 0x10000) {
7593 n = 0x10000;
7594 } else if (n < -0x10000) {
7595 n = -0x10000;
7596 }
7597
9ee6e8bb 7598 aExp += n;
a2f2d288
PM
7599 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7600 aSign, aExp, aSig, 0, status);
9ee6e8bb 7601}
9ee6e8bb 7602
e5a41ffa 7603float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7604{
c120391c 7605 bool aSign;
326b9e98 7606 int32_t aExp;
bb98fe42 7607 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7608
7609 aSig1 = extractFloat128Frac1( a );
7610 aSig0 = extractFloat128Frac0( a );
7611 aExp = extractFloat128Exp( a );
7612 aSign = extractFloat128Sign( a );
7613 if ( aExp == 0x7FFF ) {
326b9e98 7614 if ( aSig0 | aSig1 ) {
ff32e16e 7615 return propagateFloat128NaN(a, a, status);
326b9e98 7616 }
9ee6e8bb
PB
7617 return a;
7618 }
3c85c37f 7619 if (aExp != 0) {
e9321124 7620 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7621 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7622 return a;
3c85c37f
PM
7623 } else {
7624 aExp++;
7625 }
69397542 7626
326b9e98
AJ
7627 if (n > 0x10000) {
7628 n = 0x10000;
7629 } else if (n < -0x10000) {
7630 n = -0x10000;
7631 }
7632
69397542
PB
7633 aExp += n - 1;
7634 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7635 , status);
9ee6e8bb
PB
7636
7637}
f6b3b108
EC
7638
7639static void __attribute__((constructor)) softfloat_init(void)
7640{
7641 union_float64 ua, ub, uc, ur;
7642
7643 if (QEMU_NO_HARDFLOAT) {
7644 return;
7645 }
7646 /*
7647 * Test that the host's FMA is not obviously broken. For example,
7648 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7649 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7650 */
7651 ua.s = 0x0020000000000001ULL;
7652 ub.s = 0x3ca0000000000000ULL;
7653 uc.s = 0x0020000000000000ULL;
7654 ur.h = fma(ua.h, ub.h, uc.h);
7655 if (ur.s != 0x0020000000000001ULL) {
7656 force_soft_fma = true;
7657 }
7658}