]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Name rounding mode enum
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
363 s->float_exception_flags |= float_flag_overflow;
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
394 s->float_exception_flags |= float_flag_overflow;
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
247d1f21
RH
472/* Simple helpers for checking if, or what kind of, NaN we have */
473static inline __attribute__((unused)) bool is_nan(FloatClass c)
474{
475 return unlikely(c >= float_class_qnan);
476}
477
478static inline __attribute__((unused)) bool is_snan(FloatClass c)
479{
480 return c == float_class_snan;
481}
482
483static inline __attribute__((unused)) bool is_qnan(FloatClass c)
484{
485 return c == float_class_qnan;
486}
487
a90119b5
AB
488/*
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
493 *
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
497 */
498
499typedef struct {
500 uint64_t frac;
501 int32_t exp;
502 FloatClass cls;
503 bool sign;
504} FloatParts;
505
506#define DECOMPOSED_BINARY_POINT (64 - 2)
507#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
508#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
509
510/* Structure holding all of the relevant parameters for a format.
511 * exp_size: the size of the exponent field
512 * exp_bias: the offset applied to the exponent field
513 * exp_max: the maximum normalised exponent
514 * frac_size: the size of the fraction field
515 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
516 * The following are computed based the size of fraction
517 * frac_lsb: least significant bit of fraction
ca3a3d5a 518 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 519 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
520 * The following optional modifiers are available:
521 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
522 */
523typedef struct {
524 int exp_size;
525 int exp_bias;
526 int exp_max;
527 int frac_size;
528 int frac_shift;
529 uint64_t frac_lsb;
530 uint64_t frac_lsbm1;
531 uint64_t round_mask;
532 uint64_t roundeven_mask;
ca3a3d5a 533 bool arm_althp;
a90119b5
AB
534} FloatFmt;
535
536/* Expand fields based on the size of exponent and fraction */
537#define FLOAT_PARAMS(E, F) \
538 .exp_size = E, \
539 .exp_bias = ((1 << E) - 1) >> 1, \
540 .exp_max = (1 << E) - 1, \
541 .frac_size = F, \
542 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
543 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
544 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
545 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
546 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
547
548static const FloatFmt float16_params = {
549 FLOAT_PARAMS(5, 10)
550};
551
6fed16b2
AB
552static const FloatFmt float16_params_ahp = {
553 FLOAT_PARAMS(5, 10),
554 .arm_althp = true
555};
556
a90119b5
AB
557static const FloatFmt float32_params = {
558 FLOAT_PARAMS(8, 23)
559};
560
561static const FloatFmt float64_params = {
562 FLOAT_PARAMS(11, 52)
563};
564
6fff2167
AB
565/* Unpack a float to parts, but do not canonicalize. */
566static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
567{
568 const int sign_pos = fmt.frac_size + fmt.exp_size;
569
570 return (FloatParts) {
571 .cls = float_class_unclassified,
572 .sign = extract64(raw, sign_pos, 1),
573 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
574 .frac = extract64(raw, 0, fmt.frac_size),
575 };
576}
577
578static inline FloatParts float16_unpack_raw(float16 f)
579{
580 return unpack_raw(float16_params, f);
581}
582
583static inline FloatParts float32_unpack_raw(float32 f)
584{
585 return unpack_raw(float32_params, f);
586}
587
588static inline FloatParts float64_unpack_raw(float64 f)
589{
590 return unpack_raw(float64_params, f);
591}
592
593/* Pack a float from parts, but do not canonicalize. */
594static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
595{
596 const int sign_pos = fmt.frac_size + fmt.exp_size;
597 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
598 return deposit64(ret, sign_pos, 1, p.sign);
599}
600
601static inline float16 float16_pack_raw(FloatParts p)
602{
603 return make_float16(pack_raw(float16_params, p));
604}
605
606static inline float32 float32_pack_raw(FloatParts p)
607{
608 return make_float32(pack_raw(float32_params, p));
609}
610
611static inline float64 float64_pack_raw(FloatParts p)
612{
613 return make_float64(pack_raw(float64_params, p));
614}
615
0664335a
RH
616/*----------------------------------------------------------------------------
617| Functions and definitions to determine: (1) whether tininess for underflow
618| is detected before or after rounding by default, (2) what (if anything)
619| happens when exceptions are raised, (3) how signaling NaNs are distinguished
620| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
621| are propagated from function inputs to output. These details are target-
622| specific.
623*----------------------------------------------------------------------------*/
00f43279 624#include "softfloat-specialize.inc.c"
0664335a 625
6fff2167 626/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
627static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
628 float_status *status)
6fff2167 629{
ca3a3d5a 630 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
631 if (part.frac == 0) {
632 part.cls = float_class_inf;
633 } else {
94933df0 634 part.frac <<= parm->frac_shift;
298b468e
RH
635 part.cls = (parts_is_snan_frac(part.frac, status)
636 ? float_class_snan : float_class_qnan);
6fff2167
AB
637 }
638 } else if (part.exp == 0) {
639 if (likely(part.frac == 0)) {
640 part.cls = float_class_zero;
641 } else if (status->flush_inputs_to_zero) {
642 float_raise(float_flag_input_denormal, status);
643 part.cls = float_class_zero;
644 part.frac = 0;
645 } else {
646 int shift = clz64(part.frac) - 1;
647 part.cls = float_class_normal;
648 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
649 part.frac <<= shift;
650 }
651 } else {
652 part.cls = float_class_normal;
653 part.exp -= parm->exp_bias;
654 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
655 }
656 return part;
657}
658
659/* Round and uncanonicalize a floating-point number by parts. There
660 * are FRAC_SHIFT bits that may require rounding at the bottom of the
661 * fraction; these bits will be removed. The exponent will be biased
662 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
663 */
664
665static FloatParts round_canonical(FloatParts p, float_status *s,
666 const FloatFmt *parm)
667{
5d64abb3 668 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
669 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
670 const uint64_t round_mask = parm->round_mask;
671 const uint64_t roundeven_mask = parm->roundeven_mask;
672 const int exp_max = parm->exp_max;
673 const int frac_shift = parm->frac_shift;
674 uint64_t frac, inc;
675 int exp, flags = 0;
676 bool overflow_norm;
677
678 frac = p.frac;
679 exp = p.exp;
680
681 switch (p.cls) {
682 case float_class_normal:
683 switch (s->float_rounding_mode) {
684 case float_round_nearest_even:
685 overflow_norm = false;
686 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
687 break;
688 case float_round_ties_away:
689 overflow_norm = false;
690 inc = frac_lsbm1;
691 break;
692 case float_round_to_zero:
693 overflow_norm = true;
694 inc = 0;
695 break;
696 case float_round_up:
697 inc = p.sign ? 0 : round_mask;
698 overflow_norm = p.sign;
699 break;
700 case float_round_down:
701 inc = p.sign ? round_mask : 0;
702 overflow_norm = !p.sign;
703 break;
5d64abb3
RH
704 case float_round_to_odd:
705 overflow_norm = true;
706 inc = frac & frac_lsb ? 0 : round_mask;
707 break;
6fff2167
AB
708 default:
709 g_assert_not_reached();
710 }
711
712 exp += parm->exp_bias;
713 if (likely(exp > 0)) {
714 if (frac & round_mask) {
715 flags |= float_flag_inexact;
716 frac += inc;
717 if (frac & DECOMPOSED_OVERFLOW_BIT) {
718 frac >>= 1;
719 exp++;
720 }
721 }
722 frac >>= frac_shift;
723
ca3a3d5a
AB
724 if (parm->arm_althp) {
725 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
726 if (unlikely(exp > exp_max)) {
727 /* Overflow. Return the maximum normal. */
728 flags = float_flag_invalid;
729 exp = exp_max;
730 frac = -1;
731 }
732 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
733 flags |= float_flag_overflow | float_flag_inexact;
734 if (overflow_norm) {
735 exp = exp_max - 1;
736 frac = -1;
737 } else {
738 p.cls = float_class_inf;
739 goto do_inf;
740 }
741 }
742 } else if (s->flush_to_zero) {
743 flags |= float_flag_output_denormal;
744 p.cls = float_class_zero;
745 goto do_zero;
746 } else {
a828b373 747 bool is_tiny = s->tininess_before_rounding
6fff2167
AB
748 || (exp < 0)
749 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
750
751 shift64RightJamming(frac, 1 - exp, &frac);
752 if (frac & round_mask) {
753 /* Need to recompute round-to-even. */
5d64abb3
RH
754 switch (s->float_rounding_mode) {
755 case float_round_nearest_even:
6fff2167
AB
756 inc = ((frac & roundeven_mask) != frac_lsbm1
757 ? frac_lsbm1 : 0);
5d64abb3
RH
758 break;
759 case float_round_to_odd:
760 inc = frac & frac_lsb ? 0 : round_mask;
761 break;
3dede407
RH
762 default:
763 break;
6fff2167
AB
764 }
765 flags |= float_flag_inexact;
766 frac += inc;
767 }
768
769 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
770 frac >>= frac_shift;
771
772 if (is_tiny && (flags & float_flag_inexact)) {
773 flags |= float_flag_underflow;
774 }
775 if (exp == 0 && frac == 0) {
776 p.cls = float_class_zero;
777 }
778 }
779 break;
780
781 case float_class_zero:
782 do_zero:
783 exp = 0;
784 frac = 0;
785 break;
786
787 case float_class_inf:
788 do_inf:
ca3a3d5a 789 assert(!parm->arm_althp);
6fff2167
AB
790 exp = exp_max;
791 frac = 0;
792 break;
793
794 case float_class_qnan:
795 case float_class_snan:
ca3a3d5a 796 assert(!parm->arm_althp);
6fff2167 797 exp = exp_max;
94933df0 798 frac >>= parm->frac_shift;
6fff2167
AB
799 break;
800
801 default:
802 g_assert_not_reached();
803 }
804
805 float_raise(flags, s);
806 p.exp = exp;
807 p.frac = frac;
808 return p;
809}
810
6fed16b2
AB
811/* Explicit FloatFmt version */
812static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
813 const FloatFmt *params)
814{
f9943c7f 815 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
816}
817
6fff2167
AB
818static FloatParts float16_unpack_canonical(float16 f, float_status *s)
819{
6fed16b2
AB
820 return float16a_unpack_canonical(f, s, &float16_params);
821}
822
823static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
824 const FloatFmt *params)
825{
826 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
827}
828
829static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
830{
6fed16b2 831 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
832}
833
834static FloatParts float32_unpack_canonical(float32 f, float_status *s)
835{
f9943c7f 836 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
837}
838
839static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
840{
0bcfbcbe 841 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
842}
843
844static FloatParts float64_unpack_canonical(float64 f, float_status *s)
845{
f9943c7f 846 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
847}
848
849static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
850{
0bcfbcbe 851 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
852}
853
dbe4d53a
AB
854static FloatParts return_nan(FloatParts a, float_status *s)
855{
856 switch (a.cls) {
857 case float_class_snan:
858 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 859 a = parts_silence_nan(a, s);
dbe4d53a
AB
860 /* fall through */
861 case float_class_qnan:
862 if (s->default_nan_mode) {
f7e598e2 863 return parts_default_nan(s);
dbe4d53a
AB
864 }
865 break;
866
867 default:
868 g_assert_not_reached();
869 }
870 return a;
871}
872
6fff2167
AB
873static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
874{
875 if (is_snan(a.cls) || is_snan(b.cls)) {
876 s->float_exception_flags |= float_flag_invalid;
877 }
878
879 if (s->default_nan_mode) {
f7e598e2 880 return parts_default_nan(s);
6fff2167 881 } else {
4f251cfd 882 if (pickNaN(a.cls, b.cls,
6fff2167
AB
883 a.frac > b.frac ||
884 (a.frac == b.frac && a.sign < b.sign))) {
885 a = b;
886 }
0bcfbcbe
RH
887 if (is_snan(a.cls)) {
888 return parts_silence_nan(a, s);
889 }
6fff2167
AB
890 }
891 return a;
892}
893
d446830a
AB
894static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
895 bool inf_zero, float_status *s)
896{
1839189b
PM
897 int which;
898
d446830a
AB
899 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
900 s->float_exception_flags |= float_flag_invalid;
901 }
902
3bd2dec1 903 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 904
d446830a 905 if (s->default_nan_mode) {
1839189b
PM
906 /* Note that this check is after pickNaNMulAdd so that function
907 * has an opportunity to set the Invalid flag.
908 */
f7e598e2 909 which = 3;
1839189b 910 }
d446830a 911
1839189b
PM
912 switch (which) {
913 case 0:
914 break;
915 case 1:
916 a = b;
917 break;
918 case 2:
919 a = c;
920 break;
921 case 3:
f7e598e2 922 return parts_default_nan(s);
1839189b
PM
923 default:
924 g_assert_not_reached();
d446830a 925 }
1839189b 926
0bcfbcbe
RH
927 if (is_snan(a.cls)) {
928 return parts_silence_nan(a, s);
929 }
d446830a
AB
930 return a;
931}
932
6fff2167
AB
933/*
934 * Returns the result of adding or subtracting the values of the
935 * floating-point values `a' and `b'. The operation is performed
936 * according to the IEC/IEEE Standard for Binary Floating-Point
937 * Arithmetic.
938 */
939
940static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
941 float_status *s)
942{
943 bool a_sign = a.sign;
944 bool b_sign = b.sign ^ subtract;
945
946 if (a_sign != b_sign) {
947 /* Subtraction */
948
949 if (a.cls == float_class_normal && b.cls == float_class_normal) {
950 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
951 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
952 a.frac = a.frac - b.frac;
953 } else {
954 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
955 a.frac = b.frac - a.frac;
956 a.exp = b.exp;
957 a_sign ^= 1;
958 }
959
960 if (a.frac == 0) {
961 a.cls = float_class_zero;
962 a.sign = s->float_rounding_mode == float_round_down;
963 } else {
964 int shift = clz64(a.frac) - 1;
965 a.frac = a.frac << shift;
966 a.exp = a.exp - shift;
967 a.sign = a_sign;
968 }
969 return a;
970 }
971 if (is_nan(a.cls) || is_nan(b.cls)) {
972 return pick_nan(a, b, s);
973 }
974 if (a.cls == float_class_inf) {
975 if (b.cls == float_class_inf) {
976 float_raise(float_flag_invalid, s);
f7e598e2 977 return parts_default_nan(s);
6fff2167
AB
978 }
979 return a;
980 }
981 if (a.cls == float_class_zero && b.cls == float_class_zero) {
982 a.sign = s->float_rounding_mode == float_round_down;
983 return a;
984 }
985 if (a.cls == float_class_zero || b.cls == float_class_inf) {
986 b.sign = a_sign ^ 1;
987 return b;
988 }
989 if (b.cls == float_class_zero) {
990 return a;
991 }
992 } else {
993 /* Addition */
994 if (a.cls == float_class_normal && b.cls == float_class_normal) {
995 if (a.exp > b.exp) {
996 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
997 } else if (a.exp < b.exp) {
998 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
999 a.exp = b.exp;
1000 }
1001 a.frac += b.frac;
1002 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1003 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1004 a.exp += 1;
1005 }
1006 return a;
1007 }
1008 if (is_nan(a.cls) || is_nan(b.cls)) {
1009 return pick_nan(a, b, s);
1010 }
1011 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1012 return a;
1013 }
1014 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1015 b.sign = b_sign;
1016 return b;
1017 }
1018 }
1019 g_assert_not_reached();
1020}
1021
1022/*
1023 * Returns the result of adding or subtracting the floating-point
1024 * values `a' and `b'. The operation is performed according to the
1025 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1026 */
1027
97ff87c0 1028float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1029{
1030 FloatParts pa = float16_unpack_canonical(a, status);
1031 FloatParts pb = float16_unpack_canonical(b, status);
1032 FloatParts pr = addsub_floats(pa, pb, false, status);
1033
1034 return float16_round_pack_canonical(pr, status);
1035}
1036
1b615d48
EC
1037float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1038{
1039 FloatParts pa = float16_unpack_canonical(a, status);
1040 FloatParts pb = float16_unpack_canonical(b, status);
1041 FloatParts pr = addsub_floats(pa, pb, true, status);
1042
1043 return float16_round_pack_canonical(pr, status);
1044}
1045
1046static float32 QEMU_SOFTFLOAT_ATTR
1047soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1048{
1049 FloatParts pa = float32_unpack_canonical(a, status);
1050 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1051 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1052
1053 return float32_round_pack_canonical(pr, status);
1054}
1055
1b615d48
EC
1056static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1057{
1058 return soft_f32_addsub(a, b, false, status);
1059}
1060
1061static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1062{
1063 return soft_f32_addsub(a, b, true, status);
1064}
1065
1066static float64 QEMU_SOFTFLOAT_ATTR
1067soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1068{
1069 FloatParts pa = float64_unpack_canonical(a, status);
1070 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1071 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1072
1073 return float64_round_pack_canonical(pr, status);
1074}
1075
1b615d48 1076static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1077{
1b615d48
EC
1078 return soft_f64_addsub(a, b, false, status);
1079}
6fff2167 1080
1b615d48
EC
1081static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1082{
1083 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1084}
1085
1b615d48 1086static float hard_f32_add(float a, float b)
6fff2167 1087{
1b615d48
EC
1088 return a + b;
1089}
6fff2167 1090
1b615d48
EC
1091static float hard_f32_sub(float a, float b)
1092{
1093 return a - b;
6fff2167
AB
1094}
1095
1b615d48 1096static double hard_f64_add(double a, double b)
6fff2167 1097{
1b615d48
EC
1098 return a + b;
1099}
6fff2167 1100
1b615d48
EC
1101static double hard_f64_sub(double a, double b)
1102{
1103 return a - b;
1104}
1105
b240c9c4 1106static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1107{
1108 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1109 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1110 }
1111 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1112}
1113
b240c9c4 1114static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1115{
1116 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1117 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1118 } else {
1119 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1120 }
1121}
1122
1123static float32 float32_addsub(float32 a, float32 b, float_status *s,
1124 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1125{
1126 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1127 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1128}
1129
1130static float64 float64_addsub(float64 a, float64 b, float_status *s,
1131 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1132{
1133 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1134 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1135}
1136
1137float32 QEMU_FLATTEN
1138float32_add(float32 a, float32 b, float_status *s)
1139{
1140 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1141}
1142
1143float32 QEMU_FLATTEN
1144float32_sub(float32 a, float32 b, float_status *s)
1145{
1146 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1147}
1148
1149float64 QEMU_FLATTEN
1150float64_add(float64 a, float64 b, float_status *s)
1151{
1152 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1153}
1154
1155float64 QEMU_FLATTEN
1156float64_sub(float64 a, float64 b, float_status *s)
1157{
1158 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1159}
1160
74d707e2
AB
1161/*
1162 * Returns the result of multiplying the floating-point values `a' and
1163 * `b'. The operation is performed according to the IEC/IEEE Standard
1164 * for Binary Floating-Point Arithmetic.
1165 */
1166
1167static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1168{
1169 bool sign = a.sign ^ b.sign;
1170
1171 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1172 uint64_t hi, lo;
1173 int exp = a.exp + b.exp;
1174
1175 mul64To128(a.frac, b.frac, &hi, &lo);
1176 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1177 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1178 shift64RightJamming(lo, 1, &lo);
1179 exp += 1;
1180 }
1181
1182 /* Re-use a */
1183 a.exp = exp;
1184 a.sign = sign;
1185 a.frac = lo;
1186 return a;
1187 }
1188 /* handle all the NaN cases */
1189 if (is_nan(a.cls) || is_nan(b.cls)) {
1190 return pick_nan(a, b, s);
1191 }
1192 /* Inf * Zero == NaN */
1193 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1194 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1195 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1196 return parts_default_nan(s);
74d707e2
AB
1197 }
1198 /* Multiply by 0 or Inf */
1199 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1200 a.sign = sign;
1201 return a;
1202 }
1203 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1204 b.sign = sign;
1205 return b;
1206 }
1207 g_assert_not_reached();
1208}
1209
97ff87c0 1210float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1211{
1212 FloatParts pa = float16_unpack_canonical(a, status);
1213 FloatParts pb = float16_unpack_canonical(b, status);
1214 FloatParts pr = mul_floats(pa, pb, status);
1215
1216 return float16_round_pack_canonical(pr, status);
1217}
1218
2dfabc86
EC
1219static float32 QEMU_SOFTFLOAT_ATTR
1220soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1221{
1222 FloatParts pa = float32_unpack_canonical(a, status);
1223 FloatParts pb = float32_unpack_canonical(b, status);
1224 FloatParts pr = mul_floats(pa, pb, status);
1225
1226 return float32_round_pack_canonical(pr, status);
1227}
1228
2dfabc86
EC
1229static float64 QEMU_SOFTFLOAT_ATTR
1230soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1231{
1232 FloatParts pa = float64_unpack_canonical(a, status);
1233 FloatParts pb = float64_unpack_canonical(b, status);
1234 FloatParts pr = mul_floats(pa, pb, status);
1235
1236 return float64_round_pack_canonical(pr, status);
1237}
1238
2dfabc86
EC
1239static float hard_f32_mul(float a, float b)
1240{
1241 return a * b;
1242}
1243
1244static double hard_f64_mul(double a, double b)
1245{
1246 return a * b;
1247}
1248
2dfabc86
EC
1249float32 QEMU_FLATTEN
1250float32_mul(float32 a, float32 b, float_status *s)
1251{
1252 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1253 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1254}
1255
1256float64 QEMU_FLATTEN
1257float64_mul(float64 a, float64 b, float_status *s)
1258{
1259 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1260 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1261}
1262
d446830a
AB
1263/*
1264 * Returns the result of multiplying the floating-point values `a' and
1265 * `b' then adding 'c', with no intermediate rounding step after the
1266 * multiplication. The operation is performed according to the
1267 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1268 * The flags argument allows the caller to select negation of the
1269 * addend, the intermediate product, or the final result. (The
1270 * difference between this and having the caller do a separate
1271 * negation is that negating externally will flip the sign bit on
1272 * NaNs.)
1273 */
1274
1275static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1276 int flags, float_status *s)
1277{
1278 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1279 ((1 << float_class_inf) | (1 << float_class_zero));
1280 bool p_sign;
1281 bool sign_flip = flags & float_muladd_negate_result;
1282 FloatClass p_class;
1283 uint64_t hi, lo;
1284 int p_exp;
1285
1286 /* It is implementation-defined whether the cases of (0,inf,qnan)
1287 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1288 * they return if they do), so we have to hand this information
1289 * off to the target-specific pick-a-NaN routine.
1290 */
1291 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1292 return pick_nan_muladd(a, b, c, inf_zero, s);
1293 }
1294
1295 if (inf_zero) {
1296 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1297 return parts_default_nan(s);
d446830a
AB
1298 }
1299
1300 if (flags & float_muladd_negate_c) {
1301 c.sign ^= 1;
1302 }
1303
1304 p_sign = a.sign ^ b.sign;
1305
1306 if (flags & float_muladd_negate_product) {
1307 p_sign ^= 1;
1308 }
1309
1310 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1311 p_class = float_class_inf;
1312 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1313 p_class = float_class_zero;
1314 } else {
1315 p_class = float_class_normal;
1316 }
1317
1318 if (c.cls == float_class_inf) {
1319 if (p_class == float_class_inf && p_sign != c.sign) {
1320 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1321 return parts_default_nan(s);
d446830a
AB
1322 } else {
1323 a.cls = float_class_inf;
1324 a.sign = c.sign ^ sign_flip;
f7e598e2 1325 return a;
d446830a 1326 }
d446830a
AB
1327 }
1328
1329 if (p_class == float_class_inf) {
1330 a.cls = float_class_inf;
1331 a.sign = p_sign ^ sign_flip;
1332 return a;
1333 }
1334
1335 if (p_class == float_class_zero) {
1336 if (c.cls == float_class_zero) {
1337 if (p_sign != c.sign) {
1338 p_sign = s->float_rounding_mode == float_round_down;
1339 }
1340 c.sign = p_sign;
1341 } else if (flags & float_muladd_halve_result) {
1342 c.exp -= 1;
1343 }
1344 c.sign ^= sign_flip;
1345 return c;
1346 }
1347
1348 /* a & b should be normals now... */
1349 assert(a.cls == float_class_normal &&
1350 b.cls == float_class_normal);
1351
1352 p_exp = a.exp + b.exp;
1353
1354 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1355 * result.
1356 */
1357 mul64To128(a.frac, b.frac, &hi, &lo);
1358 /* binary point now at bit 124 */
1359
1360 /* check for overflow */
1361 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1362 shift128RightJamming(hi, lo, 1, &hi, &lo);
1363 p_exp += 1;
1364 }
1365
1366 /* + add/sub */
1367 if (c.cls == float_class_zero) {
1368 /* move binary point back to 62 */
1369 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1370 } else {
1371 int exp_diff = p_exp - c.exp;
1372 if (p_sign == c.sign) {
1373 /* Addition */
1374 if (exp_diff <= 0) {
1375 shift128RightJamming(hi, lo,
1376 DECOMPOSED_BINARY_POINT - exp_diff,
1377 &hi, &lo);
1378 lo += c.frac;
1379 p_exp = c.exp;
1380 } else {
1381 uint64_t c_hi, c_lo;
1382 /* shift c to the same binary point as the product (124) */
1383 c_hi = c.frac >> 2;
1384 c_lo = 0;
1385 shift128RightJamming(c_hi, c_lo,
1386 exp_diff,
1387 &c_hi, &c_lo);
1388 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1389 /* move binary point back to 62 */
1390 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1391 }
1392
1393 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1394 shift64RightJamming(lo, 1, &lo);
1395 p_exp += 1;
1396 }
1397
1398 } else {
1399 /* Subtraction */
1400 uint64_t c_hi, c_lo;
1401 /* make C binary point match product at bit 124 */
1402 c_hi = c.frac >> 2;
1403 c_lo = 0;
1404
1405 if (exp_diff <= 0) {
1406 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1407 if (exp_diff == 0
1408 &&
1409 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1410 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1411 } else {
1412 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1413 p_sign ^= 1;
1414 p_exp = c.exp;
1415 }
1416 } else {
1417 shift128RightJamming(c_hi, c_lo,
1418 exp_diff,
1419 &c_hi, &c_lo);
1420 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1421 }
1422
1423 if (hi == 0 && lo == 0) {
1424 a.cls = float_class_zero;
1425 a.sign = s->float_rounding_mode == float_round_down;
1426 a.sign ^= sign_flip;
1427 return a;
1428 } else {
1429 int shift;
1430 if (hi != 0) {
1431 shift = clz64(hi);
1432 } else {
1433 shift = clz64(lo) + 64;
1434 }
1435 /* Normalizing to a binary point of 124 is the
1436 correct adjust for the exponent. However since we're
1437 shifting, we might as well put the binary point back
1438 at 62 where we really want it. Therefore shift as
1439 if we're leaving 1 bit at the top of the word, but
1440 adjust the exponent as if we're leaving 3 bits. */
1441 shift -= 1;
1442 if (shift >= 64) {
1443 lo = lo << (shift - 64);
1444 } else {
1445 hi = (hi << shift) | (lo >> (64 - shift));
1446 lo = hi | ((lo << shift) != 0);
1447 }
1448 p_exp -= shift - 2;
1449 }
1450 }
1451 }
1452
1453 if (flags & float_muladd_halve_result) {
1454 p_exp -= 1;
1455 }
1456
1457 /* finally prepare our result */
1458 a.cls = float_class_normal;
1459 a.sign = p_sign ^ sign_flip;
1460 a.exp = p_exp;
1461 a.frac = lo;
1462
1463 return a;
1464}
1465
97ff87c0 1466float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1467 int flags, float_status *status)
1468{
1469 FloatParts pa = float16_unpack_canonical(a, status);
1470 FloatParts pb = float16_unpack_canonical(b, status);
1471 FloatParts pc = float16_unpack_canonical(c, status);
1472 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1473
1474 return float16_round_pack_canonical(pr, status);
1475}
1476
ccf770ba
EC
1477static float32 QEMU_SOFTFLOAT_ATTR
1478soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1479 float_status *status)
d446830a
AB
1480{
1481 FloatParts pa = float32_unpack_canonical(a, status);
1482 FloatParts pb = float32_unpack_canonical(b, status);
1483 FloatParts pc = float32_unpack_canonical(c, status);
1484 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1485
1486 return float32_round_pack_canonical(pr, status);
1487}
1488
ccf770ba
EC
1489static float64 QEMU_SOFTFLOAT_ATTR
1490soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1491 float_status *status)
d446830a
AB
1492{
1493 FloatParts pa = float64_unpack_canonical(a, status);
1494 FloatParts pb = float64_unpack_canonical(b, status);
1495 FloatParts pc = float64_unpack_canonical(c, status);
1496 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1497
1498 return float64_round_pack_canonical(pr, status);
1499}
1500
f6b3b108
EC
1501static bool force_soft_fma;
1502
ccf770ba
EC
1503float32 QEMU_FLATTEN
1504float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1505{
1506 union_float32 ua, ub, uc, ur;
1507
1508 ua.s = xa;
1509 ub.s = xb;
1510 uc.s = xc;
1511
1512 if (unlikely(!can_use_fpu(s))) {
1513 goto soft;
1514 }
1515 if (unlikely(flags & float_muladd_halve_result)) {
1516 goto soft;
1517 }
1518
1519 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1520 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1521 goto soft;
1522 }
f6b3b108
EC
1523
1524 if (unlikely(force_soft_fma)) {
1525 goto soft;
1526 }
1527
ccf770ba
EC
1528 /*
1529 * When (a || b) == 0, there's no need to check for under/over flow,
1530 * since we know the addend is (normal || 0) and the product is 0.
1531 */
1532 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1533 union_float32 up;
1534 bool prod_sign;
1535
1536 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1537 prod_sign ^= !!(flags & float_muladd_negate_product);
1538 up.s = float32_set_sign(float32_zero, prod_sign);
1539
1540 if (flags & float_muladd_negate_c) {
1541 uc.h = -uc.h;
1542 }
1543 ur.h = up.h + uc.h;
1544 } else {
896f51fb
KC
1545 union_float32 ua_orig = ua;
1546 union_float32 uc_orig = uc;
1547
ccf770ba
EC
1548 if (flags & float_muladd_negate_product) {
1549 ua.h = -ua.h;
1550 }
1551 if (flags & float_muladd_negate_c) {
1552 uc.h = -uc.h;
1553 }
1554
1555 ur.h = fmaf(ua.h, ub.h, uc.h);
1556
1557 if (unlikely(f32_is_inf(ur))) {
1558 s->float_exception_flags |= float_flag_overflow;
1559 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1560 ua = ua_orig;
1561 uc = uc_orig;
ccf770ba
EC
1562 goto soft;
1563 }
1564 }
1565 if (flags & float_muladd_negate_result) {
1566 return float32_chs(ur.s);
1567 }
1568 return ur.s;
1569
1570 soft:
1571 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1572}
1573
1574float64 QEMU_FLATTEN
1575float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1576{
1577 union_float64 ua, ub, uc, ur;
1578
1579 ua.s = xa;
1580 ub.s = xb;
1581 uc.s = xc;
1582
1583 if (unlikely(!can_use_fpu(s))) {
1584 goto soft;
1585 }
1586 if (unlikely(flags & float_muladd_halve_result)) {
1587 goto soft;
1588 }
1589
1590 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1591 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1592 goto soft;
1593 }
f6b3b108
EC
1594
1595 if (unlikely(force_soft_fma)) {
1596 goto soft;
1597 }
1598
ccf770ba
EC
1599 /*
1600 * When (a || b) == 0, there's no need to check for under/over flow,
1601 * since we know the addend is (normal || 0) and the product is 0.
1602 */
1603 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1604 union_float64 up;
1605 bool prod_sign;
1606
1607 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1608 prod_sign ^= !!(flags & float_muladd_negate_product);
1609 up.s = float64_set_sign(float64_zero, prod_sign);
1610
1611 if (flags & float_muladd_negate_c) {
1612 uc.h = -uc.h;
1613 }
1614 ur.h = up.h + uc.h;
1615 } else {
896f51fb
KC
1616 union_float64 ua_orig = ua;
1617 union_float64 uc_orig = uc;
1618
ccf770ba
EC
1619 if (flags & float_muladd_negate_product) {
1620 ua.h = -ua.h;
1621 }
1622 if (flags & float_muladd_negate_c) {
1623 uc.h = -uc.h;
1624 }
1625
1626 ur.h = fma(ua.h, ub.h, uc.h);
1627
1628 if (unlikely(f64_is_inf(ur))) {
1629 s->float_exception_flags |= float_flag_overflow;
1630 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1631 ua = ua_orig;
1632 uc = uc_orig;
ccf770ba
EC
1633 goto soft;
1634 }
1635 }
1636 if (flags & float_muladd_negate_result) {
1637 return float64_chs(ur.s);
1638 }
1639 return ur.s;
1640
1641 soft:
1642 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1643}
1644
cf07323d
AB
1645/*
1646 * Returns the result of dividing the floating-point value `a' by the
1647 * corresponding value `b'. The operation is performed according to
1648 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1649 */
1650
1651static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1652{
1653 bool sign = a.sign ^ b.sign;
1654
1655 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1656 uint64_t n0, n1, q, r;
cf07323d 1657 int exp = a.exp - b.exp;
5dfbc9e4
RH
1658
1659 /*
1660 * We want a 2*N / N-bit division to produce exactly an N-bit
1661 * result, so that we do not lose any precision and so that we
1662 * do not have to renormalize afterward. If A.frac < B.frac,
1663 * then division would produce an (N-1)-bit result; shift A left
1664 * by one to produce the an N-bit result, and decrement the
1665 * exponent to match.
1666 *
1667 * The udiv_qrnnd algorithm that we're using requires normalization,
1668 * i.e. the msb of the denominator must be set. Since we know that
1669 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1670 * by one (more), and the remainder must be shifted right by one.
1671 */
cf07323d
AB
1672 if (a.frac < b.frac) {
1673 exp -= 1;
5dfbc9e4 1674 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1675 } else {
5dfbc9e4 1676 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1677 }
5dfbc9e4
RH
1678 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1679
1680 /*
1681 * Set lsb if there is a remainder, to set inexact.
1682 * As mentioned above, to find the actual value of the remainder we
1683 * would need to shift right, but (1) we are only concerned about
1684 * non-zero-ness, and (2) the remainder will always be even because
1685 * both inputs to the division primitive are even.
1686 */
1687 a.frac = q | (r != 0);
cf07323d
AB
1688 a.sign = sign;
1689 a.exp = exp;
1690 return a;
1691 }
1692 /* handle all the NaN cases */
1693 if (is_nan(a.cls) || is_nan(b.cls)) {
1694 return pick_nan(a, b, s);
1695 }
1696 /* 0/0 or Inf/Inf */
1697 if (a.cls == b.cls
1698 &&
1699 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1700 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1701 return parts_default_nan(s);
cf07323d 1702 }
9cb4e398
AB
1703 /* Inf / x or 0 / x */
1704 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1705 a.sign = sign;
1706 return a;
1707 }
cf07323d
AB
1708 /* Div 0 => Inf */
1709 if (b.cls == float_class_zero) {
1710 s->float_exception_flags |= float_flag_divbyzero;
1711 a.cls = float_class_inf;
1712 a.sign = sign;
1713 return a;
1714 }
cf07323d
AB
1715 /* Div by Inf */
1716 if (b.cls == float_class_inf) {
1717 a.cls = float_class_zero;
1718 a.sign = sign;
1719 return a;
1720 }
1721 g_assert_not_reached();
1722}
1723
1724float16 float16_div(float16 a, float16 b, float_status *status)
1725{
1726 FloatParts pa = float16_unpack_canonical(a, status);
1727 FloatParts pb = float16_unpack_canonical(b, status);
1728 FloatParts pr = div_floats(pa, pb, status);
1729
1730 return float16_round_pack_canonical(pr, status);
1731}
1732
4a629561
EC
1733static float32 QEMU_SOFTFLOAT_ATTR
1734soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1735{
1736 FloatParts pa = float32_unpack_canonical(a, status);
1737 FloatParts pb = float32_unpack_canonical(b, status);
1738 FloatParts pr = div_floats(pa, pb, status);
1739
1740 return float32_round_pack_canonical(pr, status);
1741}
1742
4a629561
EC
1743static float64 QEMU_SOFTFLOAT_ATTR
1744soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1745{
1746 FloatParts pa = float64_unpack_canonical(a, status);
1747 FloatParts pb = float64_unpack_canonical(b, status);
1748 FloatParts pr = div_floats(pa, pb, status);
1749
1750 return float64_round_pack_canonical(pr, status);
1751}
1752
4a629561
EC
1753static float hard_f32_div(float a, float b)
1754{
1755 return a / b;
1756}
1757
1758static double hard_f64_div(double a, double b)
1759{
1760 return a / b;
1761}
1762
1763static bool f32_div_pre(union_float32 a, union_float32 b)
1764{
1765 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1766 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1767 fpclassify(b.h) == FP_NORMAL;
1768 }
1769 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1770}
1771
1772static bool f64_div_pre(union_float64 a, union_float64 b)
1773{
1774 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1775 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1776 fpclassify(b.h) == FP_NORMAL;
1777 }
1778 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1779}
1780
1781static bool f32_div_post(union_float32 a, union_float32 b)
1782{
1783 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1784 return fpclassify(a.h) != FP_ZERO;
1785 }
1786 return !float32_is_zero(a.s);
1787}
1788
1789static bool f64_div_post(union_float64 a, union_float64 b)
1790{
1791 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1792 return fpclassify(a.h) != FP_ZERO;
1793 }
1794 return !float64_is_zero(a.s);
1795}
1796
1797float32 QEMU_FLATTEN
1798float32_div(float32 a, float32 b, float_status *s)
1799{
1800 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1801 f32_div_pre, f32_div_post);
4a629561
EC
1802}
1803
1804float64 QEMU_FLATTEN
1805float64_div(float64 a, float64 b, float_status *s)
1806{
1807 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1808 f64_div_pre, f64_div_post);
4a629561
EC
1809}
1810
6fed16b2
AB
1811/*
1812 * Float to Float conversions
1813 *
1814 * Returns the result of converting one float format to another. The
1815 * conversion is performed according to the IEC/IEEE Standard for
1816 * Binary Floating-Point Arithmetic.
1817 *
1818 * The float_to_float helper only needs to take care of raising
1819 * invalid exceptions and handling the conversion on NaNs.
1820 */
1821
1822static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1823 float_status *s)
1824{
1825 if (dstf->arm_althp) {
1826 switch (a.cls) {
1827 case float_class_qnan:
1828 case float_class_snan:
1829 /* There is no NaN in the destination format. Raise Invalid
1830 * and return a zero with the sign of the input NaN.
1831 */
1832 s->float_exception_flags |= float_flag_invalid;
1833 a.cls = float_class_zero;
1834 a.frac = 0;
1835 a.exp = 0;
1836 break;
1837
1838 case float_class_inf:
1839 /* There is no Inf in the destination format. Raise Invalid
1840 * and return the maximum normal with the correct sign.
1841 */
1842 s->float_exception_flags |= float_flag_invalid;
1843 a.cls = float_class_normal;
1844 a.exp = dstf->exp_max;
1845 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1846 break;
1847
1848 default:
1849 break;
1850 }
1851 } else if (is_nan(a.cls)) {
1852 if (is_snan(a.cls)) {
1853 s->float_exception_flags |= float_flag_invalid;
1854 a = parts_silence_nan(a, s);
1855 }
1856 if (s->default_nan_mode) {
1857 return parts_default_nan(s);
1858 }
1859 }
1860 return a;
1861}
1862
1863float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1864{
1865 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1866 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1867 FloatParts pr = float_to_float(p, &float32_params, s);
1868 return float32_round_pack_canonical(pr, s);
1869}
1870
1871float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1872{
1873 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1874 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1875 FloatParts pr = float_to_float(p, &float64_params, s);
1876 return float64_round_pack_canonical(pr, s);
1877}
1878
1879float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1880{
1881 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1882 FloatParts p = float32_unpack_canonical(a, s);
1883 FloatParts pr = float_to_float(p, fmt16, s);
1884 return float16a_round_pack_canonical(pr, s, fmt16);
1885}
1886
21381dcf
MK
1887static float64 QEMU_SOFTFLOAT_ATTR
1888soft_float32_to_float64(float32 a, float_status *s)
6fed16b2
AB
1889{
1890 FloatParts p = float32_unpack_canonical(a, s);
1891 FloatParts pr = float_to_float(p, &float64_params, s);
1892 return float64_round_pack_canonical(pr, s);
1893}
1894
21381dcf
MK
1895float64 float32_to_float64(float32 a, float_status *s)
1896{
1897 if (likely(float32_is_normal(a))) {
1898 /* Widening conversion can never produce inexact results. */
1899 union_float32 uf;
1900 union_float64 ud;
1901 uf.s = a;
1902 ud.h = uf.h;
1903 return ud.s;
1904 } else if (float32_is_zero(a)) {
1905 return float64_set_sign(float64_zero, float32_is_neg(a));
1906 } else {
1907 return soft_float32_to_float64(a, s);
1908 }
1909}
1910
6fed16b2
AB
1911float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1912{
1913 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1914 FloatParts p = float64_unpack_canonical(a, s);
1915 FloatParts pr = float_to_float(p, fmt16, s);
1916 return float16a_round_pack_canonical(pr, s, fmt16);
1917}
1918
1919float32 float64_to_float32(float64 a, float_status *s)
1920{
1921 FloatParts p = float64_unpack_canonical(a, s);
1922 FloatParts pr = float_to_float(p, &float32_params, s);
1923 return float32_round_pack_canonical(pr, s);
1924}
1925
dbe4d53a
AB
1926/*
1927 * Rounds the floating-point value `a' to an integer, and returns the
1928 * result as a floating-point value. The operation is performed
1929 * according to the IEC/IEEE Standard for Binary Floating-Point
1930 * Arithmetic.
1931 */
1932
3dede407 1933static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
2f6c74be 1934 int scale, float_status *s)
dbe4d53a 1935{
2f6c74be
RH
1936 switch (a.cls) {
1937 case float_class_qnan:
1938 case float_class_snan:
dbe4d53a 1939 return return_nan(a, s);
dbe4d53a 1940
dbe4d53a
AB
1941 case float_class_zero:
1942 case float_class_inf:
dbe4d53a
AB
1943 /* already "integral" */
1944 break;
2f6c74be 1945
dbe4d53a 1946 case float_class_normal:
2f6c74be
RH
1947 scale = MIN(MAX(scale, -0x10000), 0x10000);
1948 a.exp += scale;
1949
dbe4d53a
AB
1950 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1951 /* already integral */
1952 break;
1953 }
1954 if (a.exp < 0) {
1955 bool one;
1956 /* all fractional */
1957 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1958 switch (rmode) {
dbe4d53a
AB
1959 case float_round_nearest_even:
1960 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1961 break;
1962 case float_round_ties_away:
1963 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1964 break;
1965 case float_round_to_zero:
1966 one = false;
1967 break;
1968 case float_round_up:
1969 one = !a.sign;
1970 break;
1971 case float_round_down:
1972 one = a.sign;
1973 break;
5d64abb3
RH
1974 case float_round_to_odd:
1975 one = true;
1976 break;
dbe4d53a
AB
1977 default:
1978 g_assert_not_reached();
1979 }
1980
1981 if (one) {
1982 a.frac = DECOMPOSED_IMPLICIT_BIT;
1983 a.exp = 0;
1984 } else {
1985 a.cls = float_class_zero;
1986 }
1987 } else {
1988 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1989 uint64_t frac_lsbm1 = frac_lsb >> 1;
1990 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1991 uint64_t rnd_mask = rnd_even_mask >> 1;
1992 uint64_t inc;
1993
2f6c74be 1994 switch (rmode) {
dbe4d53a
AB
1995 case float_round_nearest_even:
1996 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1997 break;
1998 case float_round_ties_away:
1999 inc = frac_lsbm1;
2000 break;
2001 case float_round_to_zero:
2002 inc = 0;
2003 break;
2004 case float_round_up:
2005 inc = a.sign ? 0 : rnd_mask;
2006 break;
2007 case float_round_down:
2008 inc = a.sign ? rnd_mask : 0;
2009 break;
5d64abb3
RH
2010 case float_round_to_odd:
2011 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2012 break;
dbe4d53a
AB
2013 default:
2014 g_assert_not_reached();
2015 }
2016
2017 if (a.frac & rnd_mask) {
2018 s->float_exception_flags |= float_flag_inexact;
2019 a.frac += inc;
2020 a.frac &= ~rnd_mask;
2021 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2022 a.frac >>= 1;
2023 a.exp++;
2024 }
2025 }
2026 }
2027 break;
2028 default:
2029 g_assert_not_reached();
2030 }
2031 return a;
2032}
2033
2034float16 float16_round_to_int(float16 a, float_status *s)
2035{
2036 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2037 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2038 return float16_round_pack_canonical(pr, s);
2039}
2040
2041float32 float32_round_to_int(float32 a, float_status *s)
2042{
2043 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2044 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2045 return float32_round_pack_canonical(pr, s);
2046}
2047
2048float64 float64_round_to_int(float64 a, float_status *s)
2049{
2050 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2051 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2052 return float64_round_pack_canonical(pr, s);
2053}
2054
ab52f973
AB
2055/*
2056 * Returns the result of converting the floating-point value `a' to
2057 * the two's complement integer format. The conversion is performed
2058 * according to the IEC/IEEE Standard for Binary Floating-Point
2059 * Arithmetic---which means in particular that the conversion is
2060 * rounded according to the current rounding mode. If `a' is a NaN,
2061 * the largest positive integer is returned. Otherwise, if the
2062 * conversion overflows, the largest integer with the same sign as `a'
2063 * is returned.
2064*/
2065
3dede407
RH
2066static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2067 int scale, int64_t min, int64_t max,
ab52f973
AB
2068 float_status *s)
2069{
2070 uint64_t r;
2071 int orig_flags = get_float_exception_flags(s);
2f6c74be 2072 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2073
2074 switch (p.cls) {
2075 case float_class_snan:
2076 case float_class_qnan:
801bc563 2077 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2078 return max;
2079 case float_class_inf:
801bc563 2080 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2081 return p.sign ? min : max;
2082 case float_class_zero:
2083 return 0;
2084 case float_class_normal:
2085 if (p.exp < DECOMPOSED_BINARY_POINT) {
2086 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2087 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2088 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2089 } else {
2090 r = UINT64_MAX;
2091 }
2092 if (p.sign) {
33358375 2093 if (r <= -(uint64_t) min) {
ab52f973
AB
2094 return -r;
2095 } else {
2096 s->float_exception_flags = orig_flags | float_flag_invalid;
2097 return min;
2098 }
2099 } else {
33358375 2100 if (r <= max) {
ab52f973
AB
2101 return r;
2102 } else {
2103 s->float_exception_flags = orig_flags | float_flag_invalid;
2104 return max;
2105 }
2106 }
2107 default:
2108 g_assert_not_reached();
2109 }
2110}
2111
3dede407 2112int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2113 float_status *s)
2114{
2115 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2116 rmode, scale, INT16_MIN, INT16_MAX, s);
2117}
2118
3dede407 2119int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2120 float_status *s)
2121{
2122 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2123 rmode, scale, INT32_MIN, INT32_MAX, s);
2124}
2125
3dede407 2126int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2127 float_status *s)
2128{
2129 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2130 rmode, scale, INT64_MIN, INT64_MAX, s);
2131}
2132
3dede407 2133int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2134 float_status *s)
2135{
2136 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2137 rmode, scale, INT16_MIN, INT16_MAX, s);
2138}
2139
3dede407 2140int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2141 float_status *s)
2142{
2143 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2144 rmode, scale, INT32_MIN, INT32_MAX, s);
2145}
2146
3dede407 2147int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2148 float_status *s)
2149{
2150 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2151 rmode, scale, INT64_MIN, INT64_MAX, s);
2152}
2153
3dede407 2154int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2155 float_status *s)
2156{
2157 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2158 rmode, scale, INT16_MIN, INT16_MAX, s);
2159}
2160
3dede407 2161int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2162 float_status *s)
2163{
2164 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2165 rmode, scale, INT32_MIN, INT32_MAX, s);
2166}
2167
3dede407 2168int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2169 float_status *s)
2170{
2171 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2172 rmode, scale, INT64_MIN, INT64_MAX, s);
2173}
2174
2175int16_t float16_to_int16(float16 a, float_status *s)
2176{
2177 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2178}
2179
2180int32_t float16_to_int32(float16 a, float_status *s)
2181{
2182 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2183}
2184
2185int64_t float16_to_int64(float16 a, float_status *s)
2186{
2187 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2188}
2189
2190int16_t float32_to_int16(float32 a, float_status *s)
2191{
2192 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2193}
2194
2195int32_t float32_to_int32(float32 a, float_status *s)
2196{
2197 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2198}
2199
2200int64_t float32_to_int64(float32 a, float_status *s)
2201{
2202 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2203}
2204
2205int16_t float64_to_int16(float64 a, float_status *s)
2206{
2207 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2208}
2209
2210int32_t float64_to_int32(float64 a, float_status *s)
2211{
2212 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2213}
2214
2215int64_t float64_to_int64(float64 a, float_status *s)
2216{
2217 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2218}
2219
2220int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2221{
2222 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2223}
2224
2225int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2226{
2227 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2228}
2229
2230int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2231{
2232 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2233}
2234
2f6c74be
RH
2235int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2236{
2237 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2238}
ab52f973 2239
2f6c74be
RH
2240int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2241{
2242 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2243}
2244
2245int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2246{
2247 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2248}
2249
2250int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2251{
2252 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2253}
ab52f973 2254
2f6c74be
RH
2255int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2256{
2257 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2258}
ab52f973 2259
2f6c74be
RH
2260int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2261{
2262 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2263}
ab52f973
AB
2264
2265/*
2266 * Returns the result of converting the floating-point value `a' to
2267 * the unsigned integer format. The conversion is performed according
2268 * to the IEC/IEEE Standard for Binary Floating-Point
2269 * Arithmetic---which means in particular that the conversion is
2270 * rounded according to the current rounding mode. If `a' is a NaN,
2271 * the largest unsigned integer is returned. Otherwise, if the
2272 * conversion overflows, the largest unsigned integer is returned. If
2273 * the 'a' is negative, the result is rounded and zero is returned;
2274 * values that do not round to zero will raise the inexact exception
2275 * flag.
2276 */
2277
3dede407
RH
2278static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2279 int scale, uint64_t max,
2280 float_status *s)
ab52f973
AB
2281{
2282 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2283 FloatParts p = round_to_int(in, rmode, scale, s);
2284 uint64_t r;
ab52f973
AB
2285
2286 switch (p.cls) {
2287 case float_class_snan:
2288 case float_class_qnan:
2289 s->float_exception_flags = orig_flags | float_flag_invalid;
2290 return max;
2291 case float_class_inf:
801bc563 2292 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2293 return p.sign ? 0 : max;
2294 case float_class_zero:
2295 return 0;
2296 case float_class_normal:
ab52f973
AB
2297 if (p.sign) {
2298 s->float_exception_flags = orig_flags | float_flag_invalid;
2299 return 0;
2300 }
2301
2302 if (p.exp < DECOMPOSED_BINARY_POINT) {
2303 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2304 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2305 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2306 } else {
2307 s->float_exception_flags = orig_flags | float_flag_invalid;
2308 return max;
2309 }
2310
2311 /* For uint64 this will never trip, but if p.exp is too large
2312 * to shift a decomposed fraction we shall have exited via the
2313 * 3rd leg above.
2314 */
2315 if (r > max) {
2316 s->float_exception_flags = orig_flags | float_flag_invalid;
2317 return max;
ab52f973 2318 }
2f6c74be 2319 return r;
ab52f973
AB
2320 default:
2321 g_assert_not_reached();
2322 }
2323}
2324
3dede407 2325uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2326 float_status *s)
2327{
2328 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2329 rmode, scale, UINT16_MAX, s);
2330}
2331
3dede407 2332uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2333 float_status *s)
2334{
2335 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2336 rmode, scale, UINT32_MAX, s);
2337}
2338
3dede407 2339uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2340 float_status *s)
2341{
2342 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2343 rmode, scale, UINT64_MAX, s);
2344}
2345
3dede407 2346uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2347 float_status *s)
2348{
2349 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2350 rmode, scale, UINT16_MAX, s);
2351}
2352
3dede407 2353uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2354 float_status *s)
2355{
2356 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2357 rmode, scale, UINT32_MAX, s);
2358}
2359
3dede407 2360uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2361 float_status *s)
2362{
2363 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2364 rmode, scale, UINT64_MAX, s);
2365}
2366
3dede407 2367uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2368 float_status *s)
2369{
2370 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2371 rmode, scale, UINT16_MAX, s);
2372}
2373
3dede407 2374uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2375 float_status *s)
2376{
2377 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2378 rmode, scale, UINT32_MAX, s);
2379}
2380
3dede407 2381uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2382 float_status *s)
2383{
2384 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2385 rmode, scale, UINT64_MAX, s);
2386}
2387
2388uint16_t float16_to_uint16(float16 a, float_status *s)
2389{
2390 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2391}
2392
2393uint32_t float16_to_uint32(float16 a, float_status *s)
2394{
2395 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2396}
2397
2398uint64_t float16_to_uint64(float16 a, float_status *s)
2399{
2400 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2401}
2402
2403uint16_t float32_to_uint16(float32 a, float_status *s)
2404{
2405 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2406}
2407
2408uint32_t float32_to_uint32(float32 a, float_status *s)
2409{
2410 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2411}
2412
2413uint64_t float32_to_uint64(float32 a, float_status *s)
2414{
2415 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2416}
2417
2418uint16_t float64_to_uint16(float64 a, float_status *s)
2419{
2420 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2421}
2422
2423uint32_t float64_to_uint32(float64 a, float_status *s)
2424{
2425 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2426}
2427
2428uint64_t float64_to_uint64(float64 a, float_status *s)
2429{
2430 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2431}
2432
2433uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2434{
2435 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2436}
2437
2438uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2439{
2440 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2441}
2442
2443uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2444{
2445 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2446}
2447
2448uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2449{
2450 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2451}
2452
2453uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2454{
2455 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2456}
2457
2458uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2459{
2460 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2461}
2462
2463uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2464{
2465 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2466}
2467
2468uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2469{
2470 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2471}
2472
2473uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2474{
2475 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2476}
ab52f973 2477
c02e1fb8
AB
2478/*
2479 * Integer to float conversions
2480 *
2481 * Returns the result of converting the two's complement integer `a'
2482 * to the floating-point format. The conversion is performed according
2483 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2484 */
2485
2abdfe24 2486static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2487{
2abdfe24
RH
2488 FloatParts r = { .sign = false };
2489
c02e1fb8
AB
2490 if (a == 0) {
2491 r.cls = float_class_zero;
c02e1fb8 2492 } else {
2abdfe24
RH
2493 uint64_t f = a;
2494 int shift;
2495
2496 r.cls = float_class_normal;
c02e1fb8 2497 if (a < 0) {
2abdfe24 2498 f = -f;
c02e1fb8 2499 r.sign = true;
c02e1fb8 2500 }
2abdfe24
RH
2501 shift = clz64(f) - 1;
2502 scale = MIN(MAX(scale, -0x10000), 0x10000);
2503
2504 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2505 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2506 }
2507
2508 return r;
2509}
2510
2abdfe24 2511float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2512{
2abdfe24 2513 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2514 return float16_round_pack_canonical(pa, status);
2515}
2516
2abdfe24
RH
2517float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2518{
2519 return int64_to_float16_scalbn(a, scale, status);
2520}
2521
2522float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2523{
2524 return int64_to_float16_scalbn(a, scale, status);
2525}
2526
2527float16 int64_to_float16(int64_t a, float_status *status)
2528{
2529 return int64_to_float16_scalbn(a, 0, status);
2530}
2531
c02e1fb8
AB
2532float16 int32_to_float16(int32_t a, float_status *status)
2533{
2abdfe24 2534 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2535}
2536
2537float16 int16_to_float16(int16_t a, float_status *status)
2538{
2abdfe24 2539 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2540}
2541
2abdfe24 2542float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2543{
2abdfe24 2544 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2545 return float32_round_pack_canonical(pa, status);
2546}
2547
2abdfe24
RH
2548float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2549{
2550 return int64_to_float32_scalbn(a, scale, status);
2551}
2552
2553float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2554{
2555 return int64_to_float32_scalbn(a, scale, status);
2556}
2557
2558float32 int64_to_float32(int64_t a, float_status *status)
2559{
2560 return int64_to_float32_scalbn(a, 0, status);
2561}
2562
c02e1fb8
AB
2563float32 int32_to_float32(int32_t a, float_status *status)
2564{
2abdfe24 2565 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2566}
2567
2568float32 int16_to_float32(int16_t a, float_status *status)
2569{
2abdfe24 2570 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2571}
2572
2abdfe24 2573float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2574{
2abdfe24 2575 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2576 return float64_round_pack_canonical(pa, status);
2577}
2578
2abdfe24
RH
2579float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2580{
2581 return int64_to_float64_scalbn(a, scale, status);
2582}
2583
2584float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2585{
2586 return int64_to_float64_scalbn(a, scale, status);
2587}
2588
2589float64 int64_to_float64(int64_t a, float_status *status)
2590{
2591 return int64_to_float64_scalbn(a, 0, status);
2592}
2593
c02e1fb8
AB
2594float64 int32_to_float64(int32_t a, float_status *status)
2595{
2abdfe24 2596 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2597}
2598
2599float64 int16_to_float64(int16_t a, float_status *status)
2600{
2abdfe24 2601 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2602}
2603
2604
2605/*
2606 * Unsigned Integer to float conversions
2607 *
2608 * Returns the result of converting the unsigned integer `a' to the
2609 * floating-point format. The conversion is performed according to the
2610 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2611 */
2612
2abdfe24 2613static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2614{
2abdfe24 2615 FloatParts r = { .sign = false };
c02e1fb8
AB
2616
2617 if (a == 0) {
2618 r.cls = float_class_zero;
2619 } else {
2abdfe24 2620 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2621 r.cls = float_class_normal;
2abdfe24
RH
2622 if ((int64_t)a < 0) {
2623 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2624 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2625 r.frac = a;
2626 } else {
2abdfe24
RH
2627 int shift = clz64(a) - 1;
2628 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2629 r.frac = a << shift;
c02e1fb8
AB
2630 }
2631 }
2632
2633 return r;
2634}
2635
2abdfe24 2636float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2637{
2abdfe24 2638 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2639 return float16_round_pack_canonical(pa, status);
2640}
2641
2abdfe24
RH
2642float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2643{
2644 return uint64_to_float16_scalbn(a, scale, status);
2645}
2646
2647float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2648{
2649 return uint64_to_float16_scalbn(a, scale, status);
2650}
2651
2652float16 uint64_to_float16(uint64_t a, float_status *status)
2653{
2654 return uint64_to_float16_scalbn(a, 0, status);
2655}
2656
c02e1fb8
AB
2657float16 uint32_to_float16(uint32_t a, float_status *status)
2658{
2abdfe24 2659 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2660}
2661
2662float16 uint16_to_float16(uint16_t a, float_status *status)
2663{
2abdfe24 2664 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2665}
2666
2abdfe24 2667float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2668{
2abdfe24 2669 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2670 return float32_round_pack_canonical(pa, status);
2671}
2672
2abdfe24
RH
2673float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2674{
2675 return uint64_to_float32_scalbn(a, scale, status);
2676}
2677
2678float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2679{
2680 return uint64_to_float32_scalbn(a, scale, status);
2681}
2682
2683float32 uint64_to_float32(uint64_t a, float_status *status)
2684{
2685 return uint64_to_float32_scalbn(a, 0, status);
2686}
2687
c02e1fb8
AB
2688float32 uint32_to_float32(uint32_t a, float_status *status)
2689{
2abdfe24 2690 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2691}
2692
2693float32 uint16_to_float32(uint16_t a, float_status *status)
2694{
2abdfe24 2695 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2696}
2697
2abdfe24 2698float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2699{
2abdfe24 2700 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2701 return float64_round_pack_canonical(pa, status);
2702}
2703
2abdfe24
RH
2704float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2705{
2706 return uint64_to_float64_scalbn(a, scale, status);
2707}
2708
2709float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2710{
2711 return uint64_to_float64_scalbn(a, scale, status);
2712}
2713
2714float64 uint64_to_float64(uint64_t a, float_status *status)
2715{
2716 return uint64_to_float64_scalbn(a, 0, status);
2717}
2718
c02e1fb8
AB
2719float64 uint32_to_float64(uint32_t a, float_status *status)
2720{
2abdfe24 2721 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2722}
2723
2724float64 uint16_to_float64(uint16_t a, float_status *status)
2725{
2abdfe24 2726 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2727}
2728
89360067
AB
2729/* Float Min/Max */
2730/* min() and max() functions. These can't be implemented as
2731 * 'compare and pick one input' because that would mishandle
2732 * NaNs and +0 vs -0.
2733 *
2734 * minnum() and maxnum() functions. These are similar to the min()
2735 * and max() functions but if one of the arguments is a QNaN and
2736 * the other is numerical then the numerical argument is returned.
2737 * SNaNs will get quietened before being returned.
2738 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2739 * and maxNum() operations. min() and max() are the typical min/max
2740 * semantics provided by many CPUs which predate that specification.
2741 *
2742 * minnummag() and maxnummag() functions correspond to minNumMag()
2743 * and minNumMag() from the IEEE-754 2008.
2744 */
2745static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2746 bool ieee, bool ismag, float_status *s)
2747{
2748 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2749 if (ieee) {
2750 /* Takes two floating-point values `a' and `b', one of
2751 * which is a NaN, and returns the appropriate NaN
2752 * result. If either `a' or `b' is a signaling NaN,
2753 * the invalid exception is raised.
2754 */
2755 if (is_snan(a.cls) || is_snan(b.cls)) {
2756 return pick_nan(a, b, s);
2757 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2758 return b;
2759 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2760 return a;
2761 }
2762 }
2763 return pick_nan(a, b, s);
2764 } else {
2765 int a_exp, b_exp;
89360067
AB
2766
2767 switch (a.cls) {
2768 case float_class_normal:
2769 a_exp = a.exp;
2770 break;
2771 case float_class_inf:
2772 a_exp = INT_MAX;
2773 break;
2774 case float_class_zero:
2775 a_exp = INT_MIN;
2776 break;
2777 default:
2778 g_assert_not_reached();
2779 break;
2780 }
2781 switch (b.cls) {
2782 case float_class_normal:
2783 b_exp = b.exp;
2784 break;
2785 case float_class_inf:
2786 b_exp = INT_MAX;
2787 break;
2788 case float_class_zero:
2789 b_exp = INT_MIN;
2790 break;
2791 default:
2792 g_assert_not_reached();
2793 break;
2794 }
2795
6245327a
EC
2796 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2797 bool a_less = a_exp < b_exp;
2798 if (a_exp == b_exp) {
2799 a_less = a.frac < b.frac;
2800 }
2801 return a_less ^ ismin ? b : a;
89360067
AB
2802 }
2803
6245327a 2804 if (a.sign == b.sign) {
89360067
AB
2805 bool a_less = a_exp < b_exp;
2806 if (a_exp == b_exp) {
2807 a_less = a.frac < b.frac;
2808 }
6245327a 2809 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2810 } else {
6245327a 2811 return a.sign ^ ismin ? b : a;
89360067
AB
2812 }
2813 }
2814}
2815
2816#define MINMAX(sz, name, ismin, isiee, ismag) \
2817float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2818 float_status *s) \
2819{ \
2820 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2821 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2822 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2823 \
2824 return float ## sz ## _round_pack_canonical(pr, s); \
2825}
2826
2827MINMAX(16, min, true, false, false)
2828MINMAX(16, minnum, true, true, false)
2829MINMAX(16, minnummag, true, true, true)
2830MINMAX(16, max, false, false, false)
2831MINMAX(16, maxnum, false, true, false)
2832MINMAX(16, maxnummag, false, true, true)
2833
2834MINMAX(32, min, true, false, false)
2835MINMAX(32, minnum, true, true, false)
2836MINMAX(32, minnummag, true, true, true)
2837MINMAX(32, max, false, false, false)
2838MINMAX(32, maxnum, false, true, false)
2839MINMAX(32, maxnummag, false, true, true)
2840
2841MINMAX(64, min, true, false, false)
2842MINMAX(64, minnum, true, true, false)
2843MINMAX(64, minnummag, true, true, true)
2844MINMAX(64, max, false, false, false)
2845MINMAX(64, maxnum, false, true, false)
2846MINMAX(64, maxnummag, false, true, true)
2847
2848#undef MINMAX
2849
0c4c9092
AB
2850/* Floating point compare */
2851static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2852 float_status *s)
2853{
2854 if (is_nan(a.cls) || is_nan(b.cls)) {
2855 if (!is_quiet ||
2856 a.cls == float_class_snan ||
2857 b.cls == float_class_snan) {
2858 s->float_exception_flags |= float_flag_invalid;
2859 }
2860 return float_relation_unordered;
2861 }
2862
2863 if (a.cls == float_class_zero) {
2864 if (b.cls == float_class_zero) {
2865 return float_relation_equal;
2866 }
2867 return b.sign ? float_relation_greater : float_relation_less;
2868 } else if (b.cls == float_class_zero) {
2869 return a.sign ? float_relation_less : float_relation_greater;
2870 }
2871
2872 /* The only really important thing about infinity is its sign. If
2873 * both are infinities the sign marks the smallest of the two.
2874 */
2875 if (a.cls == float_class_inf) {
2876 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2877 return float_relation_equal;
2878 }
2879 return a.sign ? float_relation_less : float_relation_greater;
2880 } else if (b.cls == float_class_inf) {
2881 return b.sign ? float_relation_greater : float_relation_less;
2882 }
2883
2884 if (a.sign != b.sign) {
2885 return a.sign ? float_relation_less : float_relation_greater;
2886 }
2887
2888 if (a.exp == b.exp) {
2889 if (a.frac == b.frac) {
2890 return float_relation_equal;
2891 }
2892 if (a.sign) {
2893 return a.frac > b.frac ?
2894 float_relation_less : float_relation_greater;
2895 } else {
2896 return a.frac > b.frac ?
2897 float_relation_greater : float_relation_less;
2898 }
2899 } else {
2900 if (a.sign) {
2901 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2902 } else {
2903 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2904 }
2905 }
2906}
2907
d9fe9db9
EC
2908#define COMPARE(name, attr, sz) \
2909static int attr \
2910name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
2911{ \
2912 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2913 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 2914 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
2915}
2916
d9fe9db9
EC
2917COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2918COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2919COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
2920
2921#undef COMPARE
2922
d9fe9db9
EC
2923int float16_compare(float16 a, float16 b, float_status *s)
2924{
2925 return soft_f16_compare(a, b, false, s);
2926}
2927
2928int float16_compare_quiet(float16 a, float16 b, float_status *s)
2929{
2930 return soft_f16_compare(a, b, true, s);
2931}
2932
2933static int QEMU_FLATTEN
2934f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2935{
2936 union_float32 ua, ub;
2937
2938 ua.s = xa;
2939 ub.s = xb;
2940
2941 if (QEMU_NO_HARDFLOAT) {
2942 goto soft;
2943 }
2944
2945 float32_input_flush2(&ua.s, &ub.s, s);
2946 if (isgreaterequal(ua.h, ub.h)) {
2947 if (isgreater(ua.h, ub.h)) {
2948 return float_relation_greater;
2949 }
2950 return float_relation_equal;
2951 }
2952 if (likely(isless(ua.h, ub.h))) {
2953 return float_relation_less;
2954 }
2955 /* The only condition remaining is unordered.
2956 * Fall through to set flags.
2957 */
2958 soft:
2959 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2960}
2961
2962int float32_compare(float32 a, float32 b, float_status *s)
2963{
2964 return f32_compare(a, b, false, s);
2965}
2966
2967int float32_compare_quiet(float32 a, float32 b, float_status *s)
2968{
2969 return f32_compare(a, b, true, s);
2970}
2971
2972static int QEMU_FLATTEN
2973f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
2974{
2975 union_float64 ua, ub;
2976
2977 ua.s = xa;
2978 ub.s = xb;
2979
2980 if (QEMU_NO_HARDFLOAT) {
2981 goto soft;
2982 }
2983
2984 float64_input_flush2(&ua.s, &ub.s, s);
2985 if (isgreaterequal(ua.h, ub.h)) {
2986 if (isgreater(ua.h, ub.h)) {
2987 return float_relation_greater;
2988 }
2989 return float_relation_equal;
2990 }
2991 if (likely(isless(ua.h, ub.h))) {
2992 return float_relation_less;
2993 }
2994 /* The only condition remaining is unordered.
2995 * Fall through to set flags.
2996 */
2997 soft:
2998 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
2999}
3000
3001int float64_compare(float64 a, float64 b, float_status *s)
3002{
3003 return f64_compare(a, b, false, s);
3004}
3005
3006int float64_compare_quiet(float64 a, float64 b, float_status *s)
3007{
3008 return f64_compare(a, b, true, s);
3009}
3010
0bfc9f19
AB
3011/* Multiply A by 2 raised to the power N. */
3012static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3013{
3014 if (unlikely(is_nan(a.cls))) {
3015 return return_nan(a, s);
3016 }
3017 if (a.cls == float_class_normal) {
ce8d4082
RH
3018 /* The largest float type (even though not supported by FloatParts)
3019 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3020 * still allows rounding to infinity, without allowing overflow
3021 * within the int32_t that backs FloatParts.exp.
3022 */
3023 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3024 a.exp += n;
3025 }
3026 return a;
3027}
3028
3029float16 float16_scalbn(float16 a, int n, float_status *status)
3030{
3031 FloatParts pa = float16_unpack_canonical(a, status);
3032 FloatParts pr = scalbn_decomposed(pa, n, status);
3033 return float16_round_pack_canonical(pr, status);
3034}
3035
3036float32 float32_scalbn(float32 a, int n, float_status *status)
3037{
3038 FloatParts pa = float32_unpack_canonical(a, status);
3039 FloatParts pr = scalbn_decomposed(pa, n, status);
3040 return float32_round_pack_canonical(pr, status);
3041}
3042
3043float64 float64_scalbn(float64 a, int n, float_status *status)
3044{
3045 FloatParts pa = float64_unpack_canonical(a, status);
3046 FloatParts pr = scalbn_decomposed(pa, n, status);
3047 return float64_round_pack_canonical(pr, status);
3048}
3049
c13bb2da
AB
3050/*
3051 * Square Root
3052 *
3053 * The old softfloat code did an approximation step before zeroing in
3054 * on the final result. However for simpleness we just compute the
3055 * square root by iterating down from the implicit bit to enough extra
3056 * bits to ensure we get a correctly rounded result.
3057 *
3058 * This does mean however the calculation is slower than before,
3059 * especially for 64 bit floats.
3060 */
3061
3062static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3063{
3064 uint64_t a_frac, r_frac, s_frac;
3065 int bit, last_bit;
3066
3067 if (is_nan(a.cls)) {
3068 return return_nan(a, s);
3069 }
3070 if (a.cls == float_class_zero) {
3071 return a; /* sqrt(+-0) = +-0 */
3072 }
3073 if (a.sign) {
3074 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3075 return parts_default_nan(s);
c13bb2da
AB
3076 }
3077 if (a.cls == float_class_inf) {
3078 return a; /* sqrt(+inf) = +inf */
3079 }
3080
3081 assert(a.cls == float_class_normal);
3082
3083 /* We need two overflow bits at the top. Adding room for that is a
3084 * right shift. If the exponent is odd, we can discard the low bit
3085 * by multiplying the fraction by 2; that's a left shift. Combine
3086 * those and we shift right if the exponent is even.
3087 */
3088 a_frac = a.frac;
3089 if (!(a.exp & 1)) {
3090 a_frac >>= 1;
3091 }
3092 a.exp >>= 1;
3093
3094 /* Bit-by-bit computation of sqrt. */
3095 r_frac = 0;
3096 s_frac = 0;
3097
3098 /* Iterate from implicit bit down to the 3 extra bits to compute a
3099 * properly rounded result. Remember we've inserted one more bit
3100 * at the top, so these positions are one less.
3101 */
3102 bit = DECOMPOSED_BINARY_POINT - 1;
3103 last_bit = MAX(p->frac_shift - 4, 0);
3104 do {
3105 uint64_t q = 1ULL << bit;
3106 uint64_t t_frac = s_frac + q;
3107 if (t_frac <= a_frac) {
3108 s_frac = t_frac + q;
3109 a_frac -= t_frac;
3110 r_frac += q;
3111 }
3112 a_frac <<= 1;
3113 } while (--bit >= last_bit);
3114
3115 /* Undo the right shift done above. If there is any remaining
3116 * fraction, the result is inexact. Set the sticky bit.
3117 */
3118 a.frac = (r_frac << 1) + (a_frac != 0);
3119
3120 return a;
3121}
3122
97ff87c0 3123float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3124{
3125 FloatParts pa = float16_unpack_canonical(a, status);
3126 FloatParts pr = sqrt_float(pa, status, &float16_params);
3127 return float16_round_pack_canonical(pr, status);
3128}
3129
f131bae8
EC
3130static float32 QEMU_SOFTFLOAT_ATTR
3131soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3132{
3133 FloatParts pa = float32_unpack_canonical(a, status);
3134 FloatParts pr = sqrt_float(pa, status, &float32_params);
3135 return float32_round_pack_canonical(pr, status);
3136}
3137
f131bae8
EC
3138static float64 QEMU_SOFTFLOAT_ATTR
3139soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3140{
3141 FloatParts pa = float64_unpack_canonical(a, status);
3142 FloatParts pr = sqrt_float(pa, status, &float64_params);
3143 return float64_round_pack_canonical(pr, status);
3144}
3145
f131bae8
EC
3146float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3147{
3148 union_float32 ua, ur;
3149
3150 ua.s = xa;
3151 if (unlikely(!can_use_fpu(s))) {
3152 goto soft;
3153 }
3154
3155 float32_input_flush1(&ua.s, s);
3156 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3157 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3158 fpclassify(ua.h) == FP_ZERO) ||
3159 signbit(ua.h))) {
3160 goto soft;
3161 }
3162 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3163 float32_is_neg(ua.s))) {
3164 goto soft;
3165 }
3166 ur.h = sqrtf(ua.h);
3167 return ur.s;
3168
3169 soft:
3170 return soft_f32_sqrt(ua.s, s);
3171}
3172
3173float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3174{
3175 union_float64 ua, ur;
3176
3177 ua.s = xa;
3178 if (unlikely(!can_use_fpu(s))) {
3179 goto soft;
3180 }
3181
3182 float64_input_flush1(&ua.s, s);
3183 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3184 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3185 fpclassify(ua.h) == FP_ZERO) ||
3186 signbit(ua.h))) {
3187 goto soft;
3188 }
3189 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3190 float64_is_neg(ua.s))) {
3191 goto soft;
3192 }
3193 ur.h = sqrt(ua.h);
3194 return ur.s;
3195
3196 soft:
3197 return soft_f64_sqrt(ua.s, s);
3198}
3199
0218a16e
RH
3200/*----------------------------------------------------------------------------
3201| The pattern for a default generated NaN.
3202*----------------------------------------------------------------------------*/
3203
3204float16 float16_default_nan(float_status *status)
3205{
3206 FloatParts p = parts_default_nan(status);
3207 p.frac >>= float16_params.frac_shift;
3208 return float16_pack_raw(p);
3209}
3210
3211float32 float32_default_nan(float_status *status)
3212{
3213 FloatParts p = parts_default_nan(status);
3214 p.frac >>= float32_params.frac_shift;
3215 return float32_pack_raw(p);
3216}
3217
3218float64 float64_default_nan(float_status *status)
3219{
3220 FloatParts p = parts_default_nan(status);
3221 p.frac >>= float64_params.frac_shift;
3222 return float64_pack_raw(p);
3223}
3224
3225float128 float128_default_nan(float_status *status)
3226{
3227 FloatParts p = parts_default_nan(status);
3228 float128 r;
3229
3230 /* Extrapolate from the choices made by parts_default_nan to fill
3231 * in the quad-floating format. If the low bit is set, assume we
3232 * want to set all non-snan bits.
3233 */
3234 r.low = -(p.frac & 1);
3235 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3236 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3237 r.high |= (uint64_t)p.sign << 63;
3238
3239 return r;
3240}
c13bb2da 3241
158142c2 3242/*----------------------------------------------------------------------------
377ed926
RH
3243| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3244*----------------------------------------------------------------------------*/
3245
3246float16 float16_silence_nan(float16 a, float_status *status)
3247{
3248 FloatParts p = float16_unpack_raw(a);
3249 p.frac <<= float16_params.frac_shift;
3250 p = parts_silence_nan(p, status);
3251 p.frac >>= float16_params.frac_shift;
3252 return float16_pack_raw(p);
3253}
3254
3255float32 float32_silence_nan(float32 a, float_status *status)
3256{
3257 FloatParts p = float32_unpack_raw(a);
3258 p.frac <<= float32_params.frac_shift;
3259 p = parts_silence_nan(p, status);
3260 p.frac >>= float32_params.frac_shift;
3261 return float32_pack_raw(p);
3262}
3263
3264float64 float64_silence_nan(float64 a, float_status *status)
3265{
3266 FloatParts p = float64_unpack_raw(a);
3267 p.frac <<= float64_params.frac_shift;
3268 p = parts_silence_nan(p, status);
3269 p.frac >>= float64_params.frac_shift;
3270 return float64_pack_raw(p);
3271}
3272
e6b405fe
AB
3273
3274/*----------------------------------------------------------------------------
3275| If `a' is denormal and we are in flush-to-zero mode then set the
3276| input-denormal exception and return zero. Otherwise just return the value.
3277*----------------------------------------------------------------------------*/
3278
3279static bool parts_squash_denormal(FloatParts p, float_status *status)
3280{
3281 if (p.exp == 0 && p.frac != 0) {
3282 float_raise(float_flag_input_denormal, status);
3283 return true;
3284 }
3285
3286 return false;
3287}
3288
3289float16 float16_squash_input_denormal(float16 a, float_status *status)
3290{
3291 if (status->flush_inputs_to_zero) {
3292 FloatParts p = float16_unpack_raw(a);
3293 if (parts_squash_denormal(p, status)) {
3294 return float16_set_sign(float16_zero, p.sign);
3295 }
3296 }
3297 return a;
3298}
3299
3300float32 float32_squash_input_denormal(float32 a, float_status *status)
3301{
3302 if (status->flush_inputs_to_zero) {
3303 FloatParts p = float32_unpack_raw(a);
3304 if (parts_squash_denormal(p, status)) {
3305 return float32_set_sign(float32_zero, p.sign);
3306 }
3307 }
3308 return a;
3309}
3310
3311float64 float64_squash_input_denormal(float64 a, float_status *status)
3312{
3313 if (status->flush_inputs_to_zero) {
3314 FloatParts p = float64_unpack_raw(a);
3315 if (parts_squash_denormal(p, status)) {
3316 return float64_set_sign(float64_zero, p.sign);
3317 }
3318 }
3319 return a;
3320}
3321
377ed926 3322/*----------------------------------------------------------------------------
158142c2
FB
3323| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3324| and 7, and returns the properly rounded 32-bit integer corresponding to the
3325| input. If `zSign' is 1, the input is negated before being converted to an
3326| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3327| is simply rounded to an integer, with the inexact exception raised if the
3328| input cannot be represented exactly as an integer. However, if the fixed-
3329| point input is too large, the invalid exception is raised and the largest
3330| positive or negative integer is returned.
3331*----------------------------------------------------------------------------*/
3332
c120391c
RH
3333static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3334 float_status *status)
158142c2 3335{
8f506c70 3336 int8_t roundingMode;
c120391c 3337 bool roundNearestEven;
8f506c70 3338 int8_t roundIncrement, roundBits;
760e1416 3339 int32_t z;
158142c2 3340
a2f2d288 3341 roundingMode = status->float_rounding_mode;
158142c2 3342 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3343 switch (roundingMode) {
3344 case float_round_nearest_even:
f9288a76 3345 case float_round_ties_away:
dc355b76
PM
3346 roundIncrement = 0x40;
3347 break;
3348 case float_round_to_zero:
3349 roundIncrement = 0;
3350 break;
3351 case float_round_up:
3352 roundIncrement = zSign ? 0 : 0x7f;
3353 break;
3354 case float_round_down:
3355 roundIncrement = zSign ? 0x7f : 0;
3356 break;
5d64abb3
RH
3357 case float_round_to_odd:
3358 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3359 break;
dc355b76
PM
3360 default:
3361 abort();
158142c2
FB
3362 }
3363 roundBits = absZ & 0x7F;
3364 absZ = ( absZ + roundIncrement )>>7;
3365 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3366 z = absZ;
3367 if ( zSign ) z = - z;
3368 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3369 float_raise(float_flag_invalid, status);
2c217da0 3370 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3371 }
a2f2d288
PM
3372 if (roundBits) {
3373 status->float_exception_flags |= float_flag_inexact;
3374 }
158142c2
FB
3375 return z;
3376
3377}
3378
3379/*----------------------------------------------------------------------------
3380| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3381| `absZ1', with binary point between bits 63 and 64 (between the input words),
3382| and returns the properly rounded 64-bit integer corresponding to the input.
3383| If `zSign' is 1, the input is negated before being converted to an integer.
3384| Ordinarily, the fixed-point input is simply rounded to an integer, with
3385| the inexact exception raised if the input cannot be represented exactly as
3386| an integer. However, if the fixed-point input is too large, the invalid
3387| exception is raised and the largest positive or negative integer is
3388| returned.
3389*----------------------------------------------------------------------------*/
3390
c120391c 3391static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3392 float_status *status)
158142c2 3393{
8f506c70 3394 int8_t roundingMode;
c120391c 3395 bool roundNearestEven, increment;
760e1416 3396 int64_t z;
158142c2 3397
a2f2d288 3398 roundingMode = status->float_rounding_mode;
158142c2 3399 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3400 switch (roundingMode) {
3401 case float_round_nearest_even:
f9288a76 3402 case float_round_ties_away:
dc355b76
PM
3403 increment = ((int64_t) absZ1 < 0);
3404 break;
3405 case float_round_to_zero:
3406 increment = 0;
3407 break;
3408 case float_round_up:
3409 increment = !zSign && absZ1;
3410 break;
3411 case float_round_down:
3412 increment = zSign && absZ1;
3413 break;
5d64abb3
RH
3414 case float_round_to_odd:
3415 increment = !(absZ0 & 1) && absZ1;
3416 break;
dc355b76
PM
3417 default:
3418 abort();
158142c2
FB
3419 }
3420 if ( increment ) {
3421 ++absZ0;
3422 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3423 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3424 }
3425 z = absZ0;
3426 if ( zSign ) z = - z;
3427 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3428 overflow:
ff32e16e 3429 float_raise(float_flag_invalid, status);
2c217da0 3430 return zSign ? INT64_MIN : INT64_MAX;
158142c2 3431 }
a2f2d288
PM
3432 if (absZ1) {
3433 status->float_exception_flags |= float_flag_inexact;
3434 }
158142c2
FB
3435 return z;
3436
3437}
3438
fb3ea83a
TM
3439/*----------------------------------------------------------------------------
3440| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3441| `absZ1', with binary point between bits 63 and 64 (between the input words),
3442| and returns the properly rounded 64-bit unsigned integer corresponding to the
3443| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3444| with the inexact exception raised if the input cannot be represented exactly
3445| as an integer. However, if the fixed-point input is too large, the invalid
3446| exception is raised and the largest unsigned integer is returned.
3447*----------------------------------------------------------------------------*/
3448
c120391c 3449static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 3450 uint64_t absZ1, float_status *status)
fb3ea83a 3451{
8f506c70 3452 int8_t roundingMode;
c120391c 3453 bool roundNearestEven, increment;
fb3ea83a 3454
a2f2d288 3455 roundingMode = status->float_rounding_mode;
fb3ea83a 3456 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3457 switch (roundingMode) {
3458 case float_round_nearest_even:
f9288a76 3459 case float_round_ties_away:
dc355b76
PM
3460 increment = ((int64_t)absZ1 < 0);
3461 break;
3462 case float_round_to_zero:
3463 increment = 0;
3464 break;
3465 case float_round_up:
3466 increment = !zSign && absZ1;
3467 break;
3468 case float_round_down:
3469 increment = zSign && absZ1;
3470 break;
5d64abb3
RH
3471 case float_round_to_odd:
3472 increment = !(absZ0 & 1) && absZ1;
3473 break;
dc355b76
PM
3474 default:
3475 abort();
fb3ea83a
TM
3476 }
3477 if (increment) {
3478 ++absZ0;
3479 if (absZ0 == 0) {
ff32e16e 3480 float_raise(float_flag_invalid, status);
2c217da0 3481 return UINT64_MAX;
fb3ea83a
TM
3482 }
3483 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3484 }
3485
3486 if (zSign && absZ0) {
ff32e16e 3487 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3488 return 0;
3489 }
3490
3491 if (absZ1) {
a2f2d288 3492 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3493 }
3494 return absZ0;
3495}
3496
158142c2
FB
3497/*----------------------------------------------------------------------------
3498| Normalizes the subnormal single-precision floating-point value represented
3499| by the denormalized significand `aSig'. The normalized exponent and
3500| significand are stored at the locations pointed to by `zExpPtr' and
3501| `zSigPtr', respectively.
3502*----------------------------------------------------------------------------*/
3503
3504static void
0c48262d 3505 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3506{
8f506c70 3507 int8_t shiftCount;
158142c2 3508
0019d5c3 3509 shiftCount = clz32(aSig) - 8;
158142c2
FB
3510 *zSigPtr = aSig<<shiftCount;
3511 *zExpPtr = 1 - shiftCount;
3512
3513}
3514
158142c2
FB
3515/*----------------------------------------------------------------------------
3516| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3517| and significand `zSig', and returns the proper single-precision floating-
3518| point value corresponding to the abstract input. Ordinarily, the abstract
3519| value is simply rounded and packed into the single-precision format, with
3520| the inexact exception raised if the abstract input cannot be represented
3521| exactly. However, if the abstract value is too large, the overflow and
3522| inexact exceptions are raised and an infinity or maximal finite value is
3523| returned. If the abstract value is too small, the input value is rounded to
3524| a subnormal number, and the underflow and inexact exceptions are raised if
3525| the abstract input cannot be represented exactly as a subnormal single-
3526| precision floating-point number.
3527| The input significand `zSig' has its binary point between bits 30
3528| and 29, which is 7 bits to the left of the usual location. This shifted
3529| significand must be normalized or smaller. If `zSig' is not normalized,
3530| `zExp' must be 0; in that case, the result returned is a subnormal number,
3531| and it must not require rounding. In the usual case that `zSig' is
3532| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3533| The handling of underflow and overflow follows the IEC/IEEE Standard for
3534| Binary Floating-Point Arithmetic.
3535*----------------------------------------------------------------------------*/
3536
c120391c 3537static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 3538 float_status *status)
158142c2 3539{
8f506c70 3540 int8_t roundingMode;
c120391c 3541 bool roundNearestEven;
8f506c70 3542 int8_t roundIncrement, roundBits;
c120391c 3543 bool isTiny;
158142c2 3544
a2f2d288 3545 roundingMode = status->float_rounding_mode;
158142c2 3546 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3547 switch (roundingMode) {
3548 case float_round_nearest_even:
f9288a76 3549 case float_round_ties_away:
dc355b76
PM
3550 roundIncrement = 0x40;
3551 break;
3552 case float_round_to_zero:
3553 roundIncrement = 0;
3554 break;
3555 case float_round_up:
3556 roundIncrement = zSign ? 0 : 0x7f;
3557 break;
3558 case float_round_down:
3559 roundIncrement = zSign ? 0x7f : 0;
3560 break;
5d64abb3
RH
3561 case float_round_to_odd:
3562 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3563 break;
dc355b76
PM
3564 default:
3565 abort();
3566 break;
158142c2
FB
3567 }
3568 roundBits = zSig & 0x7F;
bb98fe42 3569 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3570 if ( ( 0xFD < zExp )
3571 || ( ( zExp == 0xFD )
bb98fe42 3572 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3573 ) {
5d64abb3
RH
3574 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3575 roundIncrement != 0;
ff32e16e 3576 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 3577 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
3578 }
3579 if ( zExp < 0 ) {
a2f2d288 3580 if (status->flush_to_zero) {
ff32e16e 3581 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3582 return packFloat32(zSign, 0, 0);
3583 }
a828b373
RH
3584 isTiny = status->tininess_before_rounding
3585 || (zExp < -1)
3586 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
3587 shift32RightJamming( zSig, - zExp, &zSig );
3588 zExp = 0;
3589 roundBits = zSig & 0x7F;
ff32e16e
PM
3590 if (isTiny && roundBits) {
3591 float_raise(float_flag_underflow, status);
3592 }
5d64abb3
RH
3593 if (roundingMode == float_round_to_odd) {
3594 /*
3595 * For round-to-odd case, the roundIncrement depends on
3596 * zSig which just changed.
3597 */
3598 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3599 }
158142c2
FB
3600 }
3601 }
a2f2d288
PM
3602 if (roundBits) {
3603 status->float_exception_flags |= float_flag_inexact;
3604 }
158142c2
FB
3605 zSig = ( zSig + roundIncrement )>>7;
3606 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3607 if ( zSig == 0 ) zExp = 0;
3608 return packFloat32( zSign, zExp, zSig );
3609
3610}
3611
3612/*----------------------------------------------------------------------------
3613| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3614| and significand `zSig', and returns the proper single-precision floating-
3615| point value corresponding to the abstract input. This routine is just like
3616| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3617| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3618| floating-point exponent.
3619*----------------------------------------------------------------------------*/
3620
3621static float32
c120391c 3622 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 3623 float_status *status)
158142c2 3624{
8f506c70 3625 int8_t shiftCount;
158142c2 3626
0019d5c3 3627 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3628 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3629 status);
158142c2
FB
3630
3631}
3632
158142c2
FB
3633/*----------------------------------------------------------------------------
3634| Normalizes the subnormal double-precision floating-point value represented
3635| by the denormalized significand `aSig'. The normalized exponent and
3636| significand are stored at the locations pointed to by `zExpPtr' and
3637| `zSigPtr', respectively.
3638*----------------------------------------------------------------------------*/
3639
3640static void
0c48262d 3641 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3642{
8f506c70 3643 int8_t shiftCount;
158142c2 3644
0019d5c3 3645 shiftCount = clz64(aSig) - 11;
158142c2
FB
3646 *zSigPtr = aSig<<shiftCount;
3647 *zExpPtr = 1 - shiftCount;
3648
3649}
3650
3651/*----------------------------------------------------------------------------
3652| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3653| double-precision floating-point value, returning the result. After being
3654| shifted into the proper positions, the three fields are simply added
3655| together to form the result. This means that any integer portion of `zSig'
3656| will be added into the exponent. Since a properly normalized significand
3657| will have an integer portion equal to 1, the `zExp' input should be 1 less
3658| than the desired result exponent whenever `zSig' is a complete, normalized
3659| significand.
3660*----------------------------------------------------------------------------*/
3661
c120391c 3662static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
3663{
3664
f090c9d4 3665 return make_float64(
bb98fe42 3666 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3667
3668}
3669
3670/*----------------------------------------------------------------------------
3671| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3672| and significand `zSig', and returns the proper double-precision floating-
3673| point value corresponding to the abstract input. Ordinarily, the abstract
3674| value is simply rounded and packed into the double-precision format, with
3675| the inexact exception raised if the abstract input cannot be represented
3676| exactly. However, if the abstract value is too large, the overflow and
3677| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3678| returned. If the abstract value is too small, the input value is rounded to
3679| a subnormal number, and the underflow and inexact exceptions are raised if
3680| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3681| precision floating-point number.
3682| The input significand `zSig' has its binary point between bits 62
3683| and 61, which is 10 bits to the left of the usual location. This shifted
3684| significand must be normalized or smaller. If `zSig' is not normalized,
3685| `zExp' must be 0; in that case, the result returned is a subnormal number,
3686| and it must not require rounding. In the usual case that `zSig' is
3687| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3688| The handling of underflow and overflow follows the IEC/IEEE Standard for
3689| Binary Floating-Point Arithmetic.
3690*----------------------------------------------------------------------------*/
3691
c120391c 3692static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 3693 float_status *status)
158142c2 3694{
8f506c70 3695 int8_t roundingMode;
c120391c 3696 bool roundNearestEven;
0c48262d 3697 int roundIncrement, roundBits;
c120391c 3698 bool isTiny;
158142c2 3699
a2f2d288 3700 roundingMode = status->float_rounding_mode;
158142c2 3701 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3702 switch (roundingMode) {
3703 case float_round_nearest_even:
f9288a76 3704 case float_round_ties_away:
dc355b76
PM
3705 roundIncrement = 0x200;
3706 break;
3707 case float_round_to_zero:
3708 roundIncrement = 0;
3709 break;
3710 case float_round_up:
3711 roundIncrement = zSign ? 0 : 0x3ff;
3712 break;
3713 case float_round_down:
3714 roundIncrement = zSign ? 0x3ff : 0;
3715 break;
9ee6f678
BR
3716 case float_round_to_odd:
3717 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3718 break;
dc355b76
PM
3719 default:
3720 abort();
158142c2
FB
3721 }
3722 roundBits = zSig & 0x3FF;
bb98fe42 3723 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3724 if ( ( 0x7FD < zExp )
3725 || ( ( zExp == 0x7FD )
bb98fe42 3726 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3727 ) {
9ee6f678
BR
3728 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3729 roundIncrement != 0;
ff32e16e 3730 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3731 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3732 }
3733 if ( zExp < 0 ) {
a2f2d288 3734 if (status->flush_to_zero) {
ff32e16e 3735 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3736 return packFloat64(zSign, 0, 0);
3737 }
a828b373
RH
3738 isTiny = status->tininess_before_rounding
3739 || (zExp < -1)
3740 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
3741 shift64RightJamming( zSig, - zExp, &zSig );
3742 zExp = 0;
3743 roundBits = zSig & 0x3FF;
ff32e16e
PM
3744 if (isTiny && roundBits) {
3745 float_raise(float_flag_underflow, status);
3746 }
9ee6f678
BR
3747 if (roundingMode == float_round_to_odd) {
3748 /*
3749 * For round-to-odd case, the roundIncrement depends on
3750 * zSig which just changed.
3751 */
3752 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3753 }
158142c2
FB
3754 }
3755 }
a2f2d288
PM
3756 if (roundBits) {
3757 status->float_exception_flags |= float_flag_inexact;
3758 }
158142c2
FB
3759 zSig = ( zSig + roundIncrement )>>10;
3760 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3761 if ( zSig == 0 ) zExp = 0;
3762 return packFloat64( zSign, zExp, zSig );
3763
3764}
3765
3766/*----------------------------------------------------------------------------
3767| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3768| and significand `zSig', and returns the proper double-precision floating-
3769| point value corresponding to the abstract input. This routine is just like
3770| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3771| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3772| floating-point exponent.
3773*----------------------------------------------------------------------------*/
3774
3775static float64
c120391c 3776 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 3777 float_status *status)
158142c2 3778{
8f506c70 3779 int8_t shiftCount;
158142c2 3780
0019d5c3 3781 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3782 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3783 status);
158142c2
FB
3784
3785}
3786
158142c2
FB
3787/*----------------------------------------------------------------------------
3788| Normalizes the subnormal extended double-precision floating-point value
3789| represented by the denormalized significand `aSig'. The normalized exponent
3790| and significand are stored at the locations pointed to by `zExpPtr' and
3791| `zSigPtr', respectively.
3792*----------------------------------------------------------------------------*/
3793
88857aca
LV
3794void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3795 uint64_t *zSigPtr)
158142c2 3796{
8f506c70 3797 int8_t shiftCount;
158142c2 3798
0019d5c3 3799 shiftCount = clz64(aSig);
158142c2
FB
3800 *zSigPtr = aSig<<shiftCount;
3801 *zExpPtr = 1 - shiftCount;
158142c2
FB
3802}
3803
3804/*----------------------------------------------------------------------------
3805| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3806| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3807| and returns the proper extended double-precision floating-point value
3808| corresponding to the abstract input. Ordinarily, the abstract value is
3809| rounded and packed into the extended double-precision format, with the
3810| inexact exception raised if the abstract input cannot be represented
3811| exactly. However, if the abstract value is too large, the overflow and
3812| inexact exceptions are raised and an infinity or maximal finite value is
3813| returned. If the abstract value is too small, the input value is rounded to
3814| a subnormal number, and the underflow and inexact exceptions are raised if
3815| the abstract input cannot be represented exactly as a subnormal extended
3816| double-precision floating-point number.
3817| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3818| number of bits as single or double precision, respectively. Otherwise, the
3819| result is rounded to the full precision of the extended double-precision
3820| format.
3821| The input significand must be normalized or smaller. If the input
3822| significand is not normalized, `zExp' must be 0; in that case, the result
3823| returned is a subnormal number, and it must not require rounding. The
3824| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3825| Floating-Point Arithmetic.
3826*----------------------------------------------------------------------------*/
3827
c120391c 3828floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
3829 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3830 float_status *status)
158142c2 3831{
8f506c70 3832 int8_t roundingMode;
c120391c 3833 bool roundNearestEven, increment, isTiny;
f42c2224 3834 int64_t roundIncrement, roundMask, roundBits;
158142c2 3835
a2f2d288 3836 roundingMode = status->float_rounding_mode;
158142c2
FB
3837 roundNearestEven = ( roundingMode == float_round_nearest_even );
3838 if ( roundingPrecision == 80 ) goto precision80;
3839 if ( roundingPrecision == 64 ) {
e9321124
AB
3840 roundIncrement = UINT64_C(0x0000000000000400);
3841 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
3842 }
3843 else if ( roundingPrecision == 32 ) {
e9321124
AB
3844 roundIncrement = UINT64_C(0x0000008000000000);
3845 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
3846 }
3847 else {
3848 goto precision80;
3849 }
3850 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3851 switch (roundingMode) {
3852 case float_round_nearest_even:
f9288a76 3853 case float_round_ties_away:
dc355b76
PM
3854 break;
3855 case float_round_to_zero:
3856 roundIncrement = 0;
3857 break;
3858 case float_round_up:
3859 roundIncrement = zSign ? 0 : roundMask;
3860 break;
3861 case float_round_down:
3862 roundIncrement = zSign ? roundMask : 0;
3863 break;
3864 default:
3865 abort();
158142c2
FB
3866 }
3867 roundBits = zSig0 & roundMask;
bb98fe42 3868 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3869 if ( ( 0x7FFE < zExp )
3870 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3871 ) {
3872 goto overflow;
3873 }
3874 if ( zExp <= 0 ) {
a2f2d288 3875 if (status->flush_to_zero) {
ff32e16e 3876 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3877 return packFloatx80(zSign, 0, 0);
3878 }
a828b373
RH
3879 isTiny = status->tininess_before_rounding
3880 || (zExp < 0 )
3881 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
3882 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3883 zExp = 0;
3884 roundBits = zSig0 & roundMask;
ff32e16e
PM
3885 if (isTiny && roundBits) {
3886 float_raise(float_flag_underflow, status);
3887 }
a2f2d288
PM
3888 if (roundBits) {
3889 status->float_exception_flags |= float_flag_inexact;
3890 }
158142c2 3891 zSig0 += roundIncrement;
bb98fe42 3892 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3893 roundIncrement = roundMask + 1;
3894 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3895 roundMask |= roundIncrement;
3896 }
3897 zSig0 &= ~ roundMask;
3898 return packFloatx80( zSign, zExp, zSig0 );
3899 }
3900 }
a2f2d288
PM
3901 if (roundBits) {
3902 status->float_exception_flags |= float_flag_inexact;
3903 }
158142c2
FB
3904 zSig0 += roundIncrement;
3905 if ( zSig0 < roundIncrement ) {
3906 ++zExp;
e9321124 3907 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
3908 }
3909 roundIncrement = roundMask + 1;
3910 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3911 roundMask |= roundIncrement;
3912 }
3913 zSig0 &= ~ roundMask;
3914 if ( zSig0 == 0 ) zExp = 0;
3915 return packFloatx80( zSign, zExp, zSig0 );
3916 precision80:
dc355b76
PM
3917 switch (roundingMode) {
3918 case float_round_nearest_even:
f9288a76 3919 case float_round_ties_away:
dc355b76
PM
3920 increment = ((int64_t)zSig1 < 0);
3921 break;
3922 case float_round_to_zero:
3923 increment = 0;
3924 break;
3925 case float_round_up:
3926 increment = !zSign && zSig1;
3927 break;
3928 case float_round_down:
3929 increment = zSign && zSig1;
3930 break;
3931 default:
3932 abort();
158142c2 3933 }
bb98fe42 3934 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3935 if ( ( 0x7FFE < zExp )
3936 || ( ( zExp == 0x7FFE )
e9321124 3937 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
3938 && increment
3939 )
3940 ) {
3941 roundMask = 0;
3942 overflow:
ff32e16e 3943 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3944 if ( ( roundingMode == float_round_to_zero )
3945 || ( zSign && ( roundingMode == float_round_up ) )
3946 || ( ! zSign && ( roundingMode == float_round_down ) )
3947 ) {
3948 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3949 }
0f605c88
LV
3950 return packFloatx80(zSign,
3951 floatx80_infinity_high,
3952 floatx80_infinity_low);
158142c2
FB
3953 }
3954 if ( zExp <= 0 ) {
a828b373
RH
3955 isTiny = status->tininess_before_rounding
3956 || (zExp < 0)
3957 || !increment
3958 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
3959 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3960 zExp = 0;
ff32e16e
PM
3961 if (isTiny && zSig1) {
3962 float_raise(float_flag_underflow, status);
3963 }
a2f2d288
PM
3964 if (zSig1) {
3965 status->float_exception_flags |= float_flag_inexact;
3966 }
dc355b76
PM
3967 switch (roundingMode) {
3968 case float_round_nearest_even:
f9288a76 3969 case float_round_ties_away:
dc355b76
PM
3970 increment = ((int64_t)zSig1 < 0);
3971 break;
3972 case float_round_to_zero:
3973 increment = 0;
3974 break;
3975 case float_round_up:
3976 increment = !zSign && zSig1;
3977 break;
3978 case float_round_down:
3979 increment = zSign && zSig1;
3980 break;
3981 default:
3982 abort();
158142c2
FB
3983 }
3984 if ( increment ) {
3985 ++zSig0;
3986 zSig0 &=
bb98fe42
AF
3987 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3988 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3989 }
3990 return packFloatx80( zSign, zExp, zSig0 );
3991 }
3992 }
a2f2d288
PM
3993 if (zSig1) {
3994 status->float_exception_flags |= float_flag_inexact;
3995 }
158142c2
FB
3996 if ( increment ) {
3997 ++zSig0;
3998 if ( zSig0 == 0 ) {
3999 ++zExp;
e9321124 4000 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4001 }
4002 else {
bb98fe42 4003 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
4004 }
4005 }
4006 else {
4007 if ( zSig0 == 0 ) zExp = 0;
4008 }
4009 return packFloatx80( zSign, zExp, zSig0 );
4010
4011}
4012
4013/*----------------------------------------------------------------------------
4014| Takes an abstract floating-point value having sign `zSign', exponent
4015| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4016| and returns the proper extended double-precision floating-point value
4017| corresponding to the abstract input. This routine is just like
4018| `roundAndPackFloatx80' except that the input significand does not have to be
4019| normalized.
4020*----------------------------------------------------------------------------*/
4021
88857aca 4022floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4023 bool zSign, int32_t zExp,
88857aca
LV
4024 uint64_t zSig0, uint64_t zSig1,
4025 float_status *status)
158142c2 4026{
8f506c70 4027 int8_t shiftCount;
158142c2
FB
4028
4029 if ( zSig0 == 0 ) {
4030 zSig0 = zSig1;
4031 zSig1 = 0;
4032 zExp -= 64;
4033 }
0019d5c3 4034 shiftCount = clz64(zSig0);
158142c2
FB
4035 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4036 zExp -= shiftCount;
ff32e16e
PM
4037 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4038 zSig0, zSig1, status);
158142c2
FB
4039
4040}
4041
158142c2
FB
4042/*----------------------------------------------------------------------------
4043| Returns the least-significant 64 fraction bits of the quadruple-precision
4044| floating-point value `a'.
4045*----------------------------------------------------------------------------*/
4046
a49db98d 4047static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4048{
4049
4050 return a.low;
4051
4052}
4053
4054/*----------------------------------------------------------------------------
4055| Returns the most-significant 48 fraction bits of the quadruple-precision
4056| floating-point value `a'.
4057*----------------------------------------------------------------------------*/
4058
a49db98d 4059static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4060{
4061
e9321124 4062 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4063
4064}
4065
4066/*----------------------------------------------------------------------------
4067| Returns the exponent bits of the quadruple-precision floating-point value
4068| `a'.
4069*----------------------------------------------------------------------------*/
4070
f4014512 4071static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4072{
4073
4074 return ( a.high>>48 ) & 0x7FFF;
4075
4076}
4077
4078/*----------------------------------------------------------------------------
4079| Returns the sign bit of the quadruple-precision floating-point value `a'.
4080*----------------------------------------------------------------------------*/
4081
c120391c 4082static inline bool extractFloat128Sign(float128 a)
158142c2 4083{
c120391c 4084 return a.high >> 63;
158142c2
FB
4085}
4086
4087/*----------------------------------------------------------------------------
4088| Normalizes the subnormal quadruple-precision floating-point value
4089| represented by the denormalized significand formed by the concatenation of
4090| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4091| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4092| significand are stored at the location pointed to by `zSig0Ptr', and the
4093| least significant 64 bits of the normalized significand are stored at the
4094| location pointed to by `zSig1Ptr'.
4095*----------------------------------------------------------------------------*/
4096
4097static void
4098 normalizeFloat128Subnormal(
bb98fe42
AF
4099 uint64_t aSig0,
4100 uint64_t aSig1,
f4014512 4101 int32_t *zExpPtr,
bb98fe42
AF
4102 uint64_t *zSig0Ptr,
4103 uint64_t *zSig1Ptr
158142c2
FB
4104 )
4105{
8f506c70 4106 int8_t shiftCount;
158142c2
FB
4107
4108 if ( aSig0 == 0 ) {
0019d5c3 4109 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4110 if ( shiftCount < 0 ) {
4111 *zSig0Ptr = aSig1>>( - shiftCount );
4112 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4113 }
4114 else {
4115 *zSig0Ptr = aSig1<<shiftCount;
4116 *zSig1Ptr = 0;
4117 }
4118 *zExpPtr = - shiftCount - 63;
4119 }
4120 else {
0019d5c3 4121 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4122 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4123 *zExpPtr = 1 - shiftCount;
4124 }
4125
4126}
4127
4128/*----------------------------------------------------------------------------
4129| Packs the sign `zSign', the exponent `zExp', and the significand formed
4130| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4131| floating-point value, returning the result. After being shifted into the
4132| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4133| added together to form the most significant 32 bits of the result. This
4134| means that any integer portion of `zSig0' will be added into the exponent.
4135| Since a properly normalized significand will have an integer portion equal
4136| to 1, the `zExp' input should be 1 less than the desired result exponent
4137| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4138| significand.
4139*----------------------------------------------------------------------------*/
4140
a49db98d 4141static inline float128
c120391c 4142packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4143{
4144 float128 z;
4145
4146 z.low = zSig1;
c120391c 4147 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4148 return z;
158142c2
FB
4149}
4150
4151/*----------------------------------------------------------------------------
4152| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4153| and extended significand formed by the concatenation of `zSig0', `zSig1',
4154| and `zSig2', and returns the proper quadruple-precision floating-point value
4155| corresponding to the abstract input. Ordinarily, the abstract value is
4156| simply rounded and packed into the quadruple-precision format, with the
4157| inexact exception raised if the abstract input cannot be represented
4158| exactly. However, if the abstract value is too large, the overflow and
4159| inexact exceptions are raised and an infinity or maximal finite value is
4160| returned. If the abstract value is too small, the input value is rounded to
4161| a subnormal number, and the underflow and inexact exceptions are raised if
4162| the abstract input cannot be represented exactly as a subnormal quadruple-
4163| precision floating-point number.
4164| The input significand must be normalized or smaller. If the input
4165| significand is not normalized, `zExp' must be 0; in that case, the result
4166| returned is a subnormal number, and it must not require rounding. In the
4167| usual case that the input significand is normalized, `zExp' must be 1 less
4168| than the ``true'' floating-point exponent. The handling of underflow and
4169| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4170*----------------------------------------------------------------------------*/
4171
c120391c 4172static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4173 uint64_t zSig0, uint64_t zSig1,
4174 uint64_t zSig2, float_status *status)
158142c2 4175{
8f506c70 4176 int8_t roundingMode;
c120391c 4177 bool roundNearestEven, increment, isTiny;
158142c2 4178
a2f2d288 4179 roundingMode = status->float_rounding_mode;
158142c2 4180 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4181 switch (roundingMode) {
4182 case float_round_nearest_even:
f9288a76 4183 case float_round_ties_away:
dc355b76
PM
4184 increment = ((int64_t)zSig2 < 0);
4185 break;
4186 case float_round_to_zero:
4187 increment = 0;
4188 break;
4189 case float_round_up:
4190 increment = !zSign && zSig2;
4191 break;
4192 case float_round_down:
4193 increment = zSign && zSig2;
4194 break;
9ee6f678
BR
4195 case float_round_to_odd:
4196 increment = !(zSig1 & 0x1) && zSig2;
4197 break;
dc355b76
PM
4198 default:
4199 abort();
158142c2 4200 }
bb98fe42 4201 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4202 if ( ( 0x7FFD < zExp )
4203 || ( ( zExp == 0x7FFD )
4204 && eq128(
e9321124
AB
4205 UINT64_C(0x0001FFFFFFFFFFFF),
4206 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4207 zSig0,
4208 zSig1
4209 )
4210 && increment
4211 )
4212 ) {
ff32e16e 4213 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4214 if ( ( roundingMode == float_round_to_zero )
4215 || ( zSign && ( roundingMode == float_round_up ) )
4216 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4217 || (roundingMode == float_round_to_odd)
158142c2
FB
4218 ) {
4219 return
4220 packFloat128(
4221 zSign,
4222 0x7FFE,
e9321124
AB
4223 UINT64_C(0x0000FFFFFFFFFFFF),
4224 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4225 );
4226 }
4227 return packFloat128( zSign, 0x7FFF, 0, 0 );
4228 }
4229 if ( zExp < 0 ) {
a2f2d288 4230 if (status->flush_to_zero) {
ff32e16e 4231 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4232 return packFloat128(zSign, 0, 0, 0);
4233 }
a828b373
RH
4234 isTiny = status->tininess_before_rounding
4235 || (zExp < -1)
4236 || !increment
4237 || lt128(zSig0, zSig1,
4238 UINT64_C(0x0001FFFFFFFFFFFF),
4239 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4240 shift128ExtraRightJamming(
4241 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4242 zExp = 0;
ff32e16e
PM
4243 if (isTiny && zSig2) {
4244 float_raise(float_flag_underflow, status);
4245 }
dc355b76
PM
4246 switch (roundingMode) {
4247 case float_round_nearest_even:
f9288a76 4248 case float_round_ties_away:
dc355b76
PM
4249 increment = ((int64_t)zSig2 < 0);
4250 break;
4251 case float_round_to_zero:
4252 increment = 0;
4253 break;
4254 case float_round_up:
4255 increment = !zSign && zSig2;
4256 break;
4257 case float_round_down:
4258 increment = zSign && zSig2;
4259 break;
9ee6f678
BR
4260 case float_round_to_odd:
4261 increment = !(zSig1 & 0x1) && zSig2;
4262 break;
dc355b76
PM
4263 default:
4264 abort();
158142c2
FB
4265 }
4266 }
4267 }
a2f2d288
PM
4268 if (zSig2) {
4269 status->float_exception_flags |= float_flag_inexact;
4270 }
158142c2
FB
4271 if ( increment ) {
4272 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4273 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4274 }
4275 else {
4276 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4277 }
4278 return packFloat128( zSign, zExp, zSig0, zSig1 );
4279
4280}
4281
4282/*----------------------------------------------------------------------------
4283| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4284| and significand formed by the concatenation of `zSig0' and `zSig1', and
4285| returns the proper quadruple-precision floating-point value corresponding
4286| to the abstract input. This routine is just like `roundAndPackFloat128'
4287| except that the input significand has fewer bits and does not have to be
4288| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4289| point exponent.
4290*----------------------------------------------------------------------------*/
4291
c120391c 4292static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4293 uint64_t zSig0, uint64_t zSig1,
4294 float_status *status)
158142c2 4295{
8f506c70 4296 int8_t shiftCount;
bb98fe42 4297 uint64_t zSig2;
158142c2
FB
4298
4299 if ( zSig0 == 0 ) {
4300 zSig0 = zSig1;
4301 zSig1 = 0;
4302 zExp -= 64;
4303 }
0019d5c3 4304 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4305 if ( 0 <= shiftCount ) {
4306 zSig2 = 0;
4307 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4308 }
4309 else {
4310 shift128ExtraRightJamming(
4311 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4312 }
4313 zExp -= shiftCount;
ff32e16e 4314 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4315
4316}
4317
158142c2 4318
158142c2
FB
4319/*----------------------------------------------------------------------------
4320| Returns the result of converting the 32-bit two's complement integer `a'
4321| to the extended double-precision floating-point format. The conversion
4322| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4323| Arithmetic.
4324*----------------------------------------------------------------------------*/
4325
e5a41ffa 4326floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4327{
c120391c 4328 bool zSign;
3a87d009 4329 uint32_t absA;
8f506c70 4330 int8_t shiftCount;
bb98fe42 4331 uint64_t zSig;
158142c2
FB
4332
4333 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4334 zSign = ( a < 0 );
4335 absA = zSign ? - a : a;
0019d5c3 4336 shiftCount = clz32(absA) + 32;
158142c2
FB
4337 zSig = absA;
4338 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4339
4340}
4341
158142c2
FB
4342/*----------------------------------------------------------------------------
4343| Returns the result of converting the 32-bit two's complement integer `a' to
4344| the quadruple-precision floating-point format. The conversion is performed
4345| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4346*----------------------------------------------------------------------------*/
4347
e5a41ffa 4348float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4349{
c120391c 4350 bool zSign;
3a87d009 4351 uint32_t absA;
8f506c70 4352 int8_t shiftCount;
bb98fe42 4353 uint64_t zSig0;
158142c2
FB
4354
4355 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4356 zSign = ( a < 0 );
4357 absA = zSign ? - a : a;
0019d5c3 4358 shiftCount = clz32(absA) + 17;
158142c2
FB
4359 zSig0 = absA;
4360 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4361
4362}
4363
158142c2
FB
4364/*----------------------------------------------------------------------------
4365| Returns the result of converting the 64-bit two's complement integer `a'
4366| to the extended double-precision floating-point format. The conversion
4367| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4368| Arithmetic.
4369*----------------------------------------------------------------------------*/
4370
e5a41ffa 4371floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4372{
c120391c 4373 bool zSign;
182f42fd 4374 uint64_t absA;
8f506c70 4375 int8_t shiftCount;
158142c2
FB
4376
4377 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4378 zSign = ( a < 0 );
4379 absA = zSign ? - a : a;
0019d5c3 4380 shiftCount = clz64(absA);
158142c2
FB
4381 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4382
4383}
4384
158142c2
FB
4385/*----------------------------------------------------------------------------
4386| Returns the result of converting the 64-bit two's complement integer `a' to
4387| the quadruple-precision floating-point format. The conversion is performed
4388| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4389*----------------------------------------------------------------------------*/
4390
e5a41ffa 4391float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4392{
c120391c 4393 bool zSign;
182f42fd 4394 uint64_t absA;
8f506c70 4395 int8_t shiftCount;
f4014512 4396 int32_t zExp;
bb98fe42 4397 uint64_t zSig0, zSig1;
158142c2
FB
4398
4399 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4400 zSign = ( a < 0 );
4401 absA = zSign ? - a : a;
0019d5c3 4402 shiftCount = clz64(absA) + 49;
158142c2
FB
4403 zExp = 0x406E - shiftCount;
4404 if ( 64 <= shiftCount ) {
4405 zSig1 = 0;
4406 zSig0 = absA;
4407 shiftCount -= 64;
4408 }
4409 else {
4410 zSig1 = absA;
4411 zSig0 = 0;
4412 }
4413 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4414 return packFloat128( zSign, zExp, zSig0, zSig1 );
4415
4416}
4417
6bb8e0f1
PM
4418/*----------------------------------------------------------------------------
4419| Returns the result of converting the 64-bit unsigned integer `a'
4420| to the quadruple-precision floating-point format. The conversion is performed
4421| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4422*----------------------------------------------------------------------------*/
4423
e5a41ffa 4424float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4425{
4426 if (a == 0) {
4427 return float128_zero;
4428 }
6603d506 4429 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4430}
4431
158142c2
FB
4432/*----------------------------------------------------------------------------
4433| Returns the result of converting the single-precision floating-point value
4434| `a' to the extended double-precision floating-point format. The conversion
4435| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4436| Arithmetic.
4437*----------------------------------------------------------------------------*/
4438
e5a41ffa 4439floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4440{
c120391c 4441 bool aSign;
0c48262d 4442 int aExp;
bb98fe42 4443 uint32_t aSig;
158142c2 4444
ff32e16e 4445 a = float32_squash_input_denormal(a, status);
158142c2
FB
4446 aSig = extractFloat32Frac( a );
4447 aExp = extractFloat32Exp( a );
4448 aSign = extractFloat32Sign( a );
4449 if ( aExp == 0xFF ) {
ff32e16e 4450 if (aSig) {
7537c2b4
JM
4451 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4452 status);
4453 return floatx80_silence_nan(res, status);
ff32e16e 4454 }
0f605c88
LV
4455 return packFloatx80(aSign,
4456 floatx80_infinity_high,
4457 floatx80_infinity_low);
158142c2
FB
4458 }
4459 if ( aExp == 0 ) {
4460 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4461 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4462 }
4463 aSig |= 0x00800000;
bb98fe42 4464 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4465
4466}
4467
158142c2
FB
4468/*----------------------------------------------------------------------------
4469| Returns the result of converting the single-precision floating-point value
4470| `a' to the double-precision floating-point format. The conversion is
4471| performed according to the IEC/IEEE Standard for Binary Floating-Point
4472| Arithmetic.
4473*----------------------------------------------------------------------------*/
4474
e5a41ffa 4475float128 float32_to_float128(float32 a, float_status *status)
158142c2 4476{
c120391c 4477 bool aSign;
0c48262d 4478 int aExp;
bb98fe42 4479 uint32_t aSig;
158142c2 4480
ff32e16e 4481 a = float32_squash_input_denormal(a, status);
158142c2
FB
4482 aSig = extractFloat32Frac( a );
4483 aExp = extractFloat32Exp( a );
4484 aSign = extractFloat32Sign( a );
4485 if ( aExp == 0xFF ) {
ff32e16e
PM
4486 if (aSig) {
4487 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4488 }
158142c2
FB
4489 return packFloat128( aSign, 0x7FFF, 0, 0 );
4490 }
4491 if ( aExp == 0 ) {
4492 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4493 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4494 --aExp;
4495 }
bb98fe42 4496 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4497
4498}
4499
158142c2
FB
4500/*----------------------------------------------------------------------------
4501| Returns the remainder of the single-precision floating-point value `a'
4502| with respect to the corresponding value `b'. The operation is performed
4503| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4504*----------------------------------------------------------------------------*/
4505
e5a41ffa 4506float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4507{
c120391c 4508 bool aSign, zSign;
0c48262d 4509 int aExp, bExp, expDiff;
bb98fe42
AF
4510 uint32_t aSig, bSig;
4511 uint32_t q;
4512 uint64_t aSig64, bSig64, q64;
4513 uint32_t alternateASig;
4514 int32_t sigMean;
ff32e16e
PM
4515 a = float32_squash_input_denormal(a, status);
4516 b = float32_squash_input_denormal(b, status);
158142c2
FB
4517
4518 aSig = extractFloat32Frac( a );
4519 aExp = extractFloat32Exp( a );
4520 aSign = extractFloat32Sign( a );
4521 bSig = extractFloat32Frac( b );
4522 bExp = extractFloat32Exp( b );
158142c2
FB
4523 if ( aExp == 0xFF ) {
4524 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4525 return propagateFloat32NaN(a, b, status);
158142c2 4526 }
ff32e16e 4527 float_raise(float_flag_invalid, status);
af39bc8c 4528 return float32_default_nan(status);
158142c2
FB
4529 }
4530 if ( bExp == 0xFF ) {
ff32e16e
PM
4531 if (bSig) {
4532 return propagateFloat32NaN(a, b, status);
4533 }
158142c2
FB
4534 return a;
4535 }
4536 if ( bExp == 0 ) {
4537 if ( bSig == 0 ) {
ff32e16e 4538 float_raise(float_flag_invalid, status);
af39bc8c 4539 return float32_default_nan(status);
158142c2
FB
4540 }
4541 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4542 }
4543 if ( aExp == 0 ) {
4544 if ( aSig == 0 ) return a;
4545 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4546 }
4547 expDiff = aExp - bExp;
4548 aSig |= 0x00800000;
4549 bSig |= 0x00800000;
4550 if ( expDiff < 32 ) {
4551 aSig <<= 8;
4552 bSig <<= 8;
4553 if ( expDiff < 0 ) {
4554 if ( expDiff < -1 ) return a;
4555 aSig >>= 1;
4556 }
4557 q = ( bSig <= aSig );
4558 if ( q ) aSig -= bSig;
4559 if ( 0 < expDiff ) {
bb98fe42 4560 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4561 q >>= 32 - expDiff;
4562 bSig >>= 2;
4563 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4564 }
4565 else {
4566 aSig >>= 2;
4567 bSig >>= 2;
4568 }
4569 }
4570 else {
4571 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4572 aSig64 = ( (uint64_t) aSig )<<40;
4573 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4574 expDiff -= 64;
4575 while ( 0 < expDiff ) {
4576 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4577 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4578 aSig64 = - ( ( bSig * q64 )<<38 );
4579 expDiff -= 62;
4580 }
4581 expDiff += 64;
4582 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4583 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4584 q = q64>>( 64 - expDiff );
4585 bSig <<= 6;
4586 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4587 }
4588 do {
4589 alternateASig = aSig;
4590 ++q;
4591 aSig -= bSig;
bb98fe42 4592 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4593 sigMean = aSig + alternateASig;
4594 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4595 aSig = alternateASig;
4596 }
bb98fe42 4597 zSign = ( (int32_t) aSig < 0 );
158142c2 4598 if ( zSign ) aSig = - aSig;
ff32e16e 4599 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4600}
4601
369be8f6 4602
158142c2 4603
8229c991
AJ
4604/*----------------------------------------------------------------------------
4605| Returns the binary exponential of the single-precision floating-point value
4606| `a'. The operation is performed according to the IEC/IEEE Standard for
4607| Binary Floating-Point Arithmetic.
4608|
4609| Uses the following identities:
4610|
4611| 1. -------------------------------------------------------------------------
4612| x x*ln(2)
4613| 2 = e
4614|
4615| 2. -------------------------------------------------------------------------
4616| 2 3 4 5 n
4617| x x x x x x x
4618| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4619| 1! 2! 3! 4! 5! n!
4620*----------------------------------------------------------------------------*/
4621
4622static const float64 float32_exp2_coefficients[15] =
4623{
d5138cf4
PM
4624 const_float64( 0x3ff0000000000000ll ), /* 1 */
4625 const_float64( 0x3fe0000000000000ll ), /* 2 */
4626 const_float64( 0x3fc5555555555555ll ), /* 3 */
4627 const_float64( 0x3fa5555555555555ll ), /* 4 */
4628 const_float64( 0x3f81111111111111ll ), /* 5 */
4629 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4630 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4631 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4632 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4633 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4634 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4635 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4636 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4637 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4638 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4639};
4640
e5a41ffa 4641float32 float32_exp2(float32 a, float_status *status)
8229c991 4642{
c120391c 4643 bool aSign;
0c48262d 4644 int aExp;
bb98fe42 4645 uint32_t aSig;
8229c991
AJ
4646 float64 r, x, xn;
4647 int i;
ff32e16e 4648 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4649
4650 aSig = extractFloat32Frac( a );
4651 aExp = extractFloat32Exp( a );
4652 aSign = extractFloat32Sign( a );
4653
4654 if ( aExp == 0xFF) {
ff32e16e
PM
4655 if (aSig) {
4656 return propagateFloat32NaN(a, float32_zero, status);
4657 }
8229c991
AJ
4658 return (aSign) ? float32_zero : a;
4659 }
4660 if (aExp == 0) {
4661 if (aSig == 0) return float32_one;
4662 }
4663
ff32e16e 4664 float_raise(float_flag_inexact, status);
8229c991
AJ
4665
4666 /* ******************************* */
4667 /* using float64 for approximation */
4668 /* ******************************* */
ff32e16e
PM
4669 x = float32_to_float64(a, status);
4670 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4671
4672 xn = x;
4673 r = float64_one;
4674 for (i = 0 ; i < 15 ; i++) {
4675 float64 f;
4676
ff32e16e
PM
4677 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4678 r = float64_add(r, f, status);
8229c991 4679
ff32e16e 4680 xn = float64_mul(xn, x, status);
8229c991
AJ
4681 }
4682
4683 return float64_to_float32(r, status);
4684}
4685
374dfc33
AJ
4686/*----------------------------------------------------------------------------
4687| Returns the binary log of the single-precision floating-point value `a'.
4688| The operation is performed according to the IEC/IEEE Standard for Binary
4689| Floating-Point Arithmetic.
4690*----------------------------------------------------------------------------*/
e5a41ffa 4691float32 float32_log2(float32 a, float_status *status)
374dfc33 4692{
c120391c 4693 bool aSign, zSign;
0c48262d 4694 int aExp;
bb98fe42 4695 uint32_t aSig, zSig, i;
374dfc33 4696
ff32e16e 4697 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4698 aSig = extractFloat32Frac( a );
4699 aExp = extractFloat32Exp( a );
4700 aSign = extractFloat32Sign( a );
4701
4702 if ( aExp == 0 ) {
4703 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4704 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4705 }
4706 if ( aSign ) {
ff32e16e 4707 float_raise(float_flag_invalid, status);
af39bc8c 4708 return float32_default_nan(status);
374dfc33
AJ
4709 }
4710 if ( aExp == 0xFF ) {
ff32e16e
PM
4711 if (aSig) {
4712 return propagateFloat32NaN(a, float32_zero, status);
4713 }
374dfc33
AJ
4714 return a;
4715 }
4716
4717 aExp -= 0x7F;
4718 aSig |= 0x00800000;
4719 zSign = aExp < 0;
4720 zSig = aExp << 23;
4721
4722 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4723 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4724 if ( aSig & 0x01000000 ) {
4725 aSig >>= 1;
4726 zSig |= i;
4727 }
4728 }
4729
4730 if ( zSign )
4731 zSig = -zSig;
4732
ff32e16e 4733 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4734}
4735
158142c2
FB
4736/*----------------------------------------------------------------------------
4737| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4738| the corresponding value `b', and 0 otherwise. The invalid exception is
4739| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4740| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4741*----------------------------------------------------------------------------*/
4742
e5a41ffa 4743int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4744{
b689362d 4745 uint32_t av, bv;
ff32e16e
PM
4746 a = float32_squash_input_denormal(a, status);
4747 b = float32_squash_input_denormal(b, status);
158142c2
FB
4748
4749 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4750 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4751 ) {
ff32e16e 4752 float_raise(float_flag_invalid, status);
158142c2
FB
4753 return 0;
4754 }
b689362d
AJ
4755 av = float32_val(a);
4756 bv = float32_val(b);
4757 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4758}
4759
4760/*----------------------------------------------------------------------------
4761| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4762| or equal to the corresponding value `b', and 0 otherwise. The invalid
4763| exception is raised if either operand is a NaN. The comparison is performed
4764| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4765*----------------------------------------------------------------------------*/
4766
e5a41ffa 4767int float32_le(float32 a, float32 b, float_status *status)
158142c2 4768{
c120391c 4769 bool aSign, bSign;
bb98fe42 4770 uint32_t av, bv;
ff32e16e
PM
4771 a = float32_squash_input_denormal(a, status);
4772 b = float32_squash_input_denormal(b, status);
158142c2
FB
4773
4774 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4775 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4776 ) {
ff32e16e 4777 float_raise(float_flag_invalid, status);
158142c2
FB
4778 return 0;
4779 }
4780 aSign = extractFloat32Sign( a );
4781 bSign = extractFloat32Sign( b );
f090c9d4
PB
4782 av = float32_val(a);
4783 bv = float32_val(b);
bb98fe42 4784 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4785 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4786
4787}
4788
4789/*----------------------------------------------------------------------------
4790| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4791| the corresponding value `b', and 0 otherwise. The invalid exception is
4792| raised if either operand is a NaN. The comparison is performed according
4793| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4794*----------------------------------------------------------------------------*/
4795
e5a41ffa 4796int float32_lt(float32 a, float32 b, float_status *status)
158142c2 4797{
c120391c 4798 bool aSign, bSign;
bb98fe42 4799 uint32_t av, bv;
ff32e16e
PM
4800 a = float32_squash_input_denormal(a, status);
4801 b = float32_squash_input_denormal(b, status);
158142c2
FB
4802
4803 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4804 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4805 ) {
ff32e16e 4806 float_raise(float_flag_invalid, status);
158142c2
FB
4807 return 0;
4808 }
4809 aSign = extractFloat32Sign( a );
4810 bSign = extractFloat32Sign( b );
f090c9d4
PB
4811 av = float32_val(a);
4812 bv = float32_val(b);
bb98fe42 4813 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4814 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4815
4816}
4817
67b7861d
AJ
4818/*----------------------------------------------------------------------------
4819| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4820| be compared, and 0 otherwise. The invalid exception is raised if either
4821| operand is a NaN. The comparison is performed according to the IEC/IEEE
4822| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4823*----------------------------------------------------------------------------*/
4824
e5a41ffa 4825int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4826{
ff32e16e
PM
4827 a = float32_squash_input_denormal(a, status);
4828 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4829
4830 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4831 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4832 ) {
ff32e16e 4833 float_raise(float_flag_invalid, status);
67b7861d
AJ
4834 return 1;
4835 }
4836 return 0;
4837}
b689362d 4838
158142c2
FB
4839/*----------------------------------------------------------------------------
4840| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4841| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4842| exception. The comparison is performed according to the IEC/IEEE Standard
4843| for Binary Floating-Point Arithmetic.
158142c2
FB
4844*----------------------------------------------------------------------------*/
4845
e5a41ffa 4846int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4847{
ff32e16e
PM
4848 a = float32_squash_input_denormal(a, status);
4849 b = float32_squash_input_denormal(b, status);
158142c2
FB
4850
4851 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4852 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4853 ) {
af39bc8c
AM
4854 if (float32_is_signaling_nan(a, status)
4855 || float32_is_signaling_nan(b, status)) {
ff32e16e 4856 float_raise(float_flag_invalid, status);
b689362d 4857 }
158142c2
FB
4858 return 0;
4859 }
b689362d
AJ
4860 return ( float32_val(a) == float32_val(b) ) ||
4861 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4862}
4863
4864/*----------------------------------------------------------------------------
4865| Returns 1 if the single-precision floating-point value `a' is less than or
4866| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4867| cause an exception. Otherwise, the comparison is performed according to the
4868| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4869*----------------------------------------------------------------------------*/
4870
e5a41ffa 4871int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2 4872{
c120391c 4873 bool aSign, bSign;
bb98fe42 4874 uint32_t av, bv;
ff32e16e
PM
4875 a = float32_squash_input_denormal(a, status);
4876 b = float32_squash_input_denormal(b, status);
158142c2
FB
4877
4878 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4879 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4880 ) {
af39bc8c
AM
4881 if (float32_is_signaling_nan(a, status)
4882 || float32_is_signaling_nan(b, status)) {
ff32e16e 4883 float_raise(float_flag_invalid, status);
158142c2
FB
4884 }
4885 return 0;
4886 }
4887 aSign = extractFloat32Sign( a );
4888 bSign = extractFloat32Sign( b );
f090c9d4
PB
4889 av = float32_val(a);
4890 bv = float32_val(b);
bb98fe42 4891 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4892 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4893
4894}
4895
4896/*----------------------------------------------------------------------------
4897| Returns 1 if the single-precision floating-point value `a' is less than
4898| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4899| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4900| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4901*----------------------------------------------------------------------------*/
4902
ab52f973 4903int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4904{
c120391c 4905 bool aSign, bSign;
ab52f973
AB
4906 uint32_t av, bv;
4907 a = float32_squash_input_denormal(a, status);
4908 b = float32_squash_input_denormal(b, status);
158142c2 4909
ab52f973
AB
4910 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4911 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4912 ) {
4913 if (float32_is_signaling_nan(a, status)
4914 || float32_is_signaling_nan(b, status)) {
ff32e16e 4915 float_raise(float_flag_invalid, status);
158142c2 4916 }
ab52f973 4917 return 0;
158142c2 4918 }
ab52f973
AB
4919 aSign = extractFloat32Sign( a );
4920 bSign = extractFloat32Sign( b );
4921 av = float32_val(a);
4922 bv = float32_val(b);
4923 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4924 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4925
4926}
4927
4928/*----------------------------------------------------------------------------
ab52f973
AB
4929| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4930| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4931| comparison is performed according to the IEC/IEEE Standard for Binary
4932| Floating-Point Arithmetic.
158142c2
FB
4933*----------------------------------------------------------------------------*/
4934
ab52f973 4935int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4936{
ab52f973
AB
4937 a = float32_squash_input_denormal(a, status);
4938 b = float32_squash_input_denormal(b, status);
158142c2 4939
ab52f973
AB
4940 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4941 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4942 ) {
4943 if (float32_is_signaling_nan(a, status)
4944 || float32_is_signaling_nan(b, status)) {
4945 float_raise(float_flag_invalid, status);
158142c2 4946 }
ab52f973 4947 return 1;
158142c2 4948 }
ab52f973 4949 return 0;
158142c2
FB
4950}
4951
158142c2
FB
4952/*----------------------------------------------------------------------------
4953| Returns the result of converting the double-precision floating-point value
4954| `a' to the extended double-precision floating-point format. The conversion
4955| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4956| Arithmetic.
4957*----------------------------------------------------------------------------*/
4958
e5a41ffa 4959floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 4960{
c120391c 4961 bool aSign;
0c48262d 4962 int aExp;
bb98fe42 4963 uint64_t aSig;
158142c2 4964
ff32e16e 4965 a = float64_squash_input_denormal(a, status);
158142c2
FB
4966 aSig = extractFloat64Frac( a );
4967 aExp = extractFloat64Exp( a );
4968 aSign = extractFloat64Sign( a );
4969 if ( aExp == 0x7FF ) {
ff32e16e 4970 if (aSig) {
7537c2b4
JM
4971 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
4972 status);
4973 return floatx80_silence_nan(res, status);
ff32e16e 4974 }
0f605c88
LV
4975 return packFloatx80(aSign,
4976 floatx80_infinity_high,
4977 floatx80_infinity_low);
158142c2
FB
4978 }
4979 if ( aExp == 0 ) {
4980 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4981 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4982 }
4983 return
4984 packFloatx80(
e9321124 4985 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
4986
4987}
4988
158142c2
FB
4989/*----------------------------------------------------------------------------
4990| Returns the result of converting the double-precision floating-point value
4991| `a' to the quadruple-precision floating-point format. The conversion is
4992| performed according to the IEC/IEEE Standard for Binary Floating-Point
4993| Arithmetic.
4994*----------------------------------------------------------------------------*/
4995
e5a41ffa 4996float128 float64_to_float128(float64 a, float_status *status)
158142c2 4997{
c120391c 4998 bool aSign;
0c48262d 4999 int aExp;
bb98fe42 5000 uint64_t aSig, zSig0, zSig1;
158142c2 5001
ff32e16e 5002 a = float64_squash_input_denormal(a, status);
158142c2
FB
5003 aSig = extractFloat64Frac( a );
5004 aExp = extractFloat64Exp( a );
5005 aSign = extractFloat64Sign( a );
5006 if ( aExp == 0x7FF ) {
ff32e16e
PM
5007 if (aSig) {
5008 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5009 }
158142c2
FB
5010 return packFloat128( aSign, 0x7FFF, 0, 0 );
5011 }
5012 if ( aExp == 0 ) {
5013 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5014 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5015 --aExp;
5016 }
5017 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5018 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5019
5020}
5021
158142c2
FB
5022
5023/*----------------------------------------------------------------------------
5024| Returns the remainder of the double-precision floating-point value `a'
5025| with respect to the corresponding value `b'. The operation is performed
5026| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5027*----------------------------------------------------------------------------*/
5028
e5a41ffa 5029float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5030{
c120391c 5031 bool aSign, zSign;
0c48262d 5032 int aExp, bExp, expDiff;
bb98fe42
AF
5033 uint64_t aSig, bSig;
5034 uint64_t q, alternateASig;
5035 int64_t sigMean;
158142c2 5036
ff32e16e
PM
5037 a = float64_squash_input_denormal(a, status);
5038 b = float64_squash_input_denormal(b, status);
158142c2
FB
5039 aSig = extractFloat64Frac( a );
5040 aExp = extractFloat64Exp( a );
5041 aSign = extractFloat64Sign( a );
5042 bSig = extractFloat64Frac( b );
5043 bExp = extractFloat64Exp( b );
158142c2
FB
5044 if ( aExp == 0x7FF ) {
5045 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5046 return propagateFloat64NaN(a, b, status);
158142c2 5047 }
ff32e16e 5048 float_raise(float_flag_invalid, status);
af39bc8c 5049 return float64_default_nan(status);
158142c2
FB
5050 }
5051 if ( bExp == 0x7FF ) {
ff32e16e
PM
5052 if (bSig) {
5053 return propagateFloat64NaN(a, b, status);
5054 }
158142c2
FB
5055 return a;
5056 }
5057 if ( bExp == 0 ) {
5058 if ( bSig == 0 ) {
ff32e16e 5059 float_raise(float_flag_invalid, status);
af39bc8c 5060 return float64_default_nan(status);
158142c2
FB
5061 }
5062 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5063 }
5064 if ( aExp == 0 ) {
5065 if ( aSig == 0 ) return a;
5066 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5067 }
5068 expDiff = aExp - bExp;
e9321124
AB
5069 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5070 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5071 if ( expDiff < 0 ) {
5072 if ( expDiff < -1 ) return a;
5073 aSig >>= 1;
5074 }
5075 q = ( bSig <= aSig );
5076 if ( q ) aSig -= bSig;
5077 expDiff -= 64;
5078 while ( 0 < expDiff ) {
5079 q = estimateDiv128To64( aSig, 0, bSig );
5080 q = ( 2 < q ) ? q - 2 : 0;
5081 aSig = - ( ( bSig>>2 ) * q );
5082 expDiff -= 62;
5083 }
5084 expDiff += 64;
5085 if ( 0 < expDiff ) {
5086 q = estimateDiv128To64( aSig, 0, bSig );
5087 q = ( 2 < q ) ? q - 2 : 0;
5088 q >>= 64 - expDiff;
5089 bSig >>= 2;
5090 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5091 }
5092 else {
5093 aSig >>= 2;
5094 bSig >>= 2;
5095 }
5096 do {
5097 alternateASig = aSig;
5098 ++q;
5099 aSig -= bSig;
bb98fe42 5100 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5101 sigMean = aSig + alternateASig;
5102 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5103 aSig = alternateASig;
5104 }
bb98fe42 5105 zSign = ( (int64_t) aSig < 0 );
158142c2 5106 if ( zSign ) aSig = - aSig;
ff32e16e 5107 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5108
5109}
5110
374dfc33
AJ
5111/*----------------------------------------------------------------------------
5112| Returns the binary log of the double-precision floating-point value `a'.
5113| The operation is performed according to the IEC/IEEE Standard for Binary
5114| Floating-Point Arithmetic.
5115*----------------------------------------------------------------------------*/
e5a41ffa 5116float64 float64_log2(float64 a, float_status *status)
374dfc33 5117{
c120391c 5118 bool aSign, zSign;
0c48262d 5119 int aExp;
bb98fe42 5120 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5121 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5122
5123 aSig = extractFloat64Frac( a );
5124 aExp = extractFloat64Exp( a );
5125 aSign = extractFloat64Sign( a );
5126
5127 if ( aExp == 0 ) {
5128 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5129 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5130 }
5131 if ( aSign ) {
ff32e16e 5132 float_raise(float_flag_invalid, status);
af39bc8c 5133 return float64_default_nan(status);
374dfc33
AJ
5134 }
5135 if ( aExp == 0x7FF ) {
ff32e16e
PM
5136 if (aSig) {
5137 return propagateFloat64NaN(a, float64_zero, status);
5138 }
374dfc33
AJ
5139 return a;
5140 }
5141
5142 aExp -= 0x3FF;
e9321124 5143 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5144 zSign = aExp < 0;
bb98fe42 5145 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5146 for (i = 1LL << 51; i > 0; i >>= 1) {
5147 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5148 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5149 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5150 aSig >>= 1;
5151 zSig |= i;
5152 }
5153 }
5154
5155 if ( zSign )
5156 zSig = -zSig;
ff32e16e 5157 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5158}
5159
158142c2
FB
5160/*----------------------------------------------------------------------------
5161| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5162| corresponding value `b', and 0 otherwise. The invalid exception is raised
5163| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5164| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5165*----------------------------------------------------------------------------*/
5166
e5a41ffa 5167int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5168{
bb98fe42 5169 uint64_t av, bv;
ff32e16e
PM
5170 a = float64_squash_input_denormal(a, status);
5171 b = float64_squash_input_denormal(b, status);
158142c2
FB
5172
5173 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5174 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5175 ) {
ff32e16e 5176 float_raise(float_flag_invalid, status);
158142c2
FB
5177 return 0;
5178 }
f090c9d4 5179 av = float64_val(a);
a1b91bb4 5180 bv = float64_val(b);
bb98fe42 5181 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5182
5183}
5184
5185/*----------------------------------------------------------------------------
5186| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5187| equal to the corresponding value `b', and 0 otherwise. The invalid
5188| exception is raised if either operand is a NaN. The comparison is performed
5189| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5190*----------------------------------------------------------------------------*/
5191
e5a41ffa 5192int float64_le(float64 a, float64 b, float_status *status)
158142c2 5193{
c120391c 5194 bool aSign, bSign;
bb98fe42 5195 uint64_t av, bv;
ff32e16e
PM
5196 a = float64_squash_input_denormal(a, status);
5197 b = float64_squash_input_denormal(b, status);
158142c2
FB
5198
5199 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5200 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5201 ) {
ff32e16e 5202 float_raise(float_flag_invalid, status);
158142c2
FB
5203 return 0;
5204 }
5205 aSign = extractFloat64Sign( a );
5206 bSign = extractFloat64Sign( b );
f090c9d4 5207 av = float64_val(a);
a1b91bb4 5208 bv = float64_val(b);
bb98fe42 5209 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5210 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5211
5212}
5213
5214/*----------------------------------------------------------------------------
5215| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5216| the corresponding value `b', and 0 otherwise. The invalid exception is
5217| raised if either operand is a NaN. The comparison is performed according
5218| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5219*----------------------------------------------------------------------------*/
5220
e5a41ffa 5221int float64_lt(float64 a, float64 b, float_status *status)
158142c2 5222{
c120391c 5223 bool aSign, bSign;
bb98fe42 5224 uint64_t av, bv;
158142c2 5225
ff32e16e
PM
5226 a = float64_squash_input_denormal(a, status);
5227 b = float64_squash_input_denormal(b, status);
158142c2
FB
5228 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5229 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5230 ) {
ff32e16e 5231 float_raise(float_flag_invalid, status);
158142c2
FB
5232 return 0;
5233 }
5234 aSign = extractFloat64Sign( a );
5235 bSign = extractFloat64Sign( b );
f090c9d4 5236 av = float64_val(a);
a1b91bb4 5237 bv = float64_val(b);
bb98fe42 5238 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5239 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5240
5241}
5242
67b7861d
AJ
5243/*----------------------------------------------------------------------------
5244| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5245| be compared, and 0 otherwise. The invalid exception is raised if either
5246| operand is a NaN. The comparison is performed according to the IEC/IEEE
5247| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5248*----------------------------------------------------------------------------*/
5249
e5a41ffa 5250int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5251{
ff32e16e
PM
5252 a = float64_squash_input_denormal(a, status);
5253 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5254
5255 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5256 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5257 ) {
ff32e16e 5258 float_raise(float_flag_invalid, status);
67b7861d
AJ
5259 return 1;
5260 }
5261 return 0;
5262}
5263
158142c2
FB
5264/*----------------------------------------------------------------------------
5265| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5266| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5267| exception.The comparison is performed according to the IEC/IEEE Standard
5268| for Binary Floating-Point Arithmetic.
158142c2
FB
5269*----------------------------------------------------------------------------*/
5270
e5a41ffa 5271int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5272{
bb98fe42 5273 uint64_t av, bv;
ff32e16e
PM
5274 a = float64_squash_input_denormal(a, status);
5275 b = float64_squash_input_denormal(b, status);
158142c2
FB
5276
5277 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5278 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5279 ) {
af39bc8c
AM
5280 if (float64_is_signaling_nan(a, status)
5281 || float64_is_signaling_nan(b, status)) {
ff32e16e 5282 float_raise(float_flag_invalid, status);
b689362d 5283 }
158142c2
FB
5284 return 0;
5285 }
f090c9d4 5286 av = float64_val(a);
a1b91bb4 5287 bv = float64_val(b);
bb98fe42 5288 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5289
5290}
5291
5292/*----------------------------------------------------------------------------
5293| Returns 1 if the double-precision floating-point value `a' is less than or
5294| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5295| cause an exception. Otherwise, the comparison is performed according to the
5296| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5297*----------------------------------------------------------------------------*/
5298
e5a41ffa 5299int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2 5300{
c120391c 5301 bool aSign, bSign;
bb98fe42 5302 uint64_t av, bv;
ff32e16e
PM
5303 a = float64_squash_input_denormal(a, status);
5304 b = float64_squash_input_denormal(b, status);
158142c2
FB
5305
5306 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5307 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5308 ) {
af39bc8c
AM
5309 if (float64_is_signaling_nan(a, status)
5310 || float64_is_signaling_nan(b, status)) {
ff32e16e 5311 float_raise(float_flag_invalid, status);
158142c2
FB
5312 }
5313 return 0;
5314 }
5315 aSign = extractFloat64Sign( a );
5316 bSign = extractFloat64Sign( b );
f090c9d4 5317 av = float64_val(a);
a1b91bb4 5318 bv = float64_val(b);
bb98fe42 5319 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5320 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5321
5322}
5323
5324/*----------------------------------------------------------------------------
5325| Returns 1 if the double-precision floating-point value `a' is less than
5326| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5327| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5328| Standard for Binary Floating-Point Arithmetic.
5329*----------------------------------------------------------------------------*/
5330
e5a41ffa 5331int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2 5332{
c120391c 5333 bool aSign, bSign;
bb98fe42 5334 uint64_t av, bv;
ff32e16e
PM
5335 a = float64_squash_input_denormal(a, status);
5336 b = float64_squash_input_denormal(b, status);
158142c2
FB
5337
5338 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5339 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5340 ) {
af39bc8c
AM
5341 if (float64_is_signaling_nan(a, status)
5342 || float64_is_signaling_nan(b, status)) {
ff32e16e 5343 float_raise(float_flag_invalid, status);
158142c2
FB
5344 }
5345 return 0;
5346 }
5347 aSign = extractFloat64Sign( a );
5348 bSign = extractFloat64Sign( b );
f090c9d4 5349 av = float64_val(a);
a1b91bb4 5350 bv = float64_val(b);
bb98fe42 5351 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5352 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5353
5354}
5355
67b7861d
AJ
5356/*----------------------------------------------------------------------------
5357| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5358| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5359| comparison is performed according to the IEC/IEEE Standard for Binary
5360| Floating-Point Arithmetic.
5361*----------------------------------------------------------------------------*/
5362
e5a41ffa 5363int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5364{
ff32e16e
PM
5365 a = float64_squash_input_denormal(a, status);
5366 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5367
5368 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5369 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5370 ) {
af39bc8c
AM
5371 if (float64_is_signaling_nan(a, status)
5372 || float64_is_signaling_nan(b, status)) {
ff32e16e 5373 float_raise(float_flag_invalid, status);
67b7861d
AJ
5374 }
5375 return 1;
5376 }
5377 return 0;
5378}
5379
158142c2
FB
5380/*----------------------------------------------------------------------------
5381| Returns the result of converting the extended double-precision floating-
5382| point value `a' to the 32-bit two's complement integer format. The
5383| conversion is performed according to the IEC/IEEE Standard for Binary
5384| Floating-Point Arithmetic---which means in particular that the conversion
5385| is rounded according to the current rounding mode. If `a' is a NaN, the
5386| largest positive integer is returned. Otherwise, if the conversion
5387| overflows, the largest integer with the same sign as `a' is returned.
5388*----------------------------------------------------------------------------*/
5389
f4014512 5390int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5391{
c120391c 5392 bool aSign;
f4014512 5393 int32_t aExp, shiftCount;
bb98fe42 5394 uint64_t aSig;
158142c2 5395
d1eb8f2a
AD
5396 if (floatx80_invalid_encoding(a)) {
5397 float_raise(float_flag_invalid, status);
5398 return 1 << 31;
5399 }
158142c2
FB
5400 aSig = extractFloatx80Frac( a );
5401 aExp = extractFloatx80Exp( a );
5402 aSign = extractFloatx80Sign( a );
bb98fe42 5403 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5404 shiftCount = 0x4037 - aExp;
5405 if ( shiftCount <= 0 ) shiftCount = 1;
5406 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5407 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5408
5409}
5410
5411/*----------------------------------------------------------------------------
5412| Returns the result of converting the extended double-precision floating-
5413| point value `a' to the 32-bit two's complement integer format. The
5414| conversion is performed according to the IEC/IEEE Standard for Binary
5415| Floating-Point Arithmetic, except that the conversion is always rounded
5416| toward zero. If `a' is a NaN, the largest positive integer is returned.
5417| Otherwise, if the conversion overflows, the largest integer with the same
5418| sign as `a' is returned.
5419*----------------------------------------------------------------------------*/
5420
f4014512 5421int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5422{
c120391c 5423 bool aSign;
f4014512 5424 int32_t aExp, shiftCount;
bb98fe42 5425 uint64_t aSig, savedASig;
b3a6a2e0 5426 int32_t z;
158142c2 5427
d1eb8f2a
AD
5428 if (floatx80_invalid_encoding(a)) {
5429 float_raise(float_flag_invalid, status);
5430 return 1 << 31;
5431 }
158142c2
FB
5432 aSig = extractFloatx80Frac( a );
5433 aExp = extractFloatx80Exp( a );
5434 aSign = extractFloatx80Sign( a );
5435 if ( 0x401E < aExp ) {
bb98fe42 5436 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5437 goto invalid;
5438 }
5439 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5440 if (aExp || aSig) {
5441 status->float_exception_flags |= float_flag_inexact;
5442 }
158142c2
FB
5443 return 0;
5444 }
5445 shiftCount = 0x403E - aExp;
5446 savedASig = aSig;
5447 aSig >>= shiftCount;
5448 z = aSig;
5449 if ( aSign ) z = - z;
5450 if ( ( z < 0 ) ^ aSign ) {
5451 invalid:
ff32e16e 5452 float_raise(float_flag_invalid, status);
bb98fe42 5453 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5454 }
5455 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5456 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5457 }
5458 return z;
5459
5460}
5461
5462/*----------------------------------------------------------------------------
5463| Returns the result of converting the extended double-precision floating-
5464| point value `a' to the 64-bit two's complement integer format. The
5465| conversion is performed according to the IEC/IEEE Standard for Binary
5466| Floating-Point Arithmetic---which means in particular that the conversion
5467| is rounded according to the current rounding mode. If `a' is a NaN,
5468| the largest positive integer is returned. Otherwise, if the conversion
5469| overflows, the largest integer with the same sign as `a' is returned.
5470*----------------------------------------------------------------------------*/
5471
f42c2224 5472int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5473{
c120391c 5474 bool aSign;
f4014512 5475 int32_t aExp, shiftCount;
bb98fe42 5476 uint64_t aSig, aSigExtra;
158142c2 5477
d1eb8f2a
AD
5478 if (floatx80_invalid_encoding(a)) {
5479 float_raise(float_flag_invalid, status);
5480 return 1ULL << 63;
5481 }
158142c2
FB
5482 aSig = extractFloatx80Frac( a );
5483 aExp = extractFloatx80Exp( a );
5484 aSign = extractFloatx80Sign( a );
5485 shiftCount = 0x403E - aExp;
5486 if ( shiftCount <= 0 ) {
5487 if ( shiftCount ) {
ff32e16e 5488 float_raise(float_flag_invalid, status);
0f605c88 5489 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5490 return INT64_MAX;
158142c2 5491 }
2c217da0 5492 return INT64_MIN;
158142c2
FB
5493 }
5494 aSigExtra = 0;
5495 }
5496 else {
5497 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5498 }
ff32e16e 5499 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5500
5501}
5502
5503/*----------------------------------------------------------------------------
5504| Returns the result of converting the extended double-precision floating-
5505| point value `a' to the 64-bit two's complement integer format. The
5506| conversion is performed according to the IEC/IEEE Standard for Binary
5507| Floating-Point Arithmetic, except that the conversion is always rounded
5508| toward zero. If `a' is a NaN, the largest positive integer is returned.
5509| Otherwise, if the conversion overflows, the largest integer with the same
5510| sign as `a' is returned.
5511*----------------------------------------------------------------------------*/
5512
f42c2224 5513int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5514{
c120391c 5515 bool aSign;
f4014512 5516 int32_t aExp, shiftCount;
bb98fe42 5517 uint64_t aSig;
f42c2224 5518 int64_t z;
158142c2 5519
d1eb8f2a
AD
5520 if (floatx80_invalid_encoding(a)) {
5521 float_raise(float_flag_invalid, status);
5522 return 1ULL << 63;
5523 }
158142c2
FB
5524 aSig = extractFloatx80Frac( a );
5525 aExp = extractFloatx80Exp( a );
5526 aSign = extractFloatx80Sign( a );
5527 shiftCount = aExp - 0x403E;
5528 if ( 0 <= shiftCount ) {
e9321124 5529 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5530 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5531 float_raise(float_flag_invalid, status);
158142c2 5532 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5533 return INT64_MAX;
158142c2
FB
5534 }
5535 }
2c217da0 5536 return INT64_MIN;
158142c2
FB
5537 }
5538 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5539 if (aExp | aSig) {
5540 status->float_exception_flags |= float_flag_inexact;
5541 }
158142c2
FB
5542 return 0;
5543 }
5544 z = aSig>>( - shiftCount );
bb98fe42 5545 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5546 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5547 }
5548 if ( aSign ) z = - z;
5549 return z;
5550
5551}
5552
5553/*----------------------------------------------------------------------------
5554| Returns the result of converting the extended double-precision floating-
5555| point value `a' to the single-precision floating-point format. The
5556| conversion is performed according to the IEC/IEEE Standard for Binary
5557| Floating-Point Arithmetic.
5558*----------------------------------------------------------------------------*/
5559
e5a41ffa 5560float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5561{
c120391c 5562 bool aSign;
f4014512 5563 int32_t aExp;
bb98fe42 5564 uint64_t aSig;
158142c2 5565
d1eb8f2a
AD
5566 if (floatx80_invalid_encoding(a)) {
5567 float_raise(float_flag_invalid, status);
5568 return float32_default_nan(status);
5569 }
158142c2
FB
5570 aSig = extractFloatx80Frac( a );
5571 aExp = extractFloatx80Exp( a );
5572 aSign = extractFloatx80Sign( a );
5573 if ( aExp == 0x7FFF ) {
bb98fe42 5574 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5575 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5576 status);
5577 return float32_silence_nan(res, status);
158142c2
FB
5578 }
5579 return packFloat32( aSign, 0xFF, 0 );
5580 }
5581 shift64RightJamming( aSig, 33, &aSig );
5582 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5583 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5584
5585}
5586
5587/*----------------------------------------------------------------------------
5588| Returns the result of converting the extended double-precision floating-
5589| point value `a' to the double-precision floating-point format. The
5590| conversion is performed according to the IEC/IEEE Standard for Binary
5591| Floating-Point Arithmetic.
5592*----------------------------------------------------------------------------*/
5593
e5a41ffa 5594float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5595{
c120391c 5596 bool aSign;
f4014512 5597 int32_t aExp;
bb98fe42 5598 uint64_t aSig, zSig;
158142c2 5599
d1eb8f2a
AD
5600 if (floatx80_invalid_encoding(a)) {
5601 float_raise(float_flag_invalid, status);
5602 return float64_default_nan(status);
5603 }
158142c2
FB
5604 aSig = extractFloatx80Frac( a );
5605 aExp = extractFloatx80Exp( a );
5606 aSign = extractFloatx80Sign( a );
5607 if ( aExp == 0x7FFF ) {
bb98fe42 5608 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5609 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5610 status);
5611 return float64_silence_nan(res, status);
158142c2
FB
5612 }
5613 return packFloat64( aSign, 0x7FF, 0 );
5614 }
5615 shift64RightJamming( aSig, 1, &zSig );
5616 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5617 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5618
5619}
5620
158142c2
FB
5621/*----------------------------------------------------------------------------
5622| Returns the result of converting the extended double-precision floating-
5623| point value `a' to the quadruple-precision floating-point format. The
5624| conversion is performed according to the IEC/IEEE Standard for Binary
5625| Floating-Point Arithmetic.
5626*----------------------------------------------------------------------------*/
5627
e5a41ffa 5628float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5629{
c120391c 5630 bool aSign;
0c48262d 5631 int aExp;
bb98fe42 5632 uint64_t aSig, zSig0, zSig1;
158142c2 5633
d1eb8f2a
AD
5634 if (floatx80_invalid_encoding(a)) {
5635 float_raise(float_flag_invalid, status);
5636 return float128_default_nan(status);
5637 }
158142c2
FB
5638 aSig = extractFloatx80Frac( a );
5639 aExp = extractFloatx80Exp( a );
5640 aSign = extractFloatx80Sign( a );
bb98fe42 5641 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5642 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5643 status);
5644 return float128_silence_nan(res, status);
158142c2
FB
5645 }
5646 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5647 return packFloat128( aSign, aExp, zSig0, zSig1 );
5648
5649}
5650
0f721292
LV
5651/*----------------------------------------------------------------------------
5652| Rounds the extended double-precision floating-point value `a'
5653| to the precision provided by floatx80_rounding_precision and returns the
5654| result as an extended double-precision floating-point value.
5655| The operation is performed according to the IEC/IEEE Standard for Binary
5656| Floating-Point Arithmetic.
5657*----------------------------------------------------------------------------*/
5658
5659floatx80 floatx80_round(floatx80 a, float_status *status)
5660{
5661 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5662 extractFloatx80Sign(a),
5663 extractFloatx80Exp(a),
5664 extractFloatx80Frac(a), 0, status);
5665}
5666
158142c2
FB
5667/*----------------------------------------------------------------------------
5668| Rounds the extended double-precision floating-point value `a' to an integer,
5669| and returns the result as an extended quadruple-precision floating-point
5670| value. The operation is performed according to the IEC/IEEE Standard for
5671| Binary Floating-Point Arithmetic.
5672*----------------------------------------------------------------------------*/
5673
e5a41ffa 5674floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5675{
c120391c 5676 bool aSign;
f4014512 5677 int32_t aExp;
bb98fe42 5678 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5679 floatx80 z;
5680
d1eb8f2a
AD
5681 if (floatx80_invalid_encoding(a)) {
5682 float_raise(float_flag_invalid, status);
5683 return floatx80_default_nan(status);
5684 }
158142c2
FB
5685 aExp = extractFloatx80Exp( a );
5686 if ( 0x403E <= aExp ) {
bb98fe42 5687 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5688 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5689 }
5690 return a;
5691 }
5692 if ( aExp < 0x3FFF ) {
5693 if ( ( aExp == 0 )
9ecaf5cc 5694 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5695 return a;
5696 }
a2f2d288 5697 status->float_exception_flags |= float_flag_inexact;
158142c2 5698 aSign = extractFloatx80Sign( a );
a2f2d288 5699 switch (status->float_rounding_mode) {
158142c2 5700 case float_round_nearest_even:
bb98fe42 5701 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5702 ) {
5703 return
e9321124 5704 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5705 }
5706 break;
f9288a76
PM
5707 case float_round_ties_away:
5708 if (aExp == 0x3FFE) {
e9321124 5709 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5710 }
5711 break;
158142c2
FB
5712 case float_round_down:
5713 return
5714 aSign ?
e9321124 5715 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5716 : packFloatx80( 0, 0, 0 );
5717 case float_round_up:
5718 return
5719 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5720 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5721
5722 case float_round_to_zero:
5723 break;
5724 default:
5725 g_assert_not_reached();
158142c2
FB
5726 }
5727 return packFloatx80( aSign, 0, 0 );
5728 }
5729 lastBitMask = 1;
5730 lastBitMask <<= 0x403E - aExp;
5731 roundBitsMask = lastBitMask - 1;
5732 z = a;
a2f2d288 5733 switch (status->float_rounding_mode) {
dc355b76 5734 case float_round_nearest_even:
158142c2 5735 z.low += lastBitMask>>1;
dc355b76
PM
5736 if ((z.low & roundBitsMask) == 0) {
5737 z.low &= ~lastBitMask;
5738 }
5739 break;
f9288a76
PM
5740 case float_round_ties_away:
5741 z.low += lastBitMask >> 1;
5742 break;
dc355b76
PM
5743 case float_round_to_zero:
5744 break;
5745 case float_round_up:
5746 if (!extractFloatx80Sign(z)) {
5747 z.low += roundBitsMask;
5748 }
5749 break;
5750 case float_round_down:
5751 if (extractFloatx80Sign(z)) {
158142c2
FB
5752 z.low += roundBitsMask;
5753 }
dc355b76
PM
5754 break;
5755 default:
5756 abort();
158142c2
FB
5757 }
5758 z.low &= ~ roundBitsMask;
5759 if ( z.low == 0 ) {
5760 ++z.high;
e9321124 5761 z.low = UINT64_C(0x8000000000000000);
158142c2 5762 }
a2f2d288
PM
5763 if (z.low != a.low) {
5764 status->float_exception_flags |= float_flag_inexact;
5765 }
158142c2
FB
5766 return z;
5767
5768}
5769
5770/*----------------------------------------------------------------------------
5771| Returns the result of adding the absolute values of the extended double-
5772| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5773| negated before being returned. `zSign' is ignored if the result is a NaN.
5774| The addition is performed according to the IEC/IEEE Standard for Binary
5775| Floating-Point Arithmetic.
5776*----------------------------------------------------------------------------*/
5777
c120391c 5778static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5779 float_status *status)
158142c2 5780{
f4014512 5781 int32_t aExp, bExp, zExp;
bb98fe42 5782 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5783 int32_t expDiff;
158142c2
FB
5784
5785 aSig = extractFloatx80Frac( a );
5786 aExp = extractFloatx80Exp( a );
5787 bSig = extractFloatx80Frac( b );
5788 bExp = extractFloatx80Exp( b );
5789 expDiff = aExp - bExp;
5790 if ( 0 < expDiff ) {
5791 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5792 if ((uint64_t)(aSig << 1)) {
5793 return propagateFloatx80NaN(a, b, status);
5794 }
158142c2
FB
5795 return a;
5796 }
5797 if ( bExp == 0 ) --expDiff;
5798 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5799 zExp = aExp;
5800 }
5801 else if ( expDiff < 0 ) {
5802 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5803 if ((uint64_t)(bSig << 1)) {
5804 return propagateFloatx80NaN(a, b, status);
5805 }
0f605c88
LV
5806 return packFloatx80(zSign,
5807 floatx80_infinity_high,
5808 floatx80_infinity_low);
158142c2
FB
5809 }
5810 if ( aExp == 0 ) ++expDiff;
5811 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5812 zExp = bExp;
5813 }
5814 else {
5815 if ( aExp == 0x7FFF ) {
bb98fe42 5816 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5817 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5818 }
5819 return a;
5820 }
5821 zSig1 = 0;
5822 zSig0 = aSig + bSig;
5823 if ( aExp == 0 ) {
41602807
JM
5824 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5825 /* At least one of the values is a pseudo-denormal,
5826 * and there is a carry out of the result. */
5827 zExp = 1;
5828 goto shiftRight1;
5829 }
2f311075
RH
5830 if (zSig0 == 0) {
5831 return packFloatx80(zSign, 0, 0);
5832 }
158142c2
FB
5833 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5834 goto roundAndPack;
5835 }
5836 zExp = aExp;
5837 goto shiftRight1;
5838 }
5839 zSig0 = aSig + bSig;
bb98fe42 5840 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5841 shiftRight1:
5842 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5843 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5844 ++zExp;
5845 roundAndPack:
a2f2d288 5846 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5847 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5848}
5849
5850/*----------------------------------------------------------------------------
5851| Returns the result of subtracting the absolute values of the extended
5852| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5853| difference is negated before being returned. `zSign' is ignored if the
5854| result is a NaN. The subtraction is performed according to the IEC/IEEE
5855| Standard for Binary Floating-Point Arithmetic.
5856*----------------------------------------------------------------------------*/
5857
c120391c 5858static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5859 float_status *status)
158142c2 5860{
f4014512 5861 int32_t aExp, bExp, zExp;
bb98fe42 5862 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5863 int32_t expDiff;
158142c2
FB
5864
5865 aSig = extractFloatx80Frac( a );
5866 aExp = extractFloatx80Exp( a );
5867 bSig = extractFloatx80Frac( b );
5868 bExp = extractFloatx80Exp( b );
5869 expDiff = aExp - bExp;
5870 if ( 0 < expDiff ) goto aExpBigger;
5871 if ( expDiff < 0 ) goto bExpBigger;
5872 if ( aExp == 0x7FFF ) {
bb98fe42 5873 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5874 return propagateFloatx80NaN(a, b, status);
158142c2 5875 }
ff32e16e 5876 float_raise(float_flag_invalid, status);
af39bc8c 5877 return floatx80_default_nan(status);
158142c2
FB
5878 }
5879 if ( aExp == 0 ) {
5880 aExp = 1;
5881 bExp = 1;
5882 }
5883 zSig1 = 0;
5884 if ( bSig < aSig ) goto aBigger;
5885 if ( aSig < bSig ) goto bBigger;
a2f2d288 5886 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5887 bExpBigger:
5888 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5889 if ((uint64_t)(bSig << 1)) {
5890 return propagateFloatx80NaN(a, b, status);
5891 }
0f605c88
LV
5892 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5893 floatx80_infinity_low);
158142c2
FB
5894 }
5895 if ( aExp == 0 ) ++expDiff;
5896 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5897 bBigger:
5898 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5899 zExp = bExp;
5900 zSign ^= 1;
5901 goto normalizeRoundAndPack;
5902 aExpBigger:
5903 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5904 if ((uint64_t)(aSig << 1)) {
5905 return propagateFloatx80NaN(a, b, status);
5906 }
158142c2
FB
5907 return a;
5908 }
5909 if ( bExp == 0 ) --expDiff;
5910 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5911 aBigger:
5912 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5913 zExp = aExp;
5914 normalizeRoundAndPack:
a2f2d288 5915 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5916 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5917}
5918
5919/*----------------------------------------------------------------------------
5920| Returns the result of adding the extended double-precision floating-point
5921| values `a' and `b'. The operation is performed according to the IEC/IEEE
5922| Standard for Binary Floating-Point Arithmetic.
5923*----------------------------------------------------------------------------*/
5924
e5a41ffa 5925floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5926{
c120391c 5927 bool aSign, bSign;
158142c2 5928
d1eb8f2a
AD
5929 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5930 float_raise(float_flag_invalid, status);
5931 return floatx80_default_nan(status);
5932 }
158142c2
FB
5933 aSign = extractFloatx80Sign( a );
5934 bSign = extractFloatx80Sign( b );
5935 if ( aSign == bSign ) {
ff32e16e 5936 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5937 }
5938 else {
ff32e16e 5939 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5940 }
5941
5942}
5943
5944/*----------------------------------------------------------------------------
5945| Returns the result of subtracting the extended double-precision floating-
5946| point values `a' and `b'. The operation is performed according to the
5947| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5948*----------------------------------------------------------------------------*/
5949
e5a41ffa 5950floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5951{
c120391c 5952 bool aSign, bSign;
158142c2 5953
d1eb8f2a
AD
5954 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5955 float_raise(float_flag_invalid, status);
5956 return floatx80_default_nan(status);
5957 }
158142c2
FB
5958 aSign = extractFloatx80Sign( a );
5959 bSign = extractFloatx80Sign( b );
5960 if ( aSign == bSign ) {
ff32e16e 5961 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5962 }
5963 else {
ff32e16e 5964 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5965 }
5966
5967}
5968
5969/*----------------------------------------------------------------------------
5970| Returns the result of multiplying the extended double-precision floating-
5971| point values `a' and `b'. The operation is performed according to the
5972| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5973*----------------------------------------------------------------------------*/
5974
e5a41ffa 5975floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5976{
c120391c 5977 bool aSign, bSign, zSign;
f4014512 5978 int32_t aExp, bExp, zExp;
bb98fe42 5979 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5980
d1eb8f2a
AD
5981 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5982 float_raise(float_flag_invalid, status);
5983 return floatx80_default_nan(status);
5984 }
158142c2
FB
5985 aSig = extractFloatx80Frac( a );
5986 aExp = extractFloatx80Exp( a );
5987 aSign = extractFloatx80Sign( a );
5988 bSig = extractFloatx80Frac( b );
5989 bExp = extractFloatx80Exp( b );
5990 bSign = extractFloatx80Sign( b );
5991 zSign = aSign ^ bSign;
5992 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5993 if ( (uint64_t) ( aSig<<1 )
5994 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5995 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5996 }
5997 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5998 return packFloatx80(zSign, floatx80_infinity_high,
5999 floatx80_infinity_low);
158142c2
FB
6000 }
6001 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6002 if ((uint64_t)(bSig << 1)) {
6003 return propagateFloatx80NaN(a, b, status);
6004 }
158142c2
FB
6005 if ( ( aExp | aSig ) == 0 ) {
6006 invalid:
ff32e16e 6007 float_raise(float_flag_invalid, status);
af39bc8c 6008 return floatx80_default_nan(status);
158142c2 6009 }
0f605c88
LV
6010 return packFloatx80(zSign, floatx80_infinity_high,
6011 floatx80_infinity_low);
158142c2
FB
6012 }
6013 if ( aExp == 0 ) {
6014 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6015 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6016 }
6017 if ( bExp == 0 ) {
6018 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6019 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6020 }
6021 zExp = aExp + bExp - 0x3FFE;
6022 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6023 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6024 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6025 --zExp;
6026 }
a2f2d288 6027 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6028 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6029}
6030
6031/*----------------------------------------------------------------------------
6032| Returns the result of dividing the extended double-precision floating-point
6033| value `a' by the corresponding value `b'. The operation is performed
6034| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6035*----------------------------------------------------------------------------*/
6036
e5a41ffa 6037floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6038{
c120391c 6039 bool aSign, bSign, zSign;
f4014512 6040 int32_t aExp, bExp, zExp;
bb98fe42
AF
6041 uint64_t aSig, bSig, zSig0, zSig1;
6042 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6043
d1eb8f2a
AD
6044 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6045 float_raise(float_flag_invalid, status);
6046 return floatx80_default_nan(status);
6047 }
158142c2
FB
6048 aSig = extractFloatx80Frac( a );
6049 aExp = extractFloatx80Exp( a );
6050 aSign = extractFloatx80Sign( a );
6051 bSig = extractFloatx80Frac( b );
6052 bExp = extractFloatx80Exp( b );
6053 bSign = extractFloatx80Sign( b );
6054 zSign = aSign ^ bSign;
6055 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6056 if ((uint64_t)(aSig << 1)) {
6057 return propagateFloatx80NaN(a, b, status);
6058 }
158142c2 6059 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6060 if ((uint64_t)(bSig << 1)) {
6061 return propagateFloatx80NaN(a, b, status);
6062 }
158142c2
FB
6063 goto invalid;
6064 }
0f605c88
LV
6065 return packFloatx80(zSign, floatx80_infinity_high,
6066 floatx80_infinity_low);
158142c2
FB
6067 }
6068 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6069 if ((uint64_t)(bSig << 1)) {
6070 return propagateFloatx80NaN(a, b, status);
6071 }
158142c2
FB
6072 return packFloatx80( zSign, 0, 0 );
6073 }
6074 if ( bExp == 0 ) {
6075 if ( bSig == 0 ) {
6076 if ( ( aExp | aSig ) == 0 ) {
6077 invalid:
ff32e16e 6078 float_raise(float_flag_invalid, status);
af39bc8c 6079 return floatx80_default_nan(status);
158142c2 6080 }
ff32e16e 6081 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6082 return packFloatx80(zSign, floatx80_infinity_high,
6083 floatx80_infinity_low);
158142c2
FB
6084 }
6085 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6086 }
6087 if ( aExp == 0 ) {
6088 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6089 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6090 }
6091 zExp = aExp - bExp + 0x3FFE;
6092 rem1 = 0;
6093 if ( bSig <= aSig ) {
6094 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6095 ++zExp;
6096 }
6097 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6098 mul64To128( bSig, zSig0, &term0, &term1 );
6099 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6100 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6101 --zSig0;
6102 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6103 }
6104 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6105 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6106 mul64To128( bSig, zSig1, &term1, &term2 );
6107 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6108 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6109 --zSig1;
6110 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6111 }
6112 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6113 }
a2f2d288 6114 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6115 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6116}
6117
6118/*----------------------------------------------------------------------------
6119| Returns the remainder of the extended double-precision floating-point value
6120| `a' with respect to the corresponding value `b'. The operation is performed
6121| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6122*----------------------------------------------------------------------------*/
6123
e5a41ffa 6124floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6125{
c120391c 6126 bool aSign, zSign;
f4014512 6127 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6128 uint64_t aSig0, aSig1, bSig;
6129 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6130
d1eb8f2a
AD
6131 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6132 float_raise(float_flag_invalid, status);
6133 return floatx80_default_nan(status);
6134 }
158142c2
FB
6135 aSig0 = extractFloatx80Frac( a );
6136 aExp = extractFloatx80Exp( a );
6137 aSign = extractFloatx80Sign( a );
6138 bSig = extractFloatx80Frac( b );
6139 bExp = extractFloatx80Exp( b );
158142c2 6140 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6141 if ( (uint64_t) ( aSig0<<1 )
6142 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6143 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6144 }
6145 goto invalid;
6146 }
6147 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6148 if ((uint64_t)(bSig << 1)) {
6149 return propagateFloatx80NaN(a, b, status);
6150 }
158142c2
FB
6151 return a;
6152 }
6153 if ( bExp == 0 ) {
6154 if ( bSig == 0 ) {
6155 invalid:
ff32e16e 6156 float_raise(float_flag_invalid, status);
af39bc8c 6157 return floatx80_default_nan(status);
158142c2
FB
6158 }
6159 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6160 }
6161 if ( aExp == 0 ) {
bb98fe42 6162 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6163 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6164 }
e9321124 6165 bSig |= UINT64_C(0x8000000000000000);
158142c2
FB
6166 zSign = aSign;
6167 expDiff = aExp - bExp;
6168 aSig1 = 0;
6169 if ( expDiff < 0 ) {
6170 if ( expDiff < -1 ) return a;
6171 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6172 expDiff = 0;
6173 }
6174 q = ( bSig <= aSig0 );
6175 if ( q ) aSig0 -= bSig;
6176 expDiff -= 64;
6177 while ( 0 < expDiff ) {
6178 q = estimateDiv128To64( aSig0, aSig1, bSig );
6179 q = ( 2 < q ) ? q - 2 : 0;
6180 mul64To128( bSig, q, &term0, &term1 );
6181 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6182 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6183 expDiff -= 62;
6184 }
6185 expDiff += 64;
6186 if ( 0 < expDiff ) {
6187 q = estimateDiv128To64( aSig0, aSig1, bSig );
6188 q = ( 2 < q ) ? q - 2 : 0;
6189 q >>= 64 - expDiff;
6190 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6191 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6192 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6193 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6194 ++q;
6195 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6196 }
6197 }
6198 else {
6199 term1 = 0;
6200 term0 = bSig;
6201 }
6202 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6203 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6204 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6205 && ( q & 1 ) )
6206 ) {
6207 aSig0 = alternateASig0;
6208 aSig1 = alternateASig1;
6209 zSign = ! zSign;
6210 }
6211 return
6212 normalizeRoundAndPackFloatx80(
ff32e16e 6213 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6214
6215}
6216
6217/*----------------------------------------------------------------------------
6218| Returns the square root of the extended double-precision floating-point
6219| value `a'. The operation is performed according to the IEC/IEEE Standard
6220| for Binary Floating-Point Arithmetic.
6221*----------------------------------------------------------------------------*/
6222
e5a41ffa 6223floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6224{
c120391c 6225 bool aSign;
f4014512 6226 int32_t aExp, zExp;
bb98fe42
AF
6227 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6228 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6229
d1eb8f2a
AD
6230 if (floatx80_invalid_encoding(a)) {
6231 float_raise(float_flag_invalid, status);
6232 return floatx80_default_nan(status);
6233 }
158142c2
FB
6234 aSig0 = extractFloatx80Frac( a );
6235 aExp = extractFloatx80Exp( a );
6236 aSign = extractFloatx80Sign( a );
6237 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6238 if ((uint64_t)(aSig0 << 1)) {
6239 return propagateFloatx80NaN(a, a, status);
6240 }
158142c2
FB
6241 if ( ! aSign ) return a;
6242 goto invalid;
6243 }
6244 if ( aSign ) {
6245 if ( ( aExp | aSig0 ) == 0 ) return a;
6246 invalid:
ff32e16e 6247 float_raise(float_flag_invalid, status);
af39bc8c 6248 return floatx80_default_nan(status);
158142c2
FB
6249 }
6250 if ( aExp == 0 ) {
6251 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6252 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6253 }
6254 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6255 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6256 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6257 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6258 doubleZSig0 = zSig0<<1;
6259 mul64To128( zSig0, zSig0, &term0, &term1 );
6260 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6261 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6262 --zSig0;
6263 doubleZSig0 -= 2;
6264 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6265 }
6266 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6267 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6268 if ( zSig1 == 0 ) zSig1 = 1;
6269 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6270 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6271 mul64To128( zSig1, zSig1, &term2, &term3 );
6272 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6273 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6274 --zSig1;
6275 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6276 term3 |= 1;
6277 term2 |= doubleZSig0;
6278 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6279 }
6280 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6281 }
6282 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6283 zSig0 |= doubleZSig0;
a2f2d288
PM
6284 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6285 0, zExp, zSig0, zSig1, status);
158142c2
FB
6286}
6287
6288/*----------------------------------------------------------------------------
b689362d
AJ
6289| Returns 1 if the extended double-precision floating-point value `a' is equal
6290| to the corresponding value `b', and 0 otherwise. The invalid exception is
6291| raised if either operand is a NaN. Otherwise, the comparison is performed
6292| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6293*----------------------------------------------------------------------------*/
6294
e5a41ffa 6295int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6296{
6297
d1eb8f2a
AD
6298 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6299 || (extractFloatx80Exp(a) == 0x7FFF
6300 && (uint64_t) (extractFloatx80Frac(a) << 1))
6301 || (extractFloatx80Exp(b) == 0x7FFF
6302 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6303 ) {
ff32e16e 6304 float_raise(float_flag_invalid, status);
158142c2
FB
6305 return 0;
6306 }
6307 return
6308 ( a.low == b.low )
6309 && ( ( a.high == b.high )
6310 || ( ( a.low == 0 )
bb98fe42 6311 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6312 );
6313
6314}
6315
6316/*----------------------------------------------------------------------------
6317| Returns 1 if the extended double-precision floating-point value `a' is
6318| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6319| invalid exception is raised if either operand is a NaN. The comparison is
6320| performed according to the IEC/IEEE Standard for Binary Floating-Point
6321| Arithmetic.
158142c2
FB
6322*----------------------------------------------------------------------------*/
6323
e5a41ffa 6324int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2 6325{
c120391c 6326 bool aSign, bSign;
158142c2 6327
d1eb8f2a
AD
6328 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6329 || (extractFloatx80Exp(a) == 0x7FFF
6330 && (uint64_t) (extractFloatx80Frac(a) << 1))
6331 || (extractFloatx80Exp(b) == 0x7FFF
6332 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6333 ) {
ff32e16e 6334 float_raise(float_flag_invalid, status);
158142c2
FB
6335 return 0;
6336 }
6337 aSign = extractFloatx80Sign( a );
6338 bSign = extractFloatx80Sign( b );
6339 if ( aSign != bSign ) {
6340 return
6341 aSign
bb98fe42 6342 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6343 == 0 );
6344 }
6345 return
6346 aSign ? le128( b.high, b.low, a.high, a.low )
6347 : le128( a.high, a.low, b.high, b.low );
6348
6349}
6350
6351/*----------------------------------------------------------------------------
6352| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6353| less than the corresponding value `b', and 0 otherwise. The invalid
6354| exception is raised if either operand is a NaN. The comparison is performed
6355| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6356*----------------------------------------------------------------------------*/
6357
e5a41ffa 6358int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2 6359{
c120391c 6360 bool aSign, bSign;
158142c2 6361
d1eb8f2a
AD
6362 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6363 || (extractFloatx80Exp(a) == 0x7FFF
6364 && (uint64_t) (extractFloatx80Frac(a) << 1))
6365 || (extractFloatx80Exp(b) == 0x7FFF
6366 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6367 ) {
ff32e16e 6368 float_raise(float_flag_invalid, status);
158142c2
FB
6369 return 0;
6370 }
6371 aSign = extractFloatx80Sign( a );
6372 bSign = extractFloatx80Sign( b );
6373 if ( aSign != bSign ) {
6374 return
6375 aSign
bb98fe42 6376 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6377 != 0 );
6378 }
6379 return
6380 aSign ? lt128( b.high, b.low, a.high, a.low )
6381 : lt128( a.high, a.low, b.high, b.low );
6382
6383}
6384
67b7861d
AJ
6385/*----------------------------------------------------------------------------
6386| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6387| cannot be compared, and 0 otherwise. The invalid exception is raised if
6388| either operand is a NaN. The comparison is performed according to the
6389| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6390*----------------------------------------------------------------------------*/
e5a41ffa 6391int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6392{
d1eb8f2a
AD
6393 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6394 || (extractFloatx80Exp(a) == 0x7FFF
6395 && (uint64_t) (extractFloatx80Frac(a) << 1))
6396 || (extractFloatx80Exp(b) == 0x7FFF
6397 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6398 ) {
ff32e16e 6399 float_raise(float_flag_invalid, status);
67b7861d
AJ
6400 return 1;
6401 }
6402 return 0;
6403}
6404
158142c2 6405/*----------------------------------------------------------------------------
b689362d 6406| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6407| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6408| cause an exception. The comparison is performed according to the IEC/IEEE
6409| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6410*----------------------------------------------------------------------------*/
6411
e5a41ffa 6412int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6413{
6414
d1eb8f2a
AD
6415 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6416 float_raise(float_flag_invalid, status);
6417 return 0;
6418 }
158142c2 6419 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6420 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6421 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6422 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6423 ) {
af39bc8c
AM
6424 if (floatx80_is_signaling_nan(a, status)
6425 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6426 float_raise(float_flag_invalid, status);
b689362d 6427 }
158142c2
FB
6428 return 0;
6429 }
6430 return
6431 ( a.low == b.low )
6432 && ( ( a.high == b.high )
6433 || ( ( a.low == 0 )
bb98fe42 6434 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6435 );
6436
6437}
6438
6439/*----------------------------------------------------------------------------
6440| Returns 1 if the extended double-precision floating-point value `a' is less
6441| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6442| do not cause an exception. Otherwise, the comparison is performed according
6443| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6444*----------------------------------------------------------------------------*/
6445
e5a41ffa 6446int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2 6447{
c120391c 6448 bool aSign, bSign;
158142c2 6449
d1eb8f2a
AD
6450 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6451 float_raise(float_flag_invalid, status);
6452 return 0;
6453 }
158142c2 6454 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6455 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6456 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6457 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6458 ) {
af39bc8c
AM
6459 if (floatx80_is_signaling_nan(a, status)
6460 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6461 float_raise(float_flag_invalid, status);
158142c2
FB
6462 }
6463 return 0;
6464 }
6465 aSign = extractFloatx80Sign( a );
6466 bSign = extractFloatx80Sign( b );
6467 if ( aSign != bSign ) {
6468 return
6469 aSign
bb98fe42 6470 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6471 == 0 );
6472 }
6473 return
6474 aSign ? le128( b.high, b.low, a.high, a.low )
6475 : le128( a.high, a.low, b.high, b.low );
6476
6477}
6478
6479/*----------------------------------------------------------------------------
6480| Returns 1 if the extended double-precision floating-point value `a' is less
6481| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6482| an exception. Otherwise, the comparison is performed according to the
6483| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6484*----------------------------------------------------------------------------*/
6485
e5a41ffa 6486int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2 6487{
c120391c 6488 bool aSign, bSign;
158142c2 6489
d1eb8f2a
AD
6490 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6491 float_raise(float_flag_invalid, status);
6492 return 0;
6493 }
158142c2 6494 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6495 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6496 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6497 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6498 ) {
af39bc8c
AM
6499 if (floatx80_is_signaling_nan(a, status)
6500 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6501 float_raise(float_flag_invalid, status);
158142c2
FB
6502 }
6503 return 0;
6504 }
6505 aSign = extractFloatx80Sign( a );
6506 bSign = extractFloatx80Sign( b );
6507 if ( aSign != bSign ) {
6508 return
6509 aSign
bb98fe42 6510 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6511 != 0 );
6512 }
6513 return
6514 aSign ? lt128( b.high, b.low, a.high, a.low )
6515 : lt128( a.high, a.low, b.high, b.low );
6516
6517}
6518
67b7861d
AJ
6519/*----------------------------------------------------------------------------
6520| Returns 1 if the extended double-precision floating-point values `a' and `b'
6521| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6522| The comparison is performed according to the IEC/IEEE Standard for Binary
6523| Floating-Point Arithmetic.
6524*----------------------------------------------------------------------------*/
e5a41ffa 6525int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6526{
d1eb8f2a
AD
6527 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6528 float_raise(float_flag_invalid, status);
6529 return 1;
6530 }
67b7861d
AJ
6531 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6532 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6533 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6534 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6535 ) {
af39bc8c
AM
6536 if (floatx80_is_signaling_nan(a, status)
6537 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6538 float_raise(float_flag_invalid, status);
67b7861d
AJ
6539 }
6540 return 1;
6541 }
6542 return 0;
6543}
6544
158142c2
FB
6545/*----------------------------------------------------------------------------
6546| Returns the result of converting the quadruple-precision floating-point
6547| value `a' to the 32-bit two's complement integer format. The conversion
6548| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6549| Arithmetic---which means in particular that the conversion is rounded
6550| according to the current rounding mode. If `a' is a NaN, the largest
6551| positive integer is returned. Otherwise, if the conversion overflows, the
6552| largest integer with the same sign as `a' is returned.
6553*----------------------------------------------------------------------------*/
6554
f4014512 6555int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6556{
c120391c 6557 bool aSign;
f4014512 6558 int32_t aExp, shiftCount;
bb98fe42 6559 uint64_t aSig0, aSig1;
158142c2
FB
6560
6561 aSig1 = extractFloat128Frac1( a );
6562 aSig0 = extractFloat128Frac0( a );
6563 aExp = extractFloat128Exp( a );
6564 aSign = extractFloat128Sign( a );
6565 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6566 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6567 aSig0 |= ( aSig1 != 0 );
6568 shiftCount = 0x4028 - aExp;
6569 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6570 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6571
6572}
6573
6574/*----------------------------------------------------------------------------
6575| Returns the result of converting the quadruple-precision floating-point
6576| value `a' to the 32-bit two's complement integer format. The conversion
6577| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6578| Arithmetic, except that the conversion is always rounded toward zero. If
6579| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6580| conversion overflows, the largest integer with the same sign as `a' is
6581| returned.
6582*----------------------------------------------------------------------------*/
6583
f4014512 6584int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6585{
c120391c 6586 bool aSign;
f4014512 6587 int32_t aExp, shiftCount;
bb98fe42 6588 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6589 int32_t z;
158142c2
FB
6590
6591 aSig1 = extractFloat128Frac1( a );
6592 aSig0 = extractFloat128Frac0( a );
6593 aExp = extractFloat128Exp( a );
6594 aSign = extractFloat128Sign( a );
6595 aSig0 |= ( aSig1 != 0 );
6596 if ( 0x401E < aExp ) {
6597 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6598 goto invalid;
6599 }
6600 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6601 if (aExp || aSig0) {
6602 status->float_exception_flags |= float_flag_inexact;
6603 }
158142c2
FB
6604 return 0;
6605 }
e9321124 6606 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6607 shiftCount = 0x402F - aExp;
6608 savedASig = aSig0;
6609 aSig0 >>= shiftCount;
6610 z = aSig0;
6611 if ( aSign ) z = - z;
6612 if ( ( z < 0 ) ^ aSign ) {
6613 invalid:
ff32e16e 6614 float_raise(float_flag_invalid, status);
2c217da0 6615 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6616 }
6617 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6618 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6619 }
6620 return z;
6621
6622}
6623
6624/*----------------------------------------------------------------------------
6625| Returns the result of converting the quadruple-precision floating-point
6626| value `a' to the 64-bit two's complement integer format. The conversion
6627| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6628| Arithmetic---which means in particular that the conversion is rounded
6629| according to the current rounding mode. If `a' is a NaN, the largest
6630| positive integer is returned. Otherwise, if the conversion overflows, the
6631| largest integer with the same sign as `a' is returned.
6632*----------------------------------------------------------------------------*/
6633
f42c2224 6634int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6635{
c120391c 6636 bool aSign;
f4014512 6637 int32_t aExp, shiftCount;
bb98fe42 6638 uint64_t aSig0, aSig1;
158142c2
FB
6639
6640 aSig1 = extractFloat128Frac1( a );
6641 aSig0 = extractFloat128Frac0( a );
6642 aExp = extractFloat128Exp( a );
6643 aSign = extractFloat128Sign( a );
e9321124 6644 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6645 shiftCount = 0x402F - aExp;
6646 if ( shiftCount <= 0 ) {
6647 if ( 0x403E < aExp ) {
ff32e16e 6648 float_raise(float_flag_invalid, status);
158142c2
FB
6649 if ( ! aSign
6650 || ( ( aExp == 0x7FFF )
e9321124 6651 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6652 )
6653 ) {
2c217da0 6654 return INT64_MAX;
158142c2 6655 }
2c217da0 6656 return INT64_MIN;
158142c2
FB
6657 }
6658 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6659 }
6660 else {
6661 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6662 }
ff32e16e 6663 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6664
6665}
6666
6667/*----------------------------------------------------------------------------
6668| Returns the result of converting the quadruple-precision floating-point
6669| value `a' to the 64-bit two's complement integer format. The conversion
6670| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6671| Arithmetic, except that the conversion is always rounded toward zero.
6672| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6673| the conversion overflows, the largest integer with the same sign as `a' is
6674| returned.
6675*----------------------------------------------------------------------------*/
6676
f42c2224 6677int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6678{
c120391c 6679 bool aSign;
f4014512 6680 int32_t aExp, shiftCount;
bb98fe42 6681 uint64_t aSig0, aSig1;
f42c2224 6682 int64_t z;
158142c2
FB
6683
6684 aSig1 = extractFloat128Frac1( a );
6685 aSig0 = extractFloat128Frac0( a );
6686 aExp = extractFloat128Exp( a );
6687 aSign = extractFloat128Sign( a );
e9321124 6688 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6689 shiftCount = aExp - 0x402F;
6690 if ( 0 < shiftCount ) {
6691 if ( 0x403E <= aExp ) {
e9321124
AB
6692 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6693 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6694 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288
PM
6695 if (aSig1) {
6696 status->float_exception_flags |= float_flag_inexact;
6697 }
158142c2
FB
6698 }
6699 else {
ff32e16e 6700 float_raise(float_flag_invalid, status);
158142c2 6701 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6702 return INT64_MAX;
158142c2
FB
6703 }
6704 }
2c217da0 6705 return INT64_MIN;
158142c2
FB
6706 }
6707 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6708 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6709 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6710 }
6711 }
6712 else {
6713 if ( aExp < 0x3FFF ) {
6714 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6715 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6716 }
6717 return 0;
6718 }
6719 z = aSig0>>( - shiftCount );
6720 if ( aSig1
bb98fe42 6721 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6722 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6723 }
6724 }
6725 if ( aSign ) z = - z;
6726 return z;
6727
6728}
6729
2e6d8568
BR
6730/*----------------------------------------------------------------------------
6731| Returns the result of converting the quadruple-precision floating-point value
6732| `a' to the 64-bit unsigned integer format. The conversion is
6733| performed according to the IEC/IEEE Standard for Binary Floating-Point
6734| Arithmetic---which means in particular that the conversion is rounded
6735| according to the current rounding mode. If `a' is a NaN, the largest
6736| positive integer is returned. If the conversion overflows, the
6737| largest unsigned integer is returned. If 'a' is negative, the value is
6738| rounded and zero is returned; negative values that do not round to zero
6739| will raise the inexact exception.
6740*----------------------------------------------------------------------------*/
6741
6742uint64_t float128_to_uint64(float128 a, float_status *status)
6743{
c120391c 6744 bool aSign;
2e6d8568
BR
6745 int aExp;
6746 int shiftCount;
6747 uint64_t aSig0, aSig1;
6748
6749 aSig0 = extractFloat128Frac0(a);
6750 aSig1 = extractFloat128Frac1(a);
6751 aExp = extractFloat128Exp(a);
6752 aSign = extractFloat128Sign(a);
6753 if (aSign && (aExp > 0x3FFE)) {
6754 float_raise(float_flag_invalid, status);
6755 if (float128_is_any_nan(a)) {
2c217da0 6756 return UINT64_MAX;
2e6d8568
BR
6757 } else {
6758 return 0;
6759 }
6760 }
6761 if (aExp) {
2c217da0 6762 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6763 }
6764 shiftCount = 0x402F - aExp;
6765 if (shiftCount <= 0) {
6766 if (0x403E < aExp) {
6767 float_raise(float_flag_invalid, status);
2c217da0 6768 return UINT64_MAX;
2e6d8568
BR
6769 }
6770 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6771 } else {
6772 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6773 }
6774 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6775}
6776
6777uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6778{
6779 uint64_t v;
6780 signed char current_rounding_mode = status->float_rounding_mode;
6781
6782 set_float_rounding_mode(float_round_to_zero, status);
6783 v = float128_to_uint64(a, status);
6784 set_float_rounding_mode(current_rounding_mode, status);
6785
6786 return v;
6787}
6788
158142c2
FB
6789/*----------------------------------------------------------------------------
6790| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6791| value `a' to the 32-bit unsigned integer format. The conversion
6792| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6793| Arithmetic except that the conversion is always rounded toward zero.
6794| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6795| if the conversion overflows, the largest unsigned integer is returned.
6796| If 'a' is negative, the value is rounded and zero is returned; negative
6797| values that do not round to zero will raise the inexact exception.
6798*----------------------------------------------------------------------------*/
6799
6800uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6801{
6802 uint64_t v;
6803 uint32_t res;
6804 int old_exc_flags = get_float_exception_flags(status);
6805
6806 v = float128_to_uint64_round_to_zero(a, status);
6807 if (v > 0xffffffff) {
6808 res = 0xffffffff;
6809 } else {
6810 return v;
6811 }
6812 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6813 float_raise(float_flag_invalid, status);
6814 return res;
6815}
6816
6817/*----------------------------------------------------------------------------
6818| Returns the result of converting the quadruple-precision floating-point value
6819| `a' to the 32-bit unsigned integer format. The conversion is
6820| performed according to the IEC/IEEE Standard for Binary Floating-Point
6821| Arithmetic---which means in particular that the conversion is rounded
6822| according to the current rounding mode. If `a' is a NaN, the largest
6823| positive integer is returned. If the conversion overflows, the
6824| largest unsigned integer is returned. If 'a' is negative, the value is
6825| rounded and zero is returned; negative values that do not round to zero
6826| will raise the inexact exception.
6827*----------------------------------------------------------------------------*/
6828
6829uint32_t float128_to_uint32(float128 a, float_status *status)
6830{
6831 uint64_t v;
6832 uint32_t res;
6833 int old_exc_flags = get_float_exception_flags(status);
6834
6835 v = float128_to_uint64(a, status);
6836 if (v > 0xffffffff) {
6837 res = 0xffffffff;
6838 } else {
6839 return v;
6840 }
6841 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6842 float_raise(float_flag_invalid, status);
6843 return res;
6844}
6845
6846/*----------------------------------------------------------------------------
6847| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6848| value `a' to the single-precision floating-point format. The conversion
6849| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6850| Arithmetic.
6851*----------------------------------------------------------------------------*/
6852
e5a41ffa 6853float32 float128_to_float32(float128 a, float_status *status)
158142c2 6854{
c120391c 6855 bool aSign;
f4014512 6856 int32_t aExp;
bb98fe42
AF
6857 uint64_t aSig0, aSig1;
6858 uint32_t zSig;
158142c2
FB
6859
6860 aSig1 = extractFloat128Frac1( a );
6861 aSig0 = extractFloat128Frac0( a );
6862 aExp = extractFloat128Exp( a );
6863 aSign = extractFloat128Sign( a );
6864 if ( aExp == 0x7FFF ) {
6865 if ( aSig0 | aSig1 ) {
ff32e16e 6866 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6867 }
6868 return packFloat32( aSign, 0xFF, 0 );
6869 }
6870 aSig0 |= ( aSig1 != 0 );
6871 shift64RightJamming( aSig0, 18, &aSig0 );
6872 zSig = aSig0;
6873 if ( aExp || zSig ) {
6874 zSig |= 0x40000000;
6875 aExp -= 0x3F81;
6876 }
ff32e16e 6877 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6878
6879}
6880
6881/*----------------------------------------------------------------------------
6882| Returns the result of converting the quadruple-precision floating-point
6883| value `a' to the double-precision floating-point format. The conversion
6884| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6885| Arithmetic.
6886*----------------------------------------------------------------------------*/
6887
e5a41ffa 6888float64 float128_to_float64(float128 a, float_status *status)
158142c2 6889{
c120391c 6890 bool aSign;
f4014512 6891 int32_t aExp;
bb98fe42 6892 uint64_t aSig0, aSig1;
158142c2
FB
6893
6894 aSig1 = extractFloat128Frac1( a );
6895 aSig0 = extractFloat128Frac0( a );
6896 aExp = extractFloat128Exp( a );
6897 aSign = extractFloat128Sign( a );
6898 if ( aExp == 0x7FFF ) {
6899 if ( aSig0 | aSig1 ) {
ff32e16e 6900 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6901 }
6902 return packFloat64( aSign, 0x7FF, 0 );
6903 }
6904 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6905 aSig0 |= ( aSig1 != 0 );
6906 if ( aExp || aSig0 ) {
e9321124 6907 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6908 aExp -= 0x3C01;
6909 }
ff32e16e 6910 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6911
6912}
6913
158142c2
FB
6914/*----------------------------------------------------------------------------
6915| Returns the result of converting the quadruple-precision floating-point
6916| value `a' to the extended double-precision floating-point format. The
6917| conversion is performed according to the IEC/IEEE Standard for Binary
6918| Floating-Point Arithmetic.
6919*----------------------------------------------------------------------------*/
6920
e5a41ffa 6921floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6922{
c120391c 6923 bool aSign;
f4014512 6924 int32_t aExp;
bb98fe42 6925 uint64_t aSig0, aSig1;
158142c2
FB
6926
6927 aSig1 = extractFloat128Frac1( a );
6928 aSig0 = extractFloat128Frac0( a );
6929 aExp = extractFloat128Exp( a );
6930 aSign = extractFloat128Sign( a );
6931 if ( aExp == 0x7FFF ) {
6932 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6933 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6934 status);
6935 return floatx80_silence_nan(res, status);
158142c2 6936 }
0f605c88
LV
6937 return packFloatx80(aSign, floatx80_infinity_high,
6938 floatx80_infinity_low);
158142c2
FB
6939 }
6940 if ( aExp == 0 ) {
6941 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6942 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6943 }
6944 else {
e9321124 6945 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6946 }
6947 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6948 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6949
6950}
6951
158142c2
FB
6952/*----------------------------------------------------------------------------
6953| Rounds the quadruple-precision floating-point value `a' to an integer, and
6954| returns the result as a quadruple-precision floating-point value. The
6955| operation is performed according to the IEC/IEEE Standard for Binary
6956| Floating-Point Arithmetic.
6957*----------------------------------------------------------------------------*/
6958
e5a41ffa 6959float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6960{
c120391c 6961 bool aSign;
f4014512 6962 int32_t aExp;
bb98fe42 6963 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6964 float128 z;
6965
6966 aExp = extractFloat128Exp( a );
6967 if ( 0x402F <= aExp ) {
6968 if ( 0x406F <= aExp ) {
6969 if ( ( aExp == 0x7FFF )
6970 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6971 ) {
ff32e16e 6972 return propagateFloat128NaN(a, a, status);
158142c2
FB
6973 }
6974 return a;
6975 }
6976 lastBitMask = 1;
6977 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6978 roundBitsMask = lastBitMask - 1;
6979 z = a;
a2f2d288 6980 switch (status->float_rounding_mode) {
dc355b76 6981 case float_round_nearest_even:
158142c2
FB
6982 if ( lastBitMask ) {
6983 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6984 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6985 }
6986 else {
bb98fe42 6987 if ( (int64_t) z.low < 0 ) {
158142c2 6988 ++z.high;
bb98fe42 6989 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6990 }
6991 }
dc355b76 6992 break;
f9288a76
PM
6993 case float_round_ties_away:
6994 if (lastBitMask) {
6995 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6996 } else {
6997 if ((int64_t) z.low < 0) {
6998 ++z.high;
6999 }
7000 }
7001 break;
dc355b76
PM
7002 case float_round_to_zero:
7003 break;
7004 case float_round_up:
7005 if (!extractFloat128Sign(z)) {
7006 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7007 }
7008 break;
7009 case float_round_down:
7010 if (extractFloat128Sign(z)) {
7011 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 7012 }
dc355b76 7013 break;
5d64abb3
RH
7014 case float_round_to_odd:
7015 /*
7016 * Note that if lastBitMask == 0, the last bit is the lsb
7017 * of high, and roundBitsMask == -1.
7018 */
7019 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7020 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7021 }
7022 break;
dc355b76
PM
7023 default:
7024 abort();
158142c2
FB
7025 }
7026 z.low &= ~ roundBitsMask;
7027 }
7028 else {
7029 if ( aExp < 0x3FFF ) {
bb98fe42 7030 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 7031 status->float_exception_flags |= float_flag_inexact;
158142c2 7032 aSign = extractFloat128Sign( a );
a2f2d288 7033 switch (status->float_rounding_mode) {
5d64abb3 7034 case float_round_nearest_even:
158142c2
FB
7035 if ( ( aExp == 0x3FFE )
7036 && ( extractFloat128Frac0( a )
7037 | extractFloat128Frac1( a ) )
7038 ) {
7039 return packFloat128( aSign, 0x3FFF, 0, 0 );
7040 }
7041 break;
f9288a76
PM
7042 case float_round_ties_away:
7043 if (aExp == 0x3FFE) {
7044 return packFloat128(aSign, 0x3FFF, 0, 0);
7045 }
7046 break;
5d64abb3 7047 case float_round_down:
158142c2
FB
7048 return
7049 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7050 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7051 case float_round_up:
158142c2
FB
7052 return
7053 aSign ? packFloat128( 1, 0, 0, 0 )
7054 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7055
7056 case float_round_to_odd:
7057 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
7058
7059 case float_round_to_zero:
7060 break;
158142c2
FB
7061 }
7062 return packFloat128( aSign, 0, 0, 0 );
7063 }
7064 lastBitMask = 1;
7065 lastBitMask <<= 0x402F - aExp;
7066 roundBitsMask = lastBitMask - 1;
7067 z.low = 0;
7068 z.high = a.high;
a2f2d288 7069 switch (status->float_rounding_mode) {
dc355b76 7070 case float_round_nearest_even:
158142c2
FB
7071 z.high += lastBitMask>>1;
7072 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7073 z.high &= ~ lastBitMask;
7074 }
dc355b76 7075 break;
f9288a76
PM
7076 case float_round_ties_away:
7077 z.high += lastBitMask>>1;
7078 break;
dc355b76
PM
7079 case float_round_to_zero:
7080 break;
7081 case float_round_up:
7082 if (!extractFloat128Sign(z)) {
158142c2
FB
7083 z.high |= ( a.low != 0 );
7084 z.high += roundBitsMask;
7085 }
dc355b76
PM
7086 break;
7087 case float_round_down:
7088 if (extractFloat128Sign(z)) {
7089 z.high |= (a.low != 0);
7090 z.high += roundBitsMask;
7091 }
7092 break;
5d64abb3
RH
7093 case float_round_to_odd:
7094 if ((z.high & lastBitMask) == 0) {
7095 z.high |= (a.low != 0);
7096 z.high += roundBitsMask;
7097 }
7098 break;
dc355b76
PM
7099 default:
7100 abort();
158142c2
FB
7101 }
7102 z.high &= ~ roundBitsMask;
7103 }
7104 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 7105 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
7106 }
7107 return z;
7108
7109}
7110
7111/*----------------------------------------------------------------------------
7112| Returns the result of adding the absolute values of the quadruple-precision
7113| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7114| before being returned. `zSign' is ignored if the result is a NaN.
7115| The addition is performed according to the IEC/IEEE Standard for Binary
7116| Floating-Point Arithmetic.
7117*----------------------------------------------------------------------------*/
7118
c120391c 7119static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7120 float_status *status)
158142c2 7121{
f4014512 7122 int32_t aExp, bExp, zExp;
bb98fe42 7123 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7124 int32_t expDiff;
158142c2
FB
7125
7126 aSig1 = extractFloat128Frac1( a );
7127 aSig0 = extractFloat128Frac0( a );
7128 aExp = extractFloat128Exp( a );
7129 bSig1 = extractFloat128Frac1( b );
7130 bSig0 = extractFloat128Frac0( b );
7131 bExp = extractFloat128Exp( b );
7132 expDiff = aExp - bExp;
7133 if ( 0 < expDiff ) {
7134 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7135 if (aSig0 | aSig1) {
7136 return propagateFloat128NaN(a, b, status);
7137 }
158142c2
FB
7138 return a;
7139 }
7140 if ( bExp == 0 ) {
7141 --expDiff;
7142 }
7143 else {
e9321124 7144 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7145 }
7146 shift128ExtraRightJamming(
7147 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7148 zExp = aExp;
7149 }
7150 else if ( expDiff < 0 ) {
7151 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7152 if (bSig0 | bSig1) {
7153 return propagateFloat128NaN(a, b, status);
7154 }
158142c2
FB
7155 return packFloat128( zSign, 0x7FFF, 0, 0 );
7156 }
7157 if ( aExp == 0 ) {
7158 ++expDiff;
7159 }
7160 else {
e9321124 7161 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7162 }
7163 shift128ExtraRightJamming(
7164 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7165 zExp = bExp;
7166 }
7167 else {
7168 if ( aExp == 0x7FFF ) {
7169 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7170 return propagateFloat128NaN(a, b, status);
158142c2
FB
7171 }
7172 return a;
7173 }
7174 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7175 if ( aExp == 0 ) {
a2f2d288 7176 if (status->flush_to_zero) {
e6afc87f 7177 if (zSig0 | zSig1) {
ff32e16e 7178 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7179 }
7180 return packFloat128(zSign, 0, 0, 0);
7181 }
fe76d976
PB
7182 return packFloat128( zSign, 0, zSig0, zSig1 );
7183 }
158142c2 7184 zSig2 = 0;
e9321124 7185 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
7186 zExp = aExp;
7187 goto shiftRight1;
7188 }
e9321124 7189 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7190 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7191 --zExp;
e9321124 7192 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
7193 ++zExp;
7194 shiftRight1:
7195 shift128ExtraRightJamming(
7196 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7197 roundAndPack:
ff32e16e 7198 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7199
7200}
7201
7202/*----------------------------------------------------------------------------
7203| Returns the result of subtracting the absolute values of the quadruple-
7204| precision floating-point values `a' and `b'. If `zSign' is 1, the
7205| difference is negated before being returned. `zSign' is ignored if the
7206| result is a NaN. The subtraction is performed according to the IEC/IEEE
7207| Standard for Binary Floating-Point Arithmetic.
7208*----------------------------------------------------------------------------*/
7209
c120391c 7210static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 7211 float_status *status)
158142c2 7212{
f4014512 7213 int32_t aExp, bExp, zExp;
bb98fe42 7214 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7215 int32_t expDiff;
158142c2
FB
7216
7217 aSig1 = extractFloat128Frac1( a );
7218 aSig0 = extractFloat128Frac0( a );
7219 aExp = extractFloat128Exp( a );
7220 bSig1 = extractFloat128Frac1( b );
7221 bSig0 = extractFloat128Frac0( b );
7222 bExp = extractFloat128Exp( b );
7223 expDiff = aExp - bExp;
7224 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7225 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7226 if ( 0 < expDiff ) goto aExpBigger;
7227 if ( expDiff < 0 ) goto bExpBigger;
7228 if ( aExp == 0x7FFF ) {
7229 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7230 return propagateFloat128NaN(a, b, status);
158142c2 7231 }
ff32e16e 7232 float_raise(float_flag_invalid, status);
af39bc8c 7233 return float128_default_nan(status);
158142c2
FB
7234 }
7235 if ( aExp == 0 ) {
7236 aExp = 1;
7237 bExp = 1;
7238 }
7239 if ( bSig0 < aSig0 ) goto aBigger;
7240 if ( aSig0 < bSig0 ) goto bBigger;
7241 if ( bSig1 < aSig1 ) goto aBigger;
7242 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7243 return packFloat128(status->float_rounding_mode == float_round_down,
7244 0, 0, 0);
158142c2
FB
7245 bExpBigger:
7246 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7247 if (bSig0 | bSig1) {
7248 return propagateFloat128NaN(a, b, status);
7249 }
158142c2
FB
7250 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7251 }
7252 if ( aExp == 0 ) {
7253 ++expDiff;
7254 }
7255 else {
e9321124 7256 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7257 }
7258 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7259 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7260 bBigger:
7261 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7262 zExp = bExp;
7263 zSign ^= 1;
7264 goto normalizeRoundAndPack;
7265 aExpBigger:
7266 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7267 if (aSig0 | aSig1) {
7268 return propagateFloat128NaN(a, b, status);
7269 }
158142c2
FB
7270 return a;
7271 }
7272 if ( bExp == 0 ) {
7273 --expDiff;
7274 }
7275 else {
e9321124 7276 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7277 }
7278 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7279 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7280 aBigger:
7281 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7282 zExp = aExp;
7283 normalizeRoundAndPack:
7284 --zExp;
ff32e16e
PM
7285 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7286 status);
158142c2
FB
7287
7288}
7289
7290/*----------------------------------------------------------------------------
7291| Returns the result of adding the quadruple-precision floating-point values
7292| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7293| for Binary Floating-Point Arithmetic.
7294*----------------------------------------------------------------------------*/
7295
e5a41ffa 7296float128 float128_add(float128 a, float128 b, float_status *status)
158142c2 7297{
c120391c 7298 bool aSign, bSign;
158142c2
FB
7299
7300 aSign = extractFloat128Sign( a );
7301 bSign = extractFloat128Sign( b );
7302 if ( aSign == bSign ) {
ff32e16e 7303 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7304 }
7305 else {
ff32e16e 7306 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7307 }
7308
7309}
7310
7311/*----------------------------------------------------------------------------
7312| Returns the result of subtracting the quadruple-precision floating-point
7313| values `a' and `b'. The operation is performed according to the IEC/IEEE
7314| Standard for Binary Floating-Point Arithmetic.
7315*----------------------------------------------------------------------------*/
7316
e5a41ffa 7317float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2 7318{
c120391c 7319 bool aSign, bSign;
158142c2
FB
7320
7321 aSign = extractFloat128Sign( a );
7322 bSign = extractFloat128Sign( b );
7323 if ( aSign == bSign ) {
ff32e16e 7324 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7325 }
7326 else {
ff32e16e 7327 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7328 }
7329
7330}
7331
7332/*----------------------------------------------------------------------------
7333| Returns the result of multiplying the quadruple-precision floating-point
7334| values `a' and `b'. The operation is performed according to the IEC/IEEE
7335| Standard for Binary Floating-Point Arithmetic.
7336*----------------------------------------------------------------------------*/
7337
e5a41ffa 7338float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2 7339{
c120391c 7340 bool aSign, bSign, zSign;
f4014512 7341 int32_t aExp, bExp, zExp;
bb98fe42 7342 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7343
7344 aSig1 = extractFloat128Frac1( a );
7345 aSig0 = extractFloat128Frac0( a );
7346 aExp = extractFloat128Exp( a );
7347 aSign = extractFloat128Sign( a );
7348 bSig1 = extractFloat128Frac1( b );
7349 bSig0 = extractFloat128Frac0( b );
7350 bExp = extractFloat128Exp( b );
7351 bSign = extractFloat128Sign( b );
7352 zSign = aSign ^ bSign;
7353 if ( aExp == 0x7FFF ) {
7354 if ( ( aSig0 | aSig1 )
7355 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7356 return propagateFloat128NaN(a, b, status);
158142c2
FB
7357 }
7358 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7359 return packFloat128( zSign, 0x7FFF, 0, 0 );
7360 }
7361 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7362 if (bSig0 | bSig1) {
7363 return propagateFloat128NaN(a, b, status);
7364 }
158142c2
FB
7365 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7366 invalid:
ff32e16e 7367 float_raise(float_flag_invalid, status);
af39bc8c 7368 return float128_default_nan(status);
158142c2
FB
7369 }
7370 return packFloat128( zSign, 0x7FFF, 0, 0 );
7371 }
7372 if ( aExp == 0 ) {
7373 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7374 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7375 }
7376 if ( bExp == 0 ) {
7377 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7378 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7379 }
7380 zExp = aExp + bExp - 0x4000;
e9321124 7381 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7382 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7383 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7384 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7385 zSig2 |= ( zSig3 != 0 );
e9321124 7386 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7387 shift128ExtraRightJamming(
7388 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7389 ++zExp;
7390 }
ff32e16e 7391 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7392
7393}
7394
7395/*----------------------------------------------------------------------------
7396| Returns the result of dividing the quadruple-precision floating-point value
7397| `a' by the corresponding value `b'. The operation is performed according to
7398| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7399*----------------------------------------------------------------------------*/
7400
e5a41ffa 7401float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7402{
c120391c 7403 bool aSign, bSign, zSign;
f4014512 7404 int32_t aExp, bExp, zExp;
bb98fe42
AF
7405 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7406 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7407
7408 aSig1 = extractFloat128Frac1( a );
7409 aSig0 = extractFloat128Frac0( a );
7410 aExp = extractFloat128Exp( a );
7411 aSign = extractFloat128Sign( a );
7412 bSig1 = extractFloat128Frac1( b );
7413 bSig0 = extractFloat128Frac0( b );
7414 bExp = extractFloat128Exp( b );
7415 bSign = extractFloat128Sign( b );
7416 zSign = aSign ^ bSign;
7417 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7418 if (aSig0 | aSig1) {
7419 return propagateFloat128NaN(a, b, status);
7420 }
158142c2 7421 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7422 if (bSig0 | bSig1) {
7423 return propagateFloat128NaN(a, b, status);
7424 }
158142c2
FB
7425 goto invalid;
7426 }
7427 return packFloat128( zSign, 0x7FFF, 0, 0 );
7428 }
7429 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7430 if (bSig0 | bSig1) {
7431 return propagateFloat128NaN(a, b, status);
7432 }
158142c2
FB
7433 return packFloat128( zSign, 0, 0, 0 );
7434 }
7435 if ( bExp == 0 ) {
7436 if ( ( bSig0 | bSig1 ) == 0 ) {
7437 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7438 invalid:
ff32e16e 7439 float_raise(float_flag_invalid, status);
af39bc8c 7440 return float128_default_nan(status);
158142c2 7441 }
ff32e16e 7442 float_raise(float_flag_divbyzero, status);
158142c2
FB
7443 return packFloat128( zSign, 0x7FFF, 0, 0 );
7444 }
7445 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7446 }
7447 if ( aExp == 0 ) {
7448 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7449 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7450 }
7451 zExp = aExp - bExp + 0x3FFD;
7452 shortShift128Left(
e9321124 7453 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7454 shortShift128Left(
e9321124 7455 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7456 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7457 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7458 ++zExp;
7459 }
7460 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7461 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7462 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7463 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7464 --zSig0;
7465 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7466 }
7467 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7468 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7469 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7470 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7471 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7472 --zSig1;
7473 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7474 }
7475 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7476 }
7477 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7478 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7479
7480}
7481
7482/*----------------------------------------------------------------------------
7483| Returns the remainder of the quadruple-precision floating-point value `a'
7484| with respect to the corresponding value `b'. The operation is performed
7485| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7486*----------------------------------------------------------------------------*/
7487
e5a41ffa 7488float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7489{
c120391c 7490 bool aSign, zSign;
f4014512 7491 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7492 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7493 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7494 int64_t sigMean0;
158142c2
FB
7495
7496 aSig1 = extractFloat128Frac1( a );
7497 aSig0 = extractFloat128Frac0( a );
7498 aExp = extractFloat128Exp( a );
7499 aSign = extractFloat128Sign( a );
7500 bSig1 = extractFloat128Frac1( b );
7501 bSig0 = extractFloat128Frac0( b );
7502 bExp = extractFloat128Exp( b );
158142c2
FB
7503 if ( aExp == 0x7FFF ) {
7504 if ( ( aSig0 | aSig1 )
7505 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7506 return propagateFloat128NaN(a, b, status);
158142c2
FB
7507 }
7508 goto invalid;
7509 }
7510 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7511 if (bSig0 | bSig1) {
7512 return propagateFloat128NaN(a, b, status);
7513 }
158142c2
FB
7514 return a;
7515 }
7516 if ( bExp == 0 ) {
7517 if ( ( bSig0 | bSig1 ) == 0 ) {
7518 invalid:
ff32e16e 7519 float_raise(float_flag_invalid, status);
af39bc8c 7520 return float128_default_nan(status);
158142c2
FB
7521 }
7522 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7523 }
7524 if ( aExp == 0 ) {
7525 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7526 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7527 }
7528 expDiff = aExp - bExp;
7529 if ( expDiff < -1 ) return a;
7530 shortShift128Left(
e9321124 7531 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7532 aSig1,
7533 15 - ( expDiff < 0 ),
7534 &aSig0,
7535 &aSig1
7536 );
7537 shortShift128Left(
e9321124 7538 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7539 q = le128( bSig0, bSig1, aSig0, aSig1 );
7540 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7541 expDiff -= 64;
7542 while ( 0 < expDiff ) {
7543 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7544 q = ( 4 < q ) ? q - 4 : 0;
7545 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7546 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7547 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7548 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7549 expDiff -= 61;
7550 }
7551 if ( -64 < expDiff ) {
7552 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7553 q = ( 4 < q ) ? q - 4 : 0;
7554 q >>= - expDiff;
7555 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7556 expDiff += 52;
7557 if ( expDiff < 0 ) {
7558 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7559 }
7560 else {
7561 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7562 }
7563 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7564 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7565 }
7566 else {
7567 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7568 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7569 }
7570 do {
7571 alternateASig0 = aSig0;
7572 alternateASig1 = aSig1;
7573 ++q;
7574 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7575 } while ( 0 <= (int64_t) aSig0 );
158142c2 7576 add128(
bb98fe42 7577 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7578 if ( ( sigMean0 < 0 )
7579 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7580 aSig0 = alternateASig0;
7581 aSig1 = alternateASig1;
7582 }
bb98fe42 7583 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7584 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7585 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7586 status);
158142c2
FB
7587}
7588
7589/*----------------------------------------------------------------------------
7590| Returns the square root of the quadruple-precision floating-point value `a'.
7591| The operation is performed according to the IEC/IEEE Standard for Binary
7592| Floating-Point Arithmetic.
7593*----------------------------------------------------------------------------*/
7594
e5a41ffa 7595float128 float128_sqrt(float128 a, float_status *status)
158142c2 7596{
c120391c 7597 bool aSign;
f4014512 7598 int32_t aExp, zExp;
bb98fe42
AF
7599 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7600 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7601
7602 aSig1 = extractFloat128Frac1( a );
7603 aSig0 = extractFloat128Frac0( a );
7604 aExp = extractFloat128Exp( a );
7605 aSign = extractFloat128Sign( a );
7606 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7607 if (aSig0 | aSig1) {
7608 return propagateFloat128NaN(a, a, status);
7609 }
158142c2
FB
7610 if ( ! aSign ) return a;
7611 goto invalid;
7612 }
7613 if ( aSign ) {
7614 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7615 invalid:
ff32e16e 7616 float_raise(float_flag_invalid, status);
af39bc8c 7617 return float128_default_nan(status);
158142c2
FB
7618 }
7619 if ( aExp == 0 ) {
7620 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7621 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7622 }
7623 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7624 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7625 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7626 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7627 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7628 doubleZSig0 = zSig0<<1;
7629 mul64To128( zSig0, zSig0, &term0, &term1 );
7630 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7631 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7632 --zSig0;
7633 doubleZSig0 -= 2;
7634 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7635 }
7636 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7637 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7638 if ( zSig1 == 0 ) zSig1 = 1;
7639 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7640 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7641 mul64To128( zSig1, zSig1, &term2, &term3 );
7642 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7643 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7644 --zSig1;
7645 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7646 term3 |= 1;
7647 term2 |= doubleZSig0;
7648 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7649 }
7650 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7651 }
7652 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7653 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7654
7655}
7656
7657/*----------------------------------------------------------------------------
7658| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7659| the corresponding value `b', and 0 otherwise. The invalid exception is
7660| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7661| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7662*----------------------------------------------------------------------------*/
7663
e5a41ffa 7664int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7665{
7666
7667 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7668 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7669 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7670 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7671 ) {
ff32e16e 7672 float_raise(float_flag_invalid, status);
158142c2
FB
7673 return 0;
7674 }
7675 return
7676 ( a.low == b.low )
7677 && ( ( a.high == b.high )
7678 || ( ( a.low == 0 )
bb98fe42 7679 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7680 );
7681
7682}
7683
7684/*----------------------------------------------------------------------------
7685| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7686| or equal to the corresponding value `b', and 0 otherwise. The invalid
7687| exception is raised if either operand is a NaN. The comparison is performed
7688| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7689*----------------------------------------------------------------------------*/
7690
e5a41ffa 7691int float128_le(float128 a, float128 b, float_status *status)
158142c2 7692{
c120391c 7693 bool aSign, bSign;
158142c2
FB
7694
7695 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7696 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7697 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7698 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7699 ) {
ff32e16e 7700 float_raise(float_flag_invalid, status);
158142c2
FB
7701 return 0;
7702 }
7703 aSign = extractFloat128Sign( a );
7704 bSign = extractFloat128Sign( b );
7705 if ( aSign != bSign ) {
7706 return
7707 aSign
bb98fe42 7708 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7709 == 0 );
7710 }
7711 return
7712 aSign ? le128( b.high, b.low, a.high, a.low )
7713 : le128( a.high, a.low, b.high, b.low );
7714
7715}
7716
7717/*----------------------------------------------------------------------------
7718| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7719| the corresponding value `b', and 0 otherwise. The invalid exception is
7720| raised if either operand is a NaN. The comparison is performed according
7721| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7722*----------------------------------------------------------------------------*/
7723
e5a41ffa 7724int float128_lt(float128 a, float128 b, float_status *status)
158142c2 7725{
c120391c 7726 bool aSign, bSign;
158142c2
FB
7727
7728 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7729 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7730 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7731 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7732 ) {
ff32e16e 7733 float_raise(float_flag_invalid, status);
158142c2
FB
7734 return 0;
7735 }
7736 aSign = extractFloat128Sign( a );
7737 bSign = extractFloat128Sign( b );
7738 if ( aSign != bSign ) {
7739 return
7740 aSign
bb98fe42 7741 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7742 != 0 );
7743 }
7744 return
7745 aSign ? lt128( b.high, b.low, a.high, a.low )
7746 : lt128( a.high, a.low, b.high, b.low );
7747
7748}
7749
67b7861d
AJ
7750/*----------------------------------------------------------------------------
7751| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7752| be compared, and 0 otherwise. The invalid exception is raised if either
7753| operand is a NaN. The comparison is performed according to the IEC/IEEE
7754| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7755*----------------------------------------------------------------------------*/
7756
e5a41ffa 7757int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7758{
7759 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7760 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7761 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7762 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7763 ) {
ff32e16e 7764 float_raise(float_flag_invalid, status);
67b7861d
AJ
7765 return 1;
7766 }
7767 return 0;
7768}
7769
158142c2
FB
7770/*----------------------------------------------------------------------------
7771| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7772| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7773| exception. The comparison is performed according to the IEC/IEEE Standard
7774| for Binary Floating-Point Arithmetic.
158142c2
FB
7775*----------------------------------------------------------------------------*/
7776
e5a41ffa 7777int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7778{
7779
7780 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7781 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7782 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7783 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7784 ) {
af39bc8c
AM
7785 if (float128_is_signaling_nan(a, status)
7786 || float128_is_signaling_nan(b, status)) {
ff32e16e 7787 float_raise(float_flag_invalid, status);
b689362d 7788 }
158142c2
FB
7789 return 0;
7790 }
7791 return
7792 ( a.low == b.low )
7793 && ( ( a.high == b.high )
7794 || ( ( a.low == 0 )
bb98fe42 7795 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7796 );
7797
7798}
7799
7800/*----------------------------------------------------------------------------
7801| Returns 1 if the quadruple-precision floating-point value `a' is less than
7802| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7803| cause an exception. Otherwise, the comparison is performed according to the
7804| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7805*----------------------------------------------------------------------------*/
7806
e5a41ffa 7807int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2 7808{
c120391c 7809 bool aSign, bSign;
158142c2
FB
7810
7811 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7812 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7813 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7814 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7815 ) {
af39bc8c
AM
7816 if (float128_is_signaling_nan(a, status)
7817 || float128_is_signaling_nan(b, status)) {
ff32e16e 7818 float_raise(float_flag_invalid, status);
158142c2
FB
7819 }
7820 return 0;
7821 }
7822 aSign = extractFloat128Sign( a );
7823 bSign = extractFloat128Sign( b );
7824 if ( aSign != bSign ) {
7825 return
7826 aSign
bb98fe42 7827 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7828 == 0 );
7829 }
7830 return
7831 aSign ? le128( b.high, b.low, a.high, a.low )
7832 : le128( a.high, a.low, b.high, b.low );
7833
7834}
7835
7836/*----------------------------------------------------------------------------
7837| Returns 1 if the quadruple-precision floating-point value `a' is less than
7838| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7839| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7840| Standard for Binary Floating-Point Arithmetic.
7841*----------------------------------------------------------------------------*/
7842
e5a41ffa 7843int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2 7844{
c120391c 7845 bool aSign, bSign;
158142c2
FB
7846
7847 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7848 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7849 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7850 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7851 ) {
af39bc8c
AM
7852 if (float128_is_signaling_nan(a, status)
7853 || float128_is_signaling_nan(b, status)) {
ff32e16e 7854 float_raise(float_flag_invalid, status);
158142c2
FB
7855 }
7856 return 0;
7857 }
7858 aSign = extractFloat128Sign( a );
7859 bSign = extractFloat128Sign( b );
7860 if ( aSign != bSign ) {
7861 return
7862 aSign
bb98fe42 7863 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7864 != 0 );
7865 }
7866 return
7867 aSign ? lt128( b.high, b.low, a.high, a.low )
7868 : lt128( a.high, a.low, b.high, b.low );
7869
7870}
7871
67b7861d
AJ
7872/*----------------------------------------------------------------------------
7873| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7874| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7875| comparison is performed according to the IEC/IEEE Standard for Binary
7876| Floating-Point Arithmetic.
7877*----------------------------------------------------------------------------*/
7878
e5a41ffa 7879int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7880{
7881 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7882 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7883 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7884 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7885 ) {
af39bc8c
AM
7886 if (float128_is_signaling_nan(a, status)
7887 || float128_is_signaling_nan(b, status)) {
ff32e16e 7888 float_raise(float_flag_invalid, status);
67b7861d
AJ
7889 }
7890 return 1;
7891 }
7892 return 0;
7893}
7894
e5a41ffa
PM
7895static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7896 int is_quiet, float_status *status)
f6714d36 7897{
c120391c 7898 bool aSign, bSign;
f6714d36 7899
d1eb8f2a
AD
7900 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7901 float_raise(float_flag_invalid, status);
7902 return float_relation_unordered;
7903 }
f6714d36
AJ
7904 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7905 ( extractFloatx80Frac( a )<<1 ) ) ||
7906 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7907 ( extractFloatx80Frac( b )<<1 ) )) {
7908 if (!is_quiet ||
af39bc8c
AM
7909 floatx80_is_signaling_nan(a, status) ||
7910 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7911 float_raise(float_flag_invalid, status);
f6714d36
AJ
7912 }
7913 return float_relation_unordered;
7914 }
7915 aSign = extractFloatx80Sign( a );
7916 bSign = extractFloatx80Sign( b );
7917 if ( aSign != bSign ) {
7918
7919 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7920 ( ( a.low | b.low ) == 0 ) ) {
7921 /* zero case */
7922 return float_relation_equal;
7923 } else {
7924 return 1 - (2 * aSign);
7925 }
7926 } else {
be53fa78
JM
7927 /* Normalize pseudo-denormals before comparison. */
7928 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7929 ++a.high;
7930 }
7931 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7932 ++b.high;
7933 }
f6714d36
AJ
7934 if (a.low == b.low && a.high == b.high) {
7935 return float_relation_equal;
7936 } else {
7937 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7938 }
7939 }
7940}
7941
e5a41ffa 7942int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7943{
ff32e16e 7944 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7945}
7946
e5a41ffa 7947int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7948{
ff32e16e 7949 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7950}
7951
e5a41ffa
PM
7952static inline int float128_compare_internal(float128 a, float128 b,
7953 int is_quiet, float_status *status)
1f587329 7954{
c120391c 7955 bool aSign, bSign;
1f587329
BS
7956
7957 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7958 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7959 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7960 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7961 if (!is_quiet ||
af39bc8c
AM
7962 float128_is_signaling_nan(a, status) ||
7963 float128_is_signaling_nan(b, status)) {
ff32e16e 7964 float_raise(float_flag_invalid, status);
1f587329
BS
7965 }
7966 return float_relation_unordered;
7967 }
7968 aSign = extractFloat128Sign( a );
7969 bSign = extractFloat128Sign( b );
7970 if ( aSign != bSign ) {
7971 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7972 /* zero case */
7973 return float_relation_equal;
7974 } else {
7975 return 1 - (2 * aSign);
7976 }
7977 } else {
7978 if (a.low == b.low && a.high == b.high) {
7979 return float_relation_equal;
7980 } else {
7981 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7982 }
7983 }
7984}
7985
e5a41ffa 7986int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7987{
ff32e16e 7988 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7989}
7990
e5a41ffa 7991int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7992{
ff32e16e 7993 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7994}
7995
e5a41ffa 7996floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7997{
c120391c 7998 bool aSign;
326b9e98 7999 int32_t aExp;
bb98fe42 8000 uint64_t aSig;
9ee6e8bb 8001
d1eb8f2a
AD
8002 if (floatx80_invalid_encoding(a)) {
8003 float_raise(float_flag_invalid, status);
8004 return floatx80_default_nan(status);
8005 }
9ee6e8bb
PB
8006 aSig = extractFloatx80Frac( a );
8007 aExp = extractFloatx80Exp( a );
8008 aSign = extractFloatx80Sign( a );
8009
326b9e98
AJ
8010 if ( aExp == 0x7FFF ) {
8011 if ( aSig<<1 ) {
ff32e16e 8012 return propagateFloatx80NaN(a, a, status);
326b9e98 8013 }
9ee6e8bb
PB
8014 return a;
8015 }
326b9e98 8016
3c85c37f
PM
8017 if (aExp == 0) {
8018 if (aSig == 0) {
8019 return a;
8020 }
8021 aExp++;
8022 }
69397542 8023
326b9e98
AJ
8024 if (n > 0x10000) {
8025 n = 0x10000;
8026 } else if (n < -0x10000) {
8027 n = -0x10000;
8028 }
8029
9ee6e8bb 8030 aExp += n;
a2f2d288
PM
8031 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
8032 aSign, aExp, aSig, 0, status);
9ee6e8bb 8033}
9ee6e8bb 8034
e5a41ffa 8035float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 8036{
c120391c 8037 bool aSign;
326b9e98 8038 int32_t aExp;
bb98fe42 8039 uint64_t aSig0, aSig1;
9ee6e8bb
PB
8040
8041 aSig1 = extractFloat128Frac1( a );
8042 aSig0 = extractFloat128Frac0( a );
8043 aExp = extractFloat128Exp( a );
8044 aSign = extractFloat128Sign( a );
8045 if ( aExp == 0x7FFF ) {
326b9e98 8046 if ( aSig0 | aSig1 ) {
ff32e16e 8047 return propagateFloat128NaN(a, a, status);
326b9e98 8048 }
9ee6e8bb
PB
8049 return a;
8050 }
3c85c37f 8051 if (aExp != 0) {
e9321124 8052 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 8053 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 8054 return a;
3c85c37f
PM
8055 } else {
8056 aExp++;
8057 }
69397542 8058
326b9e98
AJ
8059 if (n > 0x10000) {
8060 n = 0x10000;
8061 } else if (n < -0x10000) {
8062 n = -0x10000;
8063 }
8064
69397542
PB
8065 aExp += n - 1;
8066 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 8067 , status);
9ee6e8bb
PB
8068
8069}
f6b3b108
EC
8070
8071static void __attribute__((constructor)) softfloat_init(void)
8072{
8073 union_float64 ua, ub, uc, ur;
8074
8075 if (QEMU_NO_HARDFLOAT) {
8076 return;
8077 }
8078 /*
8079 * Test that the host's FMA is not obviously broken. For example,
8080 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8081 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8082 */
8083 ua.s = 0x0020000000000001ULL;
8084 ub.s = 0x3ca0000000000000ULL;
8085 uc.s = 0x0020000000000000ULL;
8086 ur.h = fma(ua.h, ub.h, uc.h);
8087 if (ur.s != 0x0020000000000001ULL) {
8088 force_soft_fma = true;
8089 }
8090}