]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
fpu: replace LIT64 usage with UINT64_C for specialize constants
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
342/* Note: @fast_test and @post can be NULL */
343static inline float32
344float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348{
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378}
379
380static inline float64
381float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385{
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415}
416
bb4d4bb3
PM
417/*----------------------------------------------------------------------------
418| Returns the fraction bits of the half-precision floating-point value `a'.
419*----------------------------------------------------------------------------*/
420
a49db98d 421static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
422{
423 return float16_val(a) & 0x3ff;
424}
425
426/*----------------------------------------------------------------------------
427| Returns the exponent bits of the half-precision floating-point value `a'.
428*----------------------------------------------------------------------------*/
429
0c48262d 430static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
431{
432 return (float16_val(a) >> 10) & 0x1f;
433}
434
d97544c9
AB
435/*----------------------------------------------------------------------------
436| Returns the fraction bits of the single-precision floating-point value `a'.
437*----------------------------------------------------------------------------*/
438
439static inline uint32_t extractFloat32Frac(float32 a)
440{
441 return float32_val(a) & 0x007FFFFF;
442}
443
444/*----------------------------------------------------------------------------
445| Returns the exponent bits of the single-precision floating-point value `a'.
446*----------------------------------------------------------------------------*/
447
448static inline int extractFloat32Exp(float32 a)
449{
450 return (float32_val(a) >> 23) & 0xFF;
451}
452
453/*----------------------------------------------------------------------------
454| Returns the sign bit of the single-precision floating-point value `a'.
455*----------------------------------------------------------------------------*/
456
457static inline flag extractFloat32Sign(float32 a)
458{
459 return float32_val(a) >> 31;
460}
461
462/*----------------------------------------------------------------------------
463| Returns the fraction bits of the double-precision floating-point value `a'.
464*----------------------------------------------------------------------------*/
465
466static inline uint64_t extractFloat64Frac(float64 a)
467{
468 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
469}
470
471/*----------------------------------------------------------------------------
472| Returns the exponent bits of the double-precision floating-point value `a'.
473*----------------------------------------------------------------------------*/
474
475static inline int extractFloat64Exp(float64 a)
476{
477 return (float64_val(a) >> 52) & 0x7FF;
478}
479
480/*----------------------------------------------------------------------------
481| Returns the sign bit of the double-precision floating-point value `a'.
482*----------------------------------------------------------------------------*/
483
484static inline flag extractFloat64Sign(float64 a)
485{
486 return float64_val(a) >> 63;
487}
488
a90119b5
AB
489/*
490 * Classify a floating point number. Everything above float_class_qnan
491 * is a NaN so cls >= float_class_qnan is any NaN.
492 */
493
494typedef enum __attribute__ ((__packed__)) {
495 float_class_unclassified,
496 float_class_zero,
497 float_class_normal,
498 float_class_inf,
499 float_class_qnan, /* all NaNs from here */
500 float_class_snan,
a90119b5
AB
501} FloatClass;
502
247d1f21
RH
503/* Simple helpers for checking if, or what kind of, NaN we have */
504static inline __attribute__((unused)) bool is_nan(FloatClass c)
505{
506 return unlikely(c >= float_class_qnan);
507}
508
509static inline __attribute__((unused)) bool is_snan(FloatClass c)
510{
511 return c == float_class_snan;
512}
513
514static inline __attribute__((unused)) bool is_qnan(FloatClass c)
515{
516 return c == float_class_qnan;
517}
518
a90119b5
AB
519/*
520 * Structure holding all of the decomposed parts of a float. The
521 * exponent is unbiased and the fraction is normalized. All
522 * calculations are done with a 64 bit fraction and then rounded as
523 * appropriate for the final format.
524 *
525 * Thanks to the packed FloatClass a decent compiler should be able to
526 * fit the whole structure into registers and avoid using the stack
527 * for parameter passing.
528 */
529
530typedef struct {
531 uint64_t frac;
532 int32_t exp;
533 FloatClass cls;
534 bool sign;
535} FloatParts;
536
537#define DECOMPOSED_BINARY_POINT (64 - 2)
538#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
539#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
540
541/* Structure holding all of the relevant parameters for a format.
542 * exp_size: the size of the exponent field
543 * exp_bias: the offset applied to the exponent field
544 * exp_max: the maximum normalised exponent
545 * frac_size: the size of the fraction field
546 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
547 * The following are computed based the size of fraction
548 * frac_lsb: least significant bit of fraction
ca3a3d5a 549 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 550 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
551 * The following optional modifiers are available:
552 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
553 */
554typedef struct {
555 int exp_size;
556 int exp_bias;
557 int exp_max;
558 int frac_size;
559 int frac_shift;
560 uint64_t frac_lsb;
561 uint64_t frac_lsbm1;
562 uint64_t round_mask;
563 uint64_t roundeven_mask;
ca3a3d5a 564 bool arm_althp;
a90119b5
AB
565} FloatFmt;
566
567/* Expand fields based on the size of exponent and fraction */
568#define FLOAT_PARAMS(E, F) \
569 .exp_size = E, \
570 .exp_bias = ((1 << E) - 1) >> 1, \
571 .exp_max = (1 << E) - 1, \
572 .frac_size = F, \
573 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
574 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
575 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
576 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
577 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
578
579static const FloatFmt float16_params = {
580 FLOAT_PARAMS(5, 10)
581};
582
6fed16b2
AB
583static const FloatFmt float16_params_ahp = {
584 FLOAT_PARAMS(5, 10),
585 .arm_althp = true
586};
587
a90119b5
AB
588static const FloatFmt float32_params = {
589 FLOAT_PARAMS(8, 23)
590};
591
592static const FloatFmt float64_params = {
593 FLOAT_PARAMS(11, 52)
594};
595
6fff2167
AB
596/* Unpack a float to parts, but do not canonicalize. */
597static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
598{
599 const int sign_pos = fmt.frac_size + fmt.exp_size;
600
601 return (FloatParts) {
602 .cls = float_class_unclassified,
603 .sign = extract64(raw, sign_pos, 1),
604 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
605 .frac = extract64(raw, 0, fmt.frac_size),
606 };
607}
608
609static inline FloatParts float16_unpack_raw(float16 f)
610{
611 return unpack_raw(float16_params, f);
612}
613
614static inline FloatParts float32_unpack_raw(float32 f)
615{
616 return unpack_raw(float32_params, f);
617}
618
619static inline FloatParts float64_unpack_raw(float64 f)
620{
621 return unpack_raw(float64_params, f);
622}
623
624/* Pack a float from parts, but do not canonicalize. */
625static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
626{
627 const int sign_pos = fmt.frac_size + fmt.exp_size;
628 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
629 return deposit64(ret, sign_pos, 1, p.sign);
630}
631
632static inline float16 float16_pack_raw(FloatParts p)
633{
634 return make_float16(pack_raw(float16_params, p));
635}
636
637static inline float32 float32_pack_raw(FloatParts p)
638{
639 return make_float32(pack_raw(float32_params, p));
640}
641
642static inline float64 float64_pack_raw(FloatParts p)
643{
644 return make_float64(pack_raw(float64_params, p));
645}
646
0664335a
RH
647/*----------------------------------------------------------------------------
648| Functions and definitions to determine: (1) whether tininess for underflow
649| is detected before or after rounding by default, (2) what (if anything)
650| happens when exceptions are raised, (3) how signaling NaNs are distinguished
651| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
652| are propagated from function inputs to output. These details are target-
653| specific.
654*----------------------------------------------------------------------------*/
655#include "softfloat-specialize.h"
656
6fff2167 657/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
658static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
659 float_status *status)
6fff2167 660{
ca3a3d5a 661 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
662 if (part.frac == 0) {
663 part.cls = float_class_inf;
664 } else {
94933df0 665 part.frac <<= parm->frac_shift;
298b468e
RH
666 part.cls = (parts_is_snan_frac(part.frac, status)
667 ? float_class_snan : float_class_qnan);
6fff2167
AB
668 }
669 } else if (part.exp == 0) {
670 if (likely(part.frac == 0)) {
671 part.cls = float_class_zero;
672 } else if (status->flush_inputs_to_zero) {
673 float_raise(float_flag_input_denormal, status);
674 part.cls = float_class_zero;
675 part.frac = 0;
676 } else {
677 int shift = clz64(part.frac) - 1;
678 part.cls = float_class_normal;
679 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
680 part.frac <<= shift;
681 }
682 } else {
683 part.cls = float_class_normal;
684 part.exp -= parm->exp_bias;
685 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
686 }
687 return part;
688}
689
690/* Round and uncanonicalize a floating-point number by parts. There
691 * are FRAC_SHIFT bits that may require rounding at the bottom of the
692 * fraction; these bits will be removed. The exponent will be biased
693 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
694 */
695
696static FloatParts round_canonical(FloatParts p, float_status *s,
697 const FloatFmt *parm)
698{
5d64abb3 699 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
700 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
701 const uint64_t round_mask = parm->round_mask;
702 const uint64_t roundeven_mask = parm->roundeven_mask;
703 const int exp_max = parm->exp_max;
704 const int frac_shift = parm->frac_shift;
705 uint64_t frac, inc;
706 int exp, flags = 0;
707 bool overflow_norm;
708
709 frac = p.frac;
710 exp = p.exp;
711
712 switch (p.cls) {
713 case float_class_normal:
714 switch (s->float_rounding_mode) {
715 case float_round_nearest_even:
716 overflow_norm = false;
717 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
718 break;
719 case float_round_ties_away:
720 overflow_norm = false;
721 inc = frac_lsbm1;
722 break;
723 case float_round_to_zero:
724 overflow_norm = true;
725 inc = 0;
726 break;
727 case float_round_up:
728 inc = p.sign ? 0 : round_mask;
729 overflow_norm = p.sign;
730 break;
731 case float_round_down:
732 inc = p.sign ? round_mask : 0;
733 overflow_norm = !p.sign;
734 break;
5d64abb3
RH
735 case float_round_to_odd:
736 overflow_norm = true;
737 inc = frac & frac_lsb ? 0 : round_mask;
738 break;
6fff2167
AB
739 default:
740 g_assert_not_reached();
741 }
742
743 exp += parm->exp_bias;
744 if (likely(exp > 0)) {
745 if (frac & round_mask) {
746 flags |= float_flag_inexact;
747 frac += inc;
748 if (frac & DECOMPOSED_OVERFLOW_BIT) {
749 frac >>= 1;
750 exp++;
751 }
752 }
753 frac >>= frac_shift;
754
ca3a3d5a
AB
755 if (parm->arm_althp) {
756 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
757 if (unlikely(exp > exp_max)) {
758 /* Overflow. Return the maximum normal. */
759 flags = float_flag_invalid;
760 exp = exp_max;
761 frac = -1;
762 }
763 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
764 flags |= float_flag_overflow | float_flag_inexact;
765 if (overflow_norm) {
766 exp = exp_max - 1;
767 frac = -1;
768 } else {
769 p.cls = float_class_inf;
770 goto do_inf;
771 }
772 }
773 } else if (s->flush_to_zero) {
774 flags |= float_flag_output_denormal;
775 p.cls = float_class_zero;
776 goto do_zero;
777 } else {
778 bool is_tiny = (s->float_detect_tininess
779 == float_tininess_before_rounding)
780 || (exp < 0)
781 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
782
783 shift64RightJamming(frac, 1 - exp, &frac);
784 if (frac & round_mask) {
785 /* Need to recompute round-to-even. */
5d64abb3
RH
786 switch (s->float_rounding_mode) {
787 case float_round_nearest_even:
6fff2167
AB
788 inc = ((frac & roundeven_mask) != frac_lsbm1
789 ? frac_lsbm1 : 0);
5d64abb3
RH
790 break;
791 case float_round_to_odd:
792 inc = frac & frac_lsb ? 0 : round_mask;
793 break;
6fff2167
AB
794 }
795 flags |= float_flag_inexact;
796 frac += inc;
797 }
798
799 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
800 frac >>= frac_shift;
801
802 if (is_tiny && (flags & float_flag_inexact)) {
803 flags |= float_flag_underflow;
804 }
805 if (exp == 0 && frac == 0) {
806 p.cls = float_class_zero;
807 }
808 }
809 break;
810
811 case float_class_zero:
812 do_zero:
813 exp = 0;
814 frac = 0;
815 break;
816
817 case float_class_inf:
818 do_inf:
ca3a3d5a 819 assert(!parm->arm_althp);
6fff2167
AB
820 exp = exp_max;
821 frac = 0;
822 break;
823
824 case float_class_qnan:
825 case float_class_snan:
ca3a3d5a 826 assert(!parm->arm_althp);
6fff2167 827 exp = exp_max;
94933df0 828 frac >>= parm->frac_shift;
6fff2167
AB
829 break;
830
831 default:
832 g_assert_not_reached();
833 }
834
835 float_raise(flags, s);
836 p.exp = exp;
837 p.frac = frac;
838 return p;
839}
840
6fed16b2
AB
841/* Explicit FloatFmt version */
842static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
843 const FloatFmt *params)
844{
f9943c7f 845 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
846}
847
6fff2167
AB
848static FloatParts float16_unpack_canonical(float16 f, float_status *s)
849{
6fed16b2
AB
850 return float16a_unpack_canonical(f, s, &float16_params);
851}
852
853static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
854 const FloatFmt *params)
855{
856 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
857}
858
859static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
860{
6fed16b2 861 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
862}
863
864static FloatParts float32_unpack_canonical(float32 f, float_status *s)
865{
f9943c7f 866 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
867}
868
869static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
870{
0bcfbcbe 871 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
872}
873
874static FloatParts float64_unpack_canonical(float64 f, float_status *s)
875{
f9943c7f 876 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
877}
878
879static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
880{
0bcfbcbe 881 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
882}
883
dbe4d53a
AB
884static FloatParts return_nan(FloatParts a, float_status *s)
885{
886 switch (a.cls) {
887 case float_class_snan:
888 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 889 a = parts_silence_nan(a, s);
dbe4d53a
AB
890 /* fall through */
891 case float_class_qnan:
892 if (s->default_nan_mode) {
f7e598e2 893 return parts_default_nan(s);
dbe4d53a
AB
894 }
895 break;
896
897 default:
898 g_assert_not_reached();
899 }
900 return a;
901}
902
6fff2167
AB
903static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
904{
905 if (is_snan(a.cls) || is_snan(b.cls)) {
906 s->float_exception_flags |= float_flag_invalid;
907 }
908
909 if (s->default_nan_mode) {
f7e598e2 910 return parts_default_nan(s);
6fff2167 911 } else {
4f251cfd 912 if (pickNaN(a.cls, b.cls,
6fff2167
AB
913 a.frac > b.frac ||
914 (a.frac == b.frac && a.sign < b.sign))) {
915 a = b;
916 }
0bcfbcbe
RH
917 if (is_snan(a.cls)) {
918 return parts_silence_nan(a, s);
919 }
6fff2167
AB
920 }
921 return a;
922}
923
d446830a
AB
924static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
925 bool inf_zero, float_status *s)
926{
1839189b
PM
927 int which;
928
d446830a
AB
929 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
930 s->float_exception_flags |= float_flag_invalid;
931 }
932
3bd2dec1 933 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 934
d446830a 935 if (s->default_nan_mode) {
1839189b
PM
936 /* Note that this check is after pickNaNMulAdd so that function
937 * has an opportunity to set the Invalid flag.
938 */
f7e598e2 939 which = 3;
1839189b 940 }
d446830a 941
1839189b
PM
942 switch (which) {
943 case 0:
944 break;
945 case 1:
946 a = b;
947 break;
948 case 2:
949 a = c;
950 break;
951 case 3:
f7e598e2 952 return parts_default_nan(s);
1839189b
PM
953 default:
954 g_assert_not_reached();
d446830a 955 }
1839189b 956
0bcfbcbe
RH
957 if (is_snan(a.cls)) {
958 return parts_silence_nan(a, s);
959 }
d446830a
AB
960 return a;
961}
962
6fff2167
AB
963/*
964 * Returns the result of adding or subtracting the values of the
965 * floating-point values `a' and `b'. The operation is performed
966 * according to the IEC/IEEE Standard for Binary Floating-Point
967 * Arithmetic.
968 */
969
970static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
971 float_status *s)
972{
973 bool a_sign = a.sign;
974 bool b_sign = b.sign ^ subtract;
975
976 if (a_sign != b_sign) {
977 /* Subtraction */
978
979 if (a.cls == float_class_normal && b.cls == float_class_normal) {
980 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
981 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
982 a.frac = a.frac - b.frac;
983 } else {
984 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
985 a.frac = b.frac - a.frac;
986 a.exp = b.exp;
987 a_sign ^= 1;
988 }
989
990 if (a.frac == 0) {
991 a.cls = float_class_zero;
992 a.sign = s->float_rounding_mode == float_round_down;
993 } else {
994 int shift = clz64(a.frac) - 1;
995 a.frac = a.frac << shift;
996 a.exp = a.exp - shift;
997 a.sign = a_sign;
998 }
999 return a;
1000 }
1001 if (is_nan(a.cls) || is_nan(b.cls)) {
1002 return pick_nan(a, b, s);
1003 }
1004 if (a.cls == float_class_inf) {
1005 if (b.cls == float_class_inf) {
1006 float_raise(float_flag_invalid, s);
f7e598e2 1007 return parts_default_nan(s);
6fff2167
AB
1008 }
1009 return a;
1010 }
1011 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1012 a.sign = s->float_rounding_mode == float_round_down;
1013 return a;
1014 }
1015 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1016 b.sign = a_sign ^ 1;
1017 return b;
1018 }
1019 if (b.cls == float_class_zero) {
1020 return a;
1021 }
1022 } else {
1023 /* Addition */
1024 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1025 if (a.exp > b.exp) {
1026 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1027 } else if (a.exp < b.exp) {
1028 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1029 a.exp = b.exp;
1030 }
1031 a.frac += b.frac;
1032 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1033 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1034 a.exp += 1;
1035 }
1036 return a;
1037 }
1038 if (is_nan(a.cls) || is_nan(b.cls)) {
1039 return pick_nan(a, b, s);
1040 }
1041 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1042 return a;
1043 }
1044 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1045 b.sign = b_sign;
1046 return b;
1047 }
1048 }
1049 g_assert_not_reached();
1050}
1051
1052/*
1053 * Returns the result of adding or subtracting the floating-point
1054 * values `a' and `b'. The operation is performed according to the
1055 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1056 */
1057
97ff87c0 1058float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1059{
1060 FloatParts pa = float16_unpack_canonical(a, status);
1061 FloatParts pb = float16_unpack_canonical(b, status);
1062 FloatParts pr = addsub_floats(pa, pb, false, status);
1063
1064 return float16_round_pack_canonical(pr, status);
1065}
1066
1b615d48
EC
1067float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1068{
1069 FloatParts pa = float16_unpack_canonical(a, status);
1070 FloatParts pb = float16_unpack_canonical(b, status);
1071 FloatParts pr = addsub_floats(pa, pb, true, status);
1072
1073 return float16_round_pack_canonical(pr, status);
1074}
1075
1076static float32 QEMU_SOFTFLOAT_ATTR
1077soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1078{
1079 FloatParts pa = float32_unpack_canonical(a, status);
1080 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1081 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1082
1083 return float32_round_pack_canonical(pr, status);
1084}
1085
1b615d48
EC
1086static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1087{
1088 return soft_f32_addsub(a, b, false, status);
1089}
1090
1091static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1092{
1093 return soft_f32_addsub(a, b, true, status);
1094}
1095
1096static float64 QEMU_SOFTFLOAT_ATTR
1097soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1098{
1099 FloatParts pa = float64_unpack_canonical(a, status);
1100 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1101 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1102
1103 return float64_round_pack_canonical(pr, status);
1104}
1105
1b615d48 1106static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1107{
1b615d48
EC
1108 return soft_f64_addsub(a, b, false, status);
1109}
6fff2167 1110
1b615d48
EC
1111static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1112{
1113 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1114}
1115
1b615d48 1116static float hard_f32_add(float a, float b)
6fff2167 1117{
1b615d48
EC
1118 return a + b;
1119}
6fff2167 1120
1b615d48
EC
1121static float hard_f32_sub(float a, float b)
1122{
1123 return a - b;
6fff2167
AB
1124}
1125
1b615d48 1126static double hard_f64_add(double a, double b)
6fff2167 1127{
1b615d48
EC
1128 return a + b;
1129}
6fff2167 1130
1b615d48
EC
1131static double hard_f64_sub(double a, double b)
1132{
1133 return a - b;
1134}
1135
1136static bool f32_addsub_post(union_float32 a, union_float32 b)
1137{
1138 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1139 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1140 }
1141 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1142}
1143
1144static bool f64_addsub_post(union_float64 a, union_float64 b)
1145{
1146 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1147 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1148 } else {
1149 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1150 }
1151}
1152
1153static float32 float32_addsub(float32 a, float32 b, float_status *s,
1154 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1155{
1156 return float32_gen2(a, b, s, hard, soft,
1157 f32_is_zon2, f32_addsub_post, NULL, NULL);
1158}
1159
1160static float64 float64_addsub(float64 a, float64 b, float_status *s,
1161 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1162{
1163 return float64_gen2(a, b, s, hard, soft,
1164 f64_is_zon2, f64_addsub_post, NULL, NULL);
1165}
1166
1167float32 QEMU_FLATTEN
1168float32_add(float32 a, float32 b, float_status *s)
1169{
1170 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1171}
1172
1173float32 QEMU_FLATTEN
1174float32_sub(float32 a, float32 b, float_status *s)
1175{
1176 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1177}
1178
1179float64 QEMU_FLATTEN
1180float64_add(float64 a, float64 b, float_status *s)
1181{
1182 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1183}
1184
1185float64 QEMU_FLATTEN
1186float64_sub(float64 a, float64 b, float_status *s)
1187{
1188 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1189}
1190
74d707e2
AB
1191/*
1192 * Returns the result of multiplying the floating-point values `a' and
1193 * `b'. The operation is performed according to the IEC/IEEE Standard
1194 * for Binary Floating-Point Arithmetic.
1195 */
1196
1197static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1198{
1199 bool sign = a.sign ^ b.sign;
1200
1201 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1202 uint64_t hi, lo;
1203 int exp = a.exp + b.exp;
1204
1205 mul64To128(a.frac, b.frac, &hi, &lo);
1206 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1207 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1208 shift64RightJamming(lo, 1, &lo);
1209 exp += 1;
1210 }
1211
1212 /* Re-use a */
1213 a.exp = exp;
1214 a.sign = sign;
1215 a.frac = lo;
1216 return a;
1217 }
1218 /* handle all the NaN cases */
1219 if (is_nan(a.cls) || is_nan(b.cls)) {
1220 return pick_nan(a, b, s);
1221 }
1222 /* Inf * Zero == NaN */
1223 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1224 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1225 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1226 return parts_default_nan(s);
74d707e2
AB
1227 }
1228 /* Multiply by 0 or Inf */
1229 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1230 a.sign = sign;
1231 return a;
1232 }
1233 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1234 b.sign = sign;
1235 return b;
1236 }
1237 g_assert_not_reached();
1238}
1239
97ff87c0 1240float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1241{
1242 FloatParts pa = float16_unpack_canonical(a, status);
1243 FloatParts pb = float16_unpack_canonical(b, status);
1244 FloatParts pr = mul_floats(pa, pb, status);
1245
1246 return float16_round_pack_canonical(pr, status);
1247}
1248
2dfabc86
EC
1249static float32 QEMU_SOFTFLOAT_ATTR
1250soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1251{
1252 FloatParts pa = float32_unpack_canonical(a, status);
1253 FloatParts pb = float32_unpack_canonical(b, status);
1254 FloatParts pr = mul_floats(pa, pb, status);
1255
1256 return float32_round_pack_canonical(pr, status);
1257}
1258
2dfabc86
EC
1259static float64 QEMU_SOFTFLOAT_ATTR
1260soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1261{
1262 FloatParts pa = float64_unpack_canonical(a, status);
1263 FloatParts pb = float64_unpack_canonical(b, status);
1264 FloatParts pr = mul_floats(pa, pb, status);
1265
1266 return float64_round_pack_canonical(pr, status);
1267}
1268
2dfabc86
EC
1269static float hard_f32_mul(float a, float b)
1270{
1271 return a * b;
1272}
1273
1274static double hard_f64_mul(double a, double b)
1275{
1276 return a * b;
1277}
1278
1279static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1280{
1281 return float32_is_zero(a.s) || float32_is_zero(b.s);
1282}
1283
1284static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1285{
1286 return float64_is_zero(a.s) || float64_is_zero(b.s);
1287}
1288
1289static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1290{
1291 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1292
1293 return float32_set_sign(float32_zero, signbit);
1294}
1295
1296static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1297{
1298 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1299
1300 return float64_set_sign(float64_zero, signbit);
1301}
1302
1303float32 QEMU_FLATTEN
1304float32_mul(float32 a, float32 b, float_status *s)
1305{
1306 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1307 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1308}
1309
1310float64 QEMU_FLATTEN
1311float64_mul(float64 a, float64 b, float_status *s)
1312{
1313 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1314 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1315}
1316
d446830a
AB
1317/*
1318 * Returns the result of multiplying the floating-point values `a' and
1319 * `b' then adding 'c', with no intermediate rounding step after the
1320 * multiplication. The operation is performed according to the
1321 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1322 * The flags argument allows the caller to select negation of the
1323 * addend, the intermediate product, or the final result. (The
1324 * difference between this and having the caller do a separate
1325 * negation is that negating externally will flip the sign bit on
1326 * NaNs.)
1327 */
1328
1329static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1330 int flags, float_status *s)
1331{
1332 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1333 ((1 << float_class_inf) | (1 << float_class_zero));
1334 bool p_sign;
1335 bool sign_flip = flags & float_muladd_negate_result;
1336 FloatClass p_class;
1337 uint64_t hi, lo;
1338 int p_exp;
1339
1340 /* It is implementation-defined whether the cases of (0,inf,qnan)
1341 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1342 * they return if they do), so we have to hand this information
1343 * off to the target-specific pick-a-NaN routine.
1344 */
1345 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1346 return pick_nan_muladd(a, b, c, inf_zero, s);
1347 }
1348
1349 if (inf_zero) {
1350 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1351 return parts_default_nan(s);
d446830a
AB
1352 }
1353
1354 if (flags & float_muladd_negate_c) {
1355 c.sign ^= 1;
1356 }
1357
1358 p_sign = a.sign ^ b.sign;
1359
1360 if (flags & float_muladd_negate_product) {
1361 p_sign ^= 1;
1362 }
1363
1364 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1365 p_class = float_class_inf;
1366 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1367 p_class = float_class_zero;
1368 } else {
1369 p_class = float_class_normal;
1370 }
1371
1372 if (c.cls == float_class_inf) {
1373 if (p_class == float_class_inf && p_sign != c.sign) {
1374 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1375 return parts_default_nan(s);
d446830a
AB
1376 } else {
1377 a.cls = float_class_inf;
1378 a.sign = c.sign ^ sign_flip;
f7e598e2 1379 return a;
d446830a 1380 }
d446830a
AB
1381 }
1382
1383 if (p_class == float_class_inf) {
1384 a.cls = float_class_inf;
1385 a.sign = p_sign ^ sign_flip;
1386 return a;
1387 }
1388
1389 if (p_class == float_class_zero) {
1390 if (c.cls == float_class_zero) {
1391 if (p_sign != c.sign) {
1392 p_sign = s->float_rounding_mode == float_round_down;
1393 }
1394 c.sign = p_sign;
1395 } else if (flags & float_muladd_halve_result) {
1396 c.exp -= 1;
1397 }
1398 c.sign ^= sign_flip;
1399 return c;
1400 }
1401
1402 /* a & b should be normals now... */
1403 assert(a.cls == float_class_normal &&
1404 b.cls == float_class_normal);
1405
1406 p_exp = a.exp + b.exp;
1407
1408 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1409 * result.
1410 */
1411 mul64To128(a.frac, b.frac, &hi, &lo);
1412 /* binary point now at bit 124 */
1413
1414 /* check for overflow */
1415 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1416 shift128RightJamming(hi, lo, 1, &hi, &lo);
1417 p_exp += 1;
1418 }
1419
1420 /* + add/sub */
1421 if (c.cls == float_class_zero) {
1422 /* move binary point back to 62 */
1423 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1424 } else {
1425 int exp_diff = p_exp - c.exp;
1426 if (p_sign == c.sign) {
1427 /* Addition */
1428 if (exp_diff <= 0) {
1429 shift128RightJamming(hi, lo,
1430 DECOMPOSED_BINARY_POINT - exp_diff,
1431 &hi, &lo);
1432 lo += c.frac;
1433 p_exp = c.exp;
1434 } else {
1435 uint64_t c_hi, c_lo;
1436 /* shift c to the same binary point as the product (124) */
1437 c_hi = c.frac >> 2;
1438 c_lo = 0;
1439 shift128RightJamming(c_hi, c_lo,
1440 exp_diff,
1441 &c_hi, &c_lo);
1442 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1443 /* move binary point back to 62 */
1444 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1445 }
1446
1447 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1448 shift64RightJamming(lo, 1, &lo);
1449 p_exp += 1;
1450 }
1451
1452 } else {
1453 /* Subtraction */
1454 uint64_t c_hi, c_lo;
1455 /* make C binary point match product at bit 124 */
1456 c_hi = c.frac >> 2;
1457 c_lo = 0;
1458
1459 if (exp_diff <= 0) {
1460 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1461 if (exp_diff == 0
1462 &&
1463 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1464 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1465 } else {
1466 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1467 p_sign ^= 1;
1468 p_exp = c.exp;
1469 }
1470 } else {
1471 shift128RightJamming(c_hi, c_lo,
1472 exp_diff,
1473 &c_hi, &c_lo);
1474 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1475 }
1476
1477 if (hi == 0 && lo == 0) {
1478 a.cls = float_class_zero;
1479 a.sign = s->float_rounding_mode == float_round_down;
1480 a.sign ^= sign_flip;
1481 return a;
1482 } else {
1483 int shift;
1484 if (hi != 0) {
1485 shift = clz64(hi);
1486 } else {
1487 shift = clz64(lo) + 64;
1488 }
1489 /* Normalizing to a binary point of 124 is the
1490 correct adjust for the exponent. However since we're
1491 shifting, we might as well put the binary point back
1492 at 62 where we really want it. Therefore shift as
1493 if we're leaving 1 bit at the top of the word, but
1494 adjust the exponent as if we're leaving 3 bits. */
1495 shift -= 1;
1496 if (shift >= 64) {
1497 lo = lo << (shift - 64);
1498 } else {
1499 hi = (hi << shift) | (lo >> (64 - shift));
1500 lo = hi | ((lo << shift) != 0);
1501 }
1502 p_exp -= shift - 2;
1503 }
1504 }
1505 }
1506
1507 if (flags & float_muladd_halve_result) {
1508 p_exp -= 1;
1509 }
1510
1511 /* finally prepare our result */
1512 a.cls = float_class_normal;
1513 a.sign = p_sign ^ sign_flip;
1514 a.exp = p_exp;
1515 a.frac = lo;
1516
1517 return a;
1518}
1519
97ff87c0 1520float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1521 int flags, float_status *status)
1522{
1523 FloatParts pa = float16_unpack_canonical(a, status);
1524 FloatParts pb = float16_unpack_canonical(b, status);
1525 FloatParts pc = float16_unpack_canonical(c, status);
1526 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1527
1528 return float16_round_pack_canonical(pr, status);
1529}
1530
ccf770ba
EC
1531static float32 QEMU_SOFTFLOAT_ATTR
1532soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1533 float_status *status)
d446830a
AB
1534{
1535 FloatParts pa = float32_unpack_canonical(a, status);
1536 FloatParts pb = float32_unpack_canonical(b, status);
1537 FloatParts pc = float32_unpack_canonical(c, status);
1538 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1539
1540 return float32_round_pack_canonical(pr, status);
1541}
1542
ccf770ba
EC
1543static float64 QEMU_SOFTFLOAT_ATTR
1544soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1545 float_status *status)
d446830a
AB
1546{
1547 FloatParts pa = float64_unpack_canonical(a, status);
1548 FloatParts pb = float64_unpack_canonical(b, status);
1549 FloatParts pc = float64_unpack_canonical(c, status);
1550 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1551
1552 return float64_round_pack_canonical(pr, status);
1553}
1554
f6b3b108
EC
1555static bool force_soft_fma;
1556
ccf770ba
EC
1557float32 QEMU_FLATTEN
1558float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1559{
1560 union_float32 ua, ub, uc, ur;
1561
1562 ua.s = xa;
1563 ub.s = xb;
1564 uc.s = xc;
1565
1566 if (unlikely(!can_use_fpu(s))) {
1567 goto soft;
1568 }
1569 if (unlikely(flags & float_muladd_halve_result)) {
1570 goto soft;
1571 }
1572
1573 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1574 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1575 goto soft;
1576 }
f6b3b108
EC
1577
1578 if (unlikely(force_soft_fma)) {
1579 goto soft;
1580 }
1581
ccf770ba
EC
1582 /*
1583 * When (a || b) == 0, there's no need to check for under/over flow,
1584 * since we know the addend is (normal || 0) and the product is 0.
1585 */
1586 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1587 union_float32 up;
1588 bool prod_sign;
1589
1590 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1591 prod_sign ^= !!(flags & float_muladd_negate_product);
1592 up.s = float32_set_sign(float32_zero, prod_sign);
1593
1594 if (flags & float_muladd_negate_c) {
1595 uc.h = -uc.h;
1596 }
1597 ur.h = up.h + uc.h;
1598 } else {
896f51fb
KC
1599 union_float32 ua_orig = ua;
1600 union_float32 uc_orig = uc;
1601
ccf770ba
EC
1602 if (flags & float_muladd_negate_product) {
1603 ua.h = -ua.h;
1604 }
1605 if (flags & float_muladd_negate_c) {
1606 uc.h = -uc.h;
1607 }
1608
1609 ur.h = fmaf(ua.h, ub.h, uc.h);
1610
1611 if (unlikely(f32_is_inf(ur))) {
1612 s->float_exception_flags |= float_flag_overflow;
1613 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1614 ua = ua_orig;
1615 uc = uc_orig;
ccf770ba
EC
1616 goto soft;
1617 }
1618 }
1619 if (flags & float_muladd_negate_result) {
1620 return float32_chs(ur.s);
1621 }
1622 return ur.s;
1623
1624 soft:
1625 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1626}
1627
1628float64 QEMU_FLATTEN
1629float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1630{
1631 union_float64 ua, ub, uc, ur;
1632
1633 ua.s = xa;
1634 ub.s = xb;
1635 uc.s = xc;
1636
1637 if (unlikely(!can_use_fpu(s))) {
1638 goto soft;
1639 }
1640 if (unlikely(flags & float_muladd_halve_result)) {
1641 goto soft;
1642 }
1643
1644 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1645 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1646 goto soft;
1647 }
f6b3b108
EC
1648
1649 if (unlikely(force_soft_fma)) {
1650 goto soft;
1651 }
1652
ccf770ba
EC
1653 /*
1654 * When (a || b) == 0, there's no need to check for under/over flow,
1655 * since we know the addend is (normal || 0) and the product is 0.
1656 */
1657 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1658 union_float64 up;
1659 bool prod_sign;
1660
1661 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1662 prod_sign ^= !!(flags & float_muladd_negate_product);
1663 up.s = float64_set_sign(float64_zero, prod_sign);
1664
1665 if (flags & float_muladd_negate_c) {
1666 uc.h = -uc.h;
1667 }
1668 ur.h = up.h + uc.h;
1669 } else {
896f51fb
KC
1670 union_float64 ua_orig = ua;
1671 union_float64 uc_orig = uc;
1672
ccf770ba
EC
1673 if (flags & float_muladd_negate_product) {
1674 ua.h = -ua.h;
1675 }
1676 if (flags & float_muladd_negate_c) {
1677 uc.h = -uc.h;
1678 }
1679
1680 ur.h = fma(ua.h, ub.h, uc.h);
1681
1682 if (unlikely(f64_is_inf(ur))) {
1683 s->float_exception_flags |= float_flag_overflow;
1684 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1685 ua = ua_orig;
1686 uc = uc_orig;
ccf770ba
EC
1687 goto soft;
1688 }
1689 }
1690 if (flags & float_muladd_negate_result) {
1691 return float64_chs(ur.s);
1692 }
1693 return ur.s;
1694
1695 soft:
1696 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1697}
1698
cf07323d
AB
1699/*
1700 * Returns the result of dividing the floating-point value `a' by the
1701 * corresponding value `b'. The operation is performed according to
1702 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1703 */
1704
1705static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1706{
1707 bool sign = a.sign ^ b.sign;
1708
1709 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1710 uint64_t n0, n1, q, r;
cf07323d 1711 int exp = a.exp - b.exp;
5dfbc9e4
RH
1712
1713 /*
1714 * We want a 2*N / N-bit division to produce exactly an N-bit
1715 * result, so that we do not lose any precision and so that we
1716 * do not have to renormalize afterward. If A.frac < B.frac,
1717 * then division would produce an (N-1)-bit result; shift A left
1718 * by one to produce the an N-bit result, and decrement the
1719 * exponent to match.
1720 *
1721 * The udiv_qrnnd algorithm that we're using requires normalization,
1722 * i.e. the msb of the denominator must be set. Since we know that
1723 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1724 * by one (more), and the remainder must be shifted right by one.
1725 */
cf07323d
AB
1726 if (a.frac < b.frac) {
1727 exp -= 1;
5dfbc9e4 1728 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1729 } else {
5dfbc9e4 1730 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1731 }
5dfbc9e4
RH
1732 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1733
1734 /*
1735 * Set lsb if there is a remainder, to set inexact.
1736 * As mentioned above, to find the actual value of the remainder we
1737 * would need to shift right, but (1) we are only concerned about
1738 * non-zero-ness, and (2) the remainder will always be even because
1739 * both inputs to the division primitive are even.
1740 */
1741 a.frac = q | (r != 0);
cf07323d
AB
1742 a.sign = sign;
1743 a.exp = exp;
1744 return a;
1745 }
1746 /* handle all the NaN cases */
1747 if (is_nan(a.cls) || is_nan(b.cls)) {
1748 return pick_nan(a, b, s);
1749 }
1750 /* 0/0 or Inf/Inf */
1751 if (a.cls == b.cls
1752 &&
1753 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1754 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1755 return parts_default_nan(s);
cf07323d 1756 }
9cb4e398
AB
1757 /* Inf / x or 0 / x */
1758 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1759 a.sign = sign;
1760 return a;
1761 }
cf07323d
AB
1762 /* Div 0 => Inf */
1763 if (b.cls == float_class_zero) {
1764 s->float_exception_flags |= float_flag_divbyzero;
1765 a.cls = float_class_inf;
1766 a.sign = sign;
1767 return a;
1768 }
cf07323d
AB
1769 /* Div by Inf */
1770 if (b.cls == float_class_inf) {
1771 a.cls = float_class_zero;
1772 a.sign = sign;
1773 return a;
1774 }
1775 g_assert_not_reached();
1776}
1777
1778float16 float16_div(float16 a, float16 b, float_status *status)
1779{
1780 FloatParts pa = float16_unpack_canonical(a, status);
1781 FloatParts pb = float16_unpack_canonical(b, status);
1782 FloatParts pr = div_floats(pa, pb, status);
1783
1784 return float16_round_pack_canonical(pr, status);
1785}
1786
4a629561
EC
1787static float32 QEMU_SOFTFLOAT_ATTR
1788soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1789{
1790 FloatParts pa = float32_unpack_canonical(a, status);
1791 FloatParts pb = float32_unpack_canonical(b, status);
1792 FloatParts pr = div_floats(pa, pb, status);
1793
1794 return float32_round_pack_canonical(pr, status);
1795}
1796
4a629561
EC
1797static float64 QEMU_SOFTFLOAT_ATTR
1798soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1799{
1800 FloatParts pa = float64_unpack_canonical(a, status);
1801 FloatParts pb = float64_unpack_canonical(b, status);
1802 FloatParts pr = div_floats(pa, pb, status);
1803
1804 return float64_round_pack_canonical(pr, status);
1805}
1806
4a629561
EC
1807static float hard_f32_div(float a, float b)
1808{
1809 return a / b;
1810}
1811
1812static double hard_f64_div(double a, double b)
1813{
1814 return a / b;
1815}
1816
1817static bool f32_div_pre(union_float32 a, union_float32 b)
1818{
1819 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1820 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1821 fpclassify(b.h) == FP_NORMAL;
1822 }
1823 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1824}
1825
1826static bool f64_div_pre(union_float64 a, union_float64 b)
1827{
1828 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1829 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1830 fpclassify(b.h) == FP_NORMAL;
1831 }
1832 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1833}
1834
1835static bool f32_div_post(union_float32 a, union_float32 b)
1836{
1837 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1838 return fpclassify(a.h) != FP_ZERO;
1839 }
1840 return !float32_is_zero(a.s);
1841}
1842
1843static bool f64_div_post(union_float64 a, union_float64 b)
1844{
1845 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1846 return fpclassify(a.h) != FP_ZERO;
1847 }
1848 return !float64_is_zero(a.s);
1849}
1850
1851float32 QEMU_FLATTEN
1852float32_div(float32 a, float32 b, float_status *s)
1853{
1854 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1855 f32_div_pre, f32_div_post, NULL, NULL);
1856}
1857
1858float64 QEMU_FLATTEN
1859float64_div(float64 a, float64 b, float_status *s)
1860{
1861 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1862 f64_div_pre, f64_div_post, NULL, NULL);
1863}
1864
6fed16b2
AB
1865/*
1866 * Float to Float conversions
1867 *
1868 * Returns the result of converting one float format to another. The
1869 * conversion is performed according to the IEC/IEEE Standard for
1870 * Binary Floating-Point Arithmetic.
1871 *
1872 * The float_to_float helper only needs to take care of raising
1873 * invalid exceptions and handling the conversion on NaNs.
1874 */
1875
1876static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1877 float_status *s)
1878{
1879 if (dstf->arm_althp) {
1880 switch (a.cls) {
1881 case float_class_qnan:
1882 case float_class_snan:
1883 /* There is no NaN in the destination format. Raise Invalid
1884 * and return a zero with the sign of the input NaN.
1885 */
1886 s->float_exception_flags |= float_flag_invalid;
1887 a.cls = float_class_zero;
1888 a.frac = 0;
1889 a.exp = 0;
1890 break;
1891
1892 case float_class_inf:
1893 /* There is no Inf in the destination format. Raise Invalid
1894 * and return the maximum normal with the correct sign.
1895 */
1896 s->float_exception_flags |= float_flag_invalid;
1897 a.cls = float_class_normal;
1898 a.exp = dstf->exp_max;
1899 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1900 break;
1901
1902 default:
1903 break;
1904 }
1905 } else if (is_nan(a.cls)) {
1906 if (is_snan(a.cls)) {
1907 s->float_exception_flags |= float_flag_invalid;
1908 a = parts_silence_nan(a, s);
1909 }
1910 if (s->default_nan_mode) {
1911 return parts_default_nan(s);
1912 }
1913 }
1914 return a;
1915}
1916
1917float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1918{
1919 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1920 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1921 FloatParts pr = float_to_float(p, &float32_params, s);
1922 return float32_round_pack_canonical(pr, s);
1923}
1924
1925float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1926{
1927 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1928 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1929 FloatParts pr = float_to_float(p, &float64_params, s);
1930 return float64_round_pack_canonical(pr, s);
1931}
1932
1933float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1934{
1935 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1936 FloatParts p = float32_unpack_canonical(a, s);
1937 FloatParts pr = float_to_float(p, fmt16, s);
1938 return float16a_round_pack_canonical(pr, s, fmt16);
1939}
1940
1941float64 float32_to_float64(float32 a, float_status *s)
1942{
1943 FloatParts p = float32_unpack_canonical(a, s);
1944 FloatParts pr = float_to_float(p, &float64_params, s);
1945 return float64_round_pack_canonical(pr, s);
1946}
1947
1948float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1949{
1950 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1951 FloatParts p = float64_unpack_canonical(a, s);
1952 FloatParts pr = float_to_float(p, fmt16, s);
1953 return float16a_round_pack_canonical(pr, s, fmt16);
1954}
1955
1956float32 float64_to_float32(float64 a, float_status *s)
1957{
1958 FloatParts p = float64_unpack_canonical(a, s);
1959 FloatParts pr = float_to_float(p, &float32_params, s);
1960 return float32_round_pack_canonical(pr, s);
1961}
1962
dbe4d53a
AB
1963/*
1964 * Rounds the floating-point value `a' to an integer, and returns the
1965 * result as a floating-point value. The operation is performed
1966 * according to the IEC/IEEE Standard for Binary Floating-Point
1967 * Arithmetic.
1968 */
1969
2f6c74be
RH
1970static FloatParts round_to_int(FloatParts a, int rmode,
1971 int scale, float_status *s)
dbe4d53a 1972{
2f6c74be
RH
1973 switch (a.cls) {
1974 case float_class_qnan:
1975 case float_class_snan:
dbe4d53a 1976 return return_nan(a, s);
dbe4d53a 1977
dbe4d53a
AB
1978 case float_class_zero:
1979 case float_class_inf:
dbe4d53a
AB
1980 /* already "integral" */
1981 break;
2f6c74be 1982
dbe4d53a 1983 case float_class_normal:
2f6c74be
RH
1984 scale = MIN(MAX(scale, -0x10000), 0x10000);
1985 a.exp += scale;
1986
dbe4d53a
AB
1987 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1988 /* already integral */
1989 break;
1990 }
1991 if (a.exp < 0) {
1992 bool one;
1993 /* all fractional */
1994 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1995 switch (rmode) {
dbe4d53a
AB
1996 case float_round_nearest_even:
1997 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1998 break;
1999 case float_round_ties_away:
2000 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2001 break;
2002 case float_round_to_zero:
2003 one = false;
2004 break;
2005 case float_round_up:
2006 one = !a.sign;
2007 break;
2008 case float_round_down:
2009 one = a.sign;
2010 break;
5d64abb3
RH
2011 case float_round_to_odd:
2012 one = true;
2013 break;
dbe4d53a
AB
2014 default:
2015 g_assert_not_reached();
2016 }
2017
2018 if (one) {
2019 a.frac = DECOMPOSED_IMPLICIT_BIT;
2020 a.exp = 0;
2021 } else {
2022 a.cls = float_class_zero;
2023 }
2024 } else {
2025 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2026 uint64_t frac_lsbm1 = frac_lsb >> 1;
2027 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2028 uint64_t rnd_mask = rnd_even_mask >> 1;
2029 uint64_t inc;
2030
2f6c74be 2031 switch (rmode) {
dbe4d53a
AB
2032 case float_round_nearest_even:
2033 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2034 break;
2035 case float_round_ties_away:
2036 inc = frac_lsbm1;
2037 break;
2038 case float_round_to_zero:
2039 inc = 0;
2040 break;
2041 case float_round_up:
2042 inc = a.sign ? 0 : rnd_mask;
2043 break;
2044 case float_round_down:
2045 inc = a.sign ? rnd_mask : 0;
2046 break;
5d64abb3
RH
2047 case float_round_to_odd:
2048 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2049 break;
dbe4d53a
AB
2050 default:
2051 g_assert_not_reached();
2052 }
2053
2054 if (a.frac & rnd_mask) {
2055 s->float_exception_flags |= float_flag_inexact;
2056 a.frac += inc;
2057 a.frac &= ~rnd_mask;
2058 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2059 a.frac >>= 1;
2060 a.exp++;
2061 }
2062 }
2063 }
2064 break;
2065 default:
2066 g_assert_not_reached();
2067 }
2068 return a;
2069}
2070
2071float16 float16_round_to_int(float16 a, float_status *s)
2072{
2073 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2074 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2075 return float16_round_pack_canonical(pr, s);
2076}
2077
2078float32 float32_round_to_int(float32 a, float_status *s)
2079{
2080 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2081 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2082 return float32_round_pack_canonical(pr, s);
2083}
2084
2085float64 float64_round_to_int(float64 a, float_status *s)
2086{
2087 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2088 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2089 return float64_round_pack_canonical(pr, s);
2090}
2091
ab52f973
AB
2092/*
2093 * Returns the result of converting the floating-point value `a' to
2094 * the two's complement integer format. The conversion is performed
2095 * according to the IEC/IEEE Standard for Binary Floating-Point
2096 * Arithmetic---which means in particular that the conversion is
2097 * rounded according to the current rounding mode. If `a' is a NaN,
2098 * the largest positive integer is returned. Otherwise, if the
2099 * conversion overflows, the largest integer with the same sign as `a'
2100 * is returned.
2101*/
2102
2f6c74be 2103static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
2104 int64_t min, int64_t max,
2105 float_status *s)
2106{
2107 uint64_t r;
2108 int orig_flags = get_float_exception_flags(s);
2f6c74be 2109 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2110
2111 switch (p.cls) {
2112 case float_class_snan:
2113 case float_class_qnan:
801bc563 2114 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2115 return max;
2116 case float_class_inf:
801bc563 2117 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2118 return p.sign ? min : max;
2119 case float_class_zero:
2120 return 0;
2121 case float_class_normal:
2122 if (p.exp < DECOMPOSED_BINARY_POINT) {
2123 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2124 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2125 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2126 } else {
2127 r = UINT64_MAX;
2128 }
2129 if (p.sign) {
33358375 2130 if (r <= -(uint64_t) min) {
ab52f973
AB
2131 return -r;
2132 } else {
2133 s->float_exception_flags = orig_flags | float_flag_invalid;
2134 return min;
2135 }
2136 } else {
33358375 2137 if (r <= max) {
ab52f973
AB
2138 return r;
2139 } else {
2140 s->float_exception_flags = orig_flags | float_flag_invalid;
2141 return max;
2142 }
2143 }
2144 default:
2145 g_assert_not_reached();
2146 }
2147}
2148
2f6c74be
RH
2149int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2150 float_status *s)
2151{
2152 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2153 rmode, scale, INT16_MIN, INT16_MAX, s);
2154}
2155
2156int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2157 float_status *s)
2158{
2159 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2160 rmode, scale, INT32_MIN, INT32_MAX, s);
2161}
2162
2163int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2164 float_status *s)
2165{
2166 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2167 rmode, scale, INT64_MIN, INT64_MAX, s);
2168}
2169
2170int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2171 float_status *s)
2172{
2173 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2174 rmode, scale, INT16_MIN, INT16_MAX, s);
2175}
2176
2177int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2178 float_status *s)
2179{
2180 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2181 rmode, scale, INT32_MIN, INT32_MAX, s);
2182}
2183
2184int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2185 float_status *s)
2186{
2187 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2188 rmode, scale, INT64_MIN, INT64_MAX, s);
2189}
2190
2191int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2192 float_status *s)
2193{
2194 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2195 rmode, scale, INT16_MIN, INT16_MAX, s);
2196}
2197
2198int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2199 float_status *s)
2200{
2201 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2202 rmode, scale, INT32_MIN, INT32_MAX, s);
2203}
2204
2205int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2206 float_status *s)
2207{
2208 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2209 rmode, scale, INT64_MIN, INT64_MAX, s);
2210}
2211
2212int16_t float16_to_int16(float16 a, float_status *s)
2213{
2214 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2215}
2216
2217int32_t float16_to_int32(float16 a, float_status *s)
2218{
2219 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2220}
2221
2222int64_t float16_to_int64(float16 a, float_status *s)
2223{
2224 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2225}
2226
2227int16_t float32_to_int16(float32 a, float_status *s)
2228{
2229 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2230}
2231
2232int32_t float32_to_int32(float32 a, float_status *s)
2233{
2234 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2235}
2236
2237int64_t float32_to_int64(float32 a, float_status *s)
2238{
2239 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2240}
2241
2242int16_t float64_to_int16(float64 a, float_status *s)
2243{
2244 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2245}
2246
2247int32_t float64_to_int32(float64 a, float_status *s)
2248{
2249 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2250}
2251
2252int64_t float64_to_int64(float64 a, float_status *s)
2253{
2254 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2255}
2256
2257int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2258{
2259 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2260}
2261
2262int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2263{
2264 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2265}
2266
2267int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2268{
2269 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2270}
2271
2f6c74be
RH
2272int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2273{
2274 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2275}
ab52f973 2276
2f6c74be
RH
2277int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2278{
2279 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2280}
2281
2282int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2283{
2284 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2285}
2286
2287int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2288{
2289 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2290}
ab52f973 2291
2f6c74be
RH
2292int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2293{
2294 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2295}
ab52f973 2296
2f6c74be
RH
2297int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2298{
2299 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2300}
ab52f973
AB
2301
2302/*
2303 * Returns the result of converting the floating-point value `a' to
2304 * the unsigned integer format. The conversion is performed according
2305 * to the IEC/IEEE Standard for Binary Floating-Point
2306 * Arithmetic---which means in particular that the conversion is
2307 * rounded according to the current rounding mode. If `a' is a NaN,
2308 * the largest unsigned integer is returned. Otherwise, if the
2309 * conversion overflows, the largest unsigned integer is returned. If
2310 * the 'a' is negative, the result is rounded and zero is returned;
2311 * values that do not round to zero will raise the inexact exception
2312 * flag.
2313 */
2314
2f6c74be
RH
2315static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2316 uint64_t max, float_status *s)
ab52f973
AB
2317{
2318 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2319 FloatParts p = round_to_int(in, rmode, scale, s);
2320 uint64_t r;
ab52f973
AB
2321
2322 switch (p.cls) {
2323 case float_class_snan:
2324 case float_class_qnan:
2325 s->float_exception_flags = orig_flags | float_flag_invalid;
2326 return max;
2327 case float_class_inf:
801bc563 2328 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2329 return p.sign ? 0 : max;
2330 case float_class_zero:
2331 return 0;
2332 case float_class_normal:
ab52f973
AB
2333 if (p.sign) {
2334 s->float_exception_flags = orig_flags | float_flag_invalid;
2335 return 0;
2336 }
2337
2338 if (p.exp < DECOMPOSED_BINARY_POINT) {
2339 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2340 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2341 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2342 } else {
2343 s->float_exception_flags = orig_flags | float_flag_invalid;
2344 return max;
2345 }
2346
2347 /* For uint64 this will never trip, but if p.exp is too large
2348 * to shift a decomposed fraction we shall have exited via the
2349 * 3rd leg above.
2350 */
2351 if (r > max) {
2352 s->float_exception_flags = orig_flags | float_flag_invalid;
2353 return max;
ab52f973 2354 }
2f6c74be 2355 return r;
ab52f973
AB
2356 default:
2357 g_assert_not_reached();
2358 }
2359}
2360
2f6c74be
RH
2361uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2362 float_status *s)
2363{
2364 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2365 rmode, scale, UINT16_MAX, s);
2366}
2367
2368uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2369 float_status *s)
2370{
2371 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2372 rmode, scale, UINT32_MAX, s);
2373}
2374
2375uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2376 float_status *s)
2377{
2378 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2379 rmode, scale, UINT64_MAX, s);
2380}
2381
2382uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2383 float_status *s)
2384{
2385 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2386 rmode, scale, UINT16_MAX, s);
2387}
2388
2389uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2390 float_status *s)
2391{
2392 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2393 rmode, scale, UINT32_MAX, s);
2394}
2395
2396uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2397 float_status *s)
2398{
2399 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2400 rmode, scale, UINT64_MAX, s);
2401}
2402
2403uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2404 float_status *s)
2405{
2406 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2407 rmode, scale, UINT16_MAX, s);
2408}
2409
2410uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2411 float_status *s)
2412{
2413 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2414 rmode, scale, UINT32_MAX, s);
2415}
2416
2417uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2418 float_status *s)
2419{
2420 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2421 rmode, scale, UINT64_MAX, s);
2422}
2423
2424uint16_t float16_to_uint16(float16 a, float_status *s)
2425{
2426 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2427}
2428
2429uint32_t float16_to_uint32(float16 a, float_status *s)
2430{
2431 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2432}
2433
2434uint64_t float16_to_uint64(float16 a, float_status *s)
2435{
2436 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2437}
2438
2439uint16_t float32_to_uint16(float32 a, float_status *s)
2440{
2441 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2442}
2443
2444uint32_t float32_to_uint32(float32 a, float_status *s)
2445{
2446 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2447}
2448
2449uint64_t float32_to_uint64(float32 a, float_status *s)
2450{
2451 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2452}
2453
2454uint16_t float64_to_uint16(float64 a, float_status *s)
2455{
2456 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2457}
2458
2459uint32_t float64_to_uint32(float64 a, float_status *s)
2460{
2461 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2462}
2463
2464uint64_t float64_to_uint64(float64 a, float_status *s)
2465{
2466 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2467}
2468
2469uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2470{
2471 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2472}
2473
2474uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2475{
2476 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2477}
2478
2479uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2480{
2481 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2482}
2483
2484uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2485{
2486 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2487}
2488
2489uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2490{
2491 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2492}
2493
2494uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2495{
2496 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2497}
2498
2499uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2500{
2501 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2502}
2503
2504uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2505{
2506 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2507}
2508
2509uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2510{
2511 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2512}
ab52f973 2513
c02e1fb8
AB
2514/*
2515 * Integer to float conversions
2516 *
2517 * Returns the result of converting the two's complement integer `a'
2518 * to the floating-point format. The conversion is performed according
2519 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2520 */
2521
2abdfe24 2522static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2523{
2abdfe24
RH
2524 FloatParts r = { .sign = false };
2525
c02e1fb8
AB
2526 if (a == 0) {
2527 r.cls = float_class_zero;
c02e1fb8 2528 } else {
2abdfe24
RH
2529 uint64_t f = a;
2530 int shift;
2531
2532 r.cls = float_class_normal;
c02e1fb8 2533 if (a < 0) {
2abdfe24 2534 f = -f;
c02e1fb8 2535 r.sign = true;
c02e1fb8 2536 }
2abdfe24
RH
2537 shift = clz64(f) - 1;
2538 scale = MIN(MAX(scale, -0x10000), 0x10000);
2539
2540 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2541 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2542 }
2543
2544 return r;
2545}
2546
2abdfe24 2547float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2548{
2abdfe24 2549 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2550 return float16_round_pack_canonical(pa, status);
2551}
2552
2abdfe24
RH
2553float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2554{
2555 return int64_to_float16_scalbn(a, scale, status);
2556}
2557
2558float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2559{
2560 return int64_to_float16_scalbn(a, scale, status);
2561}
2562
2563float16 int64_to_float16(int64_t a, float_status *status)
2564{
2565 return int64_to_float16_scalbn(a, 0, status);
2566}
2567
c02e1fb8
AB
2568float16 int32_to_float16(int32_t a, float_status *status)
2569{
2abdfe24 2570 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2571}
2572
2573float16 int16_to_float16(int16_t a, float_status *status)
2574{
2abdfe24 2575 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2576}
2577
2abdfe24 2578float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2579{
2abdfe24 2580 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2581 return float32_round_pack_canonical(pa, status);
2582}
2583
2abdfe24
RH
2584float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2585{
2586 return int64_to_float32_scalbn(a, scale, status);
2587}
2588
2589float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2590{
2591 return int64_to_float32_scalbn(a, scale, status);
2592}
2593
2594float32 int64_to_float32(int64_t a, float_status *status)
2595{
2596 return int64_to_float32_scalbn(a, 0, status);
2597}
2598
c02e1fb8
AB
2599float32 int32_to_float32(int32_t a, float_status *status)
2600{
2abdfe24 2601 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2602}
2603
2604float32 int16_to_float32(int16_t a, float_status *status)
2605{
2abdfe24 2606 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2607}
2608
2abdfe24 2609float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2610{
2abdfe24 2611 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2612 return float64_round_pack_canonical(pa, status);
2613}
2614
2abdfe24
RH
2615float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2616{
2617 return int64_to_float64_scalbn(a, scale, status);
2618}
2619
2620float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2621{
2622 return int64_to_float64_scalbn(a, scale, status);
2623}
2624
2625float64 int64_to_float64(int64_t a, float_status *status)
2626{
2627 return int64_to_float64_scalbn(a, 0, status);
2628}
2629
c02e1fb8
AB
2630float64 int32_to_float64(int32_t a, float_status *status)
2631{
2abdfe24 2632 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2633}
2634
2635float64 int16_to_float64(int16_t a, float_status *status)
2636{
2abdfe24 2637 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2638}
2639
2640
2641/*
2642 * Unsigned Integer to float conversions
2643 *
2644 * Returns the result of converting the unsigned integer `a' to the
2645 * floating-point format. The conversion is performed according to the
2646 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2647 */
2648
2abdfe24 2649static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2650{
2abdfe24 2651 FloatParts r = { .sign = false };
c02e1fb8
AB
2652
2653 if (a == 0) {
2654 r.cls = float_class_zero;
2655 } else {
2abdfe24 2656 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2657 r.cls = float_class_normal;
2abdfe24
RH
2658 if ((int64_t)a < 0) {
2659 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2660 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2661 r.frac = a;
2662 } else {
2abdfe24
RH
2663 int shift = clz64(a) - 1;
2664 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2665 r.frac = a << shift;
c02e1fb8
AB
2666 }
2667 }
2668
2669 return r;
2670}
2671
2abdfe24 2672float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2673{
2abdfe24 2674 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2675 return float16_round_pack_canonical(pa, status);
2676}
2677
2abdfe24
RH
2678float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2679{
2680 return uint64_to_float16_scalbn(a, scale, status);
2681}
2682
2683float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2684{
2685 return uint64_to_float16_scalbn(a, scale, status);
2686}
2687
2688float16 uint64_to_float16(uint64_t a, float_status *status)
2689{
2690 return uint64_to_float16_scalbn(a, 0, status);
2691}
2692
c02e1fb8
AB
2693float16 uint32_to_float16(uint32_t a, float_status *status)
2694{
2abdfe24 2695 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2696}
2697
2698float16 uint16_to_float16(uint16_t a, float_status *status)
2699{
2abdfe24 2700 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2701}
2702
2abdfe24 2703float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2704{
2abdfe24 2705 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2706 return float32_round_pack_canonical(pa, status);
2707}
2708
2abdfe24
RH
2709float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2710{
2711 return uint64_to_float32_scalbn(a, scale, status);
2712}
2713
2714float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2715{
2716 return uint64_to_float32_scalbn(a, scale, status);
2717}
2718
2719float32 uint64_to_float32(uint64_t a, float_status *status)
2720{
2721 return uint64_to_float32_scalbn(a, 0, status);
2722}
2723
c02e1fb8
AB
2724float32 uint32_to_float32(uint32_t a, float_status *status)
2725{
2abdfe24 2726 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2727}
2728
2729float32 uint16_to_float32(uint16_t a, float_status *status)
2730{
2abdfe24 2731 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2732}
2733
2abdfe24 2734float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2735{
2abdfe24 2736 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2737 return float64_round_pack_canonical(pa, status);
2738}
2739
2abdfe24
RH
2740float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2741{
2742 return uint64_to_float64_scalbn(a, scale, status);
2743}
2744
2745float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2746{
2747 return uint64_to_float64_scalbn(a, scale, status);
2748}
2749
2750float64 uint64_to_float64(uint64_t a, float_status *status)
2751{
2752 return uint64_to_float64_scalbn(a, 0, status);
2753}
2754
c02e1fb8
AB
2755float64 uint32_to_float64(uint32_t a, float_status *status)
2756{
2abdfe24 2757 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2758}
2759
2760float64 uint16_to_float64(uint16_t a, float_status *status)
2761{
2abdfe24 2762 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2763}
2764
89360067
AB
2765/* Float Min/Max */
2766/* min() and max() functions. These can't be implemented as
2767 * 'compare and pick one input' because that would mishandle
2768 * NaNs and +0 vs -0.
2769 *
2770 * minnum() and maxnum() functions. These are similar to the min()
2771 * and max() functions but if one of the arguments is a QNaN and
2772 * the other is numerical then the numerical argument is returned.
2773 * SNaNs will get quietened before being returned.
2774 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2775 * and maxNum() operations. min() and max() are the typical min/max
2776 * semantics provided by many CPUs which predate that specification.
2777 *
2778 * minnummag() and maxnummag() functions correspond to minNumMag()
2779 * and minNumMag() from the IEEE-754 2008.
2780 */
2781static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2782 bool ieee, bool ismag, float_status *s)
2783{
2784 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2785 if (ieee) {
2786 /* Takes two floating-point values `a' and `b', one of
2787 * which is a NaN, and returns the appropriate NaN
2788 * result. If either `a' or `b' is a signaling NaN,
2789 * the invalid exception is raised.
2790 */
2791 if (is_snan(a.cls) || is_snan(b.cls)) {
2792 return pick_nan(a, b, s);
2793 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2794 return b;
2795 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2796 return a;
2797 }
2798 }
2799 return pick_nan(a, b, s);
2800 } else {
2801 int a_exp, b_exp;
89360067
AB
2802
2803 switch (a.cls) {
2804 case float_class_normal:
2805 a_exp = a.exp;
2806 break;
2807 case float_class_inf:
2808 a_exp = INT_MAX;
2809 break;
2810 case float_class_zero:
2811 a_exp = INT_MIN;
2812 break;
2813 default:
2814 g_assert_not_reached();
2815 break;
2816 }
2817 switch (b.cls) {
2818 case float_class_normal:
2819 b_exp = b.exp;
2820 break;
2821 case float_class_inf:
2822 b_exp = INT_MAX;
2823 break;
2824 case float_class_zero:
2825 b_exp = INT_MIN;
2826 break;
2827 default:
2828 g_assert_not_reached();
2829 break;
2830 }
2831
6245327a
EC
2832 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2833 bool a_less = a_exp < b_exp;
2834 if (a_exp == b_exp) {
2835 a_less = a.frac < b.frac;
2836 }
2837 return a_less ^ ismin ? b : a;
89360067
AB
2838 }
2839
6245327a 2840 if (a.sign == b.sign) {
89360067
AB
2841 bool a_less = a_exp < b_exp;
2842 if (a_exp == b_exp) {
2843 a_less = a.frac < b.frac;
2844 }
6245327a 2845 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2846 } else {
6245327a 2847 return a.sign ^ ismin ? b : a;
89360067
AB
2848 }
2849 }
2850}
2851
2852#define MINMAX(sz, name, ismin, isiee, ismag) \
2853float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2854 float_status *s) \
2855{ \
2856 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2857 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2858 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2859 \
2860 return float ## sz ## _round_pack_canonical(pr, s); \
2861}
2862
2863MINMAX(16, min, true, false, false)
2864MINMAX(16, minnum, true, true, false)
2865MINMAX(16, minnummag, true, true, true)
2866MINMAX(16, max, false, false, false)
2867MINMAX(16, maxnum, false, true, false)
2868MINMAX(16, maxnummag, false, true, true)
2869
2870MINMAX(32, min, true, false, false)
2871MINMAX(32, minnum, true, true, false)
2872MINMAX(32, minnummag, true, true, true)
2873MINMAX(32, max, false, false, false)
2874MINMAX(32, maxnum, false, true, false)
2875MINMAX(32, maxnummag, false, true, true)
2876
2877MINMAX(64, min, true, false, false)
2878MINMAX(64, minnum, true, true, false)
2879MINMAX(64, minnummag, true, true, true)
2880MINMAX(64, max, false, false, false)
2881MINMAX(64, maxnum, false, true, false)
2882MINMAX(64, maxnummag, false, true, true)
2883
2884#undef MINMAX
2885
0c4c9092
AB
2886/* Floating point compare */
2887static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2888 float_status *s)
2889{
2890 if (is_nan(a.cls) || is_nan(b.cls)) {
2891 if (!is_quiet ||
2892 a.cls == float_class_snan ||
2893 b.cls == float_class_snan) {
2894 s->float_exception_flags |= float_flag_invalid;
2895 }
2896 return float_relation_unordered;
2897 }
2898
2899 if (a.cls == float_class_zero) {
2900 if (b.cls == float_class_zero) {
2901 return float_relation_equal;
2902 }
2903 return b.sign ? float_relation_greater : float_relation_less;
2904 } else if (b.cls == float_class_zero) {
2905 return a.sign ? float_relation_less : float_relation_greater;
2906 }
2907
2908 /* The only really important thing about infinity is its sign. If
2909 * both are infinities the sign marks the smallest of the two.
2910 */
2911 if (a.cls == float_class_inf) {
2912 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2913 return float_relation_equal;
2914 }
2915 return a.sign ? float_relation_less : float_relation_greater;
2916 } else if (b.cls == float_class_inf) {
2917 return b.sign ? float_relation_greater : float_relation_less;
2918 }
2919
2920 if (a.sign != b.sign) {
2921 return a.sign ? float_relation_less : float_relation_greater;
2922 }
2923
2924 if (a.exp == b.exp) {
2925 if (a.frac == b.frac) {
2926 return float_relation_equal;
2927 }
2928 if (a.sign) {
2929 return a.frac > b.frac ?
2930 float_relation_less : float_relation_greater;
2931 } else {
2932 return a.frac > b.frac ?
2933 float_relation_greater : float_relation_less;
2934 }
2935 } else {
2936 if (a.sign) {
2937 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2938 } else {
2939 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2940 }
2941 }
2942}
2943
d9fe9db9
EC
2944#define COMPARE(name, attr, sz) \
2945static int attr \
2946name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
2947{ \
2948 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2949 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 2950 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
2951}
2952
d9fe9db9
EC
2953COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2954COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2955COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
2956
2957#undef COMPARE
2958
d9fe9db9
EC
2959int float16_compare(float16 a, float16 b, float_status *s)
2960{
2961 return soft_f16_compare(a, b, false, s);
2962}
2963
2964int float16_compare_quiet(float16 a, float16 b, float_status *s)
2965{
2966 return soft_f16_compare(a, b, true, s);
2967}
2968
2969static int QEMU_FLATTEN
2970f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2971{
2972 union_float32 ua, ub;
2973
2974 ua.s = xa;
2975 ub.s = xb;
2976
2977 if (QEMU_NO_HARDFLOAT) {
2978 goto soft;
2979 }
2980
2981 float32_input_flush2(&ua.s, &ub.s, s);
2982 if (isgreaterequal(ua.h, ub.h)) {
2983 if (isgreater(ua.h, ub.h)) {
2984 return float_relation_greater;
2985 }
2986 return float_relation_equal;
2987 }
2988 if (likely(isless(ua.h, ub.h))) {
2989 return float_relation_less;
2990 }
2991 /* The only condition remaining is unordered.
2992 * Fall through to set flags.
2993 */
2994 soft:
2995 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2996}
2997
2998int float32_compare(float32 a, float32 b, float_status *s)
2999{
3000 return f32_compare(a, b, false, s);
3001}
3002
3003int float32_compare_quiet(float32 a, float32 b, float_status *s)
3004{
3005 return f32_compare(a, b, true, s);
3006}
3007
3008static int QEMU_FLATTEN
3009f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3010{
3011 union_float64 ua, ub;
3012
3013 ua.s = xa;
3014 ub.s = xb;
3015
3016 if (QEMU_NO_HARDFLOAT) {
3017 goto soft;
3018 }
3019
3020 float64_input_flush2(&ua.s, &ub.s, s);
3021 if (isgreaterequal(ua.h, ub.h)) {
3022 if (isgreater(ua.h, ub.h)) {
3023 return float_relation_greater;
3024 }
3025 return float_relation_equal;
3026 }
3027 if (likely(isless(ua.h, ub.h))) {
3028 return float_relation_less;
3029 }
3030 /* The only condition remaining is unordered.
3031 * Fall through to set flags.
3032 */
3033 soft:
3034 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3035}
3036
3037int float64_compare(float64 a, float64 b, float_status *s)
3038{
3039 return f64_compare(a, b, false, s);
3040}
3041
3042int float64_compare_quiet(float64 a, float64 b, float_status *s)
3043{
3044 return f64_compare(a, b, true, s);
3045}
3046
0bfc9f19
AB
3047/* Multiply A by 2 raised to the power N. */
3048static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3049{
3050 if (unlikely(is_nan(a.cls))) {
3051 return return_nan(a, s);
3052 }
3053 if (a.cls == float_class_normal) {
ce8d4082
RH
3054 /* The largest float type (even though not supported by FloatParts)
3055 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3056 * still allows rounding to infinity, without allowing overflow
3057 * within the int32_t that backs FloatParts.exp.
3058 */
3059 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3060 a.exp += n;
3061 }
3062 return a;
3063}
3064
3065float16 float16_scalbn(float16 a, int n, float_status *status)
3066{
3067 FloatParts pa = float16_unpack_canonical(a, status);
3068 FloatParts pr = scalbn_decomposed(pa, n, status);
3069 return float16_round_pack_canonical(pr, status);
3070}
3071
3072float32 float32_scalbn(float32 a, int n, float_status *status)
3073{
3074 FloatParts pa = float32_unpack_canonical(a, status);
3075 FloatParts pr = scalbn_decomposed(pa, n, status);
3076 return float32_round_pack_canonical(pr, status);
3077}
3078
3079float64 float64_scalbn(float64 a, int n, float_status *status)
3080{
3081 FloatParts pa = float64_unpack_canonical(a, status);
3082 FloatParts pr = scalbn_decomposed(pa, n, status);
3083 return float64_round_pack_canonical(pr, status);
3084}
3085
c13bb2da
AB
3086/*
3087 * Square Root
3088 *
3089 * The old softfloat code did an approximation step before zeroing in
3090 * on the final result. However for simpleness we just compute the
3091 * square root by iterating down from the implicit bit to enough extra
3092 * bits to ensure we get a correctly rounded result.
3093 *
3094 * This does mean however the calculation is slower than before,
3095 * especially for 64 bit floats.
3096 */
3097
3098static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3099{
3100 uint64_t a_frac, r_frac, s_frac;
3101 int bit, last_bit;
3102
3103 if (is_nan(a.cls)) {
3104 return return_nan(a, s);
3105 }
3106 if (a.cls == float_class_zero) {
3107 return a; /* sqrt(+-0) = +-0 */
3108 }
3109 if (a.sign) {
3110 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3111 return parts_default_nan(s);
c13bb2da
AB
3112 }
3113 if (a.cls == float_class_inf) {
3114 return a; /* sqrt(+inf) = +inf */
3115 }
3116
3117 assert(a.cls == float_class_normal);
3118
3119 /* We need two overflow bits at the top. Adding room for that is a
3120 * right shift. If the exponent is odd, we can discard the low bit
3121 * by multiplying the fraction by 2; that's a left shift. Combine
3122 * those and we shift right if the exponent is even.
3123 */
3124 a_frac = a.frac;
3125 if (!(a.exp & 1)) {
3126 a_frac >>= 1;
3127 }
3128 a.exp >>= 1;
3129
3130 /* Bit-by-bit computation of sqrt. */
3131 r_frac = 0;
3132 s_frac = 0;
3133
3134 /* Iterate from implicit bit down to the 3 extra bits to compute a
3135 * properly rounded result. Remember we've inserted one more bit
3136 * at the top, so these positions are one less.
3137 */
3138 bit = DECOMPOSED_BINARY_POINT - 1;
3139 last_bit = MAX(p->frac_shift - 4, 0);
3140 do {
3141 uint64_t q = 1ULL << bit;
3142 uint64_t t_frac = s_frac + q;
3143 if (t_frac <= a_frac) {
3144 s_frac = t_frac + q;
3145 a_frac -= t_frac;
3146 r_frac += q;
3147 }
3148 a_frac <<= 1;
3149 } while (--bit >= last_bit);
3150
3151 /* Undo the right shift done above. If there is any remaining
3152 * fraction, the result is inexact. Set the sticky bit.
3153 */
3154 a.frac = (r_frac << 1) + (a_frac != 0);
3155
3156 return a;
3157}
3158
97ff87c0 3159float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3160{
3161 FloatParts pa = float16_unpack_canonical(a, status);
3162 FloatParts pr = sqrt_float(pa, status, &float16_params);
3163 return float16_round_pack_canonical(pr, status);
3164}
3165
f131bae8
EC
3166static float32 QEMU_SOFTFLOAT_ATTR
3167soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3168{
3169 FloatParts pa = float32_unpack_canonical(a, status);
3170 FloatParts pr = sqrt_float(pa, status, &float32_params);
3171 return float32_round_pack_canonical(pr, status);
3172}
3173
f131bae8
EC
3174static float64 QEMU_SOFTFLOAT_ATTR
3175soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3176{
3177 FloatParts pa = float64_unpack_canonical(a, status);
3178 FloatParts pr = sqrt_float(pa, status, &float64_params);
3179 return float64_round_pack_canonical(pr, status);
3180}
3181
f131bae8
EC
3182float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3183{
3184 union_float32 ua, ur;
3185
3186 ua.s = xa;
3187 if (unlikely(!can_use_fpu(s))) {
3188 goto soft;
3189 }
3190
3191 float32_input_flush1(&ua.s, s);
3192 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3193 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3194 fpclassify(ua.h) == FP_ZERO) ||
3195 signbit(ua.h))) {
3196 goto soft;
3197 }
3198 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3199 float32_is_neg(ua.s))) {
3200 goto soft;
3201 }
3202 ur.h = sqrtf(ua.h);
3203 return ur.s;
3204
3205 soft:
3206 return soft_f32_sqrt(ua.s, s);
3207}
3208
3209float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3210{
3211 union_float64 ua, ur;
3212
3213 ua.s = xa;
3214 if (unlikely(!can_use_fpu(s))) {
3215 goto soft;
3216 }
3217
3218 float64_input_flush1(&ua.s, s);
3219 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3220 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3221 fpclassify(ua.h) == FP_ZERO) ||
3222 signbit(ua.h))) {
3223 goto soft;
3224 }
3225 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3226 float64_is_neg(ua.s))) {
3227 goto soft;
3228 }
3229 ur.h = sqrt(ua.h);
3230 return ur.s;
3231
3232 soft:
3233 return soft_f64_sqrt(ua.s, s);
3234}
3235
0218a16e
RH
3236/*----------------------------------------------------------------------------
3237| The pattern for a default generated NaN.
3238*----------------------------------------------------------------------------*/
3239
3240float16 float16_default_nan(float_status *status)
3241{
3242 FloatParts p = parts_default_nan(status);
3243 p.frac >>= float16_params.frac_shift;
3244 return float16_pack_raw(p);
3245}
3246
3247float32 float32_default_nan(float_status *status)
3248{
3249 FloatParts p = parts_default_nan(status);
3250 p.frac >>= float32_params.frac_shift;
3251 return float32_pack_raw(p);
3252}
3253
3254float64 float64_default_nan(float_status *status)
3255{
3256 FloatParts p = parts_default_nan(status);
3257 p.frac >>= float64_params.frac_shift;
3258 return float64_pack_raw(p);
3259}
3260
3261float128 float128_default_nan(float_status *status)
3262{
3263 FloatParts p = parts_default_nan(status);
3264 float128 r;
3265
3266 /* Extrapolate from the choices made by parts_default_nan to fill
3267 * in the quad-floating format. If the low bit is set, assume we
3268 * want to set all non-snan bits.
3269 */
3270 r.low = -(p.frac & 1);
3271 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3272 r.high |= LIT64(0x7FFF000000000000);
3273 r.high |= (uint64_t)p.sign << 63;
3274
3275 return r;
3276}
c13bb2da 3277
158142c2 3278/*----------------------------------------------------------------------------
377ed926
RH
3279| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3280*----------------------------------------------------------------------------*/
3281
3282float16 float16_silence_nan(float16 a, float_status *status)
3283{
3284 FloatParts p = float16_unpack_raw(a);
3285 p.frac <<= float16_params.frac_shift;
3286 p = parts_silence_nan(p, status);
3287 p.frac >>= float16_params.frac_shift;
3288 return float16_pack_raw(p);
3289}
3290
3291float32 float32_silence_nan(float32 a, float_status *status)
3292{
3293 FloatParts p = float32_unpack_raw(a);
3294 p.frac <<= float32_params.frac_shift;
3295 p = parts_silence_nan(p, status);
3296 p.frac >>= float32_params.frac_shift;
3297 return float32_pack_raw(p);
3298}
3299
3300float64 float64_silence_nan(float64 a, float_status *status)
3301{
3302 FloatParts p = float64_unpack_raw(a);
3303 p.frac <<= float64_params.frac_shift;
3304 p = parts_silence_nan(p, status);
3305 p.frac >>= float64_params.frac_shift;
3306 return float64_pack_raw(p);
3307}
3308
3309/*----------------------------------------------------------------------------
158142c2
FB
3310| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3311| and 7, and returns the properly rounded 32-bit integer corresponding to the
3312| input. If `zSign' is 1, the input is negated before being converted to an
3313| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3314| is simply rounded to an integer, with the inexact exception raised if the
3315| input cannot be represented exactly as an integer. However, if the fixed-
3316| point input is too large, the invalid exception is raised and the largest
3317| positive or negative integer is returned.
3318*----------------------------------------------------------------------------*/
3319
f4014512 3320static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 3321{
8f506c70 3322 int8_t roundingMode;
158142c2 3323 flag roundNearestEven;
8f506c70 3324 int8_t roundIncrement, roundBits;
760e1416 3325 int32_t z;
158142c2 3326
a2f2d288 3327 roundingMode = status->float_rounding_mode;
158142c2 3328 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3329 switch (roundingMode) {
3330 case float_round_nearest_even:
f9288a76 3331 case float_round_ties_away:
dc355b76
PM
3332 roundIncrement = 0x40;
3333 break;
3334 case float_round_to_zero:
3335 roundIncrement = 0;
3336 break;
3337 case float_round_up:
3338 roundIncrement = zSign ? 0 : 0x7f;
3339 break;
3340 case float_round_down:
3341 roundIncrement = zSign ? 0x7f : 0;
3342 break;
5d64abb3
RH
3343 case float_round_to_odd:
3344 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3345 break;
dc355b76
PM
3346 default:
3347 abort();
158142c2
FB
3348 }
3349 roundBits = absZ & 0x7F;
3350 absZ = ( absZ + roundIncrement )>>7;
3351 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3352 z = absZ;
3353 if ( zSign ) z = - z;
3354 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3355 float_raise(float_flag_invalid, status);
bb98fe42 3356 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 3357 }
a2f2d288
PM
3358 if (roundBits) {
3359 status->float_exception_flags |= float_flag_inexact;
3360 }
158142c2
FB
3361 return z;
3362
3363}
3364
3365/*----------------------------------------------------------------------------
3366| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3367| `absZ1', with binary point between bits 63 and 64 (between the input words),
3368| and returns the properly rounded 64-bit integer corresponding to the input.
3369| If `zSign' is 1, the input is negated before being converted to an integer.
3370| Ordinarily, the fixed-point input is simply rounded to an integer, with
3371| the inexact exception raised if the input cannot be represented exactly as
3372| an integer. However, if the fixed-point input is too large, the invalid
3373| exception is raised and the largest positive or negative integer is
3374| returned.
3375*----------------------------------------------------------------------------*/
3376
f42c2224 3377static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3378 float_status *status)
158142c2 3379{
8f506c70 3380 int8_t roundingMode;
158142c2 3381 flag roundNearestEven, increment;
760e1416 3382 int64_t z;
158142c2 3383
a2f2d288 3384 roundingMode = status->float_rounding_mode;
158142c2 3385 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3386 switch (roundingMode) {
3387 case float_round_nearest_even:
f9288a76 3388 case float_round_ties_away:
dc355b76
PM
3389 increment = ((int64_t) absZ1 < 0);
3390 break;
3391 case float_round_to_zero:
3392 increment = 0;
3393 break;
3394 case float_round_up:
3395 increment = !zSign && absZ1;
3396 break;
3397 case float_round_down:
3398 increment = zSign && absZ1;
3399 break;
5d64abb3
RH
3400 case float_round_to_odd:
3401 increment = !(absZ0 & 1) && absZ1;
3402 break;
dc355b76
PM
3403 default:
3404 abort();
158142c2
FB
3405 }
3406 if ( increment ) {
3407 ++absZ0;
3408 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3409 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3410 }
3411 z = absZ0;
3412 if ( zSign ) z = - z;
3413 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3414 overflow:
ff32e16e 3415 float_raise(float_flag_invalid, status);
158142c2 3416 return
bb98fe42 3417 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
3418 : LIT64( 0x7FFFFFFFFFFFFFFF );
3419 }
a2f2d288
PM
3420 if (absZ1) {
3421 status->float_exception_flags |= float_flag_inexact;
3422 }
158142c2
FB
3423 return z;
3424
3425}
3426
fb3ea83a
TM
3427/*----------------------------------------------------------------------------
3428| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3429| `absZ1', with binary point between bits 63 and 64 (between the input words),
3430| and returns the properly rounded 64-bit unsigned integer corresponding to the
3431| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3432| with the inexact exception raised if the input cannot be represented exactly
3433| as an integer. However, if the fixed-point input is too large, the invalid
3434| exception is raised and the largest unsigned integer is returned.
3435*----------------------------------------------------------------------------*/
3436
f42c2224 3437static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 3438 uint64_t absZ1, float_status *status)
fb3ea83a 3439{
8f506c70 3440 int8_t roundingMode;
fb3ea83a
TM
3441 flag roundNearestEven, increment;
3442
a2f2d288 3443 roundingMode = status->float_rounding_mode;
fb3ea83a 3444 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3445 switch (roundingMode) {
3446 case float_round_nearest_even:
f9288a76 3447 case float_round_ties_away:
dc355b76
PM
3448 increment = ((int64_t)absZ1 < 0);
3449 break;
3450 case float_round_to_zero:
3451 increment = 0;
3452 break;
3453 case float_round_up:
3454 increment = !zSign && absZ1;
3455 break;
3456 case float_round_down:
3457 increment = zSign && absZ1;
3458 break;
5d64abb3
RH
3459 case float_round_to_odd:
3460 increment = !(absZ0 & 1) && absZ1;
3461 break;
dc355b76
PM
3462 default:
3463 abort();
fb3ea83a
TM
3464 }
3465 if (increment) {
3466 ++absZ0;
3467 if (absZ0 == 0) {
ff32e16e 3468 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3469 return LIT64(0xFFFFFFFFFFFFFFFF);
3470 }
3471 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3472 }
3473
3474 if (zSign && absZ0) {
ff32e16e 3475 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3476 return 0;
3477 }
3478
3479 if (absZ1) {
a2f2d288 3480 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3481 }
3482 return absZ0;
3483}
3484
37d18660
PM
3485/*----------------------------------------------------------------------------
3486| If `a' is denormal and we are in flush-to-zero mode then set the
3487| input-denormal exception and return zero. Otherwise just return the value.
3488*----------------------------------------------------------------------------*/
e5a41ffa 3489float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 3490{
a2f2d288 3491 if (status->flush_inputs_to_zero) {
37d18660 3492 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 3493 float_raise(float_flag_input_denormal, status);
37d18660
PM
3494 return make_float32(float32_val(a) & 0x80000000);
3495 }
3496 }
3497 return a;
3498}
3499
158142c2
FB
3500/*----------------------------------------------------------------------------
3501| Normalizes the subnormal single-precision floating-point value represented
3502| by the denormalized significand `aSig'. The normalized exponent and
3503| significand are stored at the locations pointed to by `zExpPtr' and
3504| `zSigPtr', respectively.
3505*----------------------------------------------------------------------------*/
3506
3507static void
0c48262d 3508 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3509{
8f506c70 3510 int8_t shiftCount;
158142c2 3511
0019d5c3 3512 shiftCount = clz32(aSig) - 8;
158142c2
FB
3513 *zSigPtr = aSig<<shiftCount;
3514 *zExpPtr = 1 - shiftCount;
3515
3516}
3517
158142c2
FB
3518/*----------------------------------------------------------------------------
3519| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3520| and significand `zSig', and returns the proper single-precision floating-
3521| point value corresponding to the abstract input. Ordinarily, the abstract
3522| value is simply rounded and packed into the single-precision format, with
3523| the inexact exception raised if the abstract input cannot be represented
3524| exactly. However, if the abstract value is too large, the overflow and
3525| inexact exceptions are raised and an infinity or maximal finite value is
3526| returned. If the abstract value is too small, the input value is rounded to
3527| a subnormal number, and the underflow and inexact exceptions are raised if
3528| the abstract input cannot be represented exactly as a subnormal single-
3529| precision floating-point number.
3530| The input significand `zSig' has its binary point between bits 30
3531| and 29, which is 7 bits to the left of the usual location. This shifted
3532| significand must be normalized or smaller. If `zSig' is not normalized,
3533| `zExp' must be 0; in that case, the result returned is a subnormal number,
3534| and it must not require rounding. In the usual case that `zSig' is
3535| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3536| The handling of underflow and overflow follows the IEC/IEEE Standard for
3537| Binary Floating-Point Arithmetic.
3538*----------------------------------------------------------------------------*/
3539
0c48262d 3540static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3541 float_status *status)
158142c2 3542{
8f506c70 3543 int8_t roundingMode;
158142c2 3544 flag roundNearestEven;
8f506c70 3545 int8_t roundIncrement, roundBits;
158142c2
FB
3546 flag isTiny;
3547
a2f2d288 3548 roundingMode = status->float_rounding_mode;
158142c2 3549 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3550 switch (roundingMode) {
3551 case float_round_nearest_even:
f9288a76 3552 case float_round_ties_away:
dc355b76
PM
3553 roundIncrement = 0x40;
3554 break;
3555 case float_round_to_zero:
3556 roundIncrement = 0;
3557 break;
3558 case float_round_up:
3559 roundIncrement = zSign ? 0 : 0x7f;
3560 break;
3561 case float_round_down:
3562 roundIncrement = zSign ? 0x7f : 0;
3563 break;
5d64abb3
RH
3564 case float_round_to_odd:
3565 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3566 break;
dc355b76
PM
3567 default:
3568 abort();
3569 break;
158142c2
FB
3570 }
3571 roundBits = zSig & 0x7F;
bb98fe42 3572 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3573 if ( ( 0xFD < zExp )
3574 || ( ( zExp == 0xFD )
bb98fe42 3575 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3576 ) {
5d64abb3
RH
3577 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3578 roundIncrement != 0;
ff32e16e 3579 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 3580 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
3581 }
3582 if ( zExp < 0 ) {
a2f2d288 3583 if (status->flush_to_zero) {
ff32e16e 3584 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3585 return packFloat32(zSign, 0, 0);
3586 }
158142c2 3587 isTiny =
a2f2d288
PM
3588 (status->float_detect_tininess
3589 == float_tininess_before_rounding)
158142c2
FB
3590 || ( zExp < -1 )
3591 || ( zSig + roundIncrement < 0x80000000 );
3592 shift32RightJamming( zSig, - zExp, &zSig );
3593 zExp = 0;
3594 roundBits = zSig & 0x7F;
ff32e16e
PM
3595 if (isTiny && roundBits) {
3596 float_raise(float_flag_underflow, status);
3597 }
5d64abb3
RH
3598 if (roundingMode == float_round_to_odd) {
3599 /*
3600 * For round-to-odd case, the roundIncrement depends on
3601 * zSig which just changed.
3602 */
3603 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3604 }
158142c2
FB
3605 }
3606 }
a2f2d288
PM
3607 if (roundBits) {
3608 status->float_exception_flags |= float_flag_inexact;
3609 }
158142c2
FB
3610 zSig = ( zSig + roundIncrement )>>7;
3611 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3612 if ( zSig == 0 ) zExp = 0;
3613 return packFloat32( zSign, zExp, zSig );
3614
3615}
3616
3617/*----------------------------------------------------------------------------
3618| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3619| and significand `zSig', and returns the proper single-precision floating-
3620| point value corresponding to the abstract input. This routine is just like
3621| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3622| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3623| floating-point exponent.
3624*----------------------------------------------------------------------------*/
3625
3626static float32
0c48262d 3627 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3628 float_status *status)
158142c2 3629{
8f506c70 3630 int8_t shiftCount;
158142c2 3631
0019d5c3 3632 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3633 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3634 status);
158142c2
FB
3635
3636}
3637
37d18660
PM
3638/*----------------------------------------------------------------------------
3639| If `a' is denormal and we are in flush-to-zero mode then set the
3640| input-denormal exception and return zero. Otherwise just return the value.
3641*----------------------------------------------------------------------------*/
e5a41ffa 3642float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 3643{
a2f2d288 3644 if (status->flush_inputs_to_zero) {
37d18660 3645 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 3646 float_raise(float_flag_input_denormal, status);
37d18660
PM
3647 return make_float64(float64_val(a) & (1ULL << 63));
3648 }
3649 }
3650 return a;
3651}
3652
158142c2
FB
3653/*----------------------------------------------------------------------------
3654| Normalizes the subnormal double-precision floating-point value represented
3655| by the denormalized significand `aSig'. The normalized exponent and
3656| significand are stored at the locations pointed to by `zExpPtr' and
3657| `zSigPtr', respectively.
3658*----------------------------------------------------------------------------*/
3659
3660static void
0c48262d 3661 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3662{
8f506c70 3663 int8_t shiftCount;
158142c2 3664
0019d5c3 3665 shiftCount = clz64(aSig) - 11;
158142c2
FB
3666 *zSigPtr = aSig<<shiftCount;
3667 *zExpPtr = 1 - shiftCount;
3668
3669}
3670
3671/*----------------------------------------------------------------------------
3672| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3673| double-precision floating-point value, returning the result. After being
3674| shifted into the proper positions, the three fields are simply added
3675| together to form the result. This means that any integer portion of `zSig'
3676| will be added into the exponent. Since a properly normalized significand
3677| will have an integer portion equal to 1, the `zExp' input should be 1 less
3678| than the desired result exponent whenever `zSig' is a complete, normalized
3679| significand.
3680*----------------------------------------------------------------------------*/
3681
0c48262d 3682static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
3683{
3684
f090c9d4 3685 return make_float64(
bb98fe42 3686 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3687
3688}
3689
3690/*----------------------------------------------------------------------------
3691| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3692| and significand `zSig', and returns the proper double-precision floating-
3693| point value corresponding to the abstract input. Ordinarily, the abstract
3694| value is simply rounded and packed into the double-precision format, with
3695| the inexact exception raised if the abstract input cannot be represented
3696| exactly. However, if the abstract value is too large, the overflow and
3697| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3698| returned. If the abstract value is too small, the input value is rounded to
3699| a subnormal number, and the underflow and inexact exceptions are raised if
3700| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3701| precision floating-point number.
3702| The input significand `zSig' has its binary point between bits 62
3703| and 61, which is 10 bits to the left of the usual location. This shifted
3704| significand must be normalized or smaller. If `zSig' is not normalized,
3705| `zExp' must be 0; in that case, the result returned is a subnormal number,
3706| and it must not require rounding. In the usual case that `zSig' is
3707| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3708| The handling of underflow and overflow follows the IEC/IEEE Standard for
3709| Binary Floating-Point Arithmetic.
3710*----------------------------------------------------------------------------*/
3711
0c48262d 3712static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3713 float_status *status)
158142c2 3714{
8f506c70 3715 int8_t roundingMode;
158142c2 3716 flag roundNearestEven;
0c48262d 3717 int roundIncrement, roundBits;
158142c2
FB
3718 flag isTiny;
3719
a2f2d288 3720 roundingMode = status->float_rounding_mode;
158142c2 3721 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3722 switch (roundingMode) {
3723 case float_round_nearest_even:
f9288a76 3724 case float_round_ties_away:
dc355b76
PM
3725 roundIncrement = 0x200;
3726 break;
3727 case float_round_to_zero:
3728 roundIncrement = 0;
3729 break;
3730 case float_round_up:
3731 roundIncrement = zSign ? 0 : 0x3ff;
3732 break;
3733 case float_round_down:
3734 roundIncrement = zSign ? 0x3ff : 0;
3735 break;
9ee6f678
BR
3736 case float_round_to_odd:
3737 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3738 break;
dc355b76
PM
3739 default:
3740 abort();
158142c2
FB
3741 }
3742 roundBits = zSig & 0x3FF;
bb98fe42 3743 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3744 if ( ( 0x7FD < zExp )
3745 || ( ( zExp == 0x7FD )
bb98fe42 3746 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3747 ) {
9ee6f678
BR
3748 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3749 roundIncrement != 0;
ff32e16e 3750 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3751 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3752 }
3753 if ( zExp < 0 ) {
a2f2d288 3754 if (status->flush_to_zero) {
ff32e16e 3755 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3756 return packFloat64(zSign, 0, 0);
3757 }
158142c2 3758 isTiny =
a2f2d288
PM
3759 (status->float_detect_tininess
3760 == float_tininess_before_rounding)
158142c2
FB
3761 || ( zExp < -1 )
3762 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
3763 shift64RightJamming( zSig, - zExp, &zSig );
3764 zExp = 0;
3765 roundBits = zSig & 0x3FF;
ff32e16e
PM
3766 if (isTiny && roundBits) {
3767 float_raise(float_flag_underflow, status);
3768 }
9ee6f678
BR
3769 if (roundingMode == float_round_to_odd) {
3770 /*
3771 * For round-to-odd case, the roundIncrement depends on
3772 * zSig which just changed.
3773 */
3774 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3775 }
158142c2
FB
3776 }
3777 }
a2f2d288
PM
3778 if (roundBits) {
3779 status->float_exception_flags |= float_flag_inexact;
3780 }
158142c2
FB
3781 zSig = ( zSig + roundIncrement )>>10;
3782 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3783 if ( zSig == 0 ) zExp = 0;
3784 return packFloat64( zSign, zExp, zSig );
3785
3786}
3787
3788/*----------------------------------------------------------------------------
3789| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3790| and significand `zSig', and returns the proper double-precision floating-
3791| point value corresponding to the abstract input. This routine is just like
3792| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3793| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3794| floating-point exponent.
3795*----------------------------------------------------------------------------*/
3796
3797static float64
0c48262d 3798 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3799 float_status *status)
158142c2 3800{
8f506c70 3801 int8_t shiftCount;
158142c2 3802
0019d5c3 3803 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3804 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3805 status);
158142c2
FB
3806
3807}
3808
158142c2
FB
3809/*----------------------------------------------------------------------------
3810| Normalizes the subnormal extended double-precision floating-point value
3811| represented by the denormalized significand `aSig'. The normalized exponent
3812| and significand are stored at the locations pointed to by `zExpPtr' and
3813| `zSigPtr', respectively.
3814*----------------------------------------------------------------------------*/
3815
88857aca
LV
3816void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3817 uint64_t *zSigPtr)
158142c2 3818{
8f506c70 3819 int8_t shiftCount;
158142c2 3820
0019d5c3 3821 shiftCount = clz64(aSig);
158142c2
FB
3822 *zSigPtr = aSig<<shiftCount;
3823 *zExpPtr = 1 - shiftCount;
158142c2
FB
3824}
3825
3826/*----------------------------------------------------------------------------
3827| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3828| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3829| and returns the proper extended double-precision floating-point value
3830| corresponding to the abstract input. Ordinarily, the abstract value is
3831| rounded and packed into the extended double-precision format, with the
3832| inexact exception raised if the abstract input cannot be represented
3833| exactly. However, if the abstract value is too large, the overflow and
3834| inexact exceptions are raised and an infinity or maximal finite value is
3835| returned. If the abstract value is too small, the input value is rounded to
3836| a subnormal number, and the underflow and inexact exceptions are raised if
3837| the abstract input cannot be represented exactly as a subnormal extended
3838| double-precision floating-point number.
3839| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3840| number of bits as single or double precision, respectively. Otherwise, the
3841| result is rounded to the full precision of the extended double-precision
3842| format.
3843| The input significand must be normalized or smaller. If the input
3844| significand is not normalized, `zExp' must be 0; in that case, the result
3845| returned is a subnormal number, and it must not require rounding. The
3846| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3847| Floating-Point Arithmetic.
3848*----------------------------------------------------------------------------*/
3849
88857aca
LV
3850floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3851 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3852 float_status *status)
158142c2 3853{
8f506c70 3854 int8_t roundingMode;
158142c2 3855 flag roundNearestEven, increment, isTiny;
f42c2224 3856 int64_t roundIncrement, roundMask, roundBits;
158142c2 3857
a2f2d288 3858 roundingMode = status->float_rounding_mode;
158142c2
FB
3859 roundNearestEven = ( roundingMode == float_round_nearest_even );
3860 if ( roundingPrecision == 80 ) goto precision80;
3861 if ( roundingPrecision == 64 ) {
3862 roundIncrement = LIT64( 0x0000000000000400 );
3863 roundMask = LIT64( 0x00000000000007FF );
3864 }
3865 else if ( roundingPrecision == 32 ) {
3866 roundIncrement = LIT64( 0x0000008000000000 );
3867 roundMask = LIT64( 0x000000FFFFFFFFFF );
3868 }
3869 else {
3870 goto precision80;
3871 }
3872 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3873 switch (roundingMode) {
3874 case float_round_nearest_even:
f9288a76 3875 case float_round_ties_away:
dc355b76
PM
3876 break;
3877 case float_round_to_zero:
3878 roundIncrement = 0;
3879 break;
3880 case float_round_up:
3881 roundIncrement = zSign ? 0 : roundMask;
3882 break;
3883 case float_round_down:
3884 roundIncrement = zSign ? roundMask : 0;
3885 break;
3886 default:
3887 abort();
158142c2
FB
3888 }
3889 roundBits = zSig0 & roundMask;
bb98fe42 3890 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3891 if ( ( 0x7FFE < zExp )
3892 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3893 ) {
3894 goto overflow;
3895 }
3896 if ( zExp <= 0 ) {
a2f2d288 3897 if (status->flush_to_zero) {
ff32e16e 3898 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3899 return packFloatx80(zSign, 0, 0);
3900 }
158142c2 3901 isTiny =
a2f2d288
PM
3902 (status->float_detect_tininess
3903 == float_tininess_before_rounding)
158142c2
FB
3904 || ( zExp < 0 )
3905 || ( zSig0 <= zSig0 + roundIncrement );
3906 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3907 zExp = 0;
3908 roundBits = zSig0 & roundMask;
ff32e16e
PM
3909 if (isTiny && roundBits) {
3910 float_raise(float_flag_underflow, status);
3911 }
a2f2d288
PM
3912 if (roundBits) {
3913 status->float_exception_flags |= float_flag_inexact;
3914 }
158142c2 3915 zSig0 += roundIncrement;
bb98fe42 3916 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3917 roundIncrement = roundMask + 1;
3918 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3919 roundMask |= roundIncrement;
3920 }
3921 zSig0 &= ~ roundMask;
3922 return packFloatx80( zSign, zExp, zSig0 );
3923 }
3924 }
a2f2d288
PM
3925 if (roundBits) {
3926 status->float_exception_flags |= float_flag_inexact;
3927 }
158142c2
FB
3928 zSig0 += roundIncrement;
3929 if ( zSig0 < roundIncrement ) {
3930 ++zExp;
3931 zSig0 = LIT64( 0x8000000000000000 );
3932 }
3933 roundIncrement = roundMask + 1;
3934 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3935 roundMask |= roundIncrement;
3936 }
3937 zSig0 &= ~ roundMask;
3938 if ( zSig0 == 0 ) zExp = 0;
3939 return packFloatx80( zSign, zExp, zSig0 );
3940 precision80:
dc355b76
PM
3941 switch (roundingMode) {
3942 case float_round_nearest_even:
f9288a76 3943 case float_round_ties_away:
dc355b76
PM
3944 increment = ((int64_t)zSig1 < 0);
3945 break;
3946 case float_round_to_zero:
3947 increment = 0;
3948 break;
3949 case float_round_up:
3950 increment = !zSign && zSig1;
3951 break;
3952 case float_round_down:
3953 increment = zSign && zSig1;
3954 break;
3955 default:
3956 abort();
158142c2 3957 }
bb98fe42 3958 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3959 if ( ( 0x7FFE < zExp )
3960 || ( ( zExp == 0x7FFE )
3961 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3962 && increment
3963 )
3964 ) {
3965 roundMask = 0;
3966 overflow:
ff32e16e 3967 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3968 if ( ( roundingMode == float_round_to_zero )
3969 || ( zSign && ( roundingMode == float_round_up ) )
3970 || ( ! zSign && ( roundingMode == float_round_down ) )
3971 ) {
3972 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3973 }
0f605c88
LV
3974 return packFloatx80(zSign,
3975 floatx80_infinity_high,
3976 floatx80_infinity_low);
158142c2
FB
3977 }
3978 if ( zExp <= 0 ) {
3979 isTiny =
a2f2d288
PM
3980 (status->float_detect_tininess
3981 == float_tininess_before_rounding)
158142c2
FB
3982 || ( zExp < 0 )
3983 || ! increment
3984 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3985 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3986 zExp = 0;
ff32e16e
PM
3987 if (isTiny && zSig1) {
3988 float_raise(float_flag_underflow, status);
3989 }
a2f2d288
PM
3990 if (zSig1) {
3991 status->float_exception_flags |= float_flag_inexact;
3992 }
dc355b76
PM
3993 switch (roundingMode) {
3994 case float_round_nearest_even:
f9288a76 3995 case float_round_ties_away:
dc355b76
PM
3996 increment = ((int64_t)zSig1 < 0);
3997 break;
3998 case float_round_to_zero:
3999 increment = 0;
4000 break;
4001 case float_round_up:
4002 increment = !zSign && zSig1;
4003 break;
4004 case float_round_down:
4005 increment = zSign && zSig1;
4006 break;
4007 default:
4008 abort();
158142c2
FB
4009 }
4010 if ( increment ) {
4011 ++zSig0;
4012 zSig0 &=
bb98fe42
AF
4013 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
4014 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4015 }
4016 return packFloatx80( zSign, zExp, zSig0 );
4017 }
4018 }
a2f2d288
PM
4019 if (zSig1) {
4020 status->float_exception_flags |= float_flag_inexact;
4021 }
158142c2
FB
4022 if ( increment ) {
4023 ++zSig0;
4024 if ( zSig0 == 0 ) {
4025 ++zExp;
4026 zSig0 = LIT64( 0x8000000000000000 );
4027 }
4028 else {
bb98fe42 4029 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
4030 }
4031 }
4032 else {
4033 if ( zSig0 == 0 ) zExp = 0;
4034 }
4035 return packFloatx80( zSign, zExp, zSig0 );
4036
4037}
4038
4039/*----------------------------------------------------------------------------
4040| Takes an abstract floating-point value having sign `zSign', exponent
4041| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4042| and returns the proper extended double-precision floating-point value
4043| corresponding to the abstract input. This routine is just like
4044| `roundAndPackFloatx80' except that the input significand does not have to be
4045| normalized.
4046*----------------------------------------------------------------------------*/
4047
88857aca
LV
4048floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4049 flag zSign, int32_t zExp,
4050 uint64_t zSig0, uint64_t zSig1,
4051 float_status *status)
158142c2 4052{
8f506c70 4053 int8_t shiftCount;
158142c2
FB
4054
4055 if ( zSig0 == 0 ) {
4056 zSig0 = zSig1;
4057 zSig1 = 0;
4058 zExp -= 64;
4059 }
0019d5c3 4060 shiftCount = clz64(zSig0);
158142c2
FB
4061 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4062 zExp -= shiftCount;
ff32e16e
PM
4063 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4064 zSig0, zSig1, status);
158142c2
FB
4065
4066}
4067
158142c2
FB
4068/*----------------------------------------------------------------------------
4069| Returns the least-significant 64 fraction bits of the quadruple-precision
4070| floating-point value `a'.
4071*----------------------------------------------------------------------------*/
4072
a49db98d 4073static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4074{
4075
4076 return a.low;
4077
4078}
4079
4080/*----------------------------------------------------------------------------
4081| Returns the most-significant 48 fraction bits of the quadruple-precision
4082| floating-point value `a'.
4083*----------------------------------------------------------------------------*/
4084
a49db98d 4085static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4086{
4087
4088 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
4089
4090}
4091
4092/*----------------------------------------------------------------------------
4093| Returns the exponent bits of the quadruple-precision floating-point value
4094| `a'.
4095*----------------------------------------------------------------------------*/
4096
f4014512 4097static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4098{
4099
4100 return ( a.high>>48 ) & 0x7FFF;
4101
4102}
4103
4104/*----------------------------------------------------------------------------
4105| Returns the sign bit of the quadruple-precision floating-point value `a'.
4106*----------------------------------------------------------------------------*/
4107
a49db98d 4108static inline flag extractFloat128Sign( float128 a )
158142c2
FB
4109{
4110
4111 return a.high>>63;
4112
4113}
4114
4115/*----------------------------------------------------------------------------
4116| Normalizes the subnormal quadruple-precision floating-point value
4117| represented by the denormalized significand formed by the concatenation of
4118| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4119| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4120| significand are stored at the location pointed to by `zSig0Ptr', and the
4121| least significant 64 bits of the normalized significand are stored at the
4122| location pointed to by `zSig1Ptr'.
4123*----------------------------------------------------------------------------*/
4124
4125static void
4126 normalizeFloat128Subnormal(
bb98fe42
AF
4127 uint64_t aSig0,
4128 uint64_t aSig1,
f4014512 4129 int32_t *zExpPtr,
bb98fe42
AF
4130 uint64_t *zSig0Ptr,
4131 uint64_t *zSig1Ptr
158142c2
FB
4132 )
4133{
8f506c70 4134 int8_t shiftCount;
158142c2
FB
4135
4136 if ( aSig0 == 0 ) {
0019d5c3 4137 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4138 if ( shiftCount < 0 ) {
4139 *zSig0Ptr = aSig1>>( - shiftCount );
4140 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4141 }
4142 else {
4143 *zSig0Ptr = aSig1<<shiftCount;
4144 *zSig1Ptr = 0;
4145 }
4146 *zExpPtr = - shiftCount - 63;
4147 }
4148 else {
0019d5c3 4149 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4150 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4151 *zExpPtr = 1 - shiftCount;
4152 }
4153
4154}
4155
4156/*----------------------------------------------------------------------------
4157| Packs the sign `zSign', the exponent `zExp', and the significand formed
4158| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4159| floating-point value, returning the result. After being shifted into the
4160| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4161| added together to form the most significant 32 bits of the result. This
4162| means that any integer portion of `zSig0' will be added into the exponent.
4163| Since a properly normalized significand will have an integer portion equal
4164| to 1, the `zExp' input should be 1 less than the desired result exponent
4165| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4166| significand.
4167*----------------------------------------------------------------------------*/
4168
a49db98d 4169static inline float128
f4014512 4170 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
4171{
4172 float128 z;
4173
4174 z.low = zSig1;
bb98fe42 4175 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
4176 return z;
4177
4178}
4179
4180/*----------------------------------------------------------------------------
4181| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4182| and extended significand formed by the concatenation of `zSig0', `zSig1',
4183| and `zSig2', and returns the proper quadruple-precision floating-point value
4184| corresponding to the abstract input. Ordinarily, the abstract value is
4185| simply rounded and packed into the quadruple-precision format, with the
4186| inexact exception raised if the abstract input cannot be represented
4187| exactly. However, if the abstract value is too large, the overflow and
4188| inexact exceptions are raised and an infinity or maximal finite value is
4189| returned. If the abstract value is too small, the input value is rounded to
4190| a subnormal number, and the underflow and inexact exceptions are raised if
4191| the abstract input cannot be represented exactly as a subnormal quadruple-
4192| precision floating-point number.
4193| The input significand must be normalized or smaller. If the input
4194| significand is not normalized, `zExp' must be 0; in that case, the result
4195| returned is a subnormal number, and it must not require rounding. In the
4196| usual case that the input significand is normalized, `zExp' must be 1 less
4197| than the ``true'' floating-point exponent. The handling of underflow and
4198| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4199*----------------------------------------------------------------------------*/
4200
f4014512 4201static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4202 uint64_t zSig0, uint64_t zSig1,
4203 uint64_t zSig2, float_status *status)
158142c2 4204{
8f506c70 4205 int8_t roundingMode;
158142c2
FB
4206 flag roundNearestEven, increment, isTiny;
4207
a2f2d288 4208 roundingMode = status->float_rounding_mode;
158142c2 4209 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4210 switch (roundingMode) {
4211 case float_round_nearest_even:
f9288a76 4212 case float_round_ties_away:
dc355b76
PM
4213 increment = ((int64_t)zSig2 < 0);
4214 break;
4215 case float_round_to_zero:
4216 increment = 0;
4217 break;
4218 case float_round_up:
4219 increment = !zSign && zSig2;
4220 break;
4221 case float_round_down:
4222 increment = zSign && zSig2;
4223 break;
9ee6f678
BR
4224 case float_round_to_odd:
4225 increment = !(zSig1 & 0x1) && zSig2;
4226 break;
dc355b76
PM
4227 default:
4228 abort();
158142c2 4229 }
bb98fe42 4230 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4231 if ( ( 0x7FFD < zExp )
4232 || ( ( zExp == 0x7FFD )
4233 && eq128(
4234 LIT64( 0x0001FFFFFFFFFFFF ),
4235 LIT64( 0xFFFFFFFFFFFFFFFF ),
4236 zSig0,
4237 zSig1
4238 )
4239 && increment
4240 )
4241 ) {
ff32e16e 4242 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4243 if ( ( roundingMode == float_round_to_zero )
4244 || ( zSign && ( roundingMode == float_round_up ) )
4245 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4246 || (roundingMode == float_round_to_odd)
158142c2
FB
4247 ) {
4248 return
4249 packFloat128(
4250 zSign,
4251 0x7FFE,
4252 LIT64( 0x0000FFFFFFFFFFFF ),
4253 LIT64( 0xFFFFFFFFFFFFFFFF )
4254 );
4255 }
4256 return packFloat128( zSign, 0x7FFF, 0, 0 );
4257 }
4258 if ( zExp < 0 ) {
a2f2d288 4259 if (status->flush_to_zero) {
ff32e16e 4260 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4261 return packFloat128(zSign, 0, 0, 0);
4262 }
158142c2 4263 isTiny =
a2f2d288
PM
4264 (status->float_detect_tininess
4265 == float_tininess_before_rounding)
158142c2
FB
4266 || ( zExp < -1 )
4267 || ! increment
4268 || lt128(
4269 zSig0,
4270 zSig1,
4271 LIT64( 0x0001FFFFFFFFFFFF ),
4272 LIT64( 0xFFFFFFFFFFFFFFFF )
4273 );
4274 shift128ExtraRightJamming(
4275 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4276 zExp = 0;
ff32e16e
PM
4277 if (isTiny && zSig2) {
4278 float_raise(float_flag_underflow, status);
4279 }
dc355b76
PM
4280 switch (roundingMode) {
4281 case float_round_nearest_even:
f9288a76 4282 case float_round_ties_away:
dc355b76
PM
4283 increment = ((int64_t)zSig2 < 0);
4284 break;
4285 case float_round_to_zero:
4286 increment = 0;
4287 break;
4288 case float_round_up:
4289 increment = !zSign && zSig2;
4290 break;
4291 case float_round_down:
4292 increment = zSign && zSig2;
4293 break;
9ee6f678
BR
4294 case float_round_to_odd:
4295 increment = !(zSig1 & 0x1) && zSig2;
4296 break;
dc355b76
PM
4297 default:
4298 abort();
158142c2
FB
4299 }
4300 }
4301 }
a2f2d288
PM
4302 if (zSig2) {
4303 status->float_exception_flags |= float_flag_inexact;
4304 }
158142c2
FB
4305 if ( increment ) {
4306 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4307 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4308 }
4309 else {
4310 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4311 }
4312 return packFloat128( zSign, zExp, zSig0, zSig1 );
4313
4314}
4315
4316/*----------------------------------------------------------------------------
4317| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4318| and significand formed by the concatenation of `zSig0' and `zSig1', and
4319| returns the proper quadruple-precision floating-point value corresponding
4320| to the abstract input. This routine is just like `roundAndPackFloat128'
4321| except that the input significand has fewer bits and does not have to be
4322| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4323| point exponent.
4324*----------------------------------------------------------------------------*/
4325
f4014512 4326static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4327 uint64_t zSig0, uint64_t zSig1,
4328 float_status *status)
158142c2 4329{
8f506c70 4330 int8_t shiftCount;
bb98fe42 4331 uint64_t zSig2;
158142c2
FB
4332
4333 if ( zSig0 == 0 ) {
4334 zSig0 = zSig1;
4335 zSig1 = 0;
4336 zExp -= 64;
4337 }
0019d5c3 4338 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4339 if ( 0 <= shiftCount ) {
4340 zSig2 = 0;
4341 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4342 }
4343 else {
4344 shift128ExtraRightJamming(
4345 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4346 }
4347 zExp -= shiftCount;
ff32e16e 4348 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4349
4350}
4351
158142c2 4352
158142c2
FB
4353/*----------------------------------------------------------------------------
4354| Returns the result of converting the 32-bit two's complement integer `a'
4355| to the extended double-precision floating-point format. The conversion
4356| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4357| Arithmetic.
4358*----------------------------------------------------------------------------*/
4359
e5a41ffa 4360floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
4361{
4362 flag zSign;
3a87d009 4363 uint32_t absA;
8f506c70 4364 int8_t shiftCount;
bb98fe42 4365 uint64_t zSig;
158142c2
FB
4366
4367 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4368 zSign = ( a < 0 );
4369 absA = zSign ? - a : a;
0019d5c3 4370 shiftCount = clz32(absA) + 32;
158142c2
FB
4371 zSig = absA;
4372 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4373
4374}
4375
158142c2
FB
4376/*----------------------------------------------------------------------------
4377| Returns the result of converting the 32-bit two's complement integer `a' to
4378| the quadruple-precision floating-point format. The conversion is performed
4379| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4380*----------------------------------------------------------------------------*/
4381
e5a41ffa 4382float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
4383{
4384 flag zSign;
3a87d009 4385 uint32_t absA;
8f506c70 4386 int8_t shiftCount;
bb98fe42 4387 uint64_t zSig0;
158142c2
FB
4388
4389 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4390 zSign = ( a < 0 );
4391 absA = zSign ? - a : a;
0019d5c3 4392 shiftCount = clz32(absA) + 17;
158142c2
FB
4393 zSig0 = absA;
4394 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4395
4396}
4397
158142c2
FB
4398/*----------------------------------------------------------------------------
4399| Returns the result of converting the 64-bit two's complement integer `a'
4400| to the extended double-precision floating-point format. The conversion
4401| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4402| Arithmetic.
4403*----------------------------------------------------------------------------*/
4404
e5a41ffa 4405floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
4406{
4407 flag zSign;
182f42fd 4408 uint64_t absA;
8f506c70 4409 int8_t shiftCount;
158142c2
FB
4410
4411 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4412 zSign = ( a < 0 );
4413 absA = zSign ? - a : a;
0019d5c3 4414 shiftCount = clz64(absA);
158142c2
FB
4415 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4416
4417}
4418
158142c2
FB
4419/*----------------------------------------------------------------------------
4420| Returns the result of converting the 64-bit two's complement integer `a' to
4421| the quadruple-precision floating-point format. The conversion is performed
4422| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4423*----------------------------------------------------------------------------*/
4424
e5a41ffa 4425float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
4426{
4427 flag zSign;
182f42fd 4428 uint64_t absA;
8f506c70 4429 int8_t shiftCount;
f4014512 4430 int32_t zExp;
bb98fe42 4431 uint64_t zSig0, zSig1;
158142c2
FB
4432
4433 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4434 zSign = ( a < 0 );
4435 absA = zSign ? - a : a;
0019d5c3 4436 shiftCount = clz64(absA) + 49;
158142c2
FB
4437 zExp = 0x406E - shiftCount;
4438 if ( 64 <= shiftCount ) {
4439 zSig1 = 0;
4440 zSig0 = absA;
4441 shiftCount -= 64;
4442 }
4443 else {
4444 zSig1 = absA;
4445 zSig0 = 0;
4446 }
4447 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4448 return packFloat128( zSign, zExp, zSig0, zSig1 );
4449
4450}
4451
6bb8e0f1
PM
4452/*----------------------------------------------------------------------------
4453| Returns the result of converting the 64-bit unsigned integer `a'
4454| to the quadruple-precision floating-point format. The conversion is performed
4455| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4456*----------------------------------------------------------------------------*/
4457
e5a41ffa 4458float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4459{
4460 if (a == 0) {
4461 return float128_zero;
4462 }
6603d506 4463 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4464}
4465
158142c2
FB
4466/*----------------------------------------------------------------------------
4467| Returns the result of converting the single-precision floating-point value
4468| `a' to the extended double-precision floating-point format. The conversion
4469| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4470| Arithmetic.
4471*----------------------------------------------------------------------------*/
4472
e5a41ffa 4473floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
4474{
4475 flag aSign;
0c48262d 4476 int aExp;
bb98fe42 4477 uint32_t aSig;
158142c2 4478
ff32e16e 4479 a = float32_squash_input_denormal(a, status);
158142c2
FB
4480 aSig = extractFloat32Frac( a );
4481 aExp = extractFloat32Exp( a );
4482 aSign = extractFloat32Sign( a );
4483 if ( aExp == 0xFF ) {
ff32e16e
PM
4484 if (aSig) {
4485 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
4486 }
0f605c88
LV
4487 return packFloatx80(aSign,
4488 floatx80_infinity_high,
4489 floatx80_infinity_low);
158142c2
FB
4490 }
4491 if ( aExp == 0 ) {
4492 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4493 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4494 }
4495 aSig |= 0x00800000;
bb98fe42 4496 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4497
4498}
4499
158142c2
FB
4500/*----------------------------------------------------------------------------
4501| Returns the result of converting the single-precision floating-point value
4502| `a' to the double-precision floating-point format. The conversion is
4503| performed according to the IEC/IEEE Standard for Binary Floating-Point
4504| Arithmetic.
4505*----------------------------------------------------------------------------*/
4506
e5a41ffa 4507float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
4508{
4509 flag aSign;
0c48262d 4510 int aExp;
bb98fe42 4511 uint32_t aSig;
158142c2 4512
ff32e16e 4513 a = float32_squash_input_denormal(a, status);
158142c2
FB
4514 aSig = extractFloat32Frac( a );
4515 aExp = extractFloat32Exp( a );
4516 aSign = extractFloat32Sign( a );
4517 if ( aExp == 0xFF ) {
ff32e16e
PM
4518 if (aSig) {
4519 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4520 }
158142c2
FB
4521 return packFloat128( aSign, 0x7FFF, 0, 0 );
4522 }
4523 if ( aExp == 0 ) {
4524 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4525 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4526 --aExp;
4527 }
bb98fe42 4528 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4529
4530}
4531
158142c2
FB
4532/*----------------------------------------------------------------------------
4533| Returns the remainder of the single-precision floating-point value `a'
4534| with respect to the corresponding value `b'. The operation is performed
4535| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4536*----------------------------------------------------------------------------*/
4537
e5a41ffa 4538float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4539{
ed086f3d 4540 flag aSign, zSign;
0c48262d 4541 int aExp, bExp, expDiff;
bb98fe42
AF
4542 uint32_t aSig, bSig;
4543 uint32_t q;
4544 uint64_t aSig64, bSig64, q64;
4545 uint32_t alternateASig;
4546 int32_t sigMean;
ff32e16e
PM
4547 a = float32_squash_input_denormal(a, status);
4548 b = float32_squash_input_denormal(b, status);
158142c2
FB
4549
4550 aSig = extractFloat32Frac( a );
4551 aExp = extractFloat32Exp( a );
4552 aSign = extractFloat32Sign( a );
4553 bSig = extractFloat32Frac( b );
4554 bExp = extractFloat32Exp( b );
158142c2
FB
4555 if ( aExp == 0xFF ) {
4556 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4557 return propagateFloat32NaN(a, b, status);
158142c2 4558 }
ff32e16e 4559 float_raise(float_flag_invalid, status);
af39bc8c 4560 return float32_default_nan(status);
158142c2
FB
4561 }
4562 if ( bExp == 0xFF ) {
ff32e16e
PM
4563 if (bSig) {
4564 return propagateFloat32NaN(a, b, status);
4565 }
158142c2
FB
4566 return a;
4567 }
4568 if ( bExp == 0 ) {
4569 if ( bSig == 0 ) {
ff32e16e 4570 float_raise(float_flag_invalid, status);
af39bc8c 4571 return float32_default_nan(status);
158142c2
FB
4572 }
4573 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4574 }
4575 if ( aExp == 0 ) {
4576 if ( aSig == 0 ) return a;
4577 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4578 }
4579 expDiff = aExp - bExp;
4580 aSig |= 0x00800000;
4581 bSig |= 0x00800000;
4582 if ( expDiff < 32 ) {
4583 aSig <<= 8;
4584 bSig <<= 8;
4585 if ( expDiff < 0 ) {
4586 if ( expDiff < -1 ) return a;
4587 aSig >>= 1;
4588 }
4589 q = ( bSig <= aSig );
4590 if ( q ) aSig -= bSig;
4591 if ( 0 < expDiff ) {
bb98fe42 4592 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4593 q >>= 32 - expDiff;
4594 bSig >>= 2;
4595 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4596 }
4597 else {
4598 aSig >>= 2;
4599 bSig >>= 2;
4600 }
4601 }
4602 else {
4603 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4604 aSig64 = ( (uint64_t) aSig )<<40;
4605 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4606 expDiff -= 64;
4607 while ( 0 < expDiff ) {
4608 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4609 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4610 aSig64 = - ( ( bSig * q64 )<<38 );
4611 expDiff -= 62;
4612 }
4613 expDiff += 64;
4614 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4615 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4616 q = q64>>( 64 - expDiff );
4617 bSig <<= 6;
4618 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4619 }
4620 do {
4621 alternateASig = aSig;
4622 ++q;
4623 aSig -= bSig;
bb98fe42 4624 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4625 sigMean = aSig + alternateASig;
4626 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4627 aSig = alternateASig;
4628 }
bb98fe42 4629 zSign = ( (int32_t) aSig < 0 );
158142c2 4630 if ( zSign ) aSig = - aSig;
ff32e16e 4631 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4632}
4633
369be8f6 4634
158142c2 4635
8229c991
AJ
4636/*----------------------------------------------------------------------------
4637| Returns the binary exponential of the single-precision floating-point value
4638| `a'. The operation is performed according to the IEC/IEEE Standard for
4639| Binary Floating-Point Arithmetic.
4640|
4641| Uses the following identities:
4642|
4643| 1. -------------------------------------------------------------------------
4644| x x*ln(2)
4645| 2 = e
4646|
4647| 2. -------------------------------------------------------------------------
4648| 2 3 4 5 n
4649| x x x x x x x
4650| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4651| 1! 2! 3! 4! 5! n!
4652*----------------------------------------------------------------------------*/
4653
4654static const float64 float32_exp2_coefficients[15] =
4655{
d5138cf4
PM
4656 const_float64( 0x3ff0000000000000ll ), /* 1 */
4657 const_float64( 0x3fe0000000000000ll ), /* 2 */
4658 const_float64( 0x3fc5555555555555ll ), /* 3 */
4659 const_float64( 0x3fa5555555555555ll ), /* 4 */
4660 const_float64( 0x3f81111111111111ll ), /* 5 */
4661 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4662 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4663 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4664 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4665 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4666 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4667 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4668 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4669 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4670 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4671};
4672
e5a41ffa 4673float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
4674{
4675 flag aSign;
0c48262d 4676 int aExp;
bb98fe42 4677 uint32_t aSig;
8229c991
AJ
4678 float64 r, x, xn;
4679 int i;
ff32e16e 4680 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4681
4682 aSig = extractFloat32Frac( a );
4683 aExp = extractFloat32Exp( a );
4684 aSign = extractFloat32Sign( a );
4685
4686 if ( aExp == 0xFF) {
ff32e16e
PM
4687 if (aSig) {
4688 return propagateFloat32NaN(a, float32_zero, status);
4689 }
8229c991
AJ
4690 return (aSign) ? float32_zero : a;
4691 }
4692 if (aExp == 0) {
4693 if (aSig == 0) return float32_one;
4694 }
4695
ff32e16e 4696 float_raise(float_flag_inexact, status);
8229c991
AJ
4697
4698 /* ******************************* */
4699 /* using float64 for approximation */
4700 /* ******************************* */
ff32e16e
PM
4701 x = float32_to_float64(a, status);
4702 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4703
4704 xn = x;
4705 r = float64_one;
4706 for (i = 0 ; i < 15 ; i++) {
4707 float64 f;
4708
ff32e16e
PM
4709 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4710 r = float64_add(r, f, status);
8229c991 4711
ff32e16e 4712 xn = float64_mul(xn, x, status);
8229c991
AJ
4713 }
4714
4715 return float64_to_float32(r, status);
4716}
4717
374dfc33
AJ
4718/*----------------------------------------------------------------------------
4719| Returns the binary log of the single-precision floating-point value `a'.
4720| The operation is performed according to the IEC/IEEE Standard for Binary
4721| Floating-Point Arithmetic.
4722*----------------------------------------------------------------------------*/
e5a41ffa 4723float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
4724{
4725 flag aSign, zSign;
0c48262d 4726 int aExp;
bb98fe42 4727 uint32_t aSig, zSig, i;
374dfc33 4728
ff32e16e 4729 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4730 aSig = extractFloat32Frac( a );
4731 aExp = extractFloat32Exp( a );
4732 aSign = extractFloat32Sign( a );
4733
4734 if ( aExp == 0 ) {
4735 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4736 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4737 }
4738 if ( aSign ) {
ff32e16e 4739 float_raise(float_flag_invalid, status);
af39bc8c 4740 return float32_default_nan(status);
374dfc33
AJ
4741 }
4742 if ( aExp == 0xFF ) {
ff32e16e
PM
4743 if (aSig) {
4744 return propagateFloat32NaN(a, float32_zero, status);
4745 }
374dfc33
AJ
4746 return a;
4747 }
4748
4749 aExp -= 0x7F;
4750 aSig |= 0x00800000;
4751 zSign = aExp < 0;
4752 zSig = aExp << 23;
4753
4754 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4755 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4756 if ( aSig & 0x01000000 ) {
4757 aSig >>= 1;
4758 zSig |= i;
4759 }
4760 }
4761
4762 if ( zSign )
4763 zSig = -zSig;
4764
ff32e16e 4765 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4766}
4767
158142c2
FB
4768/*----------------------------------------------------------------------------
4769| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4770| the corresponding value `b', and 0 otherwise. The invalid exception is
4771| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4772| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4773*----------------------------------------------------------------------------*/
4774
e5a41ffa 4775int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4776{
b689362d 4777 uint32_t av, bv;
ff32e16e
PM
4778 a = float32_squash_input_denormal(a, status);
4779 b = float32_squash_input_denormal(b, status);
158142c2
FB
4780
4781 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4782 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4783 ) {
ff32e16e 4784 float_raise(float_flag_invalid, status);
158142c2
FB
4785 return 0;
4786 }
b689362d
AJ
4787 av = float32_val(a);
4788 bv = float32_val(b);
4789 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4790}
4791
4792/*----------------------------------------------------------------------------
4793| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4794| or equal to the corresponding value `b', and 0 otherwise. The invalid
4795| exception is raised if either operand is a NaN. The comparison is performed
4796| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4797*----------------------------------------------------------------------------*/
4798
e5a41ffa 4799int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
4800{
4801 flag aSign, bSign;
bb98fe42 4802 uint32_t av, bv;
ff32e16e
PM
4803 a = float32_squash_input_denormal(a, status);
4804 b = float32_squash_input_denormal(b, status);
158142c2
FB
4805
4806 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4807 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4808 ) {
ff32e16e 4809 float_raise(float_flag_invalid, status);
158142c2
FB
4810 return 0;
4811 }
4812 aSign = extractFloat32Sign( a );
4813 bSign = extractFloat32Sign( b );
f090c9d4
PB
4814 av = float32_val(a);
4815 bv = float32_val(b);
bb98fe42 4816 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4817 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4818
4819}
4820
4821/*----------------------------------------------------------------------------
4822| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4823| the corresponding value `b', and 0 otherwise. The invalid exception is
4824| raised if either operand is a NaN. The comparison is performed according
4825| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4826*----------------------------------------------------------------------------*/
4827
e5a41ffa 4828int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4829{
4830 flag aSign, bSign;
bb98fe42 4831 uint32_t av, bv;
ff32e16e
PM
4832 a = float32_squash_input_denormal(a, status);
4833 b = float32_squash_input_denormal(b, status);
158142c2
FB
4834
4835 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4836 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4837 ) {
ff32e16e 4838 float_raise(float_flag_invalid, status);
158142c2
FB
4839 return 0;
4840 }
4841 aSign = extractFloat32Sign( a );
4842 bSign = extractFloat32Sign( b );
f090c9d4
PB
4843 av = float32_val(a);
4844 bv = float32_val(b);
bb98fe42 4845 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4846 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4847
4848}
4849
67b7861d
AJ
4850/*----------------------------------------------------------------------------
4851| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4852| be compared, and 0 otherwise. The invalid exception is raised if either
4853| operand is a NaN. The comparison is performed according to the IEC/IEEE
4854| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4855*----------------------------------------------------------------------------*/
4856
e5a41ffa 4857int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4858{
ff32e16e
PM
4859 a = float32_squash_input_denormal(a, status);
4860 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4861
4862 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4863 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4864 ) {
ff32e16e 4865 float_raise(float_flag_invalid, status);
67b7861d
AJ
4866 return 1;
4867 }
4868 return 0;
4869}
b689362d 4870
158142c2
FB
4871/*----------------------------------------------------------------------------
4872| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4873| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4874| exception. The comparison is performed according to the IEC/IEEE Standard
4875| for Binary Floating-Point Arithmetic.
158142c2
FB
4876*----------------------------------------------------------------------------*/
4877
e5a41ffa 4878int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4879{
ff32e16e
PM
4880 a = float32_squash_input_denormal(a, status);
4881 b = float32_squash_input_denormal(b, status);
158142c2
FB
4882
4883 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4884 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4885 ) {
af39bc8c
AM
4886 if (float32_is_signaling_nan(a, status)
4887 || float32_is_signaling_nan(b, status)) {
ff32e16e 4888 float_raise(float_flag_invalid, status);
b689362d 4889 }
158142c2
FB
4890 return 0;
4891 }
b689362d
AJ
4892 return ( float32_val(a) == float32_val(b) ) ||
4893 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4894}
4895
4896/*----------------------------------------------------------------------------
4897| Returns 1 if the single-precision floating-point value `a' is less than or
4898| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4899| cause an exception. Otherwise, the comparison is performed according to the
4900| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4901*----------------------------------------------------------------------------*/
4902
e5a41ffa 4903int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4904{
4905 flag aSign, bSign;
bb98fe42 4906 uint32_t av, bv;
ff32e16e
PM
4907 a = float32_squash_input_denormal(a, status);
4908 b = float32_squash_input_denormal(b, status);
158142c2
FB
4909
4910 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4911 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4912 ) {
af39bc8c
AM
4913 if (float32_is_signaling_nan(a, status)
4914 || float32_is_signaling_nan(b, status)) {
ff32e16e 4915 float_raise(float_flag_invalid, status);
158142c2
FB
4916 }
4917 return 0;
4918 }
4919 aSign = extractFloat32Sign( a );
4920 bSign = extractFloat32Sign( b );
f090c9d4
PB
4921 av = float32_val(a);
4922 bv = float32_val(b);
bb98fe42 4923 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4924 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4925
4926}
4927
4928/*----------------------------------------------------------------------------
4929| Returns 1 if the single-precision floating-point value `a' is less than
4930| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4931| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4932| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4933*----------------------------------------------------------------------------*/
4934
ab52f973 4935int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4936{
ab52f973
AB
4937 flag aSign, bSign;
4938 uint32_t av, bv;
4939 a = float32_squash_input_denormal(a, status);
4940 b = float32_squash_input_denormal(b, status);
158142c2 4941
ab52f973
AB
4942 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4943 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4944 ) {
4945 if (float32_is_signaling_nan(a, status)
4946 || float32_is_signaling_nan(b, status)) {
ff32e16e 4947 float_raise(float_flag_invalid, status);
158142c2 4948 }
ab52f973 4949 return 0;
158142c2 4950 }
ab52f973
AB
4951 aSign = extractFloat32Sign( a );
4952 bSign = extractFloat32Sign( b );
4953 av = float32_val(a);
4954 bv = float32_val(b);
4955 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4956 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4957
4958}
4959
4960/*----------------------------------------------------------------------------
ab52f973
AB
4961| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4962| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4963| comparison is performed according to the IEC/IEEE Standard for Binary
4964| Floating-Point Arithmetic.
158142c2
FB
4965*----------------------------------------------------------------------------*/
4966
ab52f973 4967int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4968{
ab52f973
AB
4969 a = float32_squash_input_denormal(a, status);
4970 b = float32_squash_input_denormal(b, status);
158142c2 4971
ab52f973
AB
4972 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4973 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4974 ) {
4975 if (float32_is_signaling_nan(a, status)
4976 || float32_is_signaling_nan(b, status)) {
4977 float_raise(float_flag_invalid, status);
158142c2 4978 }
ab52f973 4979 return 1;
158142c2 4980 }
ab52f973 4981 return 0;
158142c2
FB
4982}
4983
210cbd49
AB
4984/*----------------------------------------------------------------------------
4985| If `a' is denormal and we are in flush-to-zero mode then set the
4986| input-denormal exception and return zero. Otherwise just return the value.
4987*----------------------------------------------------------------------------*/
4988float16 float16_squash_input_denormal(float16 a, float_status *status)
4989{
4990 if (status->flush_inputs_to_zero) {
4991 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4992 float_raise(float_flag_input_denormal, status);
4993 return make_float16(float16_val(a) & 0x8000);
4994 }
4995 }
4996 return a;
4997}
4998
158142c2
FB
4999/*----------------------------------------------------------------------------
5000| Returns the result of converting the double-precision floating-point value
5001| `a' to the extended double-precision floating-point format. The conversion
5002| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5003| Arithmetic.
5004*----------------------------------------------------------------------------*/
5005
e5a41ffa 5006floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
5007{
5008 flag aSign;
0c48262d 5009 int aExp;
bb98fe42 5010 uint64_t aSig;
158142c2 5011
ff32e16e 5012 a = float64_squash_input_denormal(a, status);
158142c2
FB
5013 aSig = extractFloat64Frac( a );
5014 aExp = extractFloat64Exp( a );
5015 aSign = extractFloat64Sign( a );
5016 if ( aExp == 0x7FF ) {
ff32e16e
PM
5017 if (aSig) {
5018 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
5019 }
0f605c88
LV
5020 return packFloatx80(aSign,
5021 floatx80_infinity_high,
5022 floatx80_infinity_low);
158142c2
FB
5023 }
5024 if ( aExp == 0 ) {
5025 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5026 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5027 }
5028 return
5029 packFloatx80(
5030 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
5031
5032}
5033
158142c2
FB
5034/*----------------------------------------------------------------------------
5035| Returns the result of converting the double-precision floating-point value
5036| `a' to the quadruple-precision floating-point format. The conversion is
5037| performed according to the IEC/IEEE Standard for Binary Floating-Point
5038| Arithmetic.
5039*----------------------------------------------------------------------------*/
5040
e5a41ffa 5041float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
5042{
5043 flag aSign;
0c48262d 5044 int aExp;
bb98fe42 5045 uint64_t aSig, zSig0, zSig1;
158142c2 5046
ff32e16e 5047 a = float64_squash_input_denormal(a, status);
158142c2
FB
5048 aSig = extractFloat64Frac( a );
5049 aExp = extractFloat64Exp( a );
5050 aSign = extractFloat64Sign( a );
5051 if ( aExp == 0x7FF ) {
ff32e16e
PM
5052 if (aSig) {
5053 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5054 }
158142c2
FB
5055 return packFloat128( aSign, 0x7FFF, 0, 0 );
5056 }
5057 if ( aExp == 0 ) {
5058 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5059 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5060 --aExp;
5061 }
5062 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5063 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5064
5065}
5066
158142c2
FB
5067
5068/*----------------------------------------------------------------------------
5069| Returns the remainder of the double-precision floating-point value `a'
5070| with respect to the corresponding value `b'. The operation is performed
5071| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5072*----------------------------------------------------------------------------*/
5073
e5a41ffa 5074float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5075{
ed086f3d 5076 flag aSign, zSign;
0c48262d 5077 int aExp, bExp, expDiff;
bb98fe42
AF
5078 uint64_t aSig, bSig;
5079 uint64_t q, alternateASig;
5080 int64_t sigMean;
158142c2 5081
ff32e16e
PM
5082 a = float64_squash_input_denormal(a, status);
5083 b = float64_squash_input_denormal(b, status);
158142c2
FB
5084 aSig = extractFloat64Frac( a );
5085 aExp = extractFloat64Exp( a );
5086 aSign = extractFloat64Sign( a );
5087 bSig = extractFloat64Frac( b );
5088 bExp = extractFloat64Exp( b );
158142c2
FB
5089 if ( aExp == 0x7FF ) {
5090 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5091 return propagateFloat64NaN(a, b, status);
158142c2 5092 }
ff32e16e 5093 float_raise(float_flag_invalid, status);
af39bc8c 5094 return float64_default_nan(status);
158142c2
FB
5095 }
5096 if ( bExp == 0x7FF ) {
ff32e16e
PM
5097 if (bSig) {
5098 return propagateFloat64NaN(a, b, status);
5099 }
158142c2
FB
5100 return a;
5101 }
5102 if ( bExp == 0 ) {
5103 if ( bSig == 0 ) {
ff32e16e 5104 float_raise(float_flag_invalid, status);
af39bc8c 5105 return float64_default_nan(status);
158142c2
FB
5106 }
5107 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5108 }
5109 if ( aExp == 0 ) {
5110 if ( aSig == 0 ) return a;
5111 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5112 }
5113 expDiff = aExp - bExp;
5114 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
5115 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
5116 if ( expDiff < 0 ) {
5117 if ( expDiff < -1 ) return a;
5118 aSig >>= 1;
5119 }
5120 q = ( bSig <= aSig );
5121 if ( q ) aSig -= bSig;
5122 expDiff -= 64;
5123 while ( 0 < expDiff ) {
5124 q = estimateDiv128To64( aSig, 0, bSig );
5125 q = ( 2 < q ) ? q - 2 : 0;
5126 aSig = - ( ( bSig>>2 ) * q );
5127 expDiff -= 62;
5128 }
5129 expDiff += 64;
5130 if ( 0 < expDiff ) {
5131 q = estimateDiv128To64( aSig, 0, bSig );
5132 q = ( 2 < q ) ? q - 2 : 0;
5133 q >>= 64 - expDiff;
5134 bSig >>= 2;
5135 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5136 }
5137 else {
5138 aSig >>= 2;
5139 bSig >>= 2;
5140 }
5141 do {
5142 alternateASig = aSig;
5143 ++q;
5144 aSig -= bSig;
bb98fe42 5145 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5146 sigMean = aSig + alternateASig;
5147 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5148 aSig = alternateASig;
5149 }
bb98fe42 5150 zSign = ( (int64_t) aSig < 0 );
158142c2 5151 if ( zSign ) aSig = - aSig;
ff32e16e 5152 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5153
5154}
5155
374dfc33
AJ
5156/*----------------------------------------------------------------------------
5157| Returns the binary log of the double-precision floating-point value `a'.
5158| The operation is performed according to the IEC/IEEE Standard for Binary
5159| Floating-Point Arithmetic.
5160*----------------------------------------------------------------------------*/
e5a41ffa 5161float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
5162{
5163 flag aSign, zSign;
0c48262d 5164 int aExp;
bb98fe42 5165 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5166 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5167
5168 aSig = extractFloat64Frac( a );
5169 aExp = extractFloat64Exp( a );
5170 aSign = extractFloat64Sign( a );
5171
5172 if ( aExp == 0 ) {
5173 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5174 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5175 }
5176 if ( aSign ) {
ff32e16e 5177 float_raise(float_flag_invalid, status);
af39bc8c 5178 return float64_default_nan(status);
374dfc33
AJ
5179 }
5180 if ( aExp == 0x7FF ) {
ff32e16e
PM
5181 if (aSig) {
5182 return propagateFloat64NaN(a, float64_zero, status);
5183 }
374dfc33
AJ
5184 return a;
5185 }
5186
5187 aExp -= 0x3FF;
5188 aSig |= LIT64( 0x0010000000000000 );
5189 zSign = aExp < 0;
bb98fe42 5190 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5191 for (i = 1LL << 51; i > 0; i >>= 1) {
5192 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5193 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5194 if ( aSig & LIT64( 0x0020000000000000 ) ) {
5195 aSig >>= 1;
5196 zSig |= i;
5197 }
5198 }
5199
5200 if ( zSign )
5201 zSig = -zSig;
ff32e16e 5202 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5203}
5204
158142c2
FB
5205/*----------------------------------------------------------------------------
5206| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5207| corresponding value `b', and 0 otherwise. The invalid exception is raised
5208| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5209| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5210*----------------------------------------------------------------------------*/
5211
e5a41ffa 5212int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5213{
bb98fe42 5214 uint64_t av, bv;
ff32e16e
PM
5215 a = float64_squash_input_denormal(a, status);
5216 b = float64_squash_input_denormal(b, status);
158142c2
FB
5217
5218 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5219 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5220 ) {
ff32e16e 5221 float_raise(float_flag_invalid, status);
158142c2
FB
5222 return 0;
5223 }
f090c9d4 5224 av = float64_val(a);
a1b91bb4 5225 bv = float64_val(b);
bb98fe42 5226 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5227
5228}
5229
5230/*----------------------------------------------------------------------------
5231| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5232| equal to the corresponding value `b', and 0 otherwise. The invalid
5233| exception is raised if either operand is a NaN. The comparison is performed
5234| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5235*----------------------------------------------------------------------------*/
5236
e5a41ffa 5237int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
5238{
5239 flag aSign, bSign;
bb98fe42 5240 uint64_t av, bv;
ff32e16e
PM
5241 a = float64_squash_input_denormal(a, status);
5242 b = float64_squash_input_denormal(b, status);
158142c2
FB
5243
5244 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5245 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5246 ) {
ff32e16e 5247 float_raise(float_flag_invalid, status);
158142c2
FB
5248 return 0;
5249 }
5250 aSign = extractFloat64Sign( a );
5251 bSign = extractFloat64Sign( b );
f090c9d4 5252 av = float64_val(a);
a1b91bb4 5253 bv = float64_val(b);
bb98fe42 5254 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5255 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5256
5257}
5258
5259/*----------------------------------------------------------------------------
5260| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5261| the corresponding value `b', and 0 otherwise. The invalid exception is
5262| raised if either operand is a NaN. The comparison is performed according
5263| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5264*----------------------------------------------------------------------------*/
5265
e5a41ffa 5266int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
5267{
5268 flag aSign, bSign;
bb98fe42 5269 uint64_t av, bv;
158142c2 5270
ff32e16e
PM
5271 a = float64_squash_input_denormal(a, status);
5272 b = float64_squash_input_denormal(b, status);
158142c2
FB
5273 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5274 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5275 ) {
ff32e16e 5276 float_raise(float_flag_invalid, status);
158142c2
FB
5277 return 0;
5278 }
5279 aSign = extractFloat64Sign( a );
5280 bSign = extractFloat64Sign( b );
f090c9d4 5281 av = float64_val(a);
a1b91bb4 5282 bv = float64_val(b);
bb98fe42 5283 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5284 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5285
5286}
5287
67b7861d
AJ
5288/*----------------------------------------------------------------------------
5289| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5290| be compared, and 0 otherwise. The invalid exception is raised if either
5291| operand is a NaN. The comparison is performed according to the IEC/IEEE
5292| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5293*----------------------------------------------------------------------------*/
5294
e5a41ffa 5295int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5296{
ff32e16e
PM
5297 a = float64_squash_input_denormal(a, status);
5298 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5299
5300 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5301 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5302 ) {
ff32e16e 5303 float_raise(float_flag_invalid, status);
67b7861d
AJ
5304 return 1;
5305 }
5306 return 0;
5307}
5308
158142c2
FB
5309/*----------------------------------------------------------------------------
5310| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5311| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5312| exception.The comparison is performed according to the IEC/IEEE Standard
5313| for Binary Floating-Point Arithmetic.
158142c2
FB
5314*----------------------------------------------------------------------------*/
5315
e5a41ffa 5316int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5317{
bb98fe42 5318 uint64_t av, bv;
ff32e16e
PM
5319 a = float64_squash_input_denormal(a, status);
5320 b = float64_squash_input_denormal(b, status);
158142c2
FB
5321
5322 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5323 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5324 ) {
af39bc8c
AM
5325 if (float64_is_signaling_nan(a, status)
5326 || float64_is_signaling_nan(b, status)) {
ff32e16e 5327 float_raise(float_flag_invalid, status);
b689362d 5328 }
158142c2
FB
5329 return 0;
5330 }
f090c9d4 5331 av = float64_val(a);
a1b91bb4 5332 bv = float64_val(b);
bb98fe42 5333 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5334
5335}
5336
5337/*----------------------------------------------------------------------------
5338| Returns 1 if the double-precision floating-point value `a' is less than or
5339| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5340| cause an exception. Otherwise, the comparison is performed according to the
5341| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5342*----------------------------------------------------------------------------*/
5343
e5a41ffa 5344int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5345{
5346 flag aSign, bSign;
bb98fe42 5347 uint64_t av, bv;
ff32e16e
PM
5348 a = float64_squash_input_denormal(a, status);
5349 b = float64_squash_input_denormal(b, status);
158142c2
FB
5350
5351 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5352 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5353 ) {
af39bc8c
AM
5354 if (float64_is_signaling_nan(a, status)
5355 || float64_is_signaling_nan(b, status)) {
ff32e16e 5356 float_raise(float_flag_invalid, status);
158142c2
FB
5357 }
5358 return 0;
5359 }
5360 aSign = extractFloat64Sign( a );
5361 bSign = extractFloat64Sign( b );
f090c9d4 5362 av = float64_val(a);
a1b91bb4 5363 bv = float64_val(b);
bb98fe42 5364 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5365 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5366
5367}
5368
5369/*----------------------------------------------------------------------------
5370| Returns 1 if the double-precision floating-point value `a' is less than
5371| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5372| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5373| Standard for Binary Floating-Point Arithmetic.
5374*----------------------------------------------------------------------------*/
5375
e5a41ffa 5376int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5377{
5378 flag aSign, bSign;
bb98fe42 5379 uint64_t av, bv;
ff32e16e
PM
5380 a = float64_squash_input_denormal(a, status);
5381 b = float64_squash_input_denormal(b, status);
158142c2
FB
5382
5383 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5384 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5385 ) {
af39bc8c
AM
5386 if (float64_is_signaling_nan(a, status)
5387 || float64_is_signaling_nan(b, status)) {
ff32e16e 5388 float_raise(float_flag_invalid, status);
158142c2
FB
5389 }
5390 return 0;
5391 }
5392 aSign = extractFloat64Sign( a );
5393 bSign = extractFloat64Sign( b );
f090c9d4 5394 av = float64_val(a);
a1b91bb4 5395 bv = float64_val(b);
bb98fe42 5396 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5397 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5398
5399}
5400
67b7861d
AJ
5401/*----------------------------------------------------------------------------
5402| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5403| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5404| comparison is performed according to the IEC/IEEE Standard for Binary
5405| Floating-Point Arithmetic.
5406*----------------------------------------------------------------------------*/
5407
e5a41ffa 5408int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5409{
ff32e16e
PM
5410 a = float64_squash_input_denormal(a, status);
5411 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5412
5413 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5414 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5415 ) {
af39bc8c
AM
5416 if (float64_is_signaling_nan(a, status)
5417 || float64_is_signaling_nan(b, status)) {
ff32e16e 5418 float_raise(float_flag_invalid, status);
67b7861d
AJ
5419 }
5420 return 1;
5421 }
5422 return 0;
5423}
5424
158142c2
FB
5425/*----------------------------------------------------------------------------
5426| Returns the result of converting the extended double-precision floating-
5427| point value `a' to the 32-bit two's complement integer format. The
5428| conversion is performed according to the IEC/IEEE Standard for Binary
5429| Floating-Point Arithmetic---which means in particular that the conversion
5430| is rounded according to the current rounding mode. If `a' is a NaN, the
5431| largest positive integer is returned. Otherwise, if the conversion
5432| overflows, the largest integer with the same sign as `a' is returned.
5433*----------------------------------------------------------------------------*/
5434
f4014512 5435int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
5436{
5437 flag aSign;
f4014512 5438 int32_t aExp, shiftCount;
bb98fe42 5439 uint64_t aSig;
158142c2 5440
d1eb8f2a
AD
5441 if (floatx80_invalid_encoding(a)) {
5442 float_raise(float_flag_invalid, status);
5443 return 1 << 31;
5444 }
158142c2
FB
5445 aSig = extractFloatx80Frac( a );
5446 aExp = extractFloatx80Exp( a );
5447 aSign = extractFloatx80Sign( a );
bb98fe42 5448 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5449 shiftCount = 0x4037 - aExp;
5450 if ( shiftCount <= 0 ) shiftCount = 1;
5451 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5452 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5453
5454}
5455
5456/*----------------------------------------------------------------------------
5457| Returns the result of converting the extended double-precision floating-
5458| point value `a' to the 32-bit two's complement integer format. The
5459| conversion is performed according to the IEC/IEEE Standard for Binary
5460| Floating-Point Arithmetic, except that the conversion is always rounded
5461| toward zero. If `a' is a NaN, the largest positive integer is returned.
5462| Otherwise, if the conversion overflows, the largest integer with the same
5463| sign as `a' is returned.
5464*----------------------------------------------------------------------------*/
5465
f4014512 5466int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5467{
5468 flag aSign;
f4014512 5469 int32_t aExp, shiftCount;
bb98fe42 5470 uint64_t aSig, savedASig;
b3a6a2e0 5471 int32_t z;
158142c2 5472
d1eb8f2a
AD
5473 if (floatx80_invalid_encoding(a)) {
5474 float_raise(float_flag_invalid, status);
5475 return 1 << 31;
5476 }
158142c2
FB
5477 aSig = extractFloatx80Frac( a );
5478 aExp = extractFloatx80Exp( a );
5479 aSign = extractFloatx80Sign( a );
5480 if ( 0x401E < aExp ) {
bb98fe42 5481 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5482 goto invalid;
5483 }
5484 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5485 if (aExp || aSig) {
5486 status->float_exception_flags |= float_flag_inexact;
5487 }
158142c2
FB
5488 return 0;
5489 }
5490 shiftCount = 0x403E - aExp;
5491 savedASig = aSig;
5492 aSig >>= shiftCount;
5493 z = aSig;
5494 if ( aSign ) z = - z;
5495 if ( ( z < 0 ) ^ aSign ) {
5496 invalid:
ff32e16e 5497 float_raise(float_flag_invalid, status);
bb98fe42 5498 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5499 }
5500 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5501 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5502 }
5503 return z;
5504
5505}
5506
5507/*----------------------------------------------------------------------------
5508| Returns the result of converting the extended double-precision floating-
5509| point value `a' to the 64-bit two's complement integer format. The
5510| conversion is performed according to the IEC/IEEE Standard for Binary
5511| Floating-Point Arithmetic---which means in particular that the conversion
5512| is rounded according to the current rounding mode. If `a' is a NaN,
5513| the largest positive integer is returned. Otherwise, if the conversion
5514| overflows, the largest integer with the same sign as `a' is returned.
5515*----------------------------------------------------------------------------*/
5516
f42c2224 5517int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
5518{
5519 flag aSign;
f4014512 5520 int32_t aExp, shiftCount;
bb98fe42 5521 uint64_t aSig, aSigExtra;
158142c2 5522
d1eb8f2a
AD
5523 if (floatx80_invalid_encoding(a)) {
5524 float_raise(float_flag_invalid, status);
5525 return 1ULL << 63;
5526 }
158142c2
FB
5527 aSig = extractFloatx80Frac( a );
5528 aExp = extractFloatx80Exp( a );
5529 aSign = extractFloatx80Sign( a );
5530 shiftCount = 0x403E - aExp;
5531 if ( shiftCount <= 0 ) {
5532 if ( shiftCount ) {
ff32e16e 5533 float_raise(float_flag_invalid, status);
0f605c88 5534 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
5535 return LIT64( 0x7FFFFFFFFFFFFFFF );
5536 }
bb98fe42 5537 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5538 }
5539 aSigExtra = 0;
5540 }
5541 else {
5542 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5543 }
ff32e16e 5544 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5545
5546}
5547
5548/*----------------------------------------------------------------------------
5549| Returns the result of converting the extended double-precision floating-
5550| point value `a' to the 64-bit two's complement integer format. The
5551| conversion is performed according to the IEC/IEEE Standard for Binary
5552| Floating-Point Arithmetic, except that the conversion is always rounded
5553| toward zero. If `a' is a NaN, the largest positive integer is returned.
5554| Otherwise, if the conversion overflows, the largest integer with the same
5555| sign as `a' is returned.
5556*----------------------------------------------------------------------------*/
5557
f42c2224 5558int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5559{
5560 flag aSign;
f4014512 5561 int32_t aExp, shiftCount;
bb98fe42 5562 uint64_t aSig;
f42c2224 5563 int64_t z;
158142c2 5564
d1eb8f2a
AD
5565 if (floatx80_invalid_encoding(a)) {
5566 float_raise(float_flag_invalid, status);
5567 return 1ULL << 63;
5568 }
158142c2
FB
5569 aSig = extractFloatx80Frac( a );
5570 aExp = extractFloatx80Exp( a );
5571 aSign = extractFloatx80Sign( a );
5572 shiftCount = aExp - 0x403E;
5573 if ( 0 <= shiftCount ) {
5574 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
5575 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5576 float_raise(float_flag_invalid, status);
158142c2
FB
5577 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5578 return LIT64( 0x7FFFFFFFFFFFFFFF );
5579 }
5580 }
bb98fe42 5581 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5582 }
5583 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5584 if (aExp | aSig) {
5585 status->float_exception_flags |= float_flag_inexact;
5586 }
158142c2
FB
5587 return 0;
5588 }
5589 z = aSig>>( - shiftCount );
bb98fe42 5590 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5591 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5592 }
5593 if ( aSign ) z = - z;
5594 return z;
5595
5596}
5597
5598/*----------------------------------------------------------------------------
5599| Returns the result of converting the extended double-precision floating-
5600| point value `a' to the single-precision floating-point format. The
5601| conversion is performed according to the IEC/IEEE Standard for Binary
5602| Floating-Point Arithmetic.
5603*----------------------------------------------------------------------------*/
5604
e5a41ffa 5605float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
5606{
5607 flag aSign;
f4014512 5608 int32_t aExp;
bb98fe42 5609 uint64_t aSig;
158142c2 5610
d1eb8f2a
AD
5611 if (floatx80_invalid_encoding(a)) {
5612 float_raise(float_flag_invalid, status);
5613 return float32_default_nan(status);
5614 }
158142c2
FB
5615 aSig = extractFloatx80Frac( a );
5616 aExp = extractFloatx80Exp( a );
5617 aSign = extractFloatx80Sign( a );
5618 if ( aExp == 0x7FFF ) {
bb98fe42 5619 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5620 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5621 }
5622 return packFloat32( aSign, 0xFF, 0 );
5623 }
5624 shift64RightJamming( aSig, 33, &aSig );
5625 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5626 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5627
5628}
5629
5630/*----------------------------------------------------------------------------
5631| Returns the result of converting the extended double-precision floating-
5632| point value `a' to the double-precision floating-point format. The
5633| conversion is performed according to the IEC/IEEE Standard for Binary
5634| Floating-Point Arithmetic.
5635*----------------------------------------------------------------------------*/
5636
e5a41ffa 5637float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5638{
5639 flag aSign;
f4014512 5640 int32_t aExp;
bb98fe42 5641 uint64_t aSig, zSig;
158142c2 5642
d1eb8f2a
AD
5643 if (floatx80_invalid_encoding(a)) {
5644 float_raise(float_flag_invalid, status);
5645 return float64_default_nan(status);
5646 }
158142c2
FB
5647 aSig = extractFloatx80Frac( a );
5648 aExp = extractFloatx80Exp( a );
5649 aSign = extractFloatx80Sign( a );
5650 if ( aExp == 0x7FFF ) {
bb98fe42 5651 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5652 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5653 }
5654 return packFloat64( aSign, 0x7FF, 0 );
5655 }
5656 shift64RightJamming( aSig, 1, &zSig );
5657 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5658 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5659
5660}
5661
158142c2
FB
5662/*----------------------------------------------------------------------------
5663| Returns the result of converting the extended double-precision floating-
5664| point value `a' to the quadruple-precision floating-point format. The
5665| conversion is performed according to the IEC/IEEE Standard for Binary
5666| Floating-Point Arithmetic.
5667*----------------------------------------------------------------------------*/
5668
e5a41ffa 5669float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5670{
5671 flag aSign;
0c48262d 5672 int aExp;
bb98fe42 5673 uint64_t aSig, zSig0, zSig1;
158142c2 5674
d1eb8f2a
AD
5675 if (floatx80_invalid_encoding(a)) {
5676 float_raise(float_flag_invalid, status);
5677 return float128_default_nan(status);
5678 }
158142c2
FB
5679 aSig = extractFloatx80Frac( a );
5680 aExp = extractFloatx80Exp( a );
5681 aSign = extractFloatx80Sign( a );
bb98fe42 5682 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5683 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5684 }
5685 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5686 return packFloat128( aSign, aExp, zSig0, zSig1 );
5687
5688}
5689
0f721292
LV
5690/*----------------------------------------------------------------------------
5691| Rounds the extended double-precision floating-point value `a'
5692| to the precision provided by floatx80_rounding_precision and returns the
5693| result as an extended double-precision floating-point value.
5694| The operation is performed according to the IEC/IEEE Standard for Binary
5695| Floating-Point Arithmetic.
5696*----------------------------------------------------------------------------*/
5697
5698floatx80 floatx80_round(floatx80 a, float_status *status)
5699{
5700 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5701 extractFloatx80Sign(a),
5702 extractFloatx80Exp(a),
5703 extractFloatx80Frac(a), 0, status);
5704}
5705
158142c2
FB
5706/*----------------------------------------------------------------------------
5707| Rounds the extended double-precision floating-point value `a' to an integer,
5708| and returns the result as an extended quadruple-precision floating-point
5709| value. The operation is performed according to the IEC/IEEE Standard for
5710| Binary Floating-Point Arithmetic.
5711*----------------------------------------------------------------------------*/
5712
e5a41ffa 5713floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5714{
5715 flag aSign;
f4014512 5716 int32_t aExp;
bb98fe42 5717 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5718 floatx80 z;
5719
d1eb8f2a
AD
5720 if (floatx80_invalid_encoding(a)) {
5721 float_raise(float_flag_invalid, status);
5722 return floatx80_default_nan(status);
5723 }
158142c2
FB
5724 aExp = extractFloatx80Exp( a );
5725 if ( 0x403E <= aExp ) {
bb98fe42 5726 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5727 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5728 }
5729 return a;
5730 }
5731 if ( aExp < 0x3FFF ) {
5732 if ( ( aExp == 0 )
bb98fe42 5733 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
5734 return a;
5735 }
a2f2d288 5736 status->float_exception_flags |= float_flag_inexact;
158142c2 5737 aSign = extractFloatx80Sign( a );
a2f2d288 5738 switch (status->float_rounding_mode) {
158142c2 5739 case float_round_nearest_even:
bb98fe42 5740 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5741 ) {
5742 return
5743 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5744 }
5745 break;
f9288a76
PM
5746 case float_round_ties_away:
5747 if (aExp == 0x3FFE) {
5748 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5749 }
5750 break;
158142c2
FB
5751 case float_round_down:
5752 return
5753 aSign ?
5754 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5755 : packFloatx80( 0, 0, 0 );
5756 case float_round_up:
5757 return
5758 aSign ? packFloatx80( 1, 0, 0 )
5759 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5760 }
5761 return packFloatx80( aSign, 0, 0 );
5762 }
5763 lastBitMask = 1;
5764 lastBitMask <<= 0x403E - aExp;
5765 roundBitsMask = lastBitMask - 1;
5766 z = a;
a2f2d288 5767 switch (status->float_rounding_mode) {
dc355b76 5768 case float_round_nearest_even:
158142c2 5769 z.low += lastBitMask>>1;
dc355b76
PM
5770 if ((z.low & roundBitsMask) == 0) {
5771 z.low &= ~lastBitMask;
5772 }
5773 break;
f9288a76
PM
5774 case float_round_ties_away:
5775 z.low += lastBitMask >> 1;
5776 break;
dc355b76
PM
5777 case float_round_to_zero:
5778 break;
5779 case float_round_up:
5780 if (!extractFloatx80Sign(z)) {
5781 z.low += roundBitsMask;
5782 }
5783 break;
5784 case float_round_down:
5785 if (extractFloatx80Sign(z)) {
158142c2
FB
5786 z.low += roundBitsMask;
5787 }
dc355b76
PM
5788 break;
5789 default:
5790 abort();
158142c2
FB
5791 }
5792 z.low &= ~ roundBitsMask;
5793 if ( z.low == 0 ) {
5794 ++z.high;
5795 z.low = LIT64( 0x8000000000000000 );
5796 }
a2f2d288
PM
5797 if (z.low != a.low) {
5798 status->float_exception_flags |= float_flag_inexact;
5799 }
158142c2
FB
5800 return z;
5801
5802}
5803
5804/*----------------------------------------------------------------------------
5805| Returns the result of adding the absolute values of the extended double-
5806| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5807| negated before being returned. `zSign' is ignored if the result is a NaN.
5808| The addition is performed according to the IEC/IEEE Standard for Binary
5809| Floating-Point Arithmetic.
5810*----------------------------------------------------------------------------*/
5811
e5a41ffa
PM
5812static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5813 float_status *status)
158142c2 5814{
f4014512 5815 int32_t aExp, bExp, zExp;
bb98fe42 5816 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5817 int32_t expDiff;
158142c2
FB
5818
5819 aSig = extractFloatx80Frac( a );
5820 aExp = extractFloatx80Exp( a );
5821 bSig = extractFloatx80Frac( b );
5822 bExp = extractFloatx80Exp( b );
5823 expDiff = aExp - bExp;
5824 if ( 0 < expDiff ) {
5825 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5826 if ((uint64_t)(aSig << 1)) {
5827 return propagateFloatx80NaN(a, b, status);
5828 }
158142c2
FB
5829 return a;
5830 }
5831 if ( bExp == 0 ) --expDiff;
5832 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5833 zExp = aExp;
5834 }
5835 else if ( expDiff < 0 ) {
5836 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5837 if ((uint64_t)(bSig << 1)) {
5838 return propagateFloatx80NaN(a, b, status);
5839 }
0f605c88
LV
5840 return packFloatx80(zSign,
5841 floatx80_infinity_high,
5842 floatx80_infinity_low);
158142c2
FB
5843 }
5844 if ( aExp == 0 ) ++expDiff;
5845 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5846 zExp = bExp;
5847 }
5848 else {
5849 if ( aExp == 0x7FFF ) {
bb98fe42 5850 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5851 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5852 }
5853 return a;
5854 }
5855 zSig1 = 0;
5856 zSig0 = aSig + bSig;
5857 if ( aExp == 0 ) {
5858 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5859 goto roundAndPack;
5860 }
5861 zExp = aExp;
5862 goto shiftRight1;
5863 }
5864 zSig0 = aSig + bSig;
bb98fe42 5865 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5866 shiftRight1:
5867 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5868 zSig0 |= LIT64( 0x8000000000000000 );
5869 ++zExp;
5870 roundAndPack:
a2f2d288 5871 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5872 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5873}
5874
5875/*----------------------------------------------------------------------------
5876| Returns the result of subtracting the absolute values of the extended
5877| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5878| difference is negated before being returned. `zSign' is ignored if the
5879| result is a NaN. The subtraction is performed according to the IEC/IEEE
5880| Standard for Binary Floating-Point Arithmetic.
5881*----------------------------------------------------------------------------*/
5882
e5a41ffa
PM
5883static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5884 float_status *status)
158142c2 5885{
f4014512 5886 int32_t aExp, bExp, zExp;
bb98fe42 5887 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5888 int32_t expDiff;
158142c2
FB
5889
5890 aSig = extractFloatx80Frac( a );
5891 aExp = extractFloatx80Exp( a );
5892 bSig = extractFloatx80Frac( b );
5893 bExp = extractFloatx80Exp( b );
5894 expDiff = aExp - bExp;
5895 if ( 0 < expDiff ) goto aExpBigger;
5896 if ( expDiff < 0 ) goto bExpBigger;
5897 if ( aExp == 0x7FFF ) {
bb98fe42 5898 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5899 return propagateFloatx80NaN(a, b, status);
158142c2 5900 }
ff32e16e 5901 float_raise(float_flag_invalid, status);
af39bc8c 5902 return floatx80_default_nan(status);
158142c2
FB
5903 }
5904 if ( aExp == 0 ) {
5905 aExp = 1;
5906 bExp = 1;
5907 }
5908 zSig1 = 0;
5909 if ( bSig < aSig ) goto aBigger;
5910 if ( aSig < bSig ) goto bBigger;
a2f2d288 5911 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5912 bExpBigger:
5913 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5914 if ((uint64_t)(bSig << 1)) {
5915 return propagateFloatx80NaN(a, b, status);
5916 }
0f605c88
LV
5917 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5918 floatx80_infinity_low);
158142c2
FB
5919 }
5920 if ( aExp == 0 ) ++expDiff;
5921 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5922 bBigger:
5923 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5924 zExp = bExp;
5925 zSign ^= 1;
5926 goto normalizeRoundAndPack;
5927 aExpBigger:
5928 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5929 if ((uint64_t)(aSig << 1)) {
5930 return propagateFloatx80NaN(a, b, status);
5931 }
158142c2
FB
5932 return a;
5933 }
5934 if ( bExp == 0 ) --expDiff;
5935 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5936 aBigger:
5937 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5938 zExp = aExp;
5939 normalizeRoundAndPack:
a2f2d288 5940 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5941 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5942}
5943
5944/*----------------------------------------------------------------------------
5945| Returns the result of adding the extended double-precision floating-point
5946| values `a' and `b'. The operation is performed according to the IEC/IEEE
5947| Standard for Binary Floating-Point Arithmetic.
5948*----------------------------------------------------------------------------*/
5949
e5a41ffa 5950floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5951{
5952 flag aSign, bSign;
5953
d1eb8f2a
AD
5954 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5955 float_raise(float_flag_invalid, status);
5956 return floatx80_default_nan(status);
5957 }
158142c2
FB
5958 aSign = extractFloatx80Sign( a );
5959 bSign = extractFloatx80Sign( b );
5960 if ( aSign == bSign ) {
ff32e16e 5961 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5962 }
5963 else {
ff32e16e 5964 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5965 }
5966
5967}
5968
5969/*----------------------------------------------------------------------------
5970| Returns the result of subtracting the extended double-precision floating-
5971| point values `a' and `b'. The operation is performed according to the
5972| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5973*----------------------------------------------------------------------------*/
5974
e5a41ffa 5975floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5976{
5977 flag aSign, bSign;
5978
d1eb8f2a
AD
5979 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5980 float_raise(float_flag_invalid, status);
5981 return floatx80_default_nan(status);
5982 }
158142c2
FB
5983 aSign = extractFloatx80Sign( a );
5984 bSign = extractFloatx80Sign( b );
5985 if ( aSign == bSign ) {
ff32e16e 5986 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5987 }
5988 else {
ff32e16e 5989 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5990 }
5991
5992}
5993
5994/*----------------------------------------------------------------------------
5995| Returns the result of multiplying the extended double-precision floating-
5996| point values `a' and `b'. The operation is performed according to the
5997| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5998*----------------------------------------------------------------------------*/
5999
e5a41ffa 6000floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6001{
6002 flag aSign, bSign, zSign;
f4014512 6003 int32_t aExp, bExp, zExp;
bb98fe42 6004 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6005
d1eb8f2a
AD
6006 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6007 float_raise(float_flag_invalid, status);
6008 return floatx80_default_nan(status);
6009 }
158142c2
FB
6010 aSig = extractFloatx80Frac( a );
6011 aExp = extractFloatx80Exp( a );
6012 aSign = extractFloatx80Sign( a );
6013 bSig = extractFloatx80Frac( b );
6014 bExp = extractFloatx80Exp( b );
6015 bSign = extractFloatx80Sign( b );
6016 zSign = aSign ^ bSign;
6017 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6018 if ( (uint64_t) ( aSig<<1 )
6019 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6020 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6021 }
6022 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6023 return packFloatx80(zSign, floatx80_infinity_high,
6024 floatx80_infinity_low);
158142c2
FB
6025 }
6026 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6027 if ((uint64_t)(bSig << 1)) {
6028 return propagateFloatx80NaN(a, b, status);
6029 }
158142c2
FB
6030 if ( ( aExp | aSig ) == 0 ) {
6031 invalid:
ff32e16e 6032 float_raise(float_flag_invalid, status);
af39bc8c 6033 return floatx80_default_nan(status);
158142c2 6034 }
0f605c88
LV
6035 return packFloatx80(zSign, floatx80_infinity_high,
6036 floatx80_infinity_low);
158142c2
FB
6037 }
6038 if ( aExp == 0 ) {
6039 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6040 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6041 }
6042 if ( bExp == 0 ) {
6043 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6044 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6045 }
6046 zExp = aExp + bExp - 0x3FFE;
6047 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6048 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6049 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6050 --zExp;
6051 }
a2f2d288 6052 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6053 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6054}
6055
6056/*----------------------------------------------------------------------------
6057| Returns the result of dividing the extended double-precision floating-point
6058| value `a' by the corresponding value `b'. The operation is performed
6059| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6060*----------------------------------------------------------------------------*/
6061
e5a41ffa 6062floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6063{
6064 flag aSign, bSign, zSign;
f4014512 6065 int32_t aExp, bExp, zExp;
bb98fe42
AF
6066 uint64_t aSig, bSig, zSig0, zSig1;
6067 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6068
d1eb8f2a
AD
6069 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6070 float_raise(float_flag_invalid, status);
6071 return floatx80_default_nan(status);
6072 }
158142c2
FB
6073 aSig = extractFloatx80Frac( a );
6074 aExp = extractFloatx80Exp( a );
6075 aSign = extractFloatx80Sign( a );
6076 bSig = extractFloatx80Frac( b );
6077 bExp = extractFloatx80Exp( b );
6078 bSign = extractFloatx80Sign( b );
6079 zSign = aSign ^ bSign;
6080 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6081 if ((uint64_t)(aSig << 1)) {
6082 return propagateFloatx80NaN(a, b, status);
6083 }
158142c2 6084 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6085 if ((uint64_t)(bSig << 1)) {
6086 return propagateFloatx80NaN(a, b, status);
6087 }
158142c2
FB
6088 goto invalid;
6089 }
0f605c88
LV
6090 return packFloatx80(zSign, floatx80_infinity_high,
6091 floatx80_infinity_low);
158142c2
FB
6092 }
6093 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6094 if ((uint64_t)(bSig << 1)) {
6095 return propagateFloatx80NaN(a, b, status);
6096 }
158142c2
FB
6097 return packFloatx80( zSign, 0, 0 );
6098 }
6099 if ( bExp == 0 ) {
6100 if ( bSig == 0 ) {
6101 if ( ( aExp | aSig ) == 0 ) {
6102 invalid:
ff32e16e 6103 float_raise(float_flag_invalid, status);
af39bc8c 6104 return floatx80_default_nan(status);
158142c2 6105 }
ff32e16e 6106 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6107 return packFloatx80(zSign, floatx80_infinity_high,
6108 floatx80_infinity_low);
158142c2
FB
6109 }
6110 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6111 }
6112 if ( aExp == 0 ) {
6113 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6114 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6115 }
6116 zExp = aExp - bExp + 0x3FFE;
6117 rem1 = 0;
6118 if ( bSig <= aSig ) {
6119 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6120 ++zExp;
6121 }
6122 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6123 mul64To128( bSig, zSig0, &term0, &term1 );
6124 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6125 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6126 --zSig0;
6127 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6128 }
6129 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6130 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6131 mul64To128( bSig, zSig1, &term1, &term2 );
6132 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6133 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6134 --zSig1;
6135 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6136 }
6137 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6138 }
a2f2d288 6139 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6140 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6141}
6142
6143/*----------------------------------------------------------------------------
6144| Returns the remainder of the extended double-precision floating-point value
6145| `a' with respect to the corresponding value `b'. The operation is performed
6146| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6147*----------------------------------------------------------------------------*/
6148
e5a41ffa 6149floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6150{
ed086f3d 6151 flag aSign, zSign;
f4014512 6152 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6153 uint64_t aSig0, aSig1, bSig;
6154 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6155
d1eb8f2a
AD
6156 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6157 float_raise(float_flag_invalid, status);
6158 return floatx80_default_nan(status);
6159 }
158142c2
FB
6160 aSig0 = extractFloatx80Frac( a );
6161 aExp = extractFloatx80Exp( a );
6162 aSign = extractFloatx80Sign( a );
6163 bSig = extractFloatx80Frac( b );
6164 bExp = extractFloatx80Exp( b );
158142c2 6165 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6166 if ( (uint64_t) ( aSig0<<1 )
6167 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6168 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6169 }
6170 goto invalid;
6171 }
6172 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6173 if ((uint64_t)(bSig << 1)) {
6174 return propagateFloatx80NaN(a, b, status);
6175 }
158142c2
FB
6176 return a;
6177 }
6178 if ( bExp == 0 ) {
6179 if ( bSig == 0 ) {
6180 invalid:
ff32e16e 6181 float_raise(float_flag_invalid, status);
af39bc8c 6182 return floatx80_default_nan(status);
158142c2
FB
6183 }
6184 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6185 }
6186 if ( aExp == 0 ) {
bb98fe42 6187 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6188 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6189 }
6190 bSig |= LIT64( 0x8000000000000000 );
6191 zSign = aSign;
6192 expDiff = aExp - bExp;
6193 aSig1 = 0;
6194 if ( expDiff < 0 ) {
6195 if ( expDiff < -1 ) return a;
6196 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6197 expDiff = 0;
6198 }
6199 q = ( bSig <= aSig0 );
6200 if ( q ) aSig0 -= bSig;
6201 expDiff -= 64;
6202 while ( 0 < expDiff ) {
6203 q = estimateDiv128To64( aSig0, aSig1, bSig );
6204 q = ( 2 < q ) ? q - 2 : 0;
6205 mul64To128( bSig, q, &term0, &term1 );
6206 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6207 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6208 expDiff -= 62;
6209 }
6210 expDiff += 64;
6211 if ( 0 < expDiff ) {
6212 q = estimateDiv128To64( aSig0, aSig1, bSig );
6213 q = ( 2 < q ) ? q - 2 : 0;
6214 q >>= 64 - expDiff;
6215 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6216 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6217 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6218 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6219 ++q;
6220 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6221 }
6222 }
6223 else {
6224 term1 = 0;
6225 term0 = bSig;
6226 }
6227 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6228 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6229 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6230 && ( q & 1 ) )
6231 ) {
6232 aSig0 = alternateASig0;
6233 aSig1 = alternateASig1;
6234 zSign = ! zSign;
6235 }
6236 return
6237 normalizeRoundAndPackFloatx80(
ff32e16e 6238 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6239
6240}
6241
6242/*----------------------------------------------------------------------------
6243| Returns the square root of the extended double-precision floating-point
6244| value `a'. The operation is performed according to the IEC/IEEE Standard
6245| for Binary Floating-Point Arithmetic.
6246*----------------------------------------------------------------------------*/
6247
e5a41ffa 6248floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
6249{
6250 flag aSign;
f4014512 6251 int32_t aExp, zExp;
bb98fe42
AF
6252 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6253 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6254
d1eb8f2a
AD
6255 if (floatx80_invalid_encoding(a)) {
6256 float_raise(float_flag_invalid, status);
6257 return floatx80_default_nan(status);
6258 }
158142c2
FB
6259 aSig0 = extractFloatx80Frac( a );
6260 aExp = extractFloatx80Exp( a );
6261 aSign = extractFloatx80Sign( a );
6262 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6263 if ((uint64_t)(aSig0 << 1)) {
6264 return propagateFloatx80NaN(a, a, status);
6265 }
158142c2
FB
6266 if ( ! aSign ) return a;
6267 goto invalid;
6268 }
6269 if ( aSign ) {
6270 if ( ( aExp | aSig0 ) == 0 ) return a;
6271 invalid:
ff32e16e 6272 float_raise(float_flag_invalid, status);
af39bc8c 6273 return floatx80_default_nan(status);
158142c2
FB
6274 }
6275 if ( aExp == 0 ) {
6276 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6277 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6278 }
6279 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6280 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6281 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6282 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6283 doubleZSig0 = zSig0<<1;
6284 mul64To128( zSig0, zSig0, &term0, &term1 );
6285 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6286 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6287 --zSig0;
6288 doubleZSig0 -= 2;
6289 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6290 }
6291 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6292 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
6293 if ( zSig1 == 0 ) zSig1 = 1;
6294 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6295 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6296 mul64To128( zSig1, zSig1, &term2, &term3 );
6297 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6298 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6299 --zSig1;
6300 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6301 term3 |= 1;
6302 term2 |= doubleZSig0;
6303 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6304 }
6305 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6306 }
6307 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6308 zSig0 |= doubleZSig0;
a2f2d288
PM
6309 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6310 0, zExp, zSig0, zSig1, status);
158142c2
FB
6311}
6312
6313/*----------------------------------------------------------------------------
b689362d
AJ
6314| Returns 1 if the extended double-precision floating-point value `a' is equal
6315| to the corresponding value `b', and 0 otherwise. The invalid exception is
6316| raised if either operand is a NaN. Otherwise, the comparison is performed
6317| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6318*----------------------------------------------------------------------------*/
6319
e5a41ffa 6320int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6321{
6322
d1eb8f2a
AD
6323 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6324 || (extractFloatx80Exp(a) == 0x7FFF
6325 && (uint64_t) (extractFloatx80Frac(a) << 1))
6326 || (extractFloatx80Exp(b) == 0x7FFF
6327 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6328 ) {
ff32e16e 6329 float_raise(float_flag_invalid, status);
158142c2
FB
6330 return 0;
6331 }
6332 return
6333 ( a.low == b.low )
6334 && ( ( a.high == b.high )
6335 || ( ( a.low == 0 )
bb98fe42 6336 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6337 );
6338
6339}
6340
6341/*----------------------------------------------------------------------------
6342| Returns 1 if the extended double-precision floating-point value `a' is
6343| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6344| invalid exception is raised if either operand is a NaN. The comparison is
6345| performed according to the IEC/IEEE Standard for Binary Floating-Point
6346| Arithmetic.
158142c2
FB
6347*----------------------------------------------------------------------------*/
6348
e5a41ffa 6349int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6350{
6351 flag aSign, bSign;
6352
d1eb8f2a
AD
6353 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6354 || (extractFloatx80Exp(a) == 0x7FFF
6355 && (uint64_t) (extractFloatx80Frac(a) << 1))
6356 || (extractFloatx80Exp(b) == 0x7FFF
6357 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6358 ) {
ff32e16e 6359 float_raise(float_flag_invalid, status);
158142c2
FB
6360 return 0;
6361 }
6362 aSign = extractFloatx80Sign( a );
6363 bSign = extractFloatx80Sign( b );
6364 if ( aSign != bSign ) {
6365 return
6366 aSign
bb98fe42 6367 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6368 == 0 );
6369 }
6370 return
6371 aSign ? le128( b.high, b.low, a.high, a.low )
6372 : le128( a.high, a.low, b.high, b.low );
6373
6374}
6375
6376/*----------------------------------------------------------------------------
6377| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6378| less than the corresponding value `b', and 0 otherwise. The invalid
6379| exception is raised if either operand is a NaN. The comparison is performed
6380| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6381*----------------------------------------------------------------------------*/
6382
e5a41ffa 6383int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6384{
6385 flag aSign, bSign;
6386
d1eb8f2a
AD
6387 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6388 || (extractFloatx80Exp(a) == 0x7FFF
6389 && (uint64_t) (extractFloatx80Frac(a) << 1))
6390 || (extractFloatx80Exp(b) == 0x7FFF
6391 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6392 ) {
ff32e16e 6393 float_raise(float_flag_invalid, status);
158142c2
FB
6394 return 0;
6395 }
6396 aSign = extractFloatx80Sign( a );
6397 bSign = extractFloatx80Sign( b );
6398 if ( aSign != bSign ) {
6399 return
6400 aSign
bb98fe42 6401 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6402 != 0 );
6403 }
6404 return
6405 aSign ? lt128( b.high, b.low, a.high, a.low )
6406 : lt128( a.high, a.low, b.high, b.low );
6407
6408}
6409
67b7861d
AJ
6410/*----------------------------------------------------------------------------
6411| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6412| cannot be compared, and 0 otherwise. The invalid exception is raised if
6413| either operand is a NaN. The comparison is performed according to the
6414| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6415*----------------------------------------------------------------------------*/
e5a41ffa 6416int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6417{
d1eb8f2a
AD
6418 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6419 || (extractFloatx80Exp(a) == 0x7FFF
6420 && (uint64_t) (extractFloatx80Frac(a) << 1))
6421 || (extractFloatx80Exp(b) == 0x7FFF
6422 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6423 ) {
ff32e16e 6424 float_raise(float_flag_invalid, status);
67b7861d
AJ
6425 return 1;
6426 }
6427 return 0;
6428}
6429
158142c2 6430/*----------------------------------------------------------------------------
b689362d 6431| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6432| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6433| cause an exception. The comparison is performed according to the IEC/IEEE
6434| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6435*----------------------------------------------------------------------------*/
6436
e5a41ffa 6437int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6438{
6439
d1eb8f2a
AD
6440 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6441 float_raise(float_flag_invalid, status);
6442 return 0;
6443 }
158142c2 6444 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6445 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6446 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6447 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6448 ) {
af39bc8c
AM
6449 if (floatx80_is_signaling_nan(a, status)
6450 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6451 float_raise(float_flag_invalid, status);
b689362d 6452 }
158142c2
FB
6453 return 0;
6454 }
6455 return
6456 ( a.low == b.low )
6457 && ( ( a.high == b.high )
6458 || ( ( a.low == 0 )
bb98fe42 6459 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6460 );
6461
6462}
6463
6464/*----------------------------------------------------------------------------
6465| Returns 1 if the extended double-precision floating-point value `a' is less
6466| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6467| do not cause an exception. Otherwise, the comparison is performed according
6468| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6469*----------------------------------------------------------------------------*/
6470
e5a41ffa 6471int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6472{
6473 flag aSign, bSign;
6474
d1eb8f2a
AD
6475 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6476 float_raise(float_flag_invalid, status);
6477 return 0;
6478 }
158142c2 6479 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6480 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6481 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6482 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6483 ) {
af39bc8c
AM
6484 if (floatx80_is_signaling_nan(a, status)
6485 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6486 float_raise(float_flag_invalid, status);
158142c2
FB
6487 }
6488 return 0;
6489 }
6490 aSign = extractFloatx80Sign( a );
6491 bSign = extractFloatx80Sign( b );
6492 if ( aSign != bSign ) {
6493 return
6494 aSign
bb98fe42 6495 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6496 == 0 );
6497 }
6498 return
6499 aSign ? le128( b.high, b.low, a.high, a.low )
6500 : le128( a.high, a.low, b.high, b.low );
6501
6502}
6503
6504/*----------------------------------------------------------------------------
6505| Returns 1 if the extended double-precision floating-point value `a' is less
6506| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6507| an exception. Otherwise, the comparison is performed according to the
6508| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6509*----------------------------------------------------------------------------*/
6510
e5a41ffa 6511int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6512{
6513 flag aSign, bSign;
6514
d1eb8f2a
AD
6515 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6516 float_raise(float_flag_invalid, status);
6517 return 0;
6518 }
158142c2 6519 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6520 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6521 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6522 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6523 ) {
af39bc8c
AM
6524 if (floatx80_is_signaling_nan(a, status)
6525 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6526 float_raise(float_flag_invalid, status);
158142c2
FB
6527 }
6528 return 0;
6529 }
6530 aSign = extractFloatx80Sign( a );
6531 bSign = extractFloatx80Sign( b );
6532 if ( aSign != bSign ) {
6533 return
6534 aSign
bb98fe42 6535 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6536 != 0 );
6537 }
6538 return
6539 aSign ? lt128( b.high, b.low, a.high, a.low )
6540 : lt128( a.high, a.low, b.high, b.low );
6541
6542}
6543
67b7861d
AJ
6544/*----------------------------------------------------------------------------
6545| Returns 1 if the extended double-precision floating-point values `a' and `b'
6546| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6547| The comparison is performed according to the IEC/IEEE Standard for Binary
6548| Floating-Point Arithmetic.
6549*----------------------------------------------------------------------------*/
e5a41ffa 6550int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6551{
d1eb8f2a
AD
6552 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6553 float_raise(float_flag_invalid, status);
6554 return 1;
6555 }
67b7861d
AJ
6556 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6557 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6558 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6559 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6560 ) {
af39bc8c
AM
6561 if (floatx80_is_signaling_nan(a, status)
6562 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6563 float_raise(float_flag_invalid, status);
67b7861d
AJ
6564 }
6565 return 1;
6566 }
6567 return 0;
6568}
6569
158142c2
FB
6570/*----------------------------------------------------------------------------
6571| Returns the result of converting the quadruple-precision floating-point
6572| value `a' to the 32-bit two's complement integer format. The conversion
6573| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6574| Arithmetic---which means in particular that the conversion is rounded
6575| according to the current rounding mode. If `a' is a NaN, the largest
6576| positive integer is returned. Otherwise, if the conversion overflows, the
6577| largest integer with the same sign as `a' is returned.
6578*----------------------------------------------------------------------------*/
6579
f4014512 6580int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
6581{
6582 flag aSign;
f4014512 6583 int32_t aExp, shiftCount;
bb98fe42 6584 uint64_t aSig0, aSig1;
158142c2
FB
6585
6586 aSig1 = extractFloat128Frac1( a );
6587 aSig0 = extractFloat128Frac0( a );
6588 aExp = extractFloat128Exp( a );
6589 aSign = extractFloat128Sign( a );
6590 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6591 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6592 aSig0 |= ( aSig1 != 0 );
6593 shiftCount = 0x4028 - aExp;
6594 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6595 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6596
6597}
6598
6599/*----------------------------------------------------------------------------
6600| Returns the result of converting the quadruple-precision floating-point
6601| value `a' to the 32-bit two's complement integer format. The conversion
6602| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6603| Arithmetic, except that the conversion is always rounded toward zero. If
6604| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6605| conversion overflows, the largest integer with the same sign as `a' is
6606| returned.
6607*----------------------------------------------------------------------------*/
6608
f4014512 6609int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
6610{
6611 flag aSign;
f4014512 6612 int32_t aExp, shiftCount;
bb98fe42 6613 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6614 int32_t z;
158142c2
FB
6615
6616 aSig1 = extractFloat128Frac1( a );
6617 aSig0 = extractFloat128Frac0( a );
6618 aExp = extractFloat128Exp( a );
6619 aSign = extractFloat128Sign( a );
6620 aSig0 |= ( aSig1 != 0 );
6621 if ( 0x401E < aExp ) {
6622 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6623 goto invalid;
6624 }
6625 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6626 if (aExp || aSig0) {
6627 status->float_exception_flags |= float_flag_inexact;
6628 }
158142c2
FB
6629 return 0;
6630 }
6631 aSig0 |= LIT64( 0x0001000000000000 );
6632 shiftCount = 0x402F - aExp;
6633 savedASig = aSig0;
6634 aSig0 >>= shiftCount;
6635 z = aSig0;
6636 if ( aSign ) z = - z;
6637 if ( ( z < 0 ) ^ aSign ) {
6638 invalid:
ff32e16e 6639 float_raise(float_flag_invalid, status);
bb98fe42 6640 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
6641 }
6642 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6643 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6644 }
6645 return z;
6646
6647}
6648
6649/*----------------------------------------------------------------------------
6650| Returns the result of converting the quadruple-precision floating-point
6651| value `a' to the 64-bit two's complement integer format. The conversion
6652| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6653| Arithmetic---which means in particular that the conversion is rounded
6654| according to the current rounding mode. If `a' is a NaN, the largest
6655| positive integer is returned. Otherwise, if the conversion overflows, the
6656| largest integer with the same sign as `a' is returned.
6657*----------------------------------------------------------------------------*/
6658
f42c2224 6659int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6660{
6661 flag aSign;
f4014512 6662 int32_t aExp, shiftCount;
bb98fe42 6663 uint64_t aSig0, aSig1;
158142c2
FB
6664
6665 aSig1 = extractFloat128Frac1( a );
6666 aSig0 = extractFloat128Frac0( a );
6667 aExp = extractFloat128Exp( a );
6668 aSign = extractFloat128Sign( a );
6669 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6670 shiftCount = 0x402F - aExp;
6671 if ( shiftCount <= 0 ) {
6672 if ( 0x403E < aExp ) {
ff32e16e 6673 float_raise(float_flag_invalid, status);
158142c2
FB
6674 if ( ! aSign
6675 || ( ( aExp == 0x7FFF )
6676 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6677 )
6678 ) {
6679 return LIT64( 0x7FFFFFFFFFFFFFFF );
6680 }
bb98fe42 6681 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6682 }
6683 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6684 }
6685 else {
6686 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6687 }
ff32e16e 6688 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6689
6690}
6691
6692/*----------------------------------------------------------------------------
6693| Returns the result of converting the quadruple-precision floating-point
6694| value `a' to the 64-bit two's complement integer format. The conversion
6695| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6696| Arithmetic, except that the conversion is always rounded toward zero.
6697| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6698| the conversion overflows, the largest integer with the same sign as `a' is
6699| returned.
6700*----------------------------------------------------------------------------*/
6701
f42c2224 6702int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6703{
6704 flag aSign;
f4014512 6705 int32_t aExp, shiftCount;
bb98fe42 6706 uint64_t aSig0, aSig1;
f42c2224 6707 int64_t z;
158142c2
FB
6708
6709 aSig1 = extractFloat128Frac1( a );
6710 aSig0 = extractFloat128Frac0( a );
6711 aExp = extractFloat128Exp( a );
6712 aSign = extractFloat128Sign( a );
6713 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6714 shiftCount = aExp - 0x402F;
6715 if ( 0 < shiftCount ) {
6716 if ( 0x403E <= aExp ) {
6717 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6718 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6719 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
6720 if (aSig1) {
6721 status->float_exception_flags |= float_flag_inexact;
6722 }
158142c2
FB
6723 }
6724 else {
ff32e16e 6725 float_raise(float_flag_invalid, status);
158142c2
FB
6726 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6727 return LIT64( 0x7FFFFFFFFFFFFFFF );
6728 }
6729 }
bb98fe42 6730 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
6731 }
6732 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6733 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6734 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6735 }
6736 }
6737 else {
6738 if ( aExp < 0x3FFF ) {
6739 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6740 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6741 }
6742 return 0;
6743 }
6744 z = aSig0>>( - shiftCount );
6745 if ( aSig1
bb98fe42 6746 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6747 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6748 }
6749 }
6750 if ( aSign ) z = - z;
6751 return z;
6752
6753}
6754
2e6d8568
BR
6755/*----------------------------------------------------------------------------
6756| Returns the result of converting the quadruple-precision floating-point value
6757| `a' to the 64-bit unsigned integer format. The conversion is
6758| performed according to the IEC/IEEE Standard for Binary Floating-Point
6759| Arithmetic---which means in particular that the conversion is rounded
6760| according to the current rounding mode. If `a' is a NaN, the largest
6761| positive integer is returned. If the conversion overflows, the
6762| largest unsigned integer is returned. If 'a' is negative, the value is
6763| rounded and zero is returned; negative values that do not round to zero
6764| will raise the inexact exception.
6765*----------------------------------------------------------------------------*/
6766
6767uint64_t float128_to_uint64(float128 a, float_status *status)
6768{
6769 flag aSign;
6770 int aExp;
6771 int shiftCount;
6772 uint64_t aSig0, aSig1;
6773
6774 aSig0 = extractFloat128Frac0(a);
6775 aSig1 = extractFloat128Frac1(a);
6776 aExp = extractFloat128Exp(a);
6777 aSign = extractFloat128Sign(a);
6778 if (aSign && (aExp > 0x3FFE)) {
6779 float_raise(float_flag_invalid, status);
6780 if (float128_is_any_nan(a)) {
6781 return LIT64(0xFFFFFFFFFFFFFFFF);
6782 } else {
6783 return 0;
6784 }
6785 }
6786 if (aExp) {
6787 aSig0 |= LIT64(0x0001000000000000);
6788 }
6789 shiftCount = 0x402F - aExp;
6790 if (shiftCount <= 0) {
6791 if (0x403E < aExp) {
6792 float_raise(float_flag_invalid, status);
6793 return LIT64(0xFFFFFFFFFFFFFFFF);
6794 }
6795 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6796 } else {
6797 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6798 }
6799 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6800}
6801
6802uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6803{
6804 uint64_t v;
6805 signed char current_rounding_mode = status->float_rounding_mode;
6806
6807 set_float_rounding_mode(float_round_to_zero, status);
6808 v = float128_to_uint64(a, status);
6809 set_float_rounding_mode(current_rounding_mode, status);
6810
6811 return v;
6812}
6813
158142c2
FB
6814/*----------------------------------------------------------------------------
6815| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6816| value `a' to the 32-bit unsigned integer format. The conversion
6817| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6818| Arithmetic except that the conversion is always rounded toward zero.
6819| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6820| if the conversion overflows, the largest unsigned integer is returned.
6821| If 'a' is negative, the value is rounded and zero is returned; negative
6822| values that do not round to zero will raise the inexact exception.
6823*----------------------------------------------------------------------------*/
6824
6825uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6826{
6827 uint64_t v;
6828 uint32_t res;
6829 int old_exc_flags = get_float_exception_flags(status);
6830
6831 v = float128_to_uint64_round_to_zero(a, status);
6832 if (v > 0xffffffff) {
6833 res = 0xffffffff;
6834 } else {
6835 return v;
6836 }
6837 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6838 float_raise(float_flag_invalid, status);
6839 return res;
6840}
6841
6842/*----------------------------------------------------------------------------
6843| Returns the result of converting the quadruple-precision floating-point value
6844| `a' to the 32-bit unsigned integer format. The conversion is
6845| performed according to the IEC/IEEE Standard for Binary Floating-Point
6846| Arithmetic---which means in particular that the conversion is rounded
6847| according to the current rounding mode. If `a' is a NaN, the largest
6848| positive integer is returned. If the conversion overflows, the
6849| largest unsigned integer is returned. If 'a' is negative, the value is
6850| rounded and zero is returned; negative values that do not round to zero
6851| will raise the inexact exception.
6852*----------------------------------------------------------------------------*/
6853
6854uint32_t float128_to_uint32(float128 a, float_status *status)
6855{
6856 uint64_t v;
6857 uint32_t res;
6858 int old_exc_flags = get_float_exception_flags(status);
6859
6860 v = float128_to_uint64(a, status);
6861 if (v > 0xffffffff) {
6862 res = 0xffffffff;
6863 } else {
6864 return v;
6865 }
6866 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6867 float_raise(float_flag_invalid, status);
6868 return res;
6869}
6870
6871/*----------------------------------------------------------------------------
6872| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6873| value `a' to the single-precision floating-point format. The conversion
6874| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6875| Arithmetic.
6876*----------------------------------------------------------------------------*/
6877
e5a41ffa 6878float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6879{
6880 flag aSign;
f4014512 6881 int32_t aExp;
bb98fe42
AF
6882 uint64_t aSig0, aSig1;
6883 uint32_t zSig;
158142c2
FB
6884
6885 aSig1 = extractFloat128Frac1( a );
6886 aSig0 = extractFloat128Frac0( a );
6887 aExp = extractFloat128Exp( a );
6888 aSign = extractFloat128Sign( a );
6889 if ( aExp == 0x7FFF ) {
6890 if ( aSig0 | aSig1 ) {
ff32e16e 6891 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6892 }
6893 return packFloat32( aSign, 0xFF, 0 );
6894 }
6895 aSig0 |= ( aSig1 != 0 );
6896 shift64RightJamming( aSig0, 18, &aSig0 );
6897 zSig = aSig0;
6898 if ( aExp || zSig ) {
6899 zSig |= 0x40000000;
6900 aExp -= 0x3F81;
6901 }
ff32e16e 6902 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6903
6904}
6905
6906/*----------------------------------------------------------------------------
6907| Returns the result of converting the quadruple-precision floating-point
6908| value `a' to the double-precision floating-point format. The conversion
6909| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6910| Arithmetic.
6911*----------------------------------------------------------------------------*/
6912
e5a41ffa 6913float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6914{
6915 flag aSign;
f4014512 6916 int32_t aExp;
bb98fe42 6917 uint64_t aSig0, aSig1;
158142c2
FB
6918
6919 aSig1 = extractFloat128Frac1( a );
6920 aSig0 = extractFloat128Frac0( a );
6921 aExp = extractFloat128Exp( a );
6922 aSign = extractFloat128Sign( a );
6923 if ( aExp == 0x7FFF ) {
6924 if ( aSig0 | aSig1 ) {
ff32e16e 6925 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6926 }
6927 return packFloat64( aSign, 0x7FF, 0 );
6928 }
6929 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6930 aSig0 |= ( aSig1 != 0 );
6931 if ( aExp || aSig0 ) {
6932 aSig0 |= LIT64( 0x4000000000000000 );
6933 aExp -= 0x3C01;
6934 }
ff32e16e 6935 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6936
6937}
6938
158142c2
FB
6939/*----------------------------------------------------------------------------
6940| Returns the result of converting the quadruple-precision floating-point
6941| value `a' to the extended double-precision floating-point format. The
6942| conversion is performed according to the IEC/IEEE Standard for Binary
6943| Floating-Point Arithmetic.
6944*----------------------------------------------------------------------------*/
6945
e5a41ffa 6946floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6947{
6948 flag aSign;
f4014512 6949 int32_t aExp;
bb98fe42 6950 uint64_t aSig0, aSig1;
158142c2
FB
6951
6952 aSig1 = extractFloat128Frac1( a );
6953 aSig0 = extractFloat128Frac0( a );
6954 aExp = extractFloat128Exp( a );
6955 aSign = extractFloat128Sign( a );
6956 if ( aExp == 0x7FFF ) {
6957 if ( aSig0 | aSig1 ) {
ff32e16e 6958 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6959 }
0f605c88
LV
6960 return packFloatx80(aSign, floatx80_infinity_high,
6961 floatx80_infinity_low);
158142c2
FB
6962 }
6963 if ( aExp == 0 ) {
6964 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6965 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6966 }
6967 else {
6968 aSig0 |= LIT64( 0x0001000000000000 );
6969 }
6970 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6971 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6972
6973}
6974
158142c2
FB
6975/*----------------------------------------------------------------------------
6976| Rounds the quadruple-precision floating-point value `a' to an integer, and
6977| returns the result as a quadruple-precision floating-point value. The
6978| operation is performed according to the IEC/IEEE Standard for Binary
6979| Floating-Point Arithmetic.
6980*----------------------------------------------------------------------------*/
6981
e5a41ffa 6982float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6983{
6984 flag aSign;
f4014512 6985 int32_t aExp;
bb98fe42 6986 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6987 float128 z;
6988
6989 aExp = extractFloat128Exp( a );
6990 if ( 0x402F <= aExp ) {
6991 if ( 0x406F <= aExp ) {
6992 if ( ( aExp == 0x7FFF )
6993 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6994 ) {
ff32e16e 6995 return propagateFloat128NaN(a, a, status);
158142c2
FB
6996 }
6997 return a;
6998 }
6999 lastBitMask = 1;
7000 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
7001 roundBitsMask = lastBitMask - 1;
7002 z = a;
a2f2d288 7003 switch (status->float_rounding_mode) {
dc355b76 7004 case float_round_nearest_even:
158142c2
FB
7005 if ( lastBitMask ) {
7006 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
7007 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
7008 }
7009 else {
bb98fe42 7010 if ( (int64_t) z.low < 0 ) {
158142c2 7011 ++z.high;
bb98fe42 7012 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
7013 }
7014 }
dc355b76 7015 break;
f9288a76
PM
7016 case float_round_ties_away:
7017 if (lastBitMask) {
7018 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7019 } else {
7020 if ((int64_t) z.low < 0) {
7021 ++z.high;
7022 }
7023 }
7024 break;
dc355b76
PM
7025 case float_round_to_zero:
7026 break;
7027 case float_round_up:
7028 if (!extractFloat128Sign(z)) {
7029 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7030 }
7031 break;
7032 case float_round_down:
7033 if (extractFloat128Sign(z)) {
7034 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 7035 }
dc355b76 7036 break;
5d64abb3
RH
7037 case float_round_to_odd:
7038 /*
7039 * Note that if lastBitMask == 0, the last bit is the lsb
7040 * of high, and roundBitsMask == -1.
7041 */
7042 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7043 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7044 }
7045 break;
dc355b76
PM
7046 default:
7047 abort();
158142c2
FB
7048 }
7049 z.low &= ~ roundBitsMask;
7050 }
7051 else {
7052 if ( aExp < 0x3FFF ) {
bb98fe42 7053 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 7054 status->float_exception_flags |= float_flag_inexact;
158142c2 7055 aSign = extractFloat128Sign( a );
a2f2d288 7056 switch (status->float_rounding_mode) {
5d64abb3 7057 case float_round_nearest_even:
158142c2
FB
7058 if ( ( aExp == 0x3FFE )
7059 && ( extractFloat128Frac0( a )
7060 | extractFloat128Frac1( a ) )
7061 ) {
7062 return packFloat128( aSign, 0x3FFF, 0, 0 );
7063 }
7064 break;
f9288a76
PM
7065 case float_round_ties_away:
7066 if (aExp == 0x3FFE) {
7067 return packFloat128(aSign, 0x3FFF, 0, 0);
7068 }
7069 break;
5d64abb3 7070 case float_round_down:
158142c2
FB
7071 return
7072 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7073 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7074 case float_round_up:
158142c2
FB
7075 return
7076 aSign ? packFloat128( 1, 0, 0, 0 )
7077 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7078
7079 case float_round_to_odd:
7080 return packFloat128(aSign, 0x3FFF, 0, 0);
158142c2
FB
7081 }
7082 return packFloat128( aSign, 0, 0, 0 );
7083 }
7084 lastBitMask = 1;
7085 lastBitMask <<= 0x402F - aExp;
7086 roundBitsMask = lastBitMask - 1;
7087 z.low = 0;
7088 z.high = a.high;
a2f2d288 7089 switch (status->float_rounding_mode) {
dc355b76 7090 case float_round_nearest_even:
158142c2
FB
7091 z.high += lastBitMask>>1;
7092 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7093 z.high &= ~ lastBitMask;
7094 }
dc355b76 7095 break;
f9288a76
PM
7096 case float_round_ties_away:
7097 z.high += lastBitMask>>1;
7098 break;
dc355b76
PM
7099 case float_round_to_zero:
7100 break;
7101 case float_round_up:
7102 if (!extractFloat128Sign(z)) {
158142c2
FB
7103 z.high |= ( a.low != 0 );
7104 z.high += roundBitsMask;
7105 }
dc355b76
PM
7106 break;
7107 case float_round_down:
7108 if (extractFloat128Sign(z)) {
7109 z.high |= (a.low != 0);
7110 z.high += roundBitsMask;
7111 }
7112 break;
5d64abb3
RH
7113 case float_round_to_odd:
7114 if ((z.high & lastBitMask) == 0) {
7115 z.high |= (a.low != 0);
7116 z.high += roundBitsMask;
7117 }
7118 break;
dc355b76
PM
7119 default:
7120 abort();
158142c2
FB
7121 }
7122 z.high &= ~ roundBitsMask;
7123 }
7124 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 7125 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
7126 }
7127 return z;
7128
7129}
7130
7131/*----------------------------------------------------------------------------
7132| Returns the result of adding the absolute values of the quadruple-precision
7133| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7134| before being returned. `zSign' is ignored if the result is a NaN.
7135| The addition is performed according to the IEC/IEEE Standard for Binary
7136| Floating-Point Arithmetic.
7137*----------------------------------------------------------------------------*/
7138
e5a41ffa
PM
7139static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
7140 float_status *status)
158142c2 7141{
f4014512 7142 int32_t aExp, bExp, zExp;
bb98fe42 7143 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7144 int32_t expDiff;
158142c2
FB
7145
7146 aSig1 = extractFloat128Frac1( a );
7147 aSig0 = extractFloat128Frac0( a );
7148 aExp = extractFloat128Exp( a );
7149 bSig1 = extractFloat128Frac1( b );
7150 bSig0 = extractFloat128Frac0( b );
7151 bExp = extractFloat128Exp( b );
7152 expDiff = aExp - bExp;
7153 if ( 0 < expDiff ) {
7154 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7155 if (aSig0 | aSig1) {
7156 return propagateFloat128NaN(a, b, status);
7157 }
158142c2
FB
7158 return a;
7159 }
7160 if ( bExp == 0 ) {
7161 --expDiff;
7162 }
7163 else {
7164 bSig0 |= LIT64( 0x0001000000000000 );
7165 }
7166 shift128ExtraRightJamming(
7167 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7168 zExp = aExp;
7169 }
7170 else if ( expDiff < 0 ) {
7171 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7172 if (bSig0 | bSig1) {
7173 return propagateFloat128NaN(a, b, status);
7174 }
158142c2
FB
7175 return packFloat128( zSign, 0x7FFF, 0, 0 );
7176 }
7177 if ( aExp == 0 ) {
7178 ++expDiff;
7179 }
7180 else {
7181 aSig0 |= LIT64( 0x0001000000000000 );
7182 }
7183 shift128ExtraRightJamming(
7184 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7185 zExp = bExp;
7186 }
7187 else {
7188 if ( aExp == 0x7FFF ) {
7189 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7190 return propagateFloat128NaN(a, b, status);
158142c2
FB
7191 }
7192 return a;
7193 }
7194 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7195 if ( aExp == 0 ) {
a2f2d288 7196 if (status->flush_to_zero) {
e6afc87f 7197 if (zSig0 | zSig1) {
ff32e16e 7198 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7199 }
7200 return packFloat128(zSign, 0, 0, 0);
7201 }
fe76d976
PB
7202 return packFloat128( zSign, 0, zSig0, zSig1 );
7203 }
158142c2
FB
7204 zSig2 = 0;
7205 zSig0 |= LIT64( 0x0002000000000000 );
7206 zExp = aExp;
7207 goto shiftRight1;
7208 }
7209 aSig0 |= LIT64( 0x0001000000000000 );
7210 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7211 --zExp;
7212 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
7213 ++zExp;
7214 shiftRight1:
7215 shift128ExtraRightJamming(
7216 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7217 roundAndPack:
ff32e16e 7218 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7219
7220}
7221
7222/*----------------------------------------------------------------------------
7223| Returns the result of subtracting the absolute values of the quadruple-
7224| precision floating-point values `a' and `b'. If `zSign' is 1, the
7225| difference is negated before being returned. `zSign' is ignored if the
7226| result is a NaN. The subtraction is performed according to the IEC/IEEE
7227| Standard for Binary Floating-Point Arithmetic.
7228*----------------------------------------------------------------------------*/
7229
e5a41ffa
PM
7230static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7231 float_status *status)
158142c2 7232{
f4014512 7233 int32_t aExp, bExp, zExp;
bb98fe42 7234 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7235 int32_t expDiff;
158142c2
FB
7236
7237 aSig1 = extractFloat128Frac1( a );
7238 aSig0 = extractFloat128Frac0( a );
7239 aExp = extractFloat128Exp( a );
7240 bSig1 = extractFloat128Frac1( b );
7241 bSig0 = extractFloat128Frac0( b );
7242 bExp = extractFloat128Exp( b );
7243 expDiff = aExp - bExp;
7244 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7245 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7246 if ( 0 < expDiff ) goto aExpBigger;
7247 if ( expDiff < 0 ) goto bExpBigger;
7248 if ( aExp == 0x7FFF ) {
7249 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7250 return propagateFloat128NaN(a, b, status);
158142c2 7251 }
ff32e16e 7252 float_raise(float_flag_invalid, status);
af39bc8c 7253 return float128_default_nan(status);
158142c2
FB
7254 }
7255 if ( aExp == 0 ) {
7256 aExp = 1;
7257 bExp = 1;
7258 }
7259 if ( bSig0 < aSig0 ) goto aBigger;
7260 if ( aSig0 < bSig0 ) goto bBigger;
7261 if ( bSig1 < aSig1 ) goto aBigger;
7262 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7263 return packFloat128(status->float_rounding_mode == float_round_down,
7264 0, 0, 0);
158142c2
FB
7265 bExpBigger:
7266 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7267 if (bSig0 | bSig1) {
7268 return propagateFloat128NaN(a, b, status);
7269 }
158142c2
FB
7270 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7271 }
7272 if ( aExp == 0 ) {
7273 ++expDiff;
7274 }
7275 else {
7276 aSig0 |= LIT64( 0x4000000000000000 );
7277 }
7278 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7279 bSig0 |= LIT64( 0x4000000000000000 );
7280 bBigger:
7281 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7282 zExp = bExp;
7283 zSign ^= 1;
7284 goto normalizeRoundAndPack;
7285 aExpBigger:
7286 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7287 if (aSig0 | aSig1) {
7288 return propagateFloat128NaN(a, b, status);
7289 }
158142c2
FB
7290 return a;
7291 }
7292 if ( bExp == 0 ) {
7293 --expDiff;
7294 }
7295 else {
7296 bSig0 |= LIT64( 0x4000000000000000 );
7297 }
7298 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7299 aSig0 |= LIT64( 0x4000000000000000 );
7300 aBigger:
7301 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7302 zExp = aExp;
7303 normalizeRoundAndPack:
7304 --zExp;
ff32e16e
PM
7305 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7306 status);
158142c2
FB
7307
7308}
7309
7310/*----------------------------------------------------------------------------
7311| Returns the result of adding the quadruple-precision floating-point values
7312| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7313| for Binary Floating-Point Arithmetic.
7314*----------------------------------------------------------------------------*/
7315
e5a41ffa 7316float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
7317{
7318 flag aSign, bSign;
7319
7320 aSign = extractFloat128Sign( a );
7321 bSign = extractFloat128Sign( b );
7322 if ( aSign == bSign ) {
ff32e16e 7323 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7324 }
7325 else {
ff32e16e 7326 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7327 }
7328
7329}
7330
7331/*----------------------------------------------------------------------------
7332| Returns the result of subtracting the quadruple-precision floating-point
7333| values `a' and `b'. The operation is performed according to the IEC/IEEE
7334| Standard for Binary Floating-Point Arithmetic.
7335*----------------------------------------------------------------------------*/
7336
e5a41ffa 7337float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
7338{
7339 flag aSign, bSign;
7340
7341 aSign = extractFloat128Sign( a );
7342 bSign = extractFloat128Sign( b );
7343 if ( aSign == bSign ) {
ff32e16e 7344 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7345 }
7346 else {
ff32e16e 7347 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7348 }
7349
7350}
7351
7352/*----------------------------------------------------------------------------
7353| Returns the result of multiplying the quadruple-precision floating-point
7354| values `a' and `b'. The operation is performed according to the IEC/IEEE
7355| Standard for Binary Floating-Point Arithmetic.
7356*----------------------------------------------------------------------------*/
7357
e5a41ffa 7358float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
7359{
7360 flag aSign, bSign, zSign;
f4014512 7361 int32_t aExp, bExp, zExp;
bb98fe42 7362 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7363
7364 aSig1 = extractFloat128Frac1( a );
7365 aSig0 = extractFloat128Frac0( a );
7366 aExp = extractFloat128Exp( a );
7367 aSign = extractFloat128Sign( a );
7368 bSig1 = extractFloat128Frac1( b );
7369 bSig0 = extractFloat128Frac0( b );
7370 bExp = extractFloat128Exp( b );
7371 bSign = extractFloat128Sign( b );
7372 zSign = aSign ^ bSign;
7373 if ( aExp == 0x7FFF ) {
7374 if ( ( aSig0 | aSig1 )
7375 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7376 return propagateFloat128NaN(a, b, status);
158142c2
FB
7377 }
7378 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7379 return packFloat128( zSign, 0x7FFF, 0, 0 );
7380 }
7381 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7382 if (bSig0 | bSig1) {
7383 return propagateFloat128NaN(a, b, status);
7384 }
158142c2
FB
7385 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7386 invalid:
ff32e16e 7387 float_raise(float_flag_invalid, status);
af39bc8c 7388 return float128_default_nan(status);
158142c2
FB
7389 }
7390 return packFloat128( zSign, 0x7FFF, 0, 0 );
7391 }
7392 if ( aExp == 0 ) {
7393 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7394 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7395 }
7396 if ( bExp == 0 ) {
7397 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7398 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7399 }
7400 zExp = aExp + bExp - 0x4000;
7401 aSig0 |= LIT64( 0x0001000000000000 );
7402 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7403 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7404 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7405 zSig2 |= ( zSig3 != 0 );
7406 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
7407 shift128ExtraRightJamming(
7408 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7409 ++zExp;
7410 }
ff32e16e 7411 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7412
7413}
7414
7415/*----------------------------------------------------------------------------
7416| Returns the result of dividing the quadruple-precision floating-point value
7417| `a' by the corresponding value `b'. The operation is performed according to
7418| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7419*----------------------------------------------------------------------------*/
7420
e5a41ffa 7421float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
7422{
7423 flag aSign, bSign, zSign;
f4014512 7424 int32_t aExp, bExp, zExp;
bb98fe42
AF
7425 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7426 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7427
7428 aSig1 = extractFloat128Frac1( a );
7429 aSig0 = extractFloat128Frac0( a );
7430 aExp = extractFloat128Exp( a );
7431 aSign = extractFloat128Sign( a );
7432 bSig1 = extractFloat128Frac1( b );
7433 bSig0 = extractFloat128Frac0( b );
7434 bExp = extractFloat128Exp( b );
7435 bSign = extractFloat128Sign( b );
7436 zSign = aSign ^ bSign;
7437 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7438 if (aSig0 | aSig1) {
7439 return propagateFloat128NaN(a, b, status);
7440 }
158142c2 7441 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7442 if (bSig0 | bSig1) {
7443 return propagateFloat128NaN(a, b, status);
7444 }
158142c2
FB
7445 goto invalid;
7446 }
7447 return packFloat128( zSign, 0x7FFF, 0, 0 );
7448 }
7449 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7450 if (bSig0 | bSig1) {
7451 return propagateFloat128NaN(a, b, status);
7452 }
158142c2
FB
7453 return packFloat128( zSign, 0, 0, 0 );
7454 }
7455 if ( bExp == 0 ) {
7456 if ( ( bSig0 | bSig1 ) == 0 ) {
7457 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7458 invalid:
ff32e16e 7459 float_raise(float_flag_invalid, status);
af39bc8c 7460 return float128_default_nan(status);
158142c2 7461 }
ff32e16e 7462 float_raise(float_flag_divbyzero, status);
158142c2
FB
7463 return packFloat128( zSign, 0x7FFF, 0, 0 );
7464 }
7465 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7466 }
7467 if ( aExp == 0 ) {
7468 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7469 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7470 }
7471 zExp = aExp - bExp + 0x3FFD;
7472 shortShift128Left(
7473 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
7474 shortShift128Left(
7475 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7476 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7477 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7478 ++zExp;
7479 }
7480 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7481 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7482 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7483 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7484 --zSig0;
7485 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7486 }
7487 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7488 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7489 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7490 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7491 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7492 --zSig1;
7493 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7494 }
7495 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7496 }
7497 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7498 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7499
7500}
7501
7502/*----------------------------------------------------------------------------
7503| Returns the remainder of the quadruple-precision floating-point value `a'
7504| with respect to the corresponding value `b'. The operation is performed
7505| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7506*----------------------------------------------------------------------------*/
7507
e5a41ffa 7508float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7509{
ed086f3d 7510 flag aSign, zSign;
f4014512 7511 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7512 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7513 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7514 int64_t sigMean0;
158142c2
FB
7515
7516 aSig1 = extractFloat128Frac1( a );
7517 aSig0 = extractFloat128Frac0( a );
7518 aExp = extractFloat128Exp( a );
7519 aSign = extractFloat128Sign( a );
7520 bSig1 = extractFloat128Frac1( b );
7521 bSig0 = extractFloat128Frac0( b );
7522 bExp = extractFloat128Exp( b );
158142c2
FB
7523 if ( aExp == 0x7FFF ) {
7524 if ( ( aSig0 | aSig1 )
7525 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7526 return propagateFloat128NaN(a, b, status);
158142c2
FB
7527 }
7528 goto invalid;
7529 }
7530 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7531 if (bSig0 | bSig1) {
7532 return propagateFloat128NaN(a, b, status);
7533 }
158142c2
FB
7534 return a;
7535 }
7536 if ( bExp == 0 ) {
7537 if ( ( bSig0 | bSig1 ) == 0 ) {
7538 invalid:
ff32e16e 7539 float_raise(float_flag_invalid, status);
af39bc8c 7540 return float128_default_nan(status);
158142c2
FB
7541 }
7542 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7543 }
7544 if ( aExp == 0 ) {
7545 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7546 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7547 }
7548 expDiff = aExp - bExp;
7549 if ( expDiff < -1 ) return a;
7550 shortShift128Left(
7551 aSig0 | LIT64( 0x0001000000000000 ),
7552 aSig1,
7553 15 - ( expDiff < 0 ),
7554 &aSig0,
7555 &aSig1
7556 );
7557 shortShift128Left(
7558 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
7559 q = le128( bSig0, bSig1, aSig0, aSig1 );
7560 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7561 expDiff -= 64;
7562 while ( 0 < expDiff ) {
7563 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7564 q = ( 4 < q ) ? q - 4 : 0;
7565 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7566 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7567 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7568 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7569 expDiff -= 61;
7570 }
7571 if ( -64 < expDiff ) {
7572 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7573 q = ( 4 < q ) ? q - 4 : 0;
7574 q >>= - expDiff;
7575 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7576 expDiff += 52;
7577 if ( expDiff < 0 ) {
7578 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7579 }
7580 else {
7581 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7582 }
7583 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7584 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7585 }
7586 else {
7587 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7588 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7589 }
7590 do {
7591 alternateASig0 = aSig0;
7592 alternateASig1 = aSig1;
7593 ++q;
7594 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7595 } while ( 0 <= (int64_t) aSig0 );
158142c2 7596 add128(
bb98fe42 7597 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7598 if ( ( sigMean0 < 0 )
7599 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7600 aSig0 = alternateASig0;
7601 aSig1 = alternateASig1;
7602 }
bb98fe42 7603 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7604 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7605 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7606 status);
158142c2
FB
7607}
7608
7609/*----------------------------------------------------------------------------
7610| Returns the square root of the quadruple-precision floating-point value `a'.
7611| The operation is performed according to the IEC/IEEE Standard for Binary
7612| Floating-Point Arithmetic.
7613*----------------------------------------------------------------------------*/
7614
e5a41ffa 7615float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
7616{
7617 flag aSign;
f4014512 7618 int32_t aExp, zExp;
bb98fe42
AF
7619 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7620 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7621
7622 aSig1 = extractFloat128Frac1( a );
7623 aSig0 = extractFloat128Frac0( a );
7624 aExp = extractFloat128Exp( a );
7625 aSign = extractFloat128Sign( a );
7626 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7627 if (aSig0 | aSig1) {
7628 return propagateFloat128NaN(a, a, status);
7629 }
158142c2
FB
7630 if ( ! aSign ) return a;
7631 goto invalid;
7632 }
7633 if ( aSign ) {
7634 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7635 invalid:
ff32e16e 7636 float_raise(float_flag_invalid, status);
af39bc8c 7637 return float128_default_nan(status);
158142c2
FB
7638 }
7639 if ( aExp == 0 ) {
7640 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7641 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7642 }
7643 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7644 aSig0 |= LIT64( 0x0001000000000000 );
7645 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7646 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7647 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7648 doubleZSig0 = zSig0<<1;
7649 mul64To128( zSig0, zSig0, &term0, &term1 );
7650 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7651 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7652 --zSig0;
7653 doubleZSig0 -= 2;
7654 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7655 }
7656 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7657 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7658 if ( zSig1 == 0 ) zSig1 = 1;
7659 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7660 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7661 mul64To128( zSig1, zSig1, &term2, &term3 );
7662 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7663 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7664 --zSig1;
7665 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7666 term3 |= 1;
7667 term2 |= doubleZSig0;
7668 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7669 }
7670 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7671 }
7672 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7673 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7674
7675}
7676
7677/*----------------------------------------------------------------------------
7678| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7679| the corresponding value `b', and 0 otherwise. The invalid exception is
7680| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7681| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7682*----------------------------------------------------------------------------*/
7683
e5a41ffa 7684int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7685{
7686
7687 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7688 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7689 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7690 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7691 ) {
ff32e16e 7692 float_raise(float_flag_invalid, status);
158142c2
FB
7693 return 0;
7694 }
7695 return
7696 ( a.low == b.low )
7697 && ( ( a.high == b.high )
7698 || ( ( a.low == 0 )
bb98fe42 7699 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7700 );
7701
7702}
7703
7704/*----------------------------------------------------------------------------
7705| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7706| or equal to the corresponding value `b', and 0 otherwise. The invalid
7707| exception is raised if either operand is a NaN. The comparison is performed
7708| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7709*----------------------------------------------------------------------------*/
7710
e5a41ffa 7711int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
7712{
7713 flag aSign, bSign;
7714
7715 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7716 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7717 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7718 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7719 ) {
ff32e16e 7720 float_raise(float_flag_invalid, status);
158142c2
FB
7721 return 0;
7722 }
7723 aSign = extractFloat128Sign( a );
7724 bSign = extractFloat128Sign( b );
7725 if ( aSign != bSign ) {
7726 return
7727 aSign
bb98fe42 7728 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7729 == 0 );
7730 }
7731 return
7732 aSign ? le128( b.high, b.low, a.high, a.low )
7733 : le128( a.high, a.low, b.high, b.low );
7734
7735}
7736
7737/*----------------------------------------------------------------------------
7738| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7739| the corresponding value `b', and 0 otherwise. The invalid exception is
7740| raised if either operand is a NaN. The comparison is performed according
7741| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7742*----------------------------------------------------------------------------*/
7743
e5a41ffa 7744int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
7745{
7746 flag aSign, bSign;
7747
7748 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7749 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7750 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7751 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7752 ) {
ff32e16e 7753 float_raise(float_flag_invalid, status);
158142c2
FB
7754 return 0;
7755 }
7756 aSign = extractFloat128Sign( a );
7757 bSign = extractFloat128Sign( b );
7758 if ( aSign != bSign ) {
7759 return
7760 aSign
bb98fe42 7761 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7762 != 0 );
7763 }
7764 return
7765 aSign ? lt128( b.high, b.low, a.high, a.low )
7766 : lt128( a.high, a.low, b.high, b.low );
7767
7768}
7769
67b7861d
AJ
7770/*----------------------------------------------------------------------------
7771| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7772| be compared, and 0 otherwise. The invalid exception is raised if either
7773| operand is a NaN. The comparison is performed according to the IEC/IEEE
7774| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7775*----------------------------------------------------------------------------*/
7776
e5a41ffa 7777int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7778{
7779 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7780 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7781 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7782 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7783 ) {
ff32e16e 7784 float_raise(float_flag_invalid, status);
67b7861d
AJ
7785 return 1;
7786 }
7787 return 0;
7788}
7789
158142c2
FB
7790/*----------------------------------------------------------------------------
7791| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7792| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7793| exception. The comparison is performed according to the IEC/IEEE Standard
7794| for Binary Floating-Point Arithmetic.
158142c2
FB
7795*----------------------------------------------------------------------------*/
7796
e5a41ffa 7797int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7798{
7799
7800 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7801 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7802 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7803 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7804 ) {
af39bc8c
AM
7805 if (float128_is_signaling_nan(a, status)
7806 || float128_is_signaling_nan(b, status)) {
ff32e16e 7807 float_raise(float_flag_invalid, status);
b689362d 7808 }
158142c2
FB
7809 return 0;
7810 }
7811 return
7812 ( a.low == b.low )
7813 && ( ( a.high == b.high )
7814 || ( ( a.low == 0 )
bb98fe42 7815 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7816 );
7817
7818}
7819
7820/*----------------------------------------------------------------------------
7821| Returns 1 if the quadruple-precision floating-point value `a' is less than
7822| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7823| cause an exception. Otherwise, the comparison is performed according to the
7824| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7825*----------------------------------------------------------------------------*/
7826
e5a41ffa 7827int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7828{
7829 flag aSign, bSign;
7830
7831 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7832 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7833 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7834 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7835 ) {
af39bc8c
AM
7836 if (float128_is_signaling_nan(a, status)
7837 || float128_is_signaling_nan(b, status)) {
ff32e16e 7838 float_raise(float_flag_invalid, status);
158142c2
FB
7839 }
7840 return 0;
7841 }
7842 aSign = extractFloat128Sign( a );
7843 bSign = extractFloat128Sign( b );
7844 if ( aSign != bSign ) {
7845 return
7846 aSign
bb98fe42 7847 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7848 == 0 );
7849 }
7850 return
7851 aSign ? le128( b.high, b.low, a.high, a.low )
7852 : le128( a.high, a.low, b.high, b.low );
7853
7854}
7855
7856/*----------------------------------------------------------------------------
7857| Returns 1 if the quadruple-precision floating-point value `a' is less than
7858| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7859| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7860| Standard for Binary Floating-Point Arithmetic.
7861*----------------------------------------------------------------------------*/
7862
e5a41ffa 7863int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7864{
7865 flag aSign, bSign;
7866
7867 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7868 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7869 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7870 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7871 ) {
af39bc8c
AM
7872 if (float128_is_signaling_nan(a, status)
7873 || float128_is_signaling_nan(b, status)) {
ff32e16e 7874 float_raise(float_flag_invalid, status);
158142c2
FB
7875 }
7876 return 0;
7877 }
7878 aSign = extractFloat128Sign( a );
7879 bSign = extractFloat128Sign( b );
7880 if ( aSign != bSign ) {
7881 return
7882 aSign
bb98fe42 7883 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7884 != 0 );
7885 }
7886 return
7887 aSign ? lt128( b.high, b.low, a.high, a.low )
7888 : lt128( a.high, a.low, b.high, b.low );
7889
7890}
7891
67b7861d
AJ
7892/*----------------------------------------------------------------------------
7893| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7894| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7895| comparison is performed according to the IEC/IEEE Standard for Binary
7896| Floating-Point Arithmetic.
7897*----------------------------------------------------------------------------*/
7898
e5a41ffa 7899int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7900{
7901 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7902 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7903 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7904 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7905 ) {
af39bc8c
AM
7906 if (float128_is_signaling_nan(a, status)
7907 || float128_is_signaling_nan(b, status)) {
ff32e16e 7908 float_raise(float_flag_invalid, status);
67b7861d
AJ
7909 }
7910 return 1;
7911 }
7912 return 0;
7913}
7914
e5a41ffa
PM
7915static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7916 int is_quiet, float_status *status)
f6714d36
AJ
7917{
7918 flag aSign, bSign;
7919
d1eb8f2a
AD
7920 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7921 float_raise(float_flag_invalid, status);
7922 return float_relation_unordered;
7923 }
f6714d36
AJ
7924 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7925 ( extractFloatx80Frac( a )<<1 ) ) ||
7926 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7927 ( extractFloatx80Frac( b )<<1 ) )) {
7928 if (!is_quiet ||
af39bc8c
AM
7929 floatx80_is_signaling_nan(a, status) ||
7930 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7931 float_raise(float_flag_invalid, status);
f6714d36
AJ
7932 }
7933 return float_relation_unordered;
7934 }
7935 aSign = extractFloatx80Sign( a );
7936 bSign = extractFloatx80Sign( b );
7937 if ( aSign != bSign ) {
7938
7939 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7940 ( ( a.low | b.low ) == 0 ) ) {
7941 /* zero case */
7942 return float_relation_equal;
7943 } else {
7944 return 1 - (2 * aSign);
7945 }
7946 } else {
7947 if (a.low == b.low && a.high == b.high) {
7948 return float_relation_equal;
7949 } else {
7950 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7951 }
7952 }
7953}
7954
e5a41ffa 7955int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7956{
ff32e16e 7957 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7958}
7959
e5a41ffa 7960int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7961{
ff32e16e 7962 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7963}
7964
e5a41ffa
PM
7965static inline int float128_compare_internal(float128 a, float128 b,
7966 int is_quiet, float_status *status)
1f587329
BS
7967{
7968 flag aSign, bSign;
7969
7970 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7971 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7972 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7973 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7974 if (!is_quiet ||
af39bc8c
AM
7975 float128_is_signaling_nan(a, status) ||
7976 float128_is_signaling_nan(b, status)) {
ff32e16e 7977 float_raise(float_flag_invalid, status);
1f587329
BS
7978 }
7979 return float_relation_unordered;
7980 }
7981 aSign = extractFloat128Sign( a );
7982 bSign = extractFloat128Sign( b );
7983 if ( aSign != bSign ) {
7984 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7985 /* zero case */
7986 return float_relation_equal;
7987 } else {
7988 return 1 - (2 * aSign);
7989 }
7990 } else {
7991 if (a.low == b.low && a.high == b.high) {
7992 return float_relation_equal;
7993 } else {
7994 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7995 }
7996 }
7997}
7998
e5a41ffa 7999int float128_compare(float128 a, float128 b, float_status *status)
1f587329 8000{
ff32e16e 8001 return float128_compare_internal(a, b, 0, status);
1f587329
BS
8002}
8003
e5a41ffa 8004int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 8005{
ff32e16e 8006 return float128_compare_internal(a, b, 1, status);
1f587329
BS
8007}
8008
e5a41ffa 8009floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
8010{
8011 flag aSign;
326b9e98 8012 int32_t aExp;
bb98fe42 8013 uint64_t aSig;
9ee6e8bb 8014
d1eb8f2a
AD
8015 if (floatx80_invalid_encoding(a)) {
8016 float_raise(float_flag_invalid, status);
8017 return floatx80_default_nan(status);
8018 }
9ee6e8bb
PB
8019 aSig = extractFloatx80Frac( a );
8020 aExp = extractFloatx80Exp( a );
8021 aSign = extractFloatx80Sign( a );
8022
326b9e98
AJ
8023 if ( aExp == 0x7FFF ) {
8024 if ( aSig<<1 ) {
ff32e16e 8025 return propagateFloatx80NaN(a, a, status);
326b9e98 8026 }
9ee6e8bb
PB
8027 return a;
8028 }
326b9e98 8029
3c85c37f
PM
8030 if (aExp == 0) {
8031 if (aSig == 0) {
8032 return a;
8033 }
8034 aExp++;
8035 }
69397542 8036
326b9e98
AJ
8037 if (n > 0x10000) {
8038 n = 0x10000;
8039 } else if (n < -0x10000) {
8040 n = -0x10000;
8041 }
8042
9ee6e8bb 8043 aExp += n;
a2f2d288
PM
8044 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
8045 aSign, aExp, aSig, 0, status);
9ee6e8bb 8046}
9ee6e8bb 8047
e5a41ffa 8048float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
8049{
8050 flag aSign;
326b9e98 8051 int32_t aExp;
bb98fe42 8052 uint64_t aSig0, aSig1;
9ee6e8bb
PB
8053
8054 aSig1 = extractFloat128Frac1( a );
8055 aSig0 = extractFloat128Frac0( a );
8056 aExp = extractFloat128Exp( a );
8057 aSign = extractFloat128Sign( a );
8058 if ( aExp == 0x7FFF ) {
326b9e98 8059 if ( aSig0 | aSig1 ) {
ff32e16e 8060 return propagateFloat128NaN(a, a, status);
326b9e98 8061 }
9ee6e8bb
PB
8062 return a;
8063 }
3c85c37f 8064 if (aExp != 0) {
69397542 8065 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 8066 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 8067 return a;
3c85c37f
PM
8068 } else {
8069 aExp++;
8070 }
69397542 8071
326b9e98
AJ
8072 if (n > 0x10000) {
8073 n = 0x10000;
8074 } else if (n < -0x10000) {
8075 n = -0x10000;
8076 }
8077
69397542
PB
8078 aExp += n - 1;
8079 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 8080 , status);
9ee6e8bb
PB
8081
8082}
f6b3b108
EC
8083
8084static void __attribute__((constructor)) softfloat_init(void)
8085{
8086 union_float64 ua, ub, uc, ur;
8087
8088 if (QEMU_NO_HARDFLOAT) {
8089 return;
8090 }
8091 /*
8092 * Test that the host's FMA is not obviously broken. For example,
8093 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8094 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8095 */
8096 ua.s = 0x0020000000000001ULL;
8097 ub.s = 0x3ca0000000000000ULL;
8098 uc.s = 0x0020000000000000ULL;
8099 ur.h = fma(ua.h, ub.h, uc.h);
8100 if (ur.s != 0x0020000000000001ULL) {
8101 force_soft_fma = true;
8102 }
8103}