]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
migration/postcopy: not necessary to do discard when canonicalizing bitmap
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
342/* Note: @fast_test and @post can be NULL */
343static inline float32
344float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348{
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378}
379
380static inline float64
381float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385{
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415}
416
d97544c9
AB
417/*----------------------------------------------------------------------------
418| Returns the fraction bits of the single-precision floating-point value `a'.
419*----------------------------------------------------------------------------*/
420
421static inline uint32_t extractFloat32Frac(float32 a)
422{
423 return float32_val(a) & 0x007FFFFF;
424}
425
426/*----------------------------------------------------------------------------
427| Returns the exponent bits of the single-precision floating-point value `a'.
428*----------------------------------------------------------------------------*/
429
430static inline int extractFloat32Exp(float32 a)
431{
432 return (float32_val(a) >> 23) & 0xFF;
433}
434
435/*----------------------------------------------------------------------------
436| Returns the sign bit of the single-precision floating-point value `a'.
437*----------------------------------------------------------------------------*/
438
439static inline flag extractFloat32Sign(float32 a)
440{
441 return float32_val(a) >> 31;
442}
443
444/*----------------------------------------------------------------------------
445| Returns the fraction bits of the double-precision floating-point value `a'.
446*----------------------------------------------------------------------------*/
447
448static inline uint64_t extractFloat64Frac(float64 a)
449{
e9321124 450 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
451}
452
453/*----------------------------------------------------------------------------
454| Returns the exponent bits of the double-precision floating-point value `a'.
455*----------------------------------------------------------------------------*/
456
457static inline int extractFloat64Exp(float64 a)
458{
459 return (float64_val(a) >> 52) & 0x7FF;
460}
461
462/*----------------------------------------------------------------------------
463| Returns the sign bit of the double-precision floating-point value `a'.
464*----------------------------------------------------------------------------*/
465
466static inline flag extractFloat64Sign(float64 a)
467{
468 return float64_val(a) >> 63;
469}
470
a90119b5
AB
471/*
472 * Classify a floating point number. Everything above float_class_qnan
473 * is a NaN so cls >= float_class_qnan is any NaN.
474 */
475
476typedef enum __attribute__ ((__packed__)) {
477 float_class_unclassified,
478 float_class_zero,
479 float_class_normal,
480 float_class_inf,
481 float_class_qnan, /* all NaNs from here */
482 float_class_snan,
a90119b5
AB
483} FloatClass;
484
247d1f21
RH
485/* Simple helpers for checking if, or what kind of, NaN we have */
486static inline __attribute__((unused)) bool is_nan(FloatClass c)
487{
488 return unlikely(c >= float_class_qnan);
489}
490
491static inline __attribute__((unused)) bool is_snan(FloatClass c)
492{
493 return c == float_class_snan;
494}
495
496static inline __attribute__((unused)) bool is_qnan(FloatClass c)
497{
498 return c == float_class_qnan;
499}
500
a90119b5
AB
501/*
502 * Structure holding all of the decomposed parts of a float. The
503 * exponent is unbiased and the fraction is normalized. All
504 * calculations are done with a 64 bit fraction and then rounded as
505 * appropriate for the final format.
506 *
507 * Thanks to the packed FloatClass a decent compiler should be able to
508 * fit the whole structure into registers and avoid using the stack
509 * for parameter passing.
510 */
511
512typedef struct {
513 uint64_t frac;
514 int32_t exp;
515 FloatClass cls;
516 bool sign;
517} FloatParts;
518
519#define DECOMPOSED_BINARY_POINT (64 - 2)
520#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
521#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
522
523/* Structure holding all of the relevant parameters for a format.
524 * exp_size: the size of the exponent field
525 * exp_bias: the offset applied to the exponent field
526 * exp_max: the maximum normalised exponent
527 * frac_size: the size of the fraction field
528 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
529 * The following are computed based the size of fraction
530 * frac_lsb: least significant bit of fraction
ca3a3d5a 531 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 532 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
533 * The following optional modifiers are available:
534 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
535 */
536typedef struct {
537 int exp_size;
538 int exp_bias;
539 int exp_max;
540 int frac_size;
541 int frac_shift;
542 uint64_t frac_lsb;
543 uint64_t frac_lsbm1;
544 uint64_t round_mask;
545 uint64_t roundeven_mask;
ca3a3d5a 546 bool arm_althp;
a90119b5
AB
547} FloatFmt;
548
549/* Expand fields based on the size of exponent and fraction */
550#define FLOAT_PARAMS(E, F) \
551 .exp_size = E, \
552 .exp_bias = ((1 << E) - 1) >> 1, \
553 .exp_max = (1 << E) - 1, \
554 .frac_size = F, \
555 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
556 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
557 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
558 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
559 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
560
561static const FloatFmt float16_params = {
562 FLOAT_PARAMS(5, 10)
563};
564
6fed16b2
AB
565static const FloatFmt float16_params_ahp = {
566 FLOAT_PARAMS(5, 10),
567 .arm_althp = true
568};
569
a90119b5
AB
570static const FloatFmt float32_params = {
571 FLOAT_PARAMS(8, 23)
572};
573
574static const FloatFmt float64_params = {
575 FLOAT_PARAMS(11, 52)
576};
577
6fff2167
AB
578/* Unpack a float to parts, but do not canonicalize. */
579static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
580{
581 const int sign_pos = fmt.frac_size + fmt.exp_size;
582
583 return (FloatParts) {
584 .cls = float_class_unclassified,
585 .sign = extract64(raw, sign_pos, 1),
586 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
587 .frac = extract64(raw, 0, fmt.frac_size),
588 };
589}
590
591static inline FloatParts float16_unpack_raw(float16 f)
592{
593 return unpack_raw(float16_params, f);
594}
595
596static inline FloatParts float32_unpack_raw(float32 f)
597{
598 return unpack_raw(float32_params, f);
599}
600
601static inline FloatParts float64_unpack_raw(float64 f)
602{
603 return unpack_raw(float64_params, f);
604}
605
606/* Pack a float from parts, but do not canonicalize. */
607static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
608{
609 const int sign_pos = fmt.frac_size + fmt.exp_size;
610 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
611 return deposit64(ret, sign_pos, 1, p.sign);
612}
613
614static inline float16 float16_pack_raw(FloatParts p)
615{
616 return make_float16(pack_raw(float16_params, p));
617}
618
619static inline float32 float32_pack_raw(FloatParts p)
620{
621 return make_float32(pack_raw(float32_params, p));
622}
623
624static inline float64 float64_pack_raw(FloatParts p)
625{
626 return make_float64(pack_raw(float64_params, p));
627}
628
0664335a
RH
629/*----------------------------------------------------------------------------
630| Functions and definitions to determine: (1) whether tininess for underflow
631| is detected before or after rounding by default, (2) what (if anything)
632| happens when exceptions are raised, (3) how signaling NaNs are distinguished
633| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
634| are propagated from function inputs to output. These details are target-
635| specific.
636*----------------------------------------------------------------------------*/
00f43279 637#include "softfloat-specialize.inc.c"
0664335a 638
6fff2167 639/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
640static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
641 float_status *status)
6fff2167 642{
ca3a3d5a 643 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
644 if (part.frac == 0) {
645 part.cls = float_class_inf;
646 } else {
94933df0 647 part.frac <<= parm->frac_shift;
298b468e
RH
648 part.cls = (parts_is_snan_frac(part.frac, status)
649 ? float_class_snan : float_class_qnan);
6fff2167
AB
650 }
651 } else if (part.exp == 0) {
652 if (likely(part.frac == 0)) {
653 part.cls = float_class_zero;
654 } else if (status->flush_inputs_to_zero) {
655 float_raise(float_flag_input_denormal, status);
656 part.cls = float_class_zero;
657 part.frac = 0;
658 } else {
659 int shift = clz64(part.frac) - 1;
660 part.cls = float_class_normal;
661 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
662 part.frac <<= shift;
663 }
664 } else {
665 part.cls = float_class_normal;
666 part.exp -= parm->exp_bias;
667 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
668 }
669 return part;
670}
671
672/* Round and uncanonicalize a floating-point number by parts. There
673 * are FRAC_SHIFT bits that may require rounding at the bottom of the
674 * fraction; these bits will be removed. The exponent will be biased
675 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
676 */
677
678static FloatParts round_canonical(FloatParts p, float_status *s,
679 const FloatFmt *parm)
680{
5d64abb3 681 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
682 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
683 const uint64_t round_mask = parm->round_mask;
684 const uint64_t roundeven_mask = parm->roundeven_mask;
685 const int exp_max = parm->exp_max;
686 const int frac_shift = parm->frac_shift;
687 uint64_t frac, inc;
688 int exp, flags = 0;
689 bool overflow_norm;
690
691 frac = p.frac;
692 exp = p.exp;
693
694 switch (p.cls) {
695 case float_class_normal:
696 switch (s->float_rounding_mode) {
697 case float_round_nearest_even:
698 overflow_norm = false;
699 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
700 break;
701 case float_round_ties_away:
702 overflow_norm = false;
703 inc = frac_lsbm1;
704 break;
705 case float_round_to_zero:
706 overflow_norm = true;
707 inc = 0;
708 break;
709 case float_round_up:
710 inc = p.sign ? 0 : round_mask;
711 overflow_norm = p.sign;
712 break;
713 case float_round_down:
714 inc = p.sign ? round_mask : 0;
715 overflow_norm = !p.sign;
716 break;
5d64abb3
RH
717 case float_round_to_odd:
718 overflow_norm = true;
719 inc = frac & frac_lsb ? 0 : round_mask;
720 break;
6fff2167
AB
721 default:
722 g_assert_not_reached();
723 }
724
725 exp += parm->exp_bias;
726 if (likely(exp > 0)) {
727 if (frac & round_mask) {
728 flags |= float_flag_inexact;
729 frac += inc;
730 if (frac & DECOMPOSED_OVERFLOW_BIT) {
731 frac >>= 1;
732 exp++;
733 }
734 }
735 frac >>= frac_shift;
736
ca3a3d5a
AB
737 if (parm->arm_althp) {
738 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
739 if (unlikely(exp > exp_max)) {
740 /* Overflow. Return the maximum normal. */
741 flags = float_flag_invalid;
742 exp = exp_max;
743 frac = -1;
744 }
745 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
746 flags |= float_flag_overflow | float_flag_inexact;
747 if (overflow_norm) {
748 exp = exp_max - 1;
749 frac = -1;
750 } else {
751 p.cls = float_class_inf;
752 goto do_inf;
753 }
754 }
755 } else if (s->flush_to_zero) {
756 flags |= float_flag_output_denormal;
757 p.cls = float_class_zero;
758 goto do_zero;
759 } else {
760 bool is_tiny = (s->float_detect_tininess
761 == float_tininess_before_rounding)
762 || (exp < 0)
763 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
764
765 shift64RightJamming(frac, 1 - exp, &frac);
766 if (frac & round_mask) {
767 /* Need to recompute round-to-even. */
5d64abb3
RH
768 switch (s->float_rounding_mode) {
769 case float_round_nearest_even:
6fff2167
AB
770 inc = ((frac & roundeven_mask) != frac_lsbm1
771 ? frac_lsbm1 : 0);
5d64abb3
RH
772 break;
773 case float_round_to_odd:
774 inc = frac & frac_lsb ? 0 : round_mask;
775 break;
6fff2167
AB
776 }
777 flags |= float_flag_inexact;
778 frac += inc;
779 }
780
781 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
782 frac >>= frac_shift;
783
784 if (is_tiny && (flags & float_flag_inexact)) {
785 flags |= float_flag_underflow;
786 }
787 if (exp == 0 && frac == 0) {
788 p.cls = float_class_zero;
789 }
790 }
791 break;
792
793 case float_class_zero:
794 do_zero:
795 exp = 0;
796 frac = 0;
797 break;
798
799 case float_class_inf:
800 do_inf:
ca3a3d5a 801 assert(!parm->arm_althp);
6fff2167
AB
802 exp = exp_max;
803 frac = 0;
804 break;
805
806 case float_class_qnan:
807 case float_class_snan:
ca3a3d5a 808 assert(!parm->arm_althp);
6fff2167 809 exp = exp_max;
94933df0 810 frac >>= parm->frac_shift;
6fff2167
AB
811 break;
812
813 default:
814 g_assert_not_reached();
815 }
816
817 float_raise(flags, s);
818 p.exp = exp;
819 p.frac = frac;
820 return p;
821}
822
6fed16b2
AB
823/* Explicit FloatFmt version */
824static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
825 const FloatFmt *params)
826{
f9943c7f 827 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
828}
829
6fff2167
AB
830static FloatParts float16_unpack_canonical(float16 f, float_status *s)
831{
6fed16b2
AB
832 return float16a_unpack_canonical(f, s, &float16_params);
833}
834
835static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
836 const FloatFmt *params)
837{
838 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
839}
840
841static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
842{
6fed16b2 843 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
844}
845
846static FloatParts float32_unpack_canonical(float32 f, float_status *s)
847{
f9943c7f 848 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
849}
850
851static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
852{
0bcfbcbe 853 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
854}
855
856static FloatParts float64_unpack_canonical(float64 f, float_status *s)
857{
f9943c7f 858 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
859}
860
861static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
862{
0bcfbcbe 863 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
864}
865
dbe4d53a
AB
866static FloatParts return_nan(FloatParts a, float_status *s)
867{
868 switch (a.cls) {
869 case float_class_snan:
870 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 871 a = parts_silence_nan(a, s);
dbe4d53a
AB
872 /* fall through */
873 case float_class_qnan:
874 if (s->default_nan_mode) {
f7e598e2 875 return parts_default_nan(s);
dbe4d53a
AB
876 }
877 break;
878
879 default:
880 g_assert_not_reached();
881 }
882 return a;
883}
884
6fff2167
AB
885static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
886{
887 if (is_snan(a.cls) || is_snan(b.cls)) {
888 s->float_exception_flags |= float_flag_invalid;
889 }
890
891 if (s->default_nan_mode) {
f7e598e2 892 return parts_default_nan(s);
6fff2167 893 } else {
4f251cfd 894 if (pickNaN(a.cls, b.cls,
6fff2167
AB
895 a.frac > b.frac ||
896 (a.frac == b.frac && a.sign < b.sign))) {
897 a = b;
898 }
0bcfbcbe
RH
899 if (is_snan(a.cls)) {
900 return parts_silence_nan(a, s);
901 }
6fff2167
AB
902 }
903 return a;
904}
905
d446830a
AB
906static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
907 bool inf_zero, float_status *s)
908{
1839189b
PM
909 int which;
910
d446830a
AB
911 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
912 s->float_exception_flags |= float_flag_invalid;
913 }
914
3bd2dec1 915 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 916
d446830a 917 if (s->default_nan_mode) {
1839189b
PM
918 /* Note that this check is after pickNaNMulAdd so that function
919 * has an opportunity to set the Invalid flag.
920 */
f7e598e2 921 which = 3;
1839189b 922 }
d446830a 923
1839189b
PM
924 switch (which) {
925 case 0:
926 break;
927 case 1:
928 a = b;
929 break;
930 case 2:
931 a = c;
932 break;
933 case 3:
f7e598e2 934 return parts_default_nan(s);
1839189b
PM
935 default:
936 g_assert_not_reached();
d446830a 937 }
1839189b 938
0bcfbcbe
RH
939 if (is_snan(a.cls)) {
940 return parts_silence_nan(a, s);
941 }
d446830a
AB
942 return a;
943}
944
6fff2167
AB
945/*
946 * Returns the result of adding or subtracting the values of the
947 * floating-point values `a' and `b'. The operation is performed
948 * according to the IEC/IEEE Standard for Binary Floating-Point
949 * Arithmetic.
950 */
951
952static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
953 float_status *s)
954{
955 bool a_sign = a.sign;
956 bool b_sign = b.sign ^ subtract;
957
958 if (a_sign != b_sign) {
959 /* Subtraction */
960
961 if (a.cls == float_class_normal && b.cls == float_class_normal) {
962 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
963 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
964 a.frac = a.frac - b.frac;
965 } else {
966 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
967 a.frac = b.frac - a.frac;
968 a.exp = b.exp;
969 a_sign ^= 1;
970 }
971
972 if (a.frac == 0) {
973 a.cls = float_class_zero;
974 a.sign = s->float_rounding_mode == float_round_down;
975 } else {
976 int shift = clz64(a.frac) - 1;
977 a.frac = a.frac << shift;
978 a.exp = a.exp - shift;
979 a.sign = a_sign;
980 }
981 return a;
982 }
983 if (is_nan(a.cls) || is_nan(b.cls)) {
984 return pick_nan(a, b, s);
985 }
986 if (a.cls == float_class_inf) {
987 if (b.cls == float_class_inf) {
988 float_raise(float_flag_invalid, s);
f7e598e2 989 return parts_default_nan(s);
6fff2167
AB
990 }
991 return a;
992 }
993 if (a.cls == float_class_zero && b.cls == float_class_zero) {
994 a.sign = s->float_rounding_mode == float_round_down;
995 return a;
996 }
997 if (a.cls == float_class_zero || b.cls == float_class_inf) {
998 b.sign = a_sign ^ 1;
999 return b;
1000 }
1001 if (b.cls == float_class_zero) {
1002 return a;
1003 }
1004 } else {
1005 /* Addition */
1006 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1007 if (a.exp > b.exp) {
1008 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1009 } else if (a.exp < b.exp) {
1010 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1011 a.exp = b.exp;
1012 }
1013 a.frac += b.frac;
1014 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1015 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1016 a.exp += 1;
1017 }
1018 return a;
1019 }
1020 if (is_nan(a.cls) || is_nan(b.cls)) {
1021 return pick_nan(a, b, s);
1022 }
1023 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1024 return a;
1025 }
1026 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1027 b.sign = b_sign;
1028 return b;
1029 }
1030 }
1031 g_assert_not_reached();
1032}
1033
1034/*
1035 * Returns the result of adding or subtracting the floating-point
1036 * values `a' and `b'. The operation is performed according to the
1037 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1038 */
1039
97ff87c0 1040float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1041{
1042 FloatParts pa = float16_unpack_canonical(a, status);
1043 FloatParts pb = float16_unpack_canonical(b, status);
1044 FloatParts pr = addsub_floats(pa, pb, false, status);
1045
1046 return float16_round_pack_canonical(pr, status);
1047}
1048
1b615d48
EC
1049float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1050{
1051 FloatParts pa = float16_unpack_canonical(a, status);
1052 FloatParts pb = float16_unpack_canonical(b, status);
1053 FloatParts pr = addsub_floats(pa, pb, true, status);
1054
1055 return float16_round_pack_canonical(pr, status);
1056}
1057
1058static float32 QEMU_SOFTFLOAT_ATTR
1059soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1060{
1061 FloatParts pa = float32_unpack_canonical(a, status);
1062 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1063 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1064
1065 return float32_round_pack_canonical(pr, status);
1066}
1067
1b615d48
EC
1068static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1069{
1070 return soft_f32_addsub(a, b, false, status);
1071}
1072
1073static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1074{
1075 return soft_f32_addsub(a, b, true, status);
1076}
1077
1078static float64 QEMU_SOFTFLOAT_ATTR
1079soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1080{
1081 FloatParts pa = float64_unpack_canonical(a, status);
1082 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1083 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1084
1085 return float64_round_pack_canonical(pr, status);
1086}
1087
1b615d48 1088static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1089{
1b615d48
EC
1090 return soft_f64_addsub(a, b, false, status);
1091}
6fff2167 1092
1b615d48
EC
1093static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1094{
1095 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1096}
1097
1b615d48 1098static float hard_f32_add(float a, float b)
6fff2167 1099{
1b615d48
EC
1100 return a + b;
1101}
6fff2167 1102
1b615d48
EC
1103static float hard_f32_sub(float a, float b)
1104{
1105 return a - b;
6fff2167
AB
1106}
1107
1b615d48 1108static double hard_f64_add(double a, double b)
6fff2167 1109{
1b615d48
EC
1110 return a + b;
1111}
6fff2167 1112
1b615d48
EC
1113static double hard_f64_sub(double a, double b)
1114{
1115 return a - b;
1116}
1117
1118static bool f32_addsub_post(union_float32 a, union_float32 b)
1119{
1120 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1121 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1122 }
1123 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1124}
1125
1126static bool f64_addsub_post(union_float64 a, union_float64 b)
1127{
1128 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1129 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1130 } else {
1131 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1132 }
1133}
1134
1135static float32 float32_addsub(float32 a, float32 b, float_status *s,
1136 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1137{
1138 return float32_gen2(a, b, s, hard, soft,
1139 f32_is_zon2, f32_addsub_post, NULL, NULL);
1140}
1141
1142static float64 float64_addsub(float64 a, float64 b, float_status *s,
1143 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1144{
1145 return float64_gen2(a, b, s, hard, soft,
1146 f64_is_zon2, f64_addsub_post, NULL, NULL);
1147}
1148
1149float32 QEMU_FLATTEN
1150float32_add(float32 a, float32 b, float_status *s)
1151{
1152 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1153}
1154
1155float32 QEMU_FLATTEN
1156float32_sub(float32 a, float32 b, float_status *s)
1157{
1158 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1159}
1160
1161float64 QEMU_FLATTEN
1162float64_add(float64 a, float64 b, float_status *s)
1163{
1164 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1165}
1166
1167float64 QEMU_FLATTEN
1168float64_sub(float64 a, float64 b, float_status *s)
1169{
1170 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1171}
1172
74d707e2
AB
1173/*
1174 * Returns the result of multiplying the floating-point values `a' and
1175 * `b'. The operation is performed according to the IEC/IEEE Standard
1176 * for Binary Floating-Point Arithmetic.
1177 */
1178
1179static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1180{
1181 bool sign = a.sign ^ b.sign;
1182
1183 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1184 uint64_t hi, lo;
1185 int exp = a.exp + b.exp;
1186
1187 mul64To128(a.frac, b.frac, &hi, &lo);
1188 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1189 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1190 shift64RightJamming(lo, 1, &lo);
1191 exp += 1;
1192 }
1193
1194 /* Re-use a */
1195 a.exp = exp;
1196 a.sign = sign;
1197 a.frac = lo;
1198 return a;
1199 }
1200 /* handle all the NaN cases */
1201 if (is_nan(a.cls) || is_nan(b.cls)) {
1202 return pick_nan(a, b, s);
1203 }
1204 /* Inf * Zero == NaN */
1205 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1206 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1207 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1208 return parts_default_nan(s);
74d707e2
AB
1209 }
1210 /* Multiply by 0 or Inf */
1211 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1212 a.sign = sign;
1213 return a;
1214 }
1215 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1216 b.sign = sign;
1217 return b;
1218 }
1219 g_assert_not_reached();
1220}
1221
97ff87c0 1222float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1223{
1224 FloatParts pa = float16_unpack_canonical(a, status);
1225 FloatParts pb = float16_unpack_canonical(b, status);
1226 FloatParts pr = mul_floats(pa, pb, status);
1227
1228 return float16_round_pack_canonical(pr, status);
1229}
1230
2dfabc86
EC
1231static float32 QEMU_SOFTFLOAT_ATTR
1232soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1233{
1234 FloatParts pa = float32_unpack_canonical(a, status);
1235 FloatParts pb = float32_unpack_canonical(b, status);
1236 FloatParts pr = mul_floats(pa, pb, status);
1237
1238 return float32_round_pack_canonical(pr, status);
1239}
1240
2dfabc86
EC
1241static float64 QEMU_SOFTFLOAT_ATTR
1242soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1243{
1244 FloatParts pa = float64_unpack_canonical(a, status);
1245 FloatParts pb = float64_unpack_canonical(b, status);
1246 FloatParts pr = mul_floats(pa, pb, status);
1247
1248 return float64_round_pack_canonical(pr, status);
1249}
1250
2dfabc86
EC
1251static float hard_f32_mul(float a, float b)
1252{
1253 return a * b;
1254}
1255
1256static double hard_f64_mul(double a, double b)
1257{
1258 return a * b;
1259}
1260
1261static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1262{
1263 return float32_is_zero(a.s) || float32_is_zero(b.s);
1264}
1265
1266static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1267{
1268 return float64_is_zero(a.s) || float64_is_zero(b.s);
1269}
1270
1271static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1272{
1273 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1274
1275 return float32_set_sign(float32_zero, signbit);
1276}
1277
1278static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1279{
1280 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1281
1282 return float64_set_sign(float64_zero, signbit);
1283}
1284
1285float32 QEMU_FLATTEN
1286float32_mul(float32 a, float32 b, float_status *s)
1287{
1288 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1289 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1290}
1291
1292float64 QEMU_FLATTEN
1293float64_mul(float64 a, float64 b, float_status *s)
1294{
1295 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1296 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1297}
1298
d446830a
AB
1299/*
1300 * Returns the result of multiplying the floating-point values `a' and
1301 * `b' then adding 'c', with no intermediate rounding step after the
1302 * multiplication. The operation is performed according to the
1303 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1304 * The flags argument allows the caller to select negation of the
1305 * addend, the intermediate product, or the final result. (The
1306 * difference between this and having the caller do a separate
1307 * negation is that negating externally will flip the sign bit on
1308 * NaNs.)
1309 */
1310
1311static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1312 int flags, float_status *s)
1313{
1314 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1315 ((1 << float_class_inf) | (1 << float_class_zero));
1316 bool p_sign;
1317 bool sign_flip = flags & float_muladd_negate_result;
1318 FloatClass p_class;
1319 uint64_t hi, lo;
1320 int p_exp;
1321
1322 /* It is implementation-defined whether the cases of (0,inf,qnan)
1323 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1324 * they return if they do), so we have to hand this information
1325 * off to the target-specific pick-a-NaN routine.
1326 */
1327 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1328 return pick_nan_muladd(a, b, c, inf_zero, s);
1329 }
1330
1331 if (inf_zero) {
1332 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1333 return parts_default_nan(s);
d446830a
AB
1334 }
1335
1336 if (flags & float_muladd_negate_c) {
1337 c.sign ^= 1;
1338 }
1339
1340 p_sign = a.sign ^ b.sign;
1341
1342 if (flags & float_muladd_negate_product) {
1343 p_sign ^= 1;
1344 }
1345
1346 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1347 p_class = float_class_inf;
1348 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1349 p_class = float_class_zero;
1350 } else {
1351 p_class = float_class_normal;
1352 }
1353
1354 if (c.cls == float_class_inf) {
1355 if (p_class == float_class_inf && p_sign != c.sign) {
1356 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1357 return parts_default_nan(s);
d446830a
AB
1358 } else {
1359 a.cls = float_class_inf;
1360 a.sign = c.sign ^ sign_flip;
f7e598e2 1361 return a;
d446830a 1362 }
d446830a
AB
1363 }
1364
1365 if (p_class == float_class_inf) {
1366 a.cls = float_class_inf;
1367 a.sign = p_sign ^ sign_flip;
1368 return a;
1369 }
1370
1371 if (p_class == float_class_zero) {
1372 if (c.cls == float_class_zero) {
1373 if (p_sign != c.sign) {
1374 p_sign = s->float_rounding_mode == float_round_down;
1375 }
1376 c.sign = p_sign;
1377 } else if (flags & float_muladd_halve_result) {
1378 c.exp -= 1;
1379 }
1380 c.sign ^= sign_flip;
1381 return c;
1382 }
1383
1384 /* a & b should be normals now... */
1385 assert(a.cls == float_class_normal &&
1386 b.cls == float_class_normal);
1387
1388 p_exp = a.exp + b.exp;
1389
1390 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1391 * result.
1392 */
1393 mul64To128(a.frac, b.frac, &hi, &lo);
1394 /* binary point now at bit 124 */
1395
1396 /* check for overflow */
1397 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1398 shift128RightJamming(hi, lo, 1, &hi, &lo);
1399 p_exp += 1;
1400 }
1401
1402 /* + add/sub */
1403 if (c.cls == float_class_zero) {
1404 /* move binary point back to 62 */
1405 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1406 } else {
1407 int exp_diff = p_exp - c.exp;
1408 if (p_sign == c.sign) {
1409 /* Addition */
1410 if (exp_diff <= 0) {
1411 shift128RightJamming(hi, lo,
1412 DECOMPOSED_BINARY_POINT - exp_diff,
1413 &hi, &lo);
1414 lo += c.frac;
1415 p_exp = c.exp;
1416 } else {
1417 uint64_t c_hi, c_lo;
1418 /* shift c to the same binary point as the product (124) */
1419 c_hi = c.frac >> 2;
1420 c_lo = 0;
1421 shift128RightJamming(c_hi, c_lo,
1422 exp_diff,
1423 &c_hi, &c_lo);
1424 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1425 /* move binary point back to 62 */
1426 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1427 }
1428
1429 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1430 shift64RightJamming(lo, 1, &lo);
1431 p_exp += 1;
1432 }
1433
1434 } else {
1435 /* Subtraction */
1436 uint64_t c_hi, c_lo;
1437 /* make C binary point match product at bit 124 */
1438 c_hi = c.frac >> 2;
1439 c_lo = 0;
1440
1441 if (exp_diff <= 0) {
1442 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1443 if (exp_diff == 0
1444 &&
1445 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1446 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1447 } else {
1448 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1449 p_sign ^= 1;
1450 p_exp = c.exp;
1451 }
1452 } else {
1453 shift128RightJamming(c_hi, c_lo,
1454 exp_diff,
1455 &c_hi, &c_lo);
1456 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1457 }
1458
1459 if (hi == 0 && lo == 0) {
1460 a.cls = float_class_zero;
1461 a.sign = s->float_rounding_mode == float_round_down;
1462 a.sign ^= sign_flip;
1463 return a;
1464 } else {
1465 int shift;
1466 if (hi != 0) {
1467 shift = clz64(hi);
1468 } else {
1469 shift = clz64(lo) + 64;
1470 }
1471 /* Normalizing to a binary point of 124 is the
1472 correct adjust for the exponent. However since we're
1473 shifting, we might as well put the binary point back
1474 at 62 where we really want it. Therefore shift as
1475 if we're leaving 1 bit at the top of the word, but
1476 adjust the exponent as if we're leaving 3 bits. */
1477 shift -= 1;
1478 if (shift >= 64) {
1479 lo = lo << (shift - 64);
1480 } else {
1481 hi = (hi << shift) | (lo >> (64 - shift));
1482 lo = hi | ((lo << shift) != 0);
1483 }
1484 p_exp -= shift - 2;
1485 }
1486 }
1487 }
1488
1489 if (flags & float_muladd_halve_result) {
1490 p_exp -= 1;
1491 }
1492
1493 /* finally prepare our result */
1494 a.cls = float_class_normal;
1495 a.sign = p_sign ^ sign_flip;
1496 a.exp = p_exp;
1497 a.frac = lo;
1498
1499 return a;
1500}
1501
97ff87c0 1502float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1503 int flags, float_status *status)
1504{
1505 FloatParts pa = float16_unpack_canonical(a, status);
1506 FloatParts pb = float16_unpack_canonical(b, status);
1507 FloatParts pc = float16_unpack_canonical(c, status);
1508 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1509
1510 return float16_round_pack_canonical(pr, status);
1511}
1512
ccf770ba
EC
1513static float32 QEMU_SOFTFLOAT_ATTR
1514soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1515 float_status *status)
d446830a
AB
1516{
1517 FloatParts pa = float32_unpack_canonical(a, status);
1518 FloatParts pb = float32_unpack_canonical(b, status);
1519 FloatParts pc = float32_unpack_canonical(c, status);
1520 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1521
1522 return float32_round_pack_canonical(pr, status);
1523}
1524
ccf770ba
EC
1525static float64 QEMU_SOFTFLOAT_ATTR
1526soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1527 float_status *status)
d446830a
AB
1528{
1529 FloatParts pa = float64_unpack_canonical(a, status);
1530 FloatParts pb = float64_unpack_canonical(b, status);
1531 FloatParts pc = float64_unpack_canonical(c, status);
1532 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534 return float64_round_pack_canonical(pr, status);
1535}
1536
f6b3b108
EC
1537static bool force_soft_fma;
1538
ccf770ba
EC
1539float32 QEMU_FLATTEN
1540float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1541{
1542 union_float32 ua, ub, uc, ur;
1543
1544 ua.s = xa;
1545 ub.s = xb;
1546 uc.s = xc;
1547
1548 if (unlikely(!can_use_fpu(s))) {
1549 goto soft;
1550 }
1551 if (unlikely(flags & float_muladd_halve_result)) {
1552 goto soft;
1553 }
1554
1555 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1556 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1557 goto soft;
1558 }
f6b3b108
EC
1559
1560 if (unlikely(force_soft_fma)) {
1561 goto soft;
1562 }
1563
ccf770ba
EC
1564 /*
1565 * When (a || b) == 0, there's no need to check for under/over flow,
1566 * since we know the addend is (normal || 0) and the product is 0.
1567 */
1568 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1569 union_float32 up;
1570 bool prod_sign;
1571
1572 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1573 prod_sign ^= !!(flags & float_muladd_negate_product);
1574 up.s = float32_set_sign(float32_zero, prod_sign);
1575
1576 if (flags & float_muladd_negate_c) {
1577 uc.h = -uc.h;
1578 }
1579 ur.h = up.h + uc.h;
1580 } else {
896f51fb
KC
1581 union_float32 ua_orig = ua;
1582 union_float32 uc_orig = uc;
1583
ccf770ba
EC
1584 if (flags & float_muladd_negate_product) {
1585 ua.h = -ua.h;
1586 }
1587 if (flags & float_muladd_negate_c) {
1588 uc.h = -uc.h;
1589 }
1590
1591 ur.h = fmaf(ua.h, ub.h, uc.h);
1592
1593 if (unlikely(f32_is_inf(ur))) {
1594 s->float_exception_flags |= float_flag_overflow;
1595 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1596 ua = ua_orig;
1597 uc = uc_orig;
ccf770ba
EC
1598 goto soft;
1599 }
1600 }
1601 if (flags & float_muladd_negate_result) {
1602 return float32_chs(ur.s);
1603 }
1604 return ur.s;
1605
1606 soft:
1607 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1608}
1609
1610float64 QEMU_FLATTEN
1611float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1612{
1613 union_float64 ua, ub, uc, ur;
1614
1615 ua.s = xa;
1616 ub.s = xb;
1617 uc.s = xc;
1618
1619 if (unlikely(!can_use_fpu(s))) {
1620 goto soft;
1621 }
1622 if (unlikely(flags & float_muladd_halve_result)) {
1623 goto soft;
1624 }
1625
1626 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1627 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1628 goto soft;
1629 }
f6b3b108
EC
1630
1631 if (unlikely(force_soft_fma)) {
1632 goto soft;
1633 }
1634
ccf770ba
EC
1635 /*
1636 * When (a || b) == 0, there's no need to check for under/over flow,
1637 * since we know the addend is (normal || 0) and the product is 0.
1638 */
1639 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1640 union_float64 up;
1641 bool prod_sign;
1642
1643 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1644 prod_sign ^= !!(flags & float_muladd_negate_product);
1645 up.s = float64_set_sign(float64_zero, prod_sign);
1646
1647 if (flags & float_muladd_negate_c) {
1648 uc.h = -uc.h;
1649 }
1650 ur.h = up.h + uc.h;
1651 } else {
896f51fb
KC
1652 union_float64 ua_orig = ua;
1653 union_float64 uc_orig = uc;
1654
ccf770ba
EC
1655 if (flags & float_muladd_negate_product) {
1656 ua.h = -ua.h;
1657 }
1658 if (flags & float_muladd_negate_c) {
1659 uc.h = -uc.h;
1660 }
1661
1662 ur.h = fma(ua.h, ub.h, uc.h);
1663
1664 if (unlikely(f64_is_inf(ur))) {
1665 s->float_exception_flags |= float_flag_overflow;
1666 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1667 ua = ua_orig;
1668 uc = uc_orig;
ccf770ba
EC
1669 goto soft;
1670 }
1671 }
1672 if (flags & float_muladd_negate_result) {
1673 return float64_chs(ur.s);
1674 }
1675 return ur.s;
1676
1677 soft:
1678 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1679}
1680
cf07323d
AB
1681/*
1682 * Returns the result of dividing the floating-point value `a' by the
1683 * corresponding value `b'. The operation is performed according to
1684 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1685 */
1686
1687static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1688{
1689 bool sign = a.sign ^ b.sign;
1690
1691 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1692 uint64_t n0, n1, q, r;
cf07323d 1693 int exp = a.exp - b.exp;
5dfbc9e4
RH
1694
1695 /*
1696 * We want a 2*N / N-bit division to produce exactly an N-bit
1697 * result, so that we do not lose any precision and so that we
1698 * do not have to renormalize afterward. If A.frac < B.frac,
1699 * then division would produce an (N-1)-bit result; shift A left
1700 * by one to produce the an N-bit result, and decrement the
1701 * exponent to match.
1702 *
1703 * The udiv_qrnnd algorithm that we're using requires normalization,
1704 * i.e. the msb of the denominator must be set. Since we know that
1705 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1706 * by one (more), and the remainder must be shifted right by one.
1707 */
cf07323d
AB
1708 if (a.frac < b.frac) {
1709 exp -= 1;
5dfbc9e4 1710 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1711 } else {
5dfbc9e4 1712 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1713 }
5dfbc9e4
RH
1714 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1715
1716 /*
1717 * Set lsb if there is a remainder, to set inexact.
1718 * As mentioned above, to find the actual value of the remainder we
1719 * would need to shift right, but (1) we are only concerned about
1720 * non-zero-ness, and (2) the remainder will always be even because
1721 * both inputs to the division primitive are even.
1722 */
1723 a.frac = q | (r != 0);
cf07323d
AB
1724 a.sign = sign;
1725 a.exp = exp;
1726 return a;
1727 }
1728 /* handle all the NaN cases */
1729 if (is_nan(a.cls) || is_nan(b.cls)) {
1730 return pick_nan(a, b, s);
1731 }
1732 /* 0/0 or Inf/Inf */
1733 if (a.cls == b.cls
1734 &&
1735 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1736 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1737 return parts_default_nan(s);
cf07323d 1738 }
9cb4e398
AB
1739 /* Inf / x or 0 / x */
1740 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1741 a.sign = sign;
1742 return a;
1743 }
cf07323d
AB
1744 /* Div 0 => Inf */
1745 if (b.cls == float_class_zero) {
1746 s->float_exception_flags |= float_flag_divbyzero;
1747 a.cls = float_class_inf;
1748 a.sign = sign;
1749 return a;
1750 }
cf07323d
AB
1751 /* Div by Inf */
1752 if (b.cls == float_class_inf) {
1753 a.cls = float_class_zero;
1754 a.sign = sign;
1755 return a;
1756 }
1757 g_assert_not_reached();
1758}
1759
1760float16 float16_div(float16 a, float16 b, float_status *status)
1761{
1762 FloatParts pa = float16_unpack_canonical(a, status);
1763 FloatParts pb = float16_unpack_canonical(b, status);
1764 FloatParts pr = div_floats(pa, pb, status);
1765
1766 return float16_round_pack_canonical(pr, status);
1767}
1768
4a629561
EC
1769static float32 QEMU_SOFTFLOAT_ATTR
1770soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1771{
1772 FloatParts pa = float32_unpack_canonical(a, status);
1773 FloatParts pb = float32_unpack_canonical(b, status);
1774 FloatParts pr = div_floats(pa, pb, status);
1775
1776 return float32_round_pack_canonical(pr, status);
1777}
1778
4a629561
EC
1779static float64 QEMU_SOFTFLOAT_ATTR
1780soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1781{
1782 FloatParts pa = float64_unpack_canonical(a, status);
1783 FloatParts pb = float64_unpack_canonical(b, status);
1784 FloatParts pr = div_floats(pa, pb, status);
1785
1786 return float64_round_pack_canonical(pr, status);
1787}
1788
4a629561
EC
1789static float hard_f32_div(float a, float b)
1790{
1791 return a / b;
1792}
1793
1794static double hard_f64_div(double a, double b)
1795{
1796 return a / b;
1797}
1798
1799static bool f32_div_pre(union_float32 a, union_float32 b)
1800{
1801 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1802 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1803 fpclassify(b.h) == FP_NORMAL;
1804 }
1805 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1806}
1807
1808static bool f64_div_pre(union_float64 a, union_float64 b)
1809{
1810 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1811 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1812 fpclassify(b.h) == FP_NORMAL;
1813 }
1814 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1815}
1816
1817static bool f32_div_post(union_float32 a, union_float32 b)
1818{
1819 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1820 return fpclassify(a.h) != FP_ZERO;
1821 }
1822 return !float32_is_zero(a.s);
1823}
1824
1825static bool f64_div_post(union_float64 a, union_float64 b)
1826{
1827 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1828 return fpclassify(a.h) != FP_ZERO;
1829 }
1830 return !float64_is_zero(a.s);
1831}
1832
1833float32 QEMU_FLATTEN
1834float32_div(float32 a, float32 b, float_status *s)
1835{
1836 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1837 f32_div_pre, f32_div_post, NULL, NULL);
1838}
1839
1840float64 QEMU_FLATTEN
1841float64_div(float64 a, float64 b, float_status *s)
1842{
1843 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1844 f64_div_pre, f64_div_post, NULL, NULL);
1845}
1846
6fed16b2
AB
1847/*
1848 * Float to Float conversions
1849 *
1850 * Returns the result of converting one float format to another. The
1851 * conversion is performed according to the IEC/IEEE Standard for
1852 * Binary Floating-Point Arithmetic.
1853 *
1854 * The float_to_float helper only needs to take care of raising
1855 * invalid exceptions and handling the conversion on NaNs.
1856 */
1857
1858static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1859 float_status *s)
1860{
1861 if (dstf->arm_althp) {
1862 switch (a.cls) {
1863 case float_class_qnan:
1864 case float_class_snan:
1865 /* There is no NaN in the destination format. Raise Invalid
1866 * and return a zero with the sign of the input NaN.
1867 */
1868 s->float_exception_flags |= float_flag_invalid;
1869 a.cls = float_class_zero;
1870 a.frac = 0;
1871 a.exp = 0;
1872 break;
1873
1874 case float_class_inf:
1875 /* There is no Inf in the destination format. Raise Invalid
1876 * and return the maximum normal with the correct sign.
1877 */
1878 s->float_exception_flags |= float_flag_invalid;
1879 a.cls = float_class_normal;
1880 a.exp = dstf->exp_max;
1881 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1882 break;
1883
1884 default:
1885 break;
1886 }
1887 } else if (is_nan(a.cls)) {
1888 if (is_snan(a.cls)) {
1889 s->float_exception_flags |= float_flag_invalid;
1890 a = parts_silence_nan(a, s);
1891 }
1892 if (s->default_nan_mode) {
1893 return parts_default_nan(s);
1894 }
1895 }
1896 return a;
1897}
1898
1899float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1900{
1901 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1902 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1903 FloatParts pr = float_to_float(p, &float32_params, s);
1904 return float32_round_pack_canonical(pr, s);
1905}
1906
1907float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1908{
1909 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1910 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1911 FloatParts pr = float_to_float(p, &float64_params, s);
1912 return float64_round_pack_canonical(pr, s);
1913}
1914
1915float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1916{
1917 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1918 FloatParts p = float32_unpack_canonical(a, s);
1919 FloatParts pr = float_to_float(p, fmt16, s);
1920 return float16a_round_pack_canonical(pr, s, fmt16);
1921}
1922
1923float64 float32_to_float64(float32 a, float_status *s)
1924{
1925 FloatParts p = float32_unpack_canonical(a, s);
1926 FloatParts pr = float_to_float(p, &float64_params, s);
1927 return float64_round_pack_canonical(pr, s);
1928}
1929
1930float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1931{
1932 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1933 FloatParts p = float64_unpack_canonical(a, s);
1934 FloatParts pr = float_to_float(p, fmt16, s);
1935 return float16a_round_pack_canonical(pr, s, fmt16);
1936}
1937
1938float32 float64_to_float32(float64 a, float_status *s)
1939{
1940 FloatParts p = float64_unpack_canonical(a, s);
1941 FloatParts pr = float_to_float(p, &float32_params, s);
1942 return float32_round_pack_canonical(pr, s);
1943}
1944
dbe4d53a
AB
1945/*
1946 * Rounds the floating-point value `a' to an integer, and returns the
1947 * result as a floating-point value. The operation is performed
1948 * according to the IEC/IEEE Standard for Binary Floating-Point
1949 * Arithmetic.
1950 */
1951
2f6c74be
RH
1952static FloatParts round_to_int(FloatParts a, int rmode,
1953 int scale, float_status *s)
dbe4d53a 1954{
2f6c74be
RH
1955 switch (a.cls) {
1956 case float_class_qnan:
1957 case float_class_snan:
dbe4d53a 1958 return return_nan(a, s);
dbe4d53a 1959
dbe4d53a
AB
1960 case float_class_zero:
1961 case float_class_inf:
dbe4d53a
AB
1962 /* already "integral" */
1963 break;
2f6c74be 1964
dbe4d53a 1965 case float_class_normal:
2f6c74be
RH
1966 scale = MIN(MAX(scale, -0x10000), 0x10000);
1967 a.exp += scale;
1968
dbe4d53a
AB
1969 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1970 /* already integral */
1971 break;
1972 }
1973 if (a.exp < 0) {
1974 bool one;
1975 /* all fractional */
1976 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1977 switch (rmode) {
dbe4d53a
AB
1978 case float_round_nearest_even:
1979 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1980 break;
1981 case float_round_ties_away:
1982 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1983 break;
1984 case float_round_to_zero:
1985 one = false;
1986 break;
1987 case float_round_up:
1988 one = !a.sign;
1989 break;
1990 case float_round_down:
1991 one = a.sign;
1992 break;
5d64abb3
RH
1993 case float_round_to_odd:
1994 one = true;
1995 break;
dbe4d53a
AB
1996 default:
1997 g_assert_not_reached();
1998 }
1999
2000 if (one) {
2001 a.frac = DECOMPOSED_IMPLICIT_BIT;
2002 a.exp = 0;
2003 } else {
2004 a.cls = float_class_zero;
2005 }
2006 } else {
2007 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2008 uint64_t frac_lsbm1 = frac_lsb >> 1;
2009 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2010 uint64_t rnd_mask = rnd_even_mask >> 1;
2011 uint64_t inc;
2012
2f6c74be 2013 switch (rmode) {
dbe4d53a
AB
2014 case float_round_nearest_even:
2015 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2016 break;
2017 case float_round_ties_away:
2018 inc = frac_lsbm1;
2019 break;
2020 case float_round_to_zero:
2021 inc = 0;
2022 break;
2023 case float_round_up:
2024 inc = a.sign ? 0 : rnd_mask;
2025 break;
2026 case float_round_down:
2027 inc = a.sign ? rnd_mask : 0;
2028 break;
5d64abb3
RH
2029 case float_round_to_odd:
2030 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2031 break;
dbe4d53a
AB
2032 default:
2033 g_assert_not_reached();
2034 }
2035
2036 if (a.frac & rnd_mask) {
2037 s->float_exception_flags |= float_flag_inexact;
2038 a.frac += inc;
2039 a.frac &= ~rnd_mask;
2040 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2041 a.frac >>= 1;
2042 a.exp++;
2043 }
2044 }
2045 }
2046 break;
2047 default:
2048 g_assert_not_reached();
2049 }
2050 return a;
2051}
2052
2053float16 float16_round_to_int(float16 a, float_status *s)
2054{
2055 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2056 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2057 return float16_round_pack_canonical(pr, s);
2058}
2059
2060float32 float32_round_to_int(float32 a, float_status *s)
2061{
2062 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2063 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2064 return float32_round_pack_canonical(pr, s);
2065}
2066
2067float64 float64_round_to_int(float64 a, float_status *s)
2068{
2069 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2070 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2071 return float64_round_pack_canonical(pr, s);
2072}
2073
ab52f973
AB
2074/*
2075 * Returns the result of converting the floating-point value `a' to
2076 * the two's complement integer format. The conversion is performed
2077 * according to the IEC/IEEE Standard for Binary Floating-Point
2078 * Arithmetic---which means in particular that the conversion is
2079 * rounded according to the current rounding mode. If `a' is a NaN,
2080 * the largest positive integer is returned. Otherwise, if the
2081 * conversion overflows, the largest integer with the same sign as `a'
2082 * is returned.
2083*/
2084
2f6c74be 2085static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
2086 int64_t min, int64_t max,
2087 float_status *s)
2088{
2089 uint64_t r;
2090 int orig_flags = get_float_exception_flags(s);
2f6c74be 2091 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2092
2093 switch (p.cls) {
2094 case float_class_snan:
2095 case float_class_qnan:
801bc563 2096 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2097 return max;
2098 case float_class_inf:
801bc563 2099 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2100 return p.sign ? min : max;
2101 case float_class_zero:
2102 return 0;
2103 case float_class_normal:
2104 if (p.exp < DECOMPOSED_BINARY_POINT) {
2105 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2106 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2107 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2108 } else {
2109 r = UINT64_MAX;
2110 }
2111 if (p.sign) {
33358375 2112 if (r <= -(uint64_t) min) {
ab52f973
AB
2113 return -r;
2114 } else {
2115 s->float_exception_flags = orig_flags | float_flag_invalid;
2116 return min;
2117 }
2118 } else {
33358375 2119 if (r <= max) {
ab52f973
AB
2120 return r;
2121 } else {
2122 s->float_exception_flags = orig_flags | float_flag_invalid;
2123 return max;
2124 }
2125 }
2126 default:
2127 g_assert_not_reached();
2128 }
2129}
2130
2f6c74be
RH
2131int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2132 float_status *s)
2133{
2134 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2135 rmode, scale, INT16_MIN, INT16_MAX, s);
2136}
2137
2138int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2139 float_status *s)
2140{
2141 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2142 rmode, scale, INT32_MIN, INT32_MAX, s);
2143}
2144
2145int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2146 float_status *s)
2147{
2148 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2149 rmode, scale, INT64_MIN, INT64_MAX, s);
2150}
2151
2152int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2153 float_status *s)
2154{
2155 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2156 rmode, scale, INT16_MIN, INT16_MAX, s);
2157}
2158
2159int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2160 float_status *s)
2161{
2162 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2163 rmode, scale, INT32_MIN, INT32_MAX, s);
2164}
2165
2166int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2167 float_status *s)
2168{
2169 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2170 rmode, scale, INT64_MIN, INT64_MAX, s);
2171}
2172
2173int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2174 float_status *s)
2175{
2176 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2177 rmode, scale, INT16_MIN, INT16_MAX, s);
2178}
2179
2180int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2181 float_status *s)
2182{
2183 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2184 rmode, scale, INT32_MIN, INT32_MAX, s);
2185}
2186
2187int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2188 float_status *s)
2189{
2190 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2191 rmode, scale, INT64_MIN, INT64_MAX, s);
2192}
2193
2194int16_t float16_to_int16(float16 a, float_status *s)
2195{
2196 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2197}
2198
2199int32_t float16_to_int32(float16 a, float_status *s)
2200{
2201 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2202}
2203
2204int64_t float16_to_int64(float16 a, float_status *s)
2205{
2206 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2207}
2208
2209int16_t float32_to_int16(float32 a, float_status *s)
2210{
2211 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2212}
2213
2214int32_t float32_to_int32(float32 a, float_status *s)
2215{
2216 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2217}
2218
2219int64_t float32_to_int64(float32 a, float_status *s)
2220{
2221 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2222}
2223
2224int16_t float64_to_int16(float64 a, float_status *s)
2225{
2226 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2227}
2228
2229int32_t float64_to_int32(float64 a, float_status *s)
2230{
2231 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2232}
2233
2234int64_t float64_to_int64(float64 a, float_status *s)
2235{
2236 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2237}
2238
2239int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2240{
2241 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2242}
2243
2244int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2245{
2246 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2247}
2248
2249int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2250{
2251 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2252}
2253
2f6c74be
RH
2254int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2255{
2256 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2257}
ab52f973 2258
2f6c74be
RH
2259int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2260{
2261 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2262}
2263
2264int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2265{
2266 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2267}
2268
2269int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2270{
2271 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2272}
ab52f973 2273
2f6c74be
RH
2274int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2275{
2276 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2277}
ab52f973 2278
2f6c74be
RH
2279int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2280{
2281 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2282}
ab52f973
AB
2283
2284/*
2285 * Returns the result of converting the floating-point value `a' to
2286 * the unsigned integer format. The conversion is performed according
2287 * to the IEC/IEEE Standard for Binary Floating-Point
2288 * Arithmetic---which means in particular that the conversion is
2289 * rounded according to the current rounding mode. If `a' is a NaN,
2290 * the largest unsigned integer is returned. Otherwise, if the
2291 * conversion overflows, the largest unsigned integer is returned. If
2292 * the 'a' is negative, the result is rounded and zero is returned;
2293 * values that do not round to zero will raise the inexact exception
2294 * flag.
2295 */
2296
2f6c74be
RH
2297static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2298 uint64_t max, float_status *s)
ab52f973
AB
2299{
2300 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2301 FloatParts p = round_to_int(in, rmode, scale, s);
2302 uint64_t r;
ab52f973
AB
2303
2304 switch (p.cls) {
2305 case float_class_snan:
2306 case float_class_qnan:
2307 s->float_exception_flags = orig_flags | float_flag_invalid;
2308 return max;
2309 case float_class_inf:
801bc563 2310 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2311 return p.sign ? 0 : max;
2312 case float_class_zero:
2313 return 0;
2314 case float_class_normal:
ab52f973
AB
2315 if (p.sign) {
2316 s->float_exception_flags = orig_flags | float_flag_invalid;
2317 return 0;
2318 }
2319
2320 if (p.exp < DECOMPOSED_BINARY_POINT) {
2321 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2322 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2323 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2324 } else {
2325 s->float_exception_flags = orig_flags | float_flag_invalid;
2326 return max;
2327 }
2328
2329 /* For uint64 this will never trip, but if p.exp is too large
2330 * to shift a decomposed fraction we shall have exited via the
2331 * 3rd leg above.
2332 */
2333 if (r > max) {
2334 s->float_exception_flags = orig_flags | float_flag_invalid;
2335 return max;
ab52f973 2336 }
2f6c74be 2337 return r;
ab52f973
AB
2338 default:
2339 g_assert_not_reached();
2340 }
2341}
2342
2f6c74be
RH
2343uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2344 float_status *s)
2345{
2346 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2347 rmode, scale, UINT16_MAX, s);
2348}
2349
2350uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2351 float_status *s)
2352{
2353 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2354 rmode, scale, UINT32_MAX, s);
2355}
2356
2357uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2358 float_status *s)
2359{
2360 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2361 rmode, scale, UINT64_MAX, s);
2362}
2363
2364uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2365 float_status *s)
2366{
2367 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2368 rmode, scale, UINT16_MAX, s);
2369}
2370
2371uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2372 float_status *s)
2373{
2374 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2375 rmode, scale, UINT32_MAX, s);
2376}
2377
2378uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2379 float_status *s)
2380{
2381 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2382 rmode, scale, UINT64_MAX, s);
2383}
2384
2385uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2386 float_status *s)
2387{
2388 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2389 rmode, scale, UINT16_MAX, s);
2390}
2391
2392uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2393 float_status *s)
2394{
2395 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2396 rmode, scale, UINT32_MAX, s);
2397}
2398
2399uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2400 float_status *s)
2401{
2402 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2403 rmode, scale, UINT64_MAX, s);
2404}
2405
2406uint16_t float16_to_uint16(float16 a, float_status *s)
2407{
2408 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2409}
2410
2411uint32_t float16_to_uint32(float16 a, float_status *s)
2412{
2413 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2414}
2415
2416uint64_t float16_to_uint64(float16 a, float_status *s)
2417{
2418 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2419}
2420
2421uint16_t float32_to_uint16(float32 a, float_status *s)
2422{
2423 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2424}
2425
2426uint32_t float32_to_uint32(float32 a, float_status *s)
2427{
2428 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2429}
2430
2431uint64_t float32_to_uint64(float32 a, float_status *s)
2432{
2433 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2434}
2435
2436uint16_t float64_to_uint16(float64 a, float_status *s)
2437{
2438 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2439}
2440
2441uint32_t float64_to_uint32(float64 a, float_status *s)
2442{
2443 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2444}
2445
2446uint64_t float64_to_uint64(float64 a, float_status *s)
2447{
2448 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2449}
2450
2451uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2452{
2453 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2454}
2455
2456uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2457{
2458 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2459}
2460
2461uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2462{
2463 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2464}
2465
2466uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2467{
2468 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2469}
2470
2471uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2472{
2473 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2474}
2475
2476uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2477{
2478 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2479}
2480
2481uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2482{
2483 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2484}
2485
2486uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2487{
2488 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2489}
2490
2491uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2492{
2493 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2494}
ab52f973 2495
c02e1fb8
AB
2496/*
2497 * Integer to float conversions
2498 *
2499 * Returns the result of converting the two's complement integer `a'
2500 * to the floating-point format. The conversion is performed according
2501 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2502 */
2503
2abdfe24 2504static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2505{
2abdfe24
RH
2506 FloatParts r = { .sign = false };
2507
c02e1fb8
AB
2508 if (a == 0) {
2509 r.cls = float_class_zero;
c02e1fb8 2510 } else {
2abdfe24
RH
2511 uint64_t f = a;
2512 int shift;
2513
2514 r.cls = float_class_normal;
c02e1fb8 2515 if (a < 0) {
2abdfe24 2516 f = -f;
c02e1fb8 2517 r.sign = true;
c02e1fb8 2518 }
2abdfe24
RH
2519 shift = clz64(f) - 1;
2520 scale = MIN(MAX(scale, -0x10000), 0x10000);
2521
2522 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2523 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2524 }
2525
2526 return r;
2527}
2528
2abdfe24 2529float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2530{
2abdfe24 2531 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2532 return float16_round_pack_canonical(pa, status);
2533}
2534
2abdfe24
RH
2535float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2536{
2537 return int64_to_float16_scalbn(a, scale, status);
2538}
2539
2540float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2541{
2542 return int64_to_float16_scalbn(a, scale, status);
2543}
2544
2545float16 int64_to_float16(int64_t a, float_status *status)
2546{
2547 return int64_to_float16_scalbn(a, 0, status);
2548}
2549
c02e1fb8
AB
2550float16 int32_to_float16(int32_t a, float_status *status)
2551{
2abdfe24 2552 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2553}
2554
2555float16 int16_to_float16(int16_t a, float_status *status)
2556{
2abdfe24 2557 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2558}
2559
2abdfe24 2560float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2561{
2abdfe24 2562 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2563 return float32_round_pack_canonical(pa, status);
2564}
2565
2abdfe24
RH
2566float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2567{
2568 return int64_to_float32_scalbn(a, scale, status);
2569}
2570
2571float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2572{
2573 return int64_to_float32_scalbn(a, scale, status);
2574}
2575
2576float32 int64_to_float32(int64_t a, float_status *status)
2577{
2578 return int64_to_float32_scalbn(a, 0, status);
2579}
2580
c02e1fb8
AB
2581float32 int32_to_float32(int32_t a, float_status *status)
2582{
2abdfe24 2583 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2584}
2585
2586float32 int16_to_float32(int16_t a, float_status *status)
2587{
2abdfe24 2588 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2589}
2590
2abdfe24 2591float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2592{
2abdfe24 2593 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2594 return float64_round_pack_canonical(pa, status);
2595}
2596
2abdfe24
RH
2597float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2598{
2599 return int64_to_float64_scalbn(a, scale, status);
2600}
2601
2602float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2603{
2604 return int64_to_float64_scalbn(a, scale, status);
2605}
2606
2607float64 int64_to_float64(int64_t a, float_status *status)
2608{
2609 return int64_to_float64_scalbn(a, 0, status);
2610}
2611
c02e1fb8
AB
2612float64 int32_to_float64(int32_t a, float_status *status)
2613{
2abdfe24 2614 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2615}
2616
2617float64 int16_to_float64(int16_t a, float_status *status)
2618{
2abdfe24 2619 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2620}
2621
2622
2623/*
2624 * Unsigned Integer to float conversions
2625 *
2626 * Returns the result of converting the unsigned integer `a' to the
2627 * floating-point format. The conversion is performed according to the
2628 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2629 */
2630
2abdfe24 2631static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2632{
2abdfe24 2633 FloatParts r = { .sign = false };
c02e1fb8
AB
2634
2635 if (a == 0) {
2636 r.cls = float_class_zero;
2637 } else {
2abdfe24 2638 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2639 r.cls = float_class_normal;
2abdfe24
RH
2640 if ((int64_t)a < 0) {
2641 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2642 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2643 r.frac = a;
2644 } else {
2abdfe24
RH
2645 int shift = clz64(a) - 1;
2646 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2647 r.frac = a << shift;
c02e1fb8
AB
2648 }
2649 }
2650
2651 return r;
2652}
2653
2abdfe24 2654float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2655{
2abdfe24 2656 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2657 return float16_round_pack_canonical(pa, status);
2658}
2659
2abdfe24
RH
2660float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2661{
2662 return uint64_to_float16_scalbn(a, scale, status);
2663}
2664
2665float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2666{
2667 return uint64_to_float16_scalbn(a, scale, status);
2668}
2669
2670float16 uint64_to_float16(uint64_t a, float_status *status)
2671{
2672 return uint64_to_float16_scalbn(a, 0, status);
2673}
2674
c02e1fb8
AB
2675float16 uint32_to_float16(uint32_t a, float_status *status)
2676{
2abdfe24 2677 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2678}
2679
2680float16 uint16_to_float16(uint16_t a, float_status *status)
2681{
2abdfe24 2682 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2683}
2684
2abdfe24 2685float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2686{
2abdfe24 2687 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2688 return float32_round_pack_canonical(pa, status);
2689}
2690
2abdfe24
RH
2691float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2692{
2693 return uint64_to_float32_scalbn(a, scale, status);
2694}
2695
2696float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2697{
2698 return uint64_to_float32_scalbn(a, scale, status);
2699}
2700
2701float32 uint64_to_float32(uint64_t a, float_status *status)
2702{
2703 return uint64_to_float32_scalbn(a, 0, status);
2704}
2705
c02e1fb8
AB
2706float32 uint32_to_float32(uint32_t a, float_status *status)
2707{
2abdfe24 2708 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2709}
2710
2711float32 uint16_to_float32(uint16_t a, float_status *status)
2712{
2abdfe24 2713 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2714}
2715
2abdfe24 2716float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2717{
2abdfe24 2718 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2719 return float64_round_pack_canonical(pa, status);
2720}
2721
2abdfe24
RH
2722float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2723{
2724 return uint64_to_float64_scalbn(a, scale, status);
2725}
2726
2727float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2728{
2729 return uint64_to_float64_scalbn(a, scale, status);
2730}
2731
2732float64 uint64_to_float64(uint64_t a, float_status *status)
2733{
2734 return uint64_to_float64_scalbn(a, 0, status);
2735}
2736
c02e1fb8
AB
2737float64 uint32_to_float64(uint32_t a, float_status *status)
2738{
2abdfe24 2739 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2740}
2741
2742float64 uint16_to_float64(uint16_t a, float_status *status)
2743{
2abdfe24 2744 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2745}
2746
89360067
AB
2747/* Float Min/Max */
2748/* min() and max() functions. These can't be implemented as
2749 * 'compare and pick one input' because that would mishandle
2750 * NaNs and +0 vs -0.
2751 *
2752 * minnum() and maxnum() functions. These are similar to the min()
2753 * and max() functions but if one of the arguments is a QNaN and
2754 * the other is numerical then the numerical argument is returned.
2755 * SNaNs will get quietened before being returned.
2756 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2757 * and maxNum() operations. min() and max() are the typical min/max
2758 * semantics provided by many CPUs which predate that specification.
2759 *
2760 * minnummag() and maxnummag() functions correspond to minNumMag()
2761 * and minNumMag() from the IEEE-754 2008.
2762 */
2763static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2764 bool ieee, bool ismag, float_status *s)
2765{
2766 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2767 if (ieee) {
2768 /* Takes two floating-point values `a' and `b', one of
2769 * which is a NaN, and returns the appropriate NaN
2770 * result. If either `a' or `b' is a signaling NaN,
2771 * the invalid exception is raised.
2772 */
2773 if (is_snan(a.cls) || is_snan(b.cls)) {
2774 return pick_nan(a, b, s);
2775 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2776 return b;
2777 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2778 return a;
2779 }
2780 }
2781 return pick_nan(a, b, s);
2782 } else {
2783 int a_exp, b_exp;
89360067
AB
2784
2785 switch (a.cls) {
2786 case float_class_normal:
2787 a_exp = a.exp;
2788 break;
2789 case float_class_inf:
2790 a_exp = INT_MAX;
2791 break;
2792 case float_class_zero:
2793 a_exp = INT_MIN;
2794 break;
2795 default:
2796 g_assert_not_reached();
2797 break;
2798 }
2799 switch (b.cls) {
2800 case float_class_normal:
2801 b_exp = b.exp;
2802 break;
2803 case float_class_inf:
2804 b_exp = INT_MAX;
2805 break;
2806 case float_class_zero:
2807 b_exp = INT_MIN;
2808 break;
2809 default:
2810 g_assert_not_reached();
2811 break;
2812 }
2813
6245327a
EC
2814 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2815 bool a_less = a_exp < b_exp;
2816 if (a_exp == b_exp) {
2817 a_less = a.frac < b.frac;
2818 }
2819 return a_less ^ ismin ? b : a;
89360067
AB
2820 }
2821
6245327a 2822 if (a.sign == b.sign) {
89360067
AB
2823 bool a_less = a_exp < b_exp;
2824 if (a_exp == b_exp) {
2825 a_less = a.frac < b.frac;
2826 }
6245327a 2827 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2828 } else {
6245327a 2829 return a.sign ^ ismin ? b : a;
89360067
AB
2830 }
2831 }
2832}
2833
2834#define MINMAX(sz, name, ismin, isiee, ismag) \
2835float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2836 float_status *s) \
2837{ \
2838 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2839 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2840 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2841 \
2842 return float ## sz ## _round_pack_canonical(pr, s); \
2843}
2844
2845MINMAX(16, min, true, false, false)
2846MINMAX(16, minnum, true, true, false)
2847MINMAX(16, minnummag, true, true, true)
2848MINMAX(16, max, false, false, false)
2849MINMAX(16, maxnum, false, true, false)
2850MINMAX(16, maxnummag, false, true, true)
2851
2852MINMAX(32, min, true, false, false)
2853MINMAX(32, minnum, true, true, false)
2854MINMAX(32, minnummag, true, true, true)
2855MINMAX(32, max, false, false, false)
2856MINMAX(32, maxnum, false, true, false)
2857MINMAX(32, maxnummag, false, true, true)
2858
2859MINMAX(64, min, true, false, false)
2860MINMAX(64, minnum, true, true, false)
2861MINMAX(64, minnummag, true, true, true)
2862MINMAX(64, max, false, false, false)
2863MINMAX(64, maxnum, false, true, false)
2864MINMAX(64, maxnummag, false, true, true)
2865
2866#undef MINMAX
2867
0c4c9092
AB
2868/* Floating point compare */
2869static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2870 float_status *s)
2871{
2872 if (is_nan(a.cls) || is_nan(b.cls)) {
2873 if (!is_quiet ||
2874 a.cls == float_class_snan ||
2875 b.cls == float_class_snan) {
2876 s->float_exception_flags |= float_flag_invalid;
2877 }
2878 return float_relation_unordered;
2879 }
2880
2881 if (a.cls == float_class_zero) {
2882 if (b.cls == float_class_zero) {
2883 return float_relation_equal;
2884 }
2885 return b.sign ? float_relation_greater : float_relation_less;
2886 } else if (b.cls == float_class_zero) {
2887 return a.sign ? float_relation_less : float_relation_greater;
2888 }
2889
2890 /* The only really important thing about infinity is its sign. If
2891 * both are infinities the sign marks the smallest of the two.
2892 */
2893 if (a.cls == float_class_inf) {
2894 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2895 return float_relation_equal;
2896 }
2897 return a.sign ? float_relation_less : float_relation_greater;
2898 } else if (b.cls == float_class_inf) {
2899 return b.sign ? float_relation_greater : float_relation_less;
2900 }
2901
2902 if (a.sign != b.sign) {
2903 return a.sign ? float_relation_less : float_relation_greater;
2904 }
2905
2906 if (a.exp == b.exp) {
2907 if (a.frac == b.frac) {
2908 return float_relation_equal;
2909 }
2910 if (a.sign) {
2911 return a.frac > b.frac ?
2912 float_relation_less : float_relation_greater;
2913 } else {
2914 return a.frac > b.frac ?
2915 float_relation_greater : float_relation_less;
2916 }
2917 } else {
2918 if (a.sign) {
2919 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2920 } else {
2921 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2922 }
2923 }
2924}
2925
d9fe9db9
EC
2926#define COMPARE(name, attr, sz) \
2927static int attr \
2928name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
2929{ \
2930 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2931 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 2932 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
2933}
2934
d9fe9db9
EC
2935COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2936COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2937COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
2938
2939#undef COMPARE
2940
d9fe9db9
EC
2941int float16_compare(float16 a, float16 b, float_status *s)
2942{
2943 return soft_f16_compare(a, b, false, s);
2944}
2945
2946int float16_compare_quiet(float16 a, float16 b, float_status *s)
2947{
2948 return soft_f16_compare(a, b, true, s);
2949}
2950
2951static int QEMU_FLATTEN
2952f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2953{
2954 union_float32 ua, ub;
2955
2956 ua.s = xa;
2957 ub.s = xb;
2958
2959 if (QEMU_NO_HARDFLOAT) {
2960 goto soft;
2961 }
2962
2963 float32_input_flush2(&ua.s, &ub.s, s);
2964 if (isgreaterequal(ua.h, ub.h)) {
2965 if (isgreater(ua.h, ub.h)) {
2966 return float_relation_greater;
2967 }
2968 return float_relation_equal;
2969 }
2970 if (likely(isless(ua.h, ub.h))) {
2971 return float_relation_less;
2972 }
2973 /* The only condition remaining is unordered.
2974 * Fall through to set flags.
2975 */
2976 soft:
2977 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2978}
2979
2980int float32_compare(float32 a, float32 b, float_status *s)
2981{
2982 return f32_compare(a, b, false, s);
2983}
2984
2985int float32_compare_quiet(float32 a, float32 b, float_status *s)
2986{
2987 return f32_compare(a, b, true, s);
2988}
2989
2990static int QEMU_FLATTEN
2991f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
2992{
2993 union_float64 ua, ub;
2994
2995 ua.s = xa;
2996 ub.s = xb;
2997
2998 if (QEMU_NO_HARDFLOAT) {
2999 goto soft;
3000 }
3001
3002 float64_input_flush2(&ua.s, &ub.s, s);
3003 if (isgreaterequal(ua.h, ub.h)) {
3004 if (isgreater(ua.h, ub.h)) {
3005 return float_relation_greater;
3006 }
3007 return float_relation_equal;
3008 }
3009 if (likely(isless(ua.h, ub.h))) {
3010 return float_relation_less;
3011 }
3012 /* The only condition remaining is unordered.
3013 * Fall through to set flags.
3014 */
3015 soft:
3016 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3017}
3018
3019int float64_compare(float64 a, float64 b, float_status *s)
3020{
3021 return f64_compare(a, b, false, s);
3022}
3023
3024int float64_compare_quiet(float64 a, float64 b, float_status *s)
3025{
3026 return f64_compare(a, b, true, s);
3027}
3028
0bfc9f19
AB
3029/* Multiply A by 2 raised to the power N. */
3030static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3031{
3032 if (unlikely(is_nan(a.cls))) {
3033 return return_nan(a, s);
3034 }
3035 if (a.cls == float_class_normal) {
ce8d4082
RH
3036 /* The largest float type (even though not supported by FloatParts)
3037 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3038 * still allows rounding to infinity, without allowing overflow
3039 * within the int32_t that backs FloatParts.exp.
3040 */
3041 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3042 a.exp += n;
3043 }
3044 return a;
3045}
3046
3047float16 float16_scalbn(float16 a, int n, float_status *status)
3048{
3049 FloatParts pa = float16_unpack_canonical(a, status);
3050 FloatParts pr = scalbn_decomposed(pa, n, status);
3051 return float16_round_pack_canonical(pr, status);
3052}
3053
3054float32 float32_scalbn(float32 a, int n, float_status *status)
3055{
3056 FloatParts pa = float32_unpack_canonical(a, status);
3057 FloatParts pr = scalbn_decomposed(pa, n, status);
3058 return float32_round_pack_canonical(pr, status);
3059}
3060
3061float64 float64_scalbn(float64 a, int n, float_status *status)
3062{
3063 FloatParts pa = float64_unpack_canonical(a, status);
3064 FloatParts pr = scalbn_decomposed(pa, n, status);
3065 return float64_round_pack_canonical(pr, status);
3066}
3067
c13bb2da
AB
3068/*
3069 * Square Root
3070 *
3071 * The old softfloat code did an approximation step before zeroing in
3072 * on the final result. However for simpleness we just compute the
3073 * square root by iterating down from the implicit bit to enough extra
3074 * bits to ensure we get a correctly rounded result.
3075 *
3076 * This does mean however the calculation is slower than before,
3077 * especially for 64 bit floats.
3078 */
3079
3080static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3081{
3082 uint64_t a_frac, r_frac, s_frac;
3083 int bit, last_bit;
3084
3085 if (is_nan(a.cls)) {
3086 return return_nan(a, s);
3087 }
3088 if (a.cls == float_class_zero) {
3089 return a; /* sqrt(+-0) = +-0 */
3090 }
3091 if (a.sign) {
3092 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3093 return parts_default_nan(s);
c13bb2da
AB
3094 }
3095 if (a.cls == float_class_inf) {
3096 return a; /* sqrt(+inf) = +inf */
3097 }
3098
3099 assert(a.cls == float_class_normal);
3100
3101 /* We need two overflow bits at the top. Adding room for that is a
3102 * right shift. If the exponent is odd, we can discard the low bit
3103 * by multiplying the fraction by 2; that's a left shift. Combine
3104 * those and we shift right if the exponent is even.
3105 */
3106 a_frac = a.frac;
3107 if (!(a.exp & 1)) {
3108 a_frac >>= 1;
3109 }
3110 a.exp >>= 1;
3111
3112 /* Bit-by-bit computation of sqrt. */
3113 r_frac = 0;
3114 s_frac = 0;
3115
3116 /* Iterate from implicit bit down to the 3 extra bits to compute a
3117 * properly rounded result. Remember we've inserted one more bit
3118 * at the top, so these positions are one less.
3119 */
3120 bit = DECOMPOSED_BINARY_POINT - 1;
3121 last_bit = MAX(p->frac_shift - 4, 0);
3122 do {
3123 uint64_t q = 1ULL << bit;
3124 uint64_t t_frac = s_frac + q;
3125 if (t_frac <= a_frac) {
3126 s_frac = t_frac + q;
3127 a_frac -= t_frac;
3128 r_frac += q;
3129 }
3130 a_frac <<= 1;
3131 } while (--bit >= last_bit);
3132
3133 /* Undo the right shift done above. If there is any remaining
3134 * fraction, the result is inexact. Set the sticky bit.
3135 */
3136 a.frac = (r_frac << 1) + (a_frac != 0);
3137
3138 return a;
3139}
3140
97ff87c0 3141float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3142{
3143 FloatParts pa = float16_unpack_canonical(a, status);
3144 FloatParts pr = sqrt_float(pa, status, &float16_params);
3145 return float16_round_pack_canonical(pr, status);
3146}
3147
f131bae8
EC
3148static float32 QEMU_SOFTFLOAT_ATTR
3149soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3150{
3151 FloatParts pa = float32_unpack_canonical(a, status);
3152 FloatParts pr = sqrt_float(pa, status, &float32_params);
3153 return float32_round_pack_canonical(pr, status);
3154}
3155
f131bae8
EC
3156static float64 QEMU_SOFTFLOAT_ATTR
3157soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3158{
3159 FloatParts pa = float64_unpack_canonical(a, status);
3160 FloatParts pr = sqrt_float(pa, status, &float64_params);
3161 return float64_round_pack_canonical(pr, status);
3162}
3163
f131bae8
EC
3164float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3165{
3166 union_float32 ua, ur;
3167
3168 ua.s = xa;
3169 if (unlikely(!can_use_fpu(s))) {
3170 goto soft;
3171 }
3172
3173 float32_input_flush1(&ua.s, s);
3174 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3175 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3176 fpclassify(ua.h) == FP_ZERO) ||
3177 signbit(ua.h))) {
3178 goto soft;
3179 }
3180 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3181 float32_is_neg(ua.s))) {
3182 goto soft;
3183 }
3184 ur.h = sqrtf(ua.h);
3185 return ur.s;
3186
3187 soft:
3188 return soft_f32_sqrt(ua.s, s);
3189}
3190
3191float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3192{
3193 union_float64 ua, ur;
3194
3195 ua.s = xa;
3196 if (unlikely(!can_use_fpu(s))) {
3197 goto soft;
3198 }
3199
3200 float64_input_flush1(&ua.s, s);
3201 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3202 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3203 fpclassify(ua.h) == FP_ZERO) ||
3204 signbit(ua.h))) {
3205 goto soft;
3206 }
3207 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3208 float64_is_neg(ua.s))) {
3209 goto soft;
3210 }
3211 ur.h = sqrt(ua.h);
3212 return ur.s;
3213
3214 soft:
3215 return soft_f64_sqrt(ua.s, s);
3216}
3217
0218a16e
RH
3218/*----------------------------------------------------------------------------
3219| The pattern for a default generated NaN.
3220*----------------------------------------------------------------------------*/
3221
3222float16 float16_default_nan(float_status *status)
3223{
3224 FloatParts p = parts_default_nan(status);
3225 p.frac >>= float16_params.frac_shift;
3226 return float16_pack_raw(p);
3227}
3228
3229float32 float32_default_nan(float_status *status)
3230{
3231 FloatParts p = parts_default_nan(status);
3232 p.frac >>= float32_params.frac_shift;
3233 return float32_pack_raw(p);
3234}
3235
3236float64 float64_default_nan(float_status *status)
3237{
3238 FloatParts p = parts_default_nan(status);
3239 p.frac >>= float64_params.frac_shift;
3240 return float64_pack_raw(p);
3241}
3242
3243float128 float128_default_nan(float_status *status)
3244{
3245 FloatParts p = parts_default_nan(status);
3246 float128 r;
3247
3248 /* Extrapolate from the choices made by parts_default_nan to fill
3249 * in the quad-floating format. If the low bit is set, assume we
3250 * want to set all non-snan bits.
3251 */
3252 r.low = -(p.frac & 1);
3253 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3254 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3255 r.high |= (uint64_t)p.sign << 63;
3256
3257 return r;
3258}
c13bb2da 3259
158142c2 3260/*----------------------------------------------------------------------------
377ed926
RH
3261| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3262*----------------------------------------------------------------------------*/
3263
3264float16 float16_silence_nan(float16 a, float_status *status)
3265{
3266 FloatParts p = float16_unpack_raw(a);
3267 p.frac <<= float16_params.frac_shift;
3268 p = parts_silence_nan(p, status);
3269 p.frac >>= float16_params.frac_shift;
3270 return float16_pack_raw(p);
3271}
3272
3273float32 float32_silence_nan(float32 a, float_status *status)
3274{
3275 FloatParts p = float32_unpack_raw(a);
3276 p.frac <<= float32_params.frac_shift;
3277 p = parts_silence_nan(p, status);
3278 p.frac >>= float32_params.frac_shift;
3279 return float32_pack_raw(p);
3280}
3281
3282float64 float64_silence_nan(float64 a, float_status *status)
3283{
3284 FloatParts p = float64_unpack_raw(a);
3285 p.frac <<= float64_params.frac_shift;
3286 p = parts_silence_nan(p, status);
3287 p.frac >>= float64_params.frac_shift;
3288 return float64_pack_raw(p);
3289}
3290
e6b405fe
AB
3291
3292/*----------------------------------------------------------------------------
3293| If `a' is denormal and we are in flush-to-zero mode then set the
3294| input-denormal exception and return zero. Otherwise just return the value.
3295*----------------------------------------------------------------------------*/
3296
3297static bool parts_squash_denormal(FloatParts p, float_status *status)
3298{
3299 if (p.exp == 0 && p.frac != 0) {
3300 float_raise(float_flag_input_denormal, status);
3301 return true;
3302 }
3303
3304 return false;
3305}
3306
3307float16 float16_squash_input_denormal(float16 a, float_status *status)
3308{
3309 if (status->flush_inputs_to_zero) {
3310 FloatParts p = float16_unpack_raw(a);
3311 if (parts_squash_denormal(p, status)) {
3312 return float16_set_sign(float16_zero, p.sign);
3313 }
3314 }
3315 return a;
3316}
3317
3318float32 float32_squash_input_denormal(float32 a, float_status *status)
3319{
3320 if (status->flush_inputs_to_zero) {
3321 FloatParts p = float32_unpack_raw(a);
3322 if (parts_squash_denormal(p, status)) {
3323 return float32_set_sign(float32_zero, p.sign);
3324 }
3325 }
3326 return a;
3327}
3328
3329float64 float64_squash_input_denormal(float64 a, float_status *status)
3330{
3331 if (status->flush_inputs_to_zero) {
3332 FloatParts p = float64_unpack_raw(a);
3333 if (parts_squash_denormal(p, status)) {
3334 return float64_set_sign(float64_zero, p.sign);
3335 }
3336 }
3337 return a;
3338}
3339
377ed926 3340/*----------------------------------------------------------------------------
158142c2
FB
3341| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3342| and 7, and returns the properly rounded 32-bit integer corresponding to the
3343| input. If `zSign' is 1, the input is negated before being converted to an
3344| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3345| is simply rounded to an integer, with the inexact exception raised if the
3346| input cannot be represented exactly as an integer. However, if the fixed-
3347| point input is too large, the invalid exception is raised and the largest
3348| positive or negative integer is returned.
3349*----------------------------------------------------------------------------*/
3350
f4014512 3351static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 3352{
8f506c70 3353 int8_t roundingMode;
158142c2 3354 flag roundNearestEven;
8f506c70 3355 int8_t roundIncrement, roundBits;
760e1416 3356 int32_t z;
158142c2 3357
a2f2d288 3358 roundingMode = status->float_rounding_mode;
158142c2 3359 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3360 switch (roundingMode) {
3361 case float_round_nearest_even:
f9288a76 3362 case float_round_ties_away:
dc355b76
PM
3363 roundIncrement = 0x40;
3364 break;
3365 case float_round_to_zero:
3366 roundIncrement = 0;
3367 break;
3368 case float_round_up:
3369 roundIncrement = zSign ? 0 : 0x7f;
3370 break;
3371 case float_round_down:
3372 roundIncrement = zSign ? 0x7f : 0;
3373 break;
5d64abb3
RH
3374 case float_round_to_odd:
3375 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3376 break;
dc355b76
PM
3377 default:
3378 abort();
158142c2
FB
3379 }
3380 roundBits = absZ & 0x7F;
3381 absZ = ( absZ + roundIncrement )>>7;
3382 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3383 z = absZ;
3384 if ( zSign ) z = - z;
3385 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3386 float_raise(float_flag_invalid, status);
2c217da0 3387 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3388 }
a2f2d288
PM
3389 if (roundBits) {
3390 status->float_exception_flags |= float_flag_inexact;
3391 }
158142c2
FB
3392 return z;
3393
3394}
3395
3396/*----------------------------------------------------------------------------
3397| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3398| `absZ1', with binary point between bits 63 and 64 (between the input words),
3399| and returns the properly rounded 64-bit integer corresponding to the input.
3400| If `zSign' is 1, the input is negated before being converted to an integer.
3401| Ordinarily, the fixed-point input is simply rounded to an integer, with
3402| the inexact exception raised if the input cannot be represented exactly as
3403| an integer. However, if the fixed-point input is too large, the invalid
3404| exception is raised and the largest positive or negative integer is
3405| returned.
3406*----------------------------------------------------------------------------*/
3407
f42c2224 3408static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3409 float_status *status)
158142c2 3410{
8f506c70 3411 int8_t roundingMode;
158142c2 3412 flag roundNearestEven, increment;
760e1416 3413 int64_t z;
158142c2 3414
a2f2d288 3415 roundingMode = status->float_rounding_mode;
158142c2 3416 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3417 switch (roundingMode) {
3418 case float_round_nearest_even:
f9288a76 3419 case float_round_ties_away:
dc355b76
PM
3420 increment = ((int64_t) absZ1 < 0);
3421 break;
3422 case float_round_to_zero:
3423 increment = 0;
3424 break;
3425 case float_round_up:
3426 increment = !zSign && absZ1;
3427 break;
3428 case float_round_down:
3429 increment = zSign && absZ1;
3430 break;
5d64abb3
RH
3431 case float_round_to_odd:
3432 increment = !(absZ0 & 1) && absZ1;
3433 break;
dc355b76
PM
3434 default:
3435 abort();
158142c2
FB
3436 }
3437 if ( increment ) {
3438 ++absZ0;
3439 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3440 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3441 }
3442 z = absZ0;
3443 if ( zSign ) z = - z;
3444 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3445 overflow:
ff32e16e 3446 float_raise(float_flag_invalid, status);
2c217da0 3447 return zSign ? INT64_MIN : INT64_MAX;
158142c2 3448 }
a2f2d288
PM
3449 if (absZ1) {
3450 status->float_exception_flags |= float_flag_inexact;
3451 }
158142c2
FB
3452 return z;
3453
3454}
3455
fb3ea83a
TM
3456/*----------------------------------------------------------------------------
3457| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3458| `absZ1', with binary point between bits 63 and 64 (between the input words),
3459| and returns the properly rounded 64-bit unsigned integer corresponding to the
3460| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3461| with the inexact exception raised if the input cannot be represented exactly
3462| as an integer. However, if the fixed-point input is too large, the invalid
3463| exception is raised and the largest unsigned integer is returned.
3464*----------------------------------------------------------------------------*/
3465
f42c2224 3466static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 3467 uint64_t absZ1, float_status *status)
fb3ea83a 3468{
8f506c70 3469 int8_t roundingMode;
fb3ea83a
TM
3470 flag roundNearestEven, increment;
3471
a2f2d288 3472 roundingMode = status->float_rounding_mode;
fb3ea83a 3473 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3474 switch (roundingMode) {
3475 case float_round_nearest_even:
f9288a76 3476 case float_round_ties_away:
dc355b76
PM
3477 increment = ((int64_t)absZ1 < 0);
3478 break;
3479 case float_round_to_zero:
3480 increment = 0;
3481 break;
3482 case float_round_up:
3483 increment = !zSign && absZ1;
3484 break;
3485 case float_round_down:
3486 increment = zSign && absZ1;
3487 break;
5d64abb3
RH
3488 case float_round_to_odd:
3489 increment = !(absZ0 & 1) && absZ1;
3490 break;
dc355b76
PM
3491 default:
3492 abort();
fb3ea83a
TM
3493 }
3494 if (increment) {
3495 ++absZ0;
3496 if (absZ0 == 0) {
ff32e16e 3497 float_raise(float_flag_invalid, status);
2c217da0 3498 return UINT64_MAX;
fb3ea83a
TM
3499 }
3500 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3501 }
3502
3503 if (zSign && absZ0) {
ff32e16e 3504 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3505 return 0;
3506 }
3507
3508 if (absZ1) {
a2f2d288 3509 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3510 }
3511 return absZ0;
3512}
3513
158142c2
FB
3514/*----------------------------------------------------------------------------
3515| Normalizes the subnormal single-precision floating-point value represented
3516| by the denormalized significand `aSig'. The normalized exponent and
3517| significand are stored at the locations pointed to by `zExpPtr' and
3518| `zSigPtr', respectively.
3519*----------------------------------------------------------------------------*/
3520
3521static void
0c48262d 3522 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3523{
8f506c70 3524 int8_t shiftCount;
158142c2 3525
0019d5c3 3526 shiftCount = clz32(aSig) - 8;
158142c2
FB
3527 *zSigPtr = aSig<<shiftCount;
3528 *zExpPtr = 1 - shiftCount;
3529
3530}
3531
158142c2
FB
3532/*----------------------------------------------------------------------------
3533| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3534| and significand `zSig', and returns the proper single-precision floating-
3535| point value corresponding to the abstract input. Ordinarily, the abstract
3536| value is simply rounded and packed into the single-precision format, with
3537| the inexact exception raised if the abstract input cannot be represented
3538| exactly. However, if the abstract value is too large, the overflow and
3539| inexact exceptions are raised and an infinity or maximal finite value is
3540| returned. If the abstract value is too small, the input value is rounded to
3541| a subnormal number, and the underflow and inexact exceptions are raised if
3542| the abstract input cannot be represented exactly as a subnormal single-
3543| precision floating-point number.
3544| The input significand `zSig' has its binary point between bits 30
3545| and 29, which is 7 bits to the left of the usual location. This shifted
3546| significand must be normalized or smaller. If `zSig' is not normalized,
3547| `zExp' must be 0; in that case, the result returned is a subnormal number,
3548| and it must not require rounding. In the usual case that `zSig' is
3549| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3550| The handling of underflow and overflow follows the IEC/IEEE Standard for
3551| Binary Floating-Point Arithmetic.
3552*----------------------------------------------------------------------------*/
3553
0c48262d 3554static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3555 float_status *status)
158142c2 3556{
8f506c70 3557 int8_t roundingMode;
158142c2 3558 flag roundNearestEven;
8f506c70 3559 int8_t roundIncrement, roundBits;
158142c2
FB
3560 flag isTiny;
3561
a2f2d288 3562 roundingMode = status->float_rounding_mode;
158142c2 3563 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3564 switch (roundingMode) {
3565 case float_round_nearest_even:
f9288a76 3566 case float_round_ties_away:
dc355b76
PM
3567 roundIncrement = 0x40;
3568 break;
3569 case float_round_to_zero:
3570 roundIncrement = 0;
3571 break;
3572 case float_round_up:
3573 roundIncrement = zSign ? 0 : 0x7f;
3574 break;
3575 case float_round_down:
3576 roundIncrement = zSign ? 0x7f : 0;
3577 break;
5d64abb3
RH
3578 case float_round_to_odd:
3579 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3580 break;
dc355b76
PM
3581 default:
3582 abort();
3583 break;
158142c2
FB
3584 }
3585 roundBits = zSig & 0x7F;
bb98fe42 3586 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3587 if ( ( 0xFD < zExp )
3588 || ( ( zExp == 0xFD )
bb98fe42 3589 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3590 ) {
5d64abb3
RH
3591 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3592 roundIncrement != 0;
ff32e16e 3593 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 3594 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
3595 }
3596 if ( zExp < 0 ) {
a2f2d288 3597 if (status->flush_to_zero) {
ff32e16e 3598 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3599 return packFloat32(zSign, 0, 0);
3600 }
158142c2 3601 isTiny =
a2f2d288
PM
3602 (status->float_detect_tininess
3603 == float_tininess_before_rounding)
158142c2
FB
3604 || ( zExp < -1 )
3605 || ( zSig + roundIncrement < 0x80000000 );
3606 shift32RightJamming( zSig, - zExp, &zSig );
3607 zExp = 0;
3608 roundBits = zSig & 0x7F;
ff32e16e
PM
3609 if (isTiny && roundBits) {
3610 float_raise(float_flag_underflow, status);
3611 }
5d64abb3
RH
3612 if (roundingMode == float_round_to_odd) {
3613 /*
3614 * For round-to-odd case, the roundIncrement depends on
3615 * zSig which just changed.
3616 */
3617 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3618 }
158142c2
FB
3619 }
3620 }
a2f2d288
PM
3621 if (roundBits) {
3622 status->float_exception_flags |= float_flag_inexact;
3623 }
158142c2
FB
3624 zSig = ( zSig + roundIncrement )>>7;
3625 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3626 if ( zSig == 0 ) zExp = 0;
3627 return packFloat32( zSign, zExp, zSig );
3628
3629}
3630
3631/*----------------------------------------------------------------------------
3632| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3633| and significand `zSig', and returns the proper single-precision floating-
3634| point value corresponding to the abstract input. This routine is just like
3635| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3636| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3637| floating-point exponent.
3638*----------------------------------------------------------------------------*/
3639
3640static float32
0c48262d 3641 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3642 float_status *status)
158142c2 3643{
8f506c70 3644 int8_t shiftCount;
158142c2 3645
0019d5c3 3646 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3647 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3648 status);
158142c2
FB
3649
3650}
3651
158142c2
FB
3652/*----------------------------------------------------------------------------
3653| Normalizes the subnormal double-precision floating-point value represented
3654| by the denormalized significand `aSig'. The normalized exponent and
3655| significand are stored at the locations pointed to by `zExpPtr' and
3656| `zSigPtr', respectively.
3657*----------------------------------------------------------------------------*/
3658
3659static void
0c48262d 3660 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3661{
8f506c70 3662 int8_t shiftCount;
158142c2 3663
0019d5c3 3664 shiftCount = clz64(aSig) - 11;
158142c2
FB
3665 *zSigPtr = aSig<<shiftCount;
3666 *zExpPtr = 1 - shiftCount;
3667
3668}
3669
3670/*----------------------------------------------------------------------------
3671| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3672| double-precision floating-point value, returning the result. After being
3673| shifted into the proper positions, the three fields are simply added
3674| together to form the result. This means that any integer portion of `zSig'
3675| will be added into the exponent. Since a properly normalized significand
3676| will have an integer portion equal to 1, the `zExp' input should be 1 less
3677| than the desired result exponent whenever `zSig' is a complete, normalized
3678| significand.
3679*----------------------------------------------------------------------------*/
3680
0c48262d 3681static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
3682{
3683
f090c9d4 3684 return make_float64(
bb98fe42 3685 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3686
3687}
3688
3689/*----------------------------------------------------------------------------
3690| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3691| and significand `zSig', and returns the proper double-precision floating-
3692| point value corresponding to the abstract input. Ordinarily, the abstract
3693| value is simply rounded and packed into the double-precision format, with
3694| the inexact exception raised if the abstract input cannot be represented
3695| exactly. However, if the abstract value is too large, the overflow and
3696| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3697| returned. If the abstract value is too small, the input value is rounded to
3698| a subnormal number, and the underflow and inexact exceptions are raised if
3699| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3700| precision floating-point number.
3701| The input significand `zSig' has its binary point between bits 62
3702| and 61, which is 10 bits to the left of the usual location. This shifted
3703| significand must be normalized or smaller. If `zSig' is not normalized,
3704| `zExp' must be 0; in that case, the result returned is a subnormal number,
3705| and it must not require rounding. In the usual case that `zSig' is
3706| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3707| The handling of underflow and overflow follows the IEC/IEEE Standard for
3708| Binary Floating-Point Arithmetic.
3709*----------------------------------------------------------------------------*/
3710
0c48262d 3711static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3712 float_status *status)
158142c2 3713{
8f506c70 3714 int8_t roundingMode;
158142c2 3715 flag roundNearestEven;
0c48262d 3716 int roundIncrement, roundBits;
158142c2
FB
3717 flag isTiny;
3718
a2f2d288 3719 roundingMode = status->float_rounding_mode;
158142c2 3720 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3721 switch (roundingMode) {
3722 case float_round_nearest_even:
f9288a76 3723 case float_round_ties_away:
dc355b76
PM
3724 roundIncrement = 0x200;
3725 break;
3726 case float_round_to_zero:
3727 roundIncrement = 0;
3728 break;
3729 case float_round_up:
3730 roundIncrement = zSign ? 0 : 0x3ff;
3731 break;
3732 case float_round_down:
3733 roundIncrement = zSign ? 0x3ff : 0;
3734 break;
9ee6f678
BR
3735 case float_round_to_odd:
3736 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3737 break;
dc355b76
PM
3738 default:
3739 abort();
158142c2
FB
3740 }
3741 roundBits = zSig & 0x3FF;
bb98fe42 3742 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3743 if ( ( 0x7FD < zExp )
3744 || ( ( zExp == 0x7FD )
bb98fe42 3745 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3746 ) {
9ee6f678
BR
3747 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3748 roundIncrement != 0;
ff32e16e 3749 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3750 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3751 }
3752 if ( zExp < 0 ) {
a2f2d288 3753 if (status->flush_to_zero) {
ff32e16e 3754 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3755 return packFloat64(zSign, 0, 0);
3756 }
158142c2 3757 isTiny =
a2f2d288
PM
3758 (status->float_detect_tininess
3759 == float_tininess_before_rounding)
158142c2 3760 || ( zExp < -1 )
e9321124 3761 || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) );
158142c2
FB
3762 shift64RightJamming( zSig, - zExp, &zSig );
3763 zExp = 0;
3764 roundBits = zSig & 0x3FF;
ff32e16e
PM
3765 if (isTiny && roundBits) {
3766 float_raise(float_flag_underflow, status);
3767 }
9ee6f678
BR
3768 if (roundingMode == float_round_to_odd) {
3769 /*
3770 * For round-to-odd case, the roundIncrement depends on
3771 * zSig which just changed.
3772 */
3773 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3774 }
158142c2
FB
3775 }
3776 }
a2f2d288
PM
3777 if (roundBits) {
3778 status->float_exception_flags |= float_flag_inexact;
3779 }
158142c2
FB
3780 zSig = ( zSig + roundIncrement )>>10;
3781 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3782 if ( zSig == 0 ) zExp = 0;
3783 return packFloat64( zSign, zExp, zSig );
3784
3785}
3786
3787/*----------------------------------------------------------------------------
3788| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3789| and significand `zSig', and returns the proper double-precision floating-
3790| point value corresponding to the abstract input. This routine is just like
3791| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3792| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3793| floating-point exponent.
3794*----------------------------------------------------------------------------*/
3795
3796static float64
0c48262d 3797 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3798 float_status *status)
158142c2 3799{
8f506c70 3800 int8_t shiftCount;
158142c2 3801
0019d5c3 3802 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3803 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3804 status);
158142c2
FB
3805
3806}
3807
158142c2
FB
3808/*----------------------------------------------------------------------------
3809| Normalizes the subnormal extended double-precision floating-point value
3810| represented by the denormalized significand `aSig'. The normalized exponent
3811| and significand are stored at the locations pointed to by `zExpPtr' and
3812| `zSigPtr', respectively.
3813*----------------------------------------------------------------------------*/
3814
88857aca
LV
3815void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3816 uint64_t *zSigPtr)
158142c2 3817{
8f506c70 3818 int8_t shiftCount;
158142c2 3819
0019d5c3 3820 shiftCount = clz64(aSig);
158142c2
FB
3821 *zSigPtr = aSig<<shiftCount;
3822 *zExpPtr = 1 - shiftCount;
158142c2
FB
3823}
3824
3825/*----------------------------------------------------------------------------
3826| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3827| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3828| and returns the proper extended double-precision floating-point value
3829| corresponding to the abstract input. Ordinarily, the abstract value is
3830| rounded and packed into the extended double-precision format, with the
3831| inexact exception raised if the abstract input cannot be represented
3832| exactly. However, if the abstract value is too large, the overflow and
3833| inexact exceptions are raised and an infinity or maximal finite value is
3834| returned. If the abstract value is too small, the input value is rounded to
3835| a subnormal number, and the underflow and inexact exceptions are raised if
3836| the abstract input cannot be represented exactly as a subnormal extended
3837| double-precision floating-point number.
3838| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3839| number of bits as single or double precision, respectively. Otherwise, the
3840| result is rounded to the full precision of the extended double-precision
3841| format.
3842| The input significand must be normalized or smaller. If the input
3843| significand is not normalized, `zExp' must be 0; in that case, the result
3844| returned is a subnormal number, and it must not require rounding. The
3845| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3846| Floating-Point Arithmetic.
3847*----------------------------------------------------------------------------*/
3848
88857aca
LV
3849floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3850 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3851 float_status *status)
158142c2 3852{
8f506c70 3853 int8_t roundingMode;
158142c2 3854 flag roundNearestEven, increment, isTiny;
f42c2224 3855 int64_t roundIncrement, roundMask, roundBits;
158142c2 3856
a2f2d288 3857 roundingMode = status->float_rounding_mode;
158142c2
FB
3858 roundNearestEven = ( roundingMode == float_round_nearest_even );
3859 if ( roundingPrecision == 80 ) goto precision80;
3860 if ( roundingPrecision == 64 ) {
e9321124
AB
3861 roundIncrement = UINT64_C(0x0000000000000400);
3862 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
3863 }
3864 else if ( roundingPrecision == 32 ) {
e9321124
AB
3865 roundIncrement = UINT64_C(0x0000008000000000);
3866 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
3867 }
3868 else {
3869 goto precision80;
3870 }
3871 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3872 switch (roundingMode) {
3873 case float_round_nearest_even:
f9288a76 3874 case float_round_ties_away:
dc355b76
PM
3875 break;
3876 case float_round_to_zero:
3877 roundIncrement = 0;
3878 break;
3879 case float_round_up:
3880 roundIncrement = zSign ? 0 : roundMask;
3881 break;
3882 case float_round_down:
3883 roundIncrement = zSign ? roundMask : 0;
3884 break;
3885 default:
3886 abort();
158142c2
FB
3887 }
3888 roundBits = zSig0 & roundMask;
bb98fe42 3889 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3890 if ( ( 0x7FFE < zExp )
3891 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3892 ) {
3893 goto overflow;
3894 }
3895 if ( zExp <= 0 ) {
a2f2d288 3896 if (status->flush_to_zero) {
ff32e16e 3897 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3898 return packFloatx80(zSign, 0, 0);
3899 }
158142c2 3900 isTiny =
a2f2d288
PM
3901 (status->float_detect_tininess
3902 == float_tininess_before_rounding)
158142c2
FB
3903 || ( zExp < 0 )
3904 || ( zSig0 <= zSig0 + roundIncrement );
3905 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3906 zExp = 0;
3907 roundBits = zSig0 & roundMask;
ff32e16e
PM
3908 if (isTiny && roundBits) {
3909 float_raise(float_flag_underflow, status);
3910 }
a2f2d288
PM
3911 if (roundBits) {
3912 status->float_exception_flags |= float_flag_inexact;
3913 }
158142c2 3914 zSig0 += roundIncrement;
bb98fe42 3915 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3916 roundIncrement = roundMask + 1;
3917 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3918 roundMask |= roundIncrement;
3919 }
3920 zSig0 &= ~ roundMask;
3921 return packFloatx80( zSign, zExp, zSig0 );
3922 }
3923 }
a2f2d288
PM
3924 if (roundBits) {
3925 status->float_exception_flags |= float_flag_inexact;
3926 }
158142c2
FB
3927 zSig0 += roundIncrement;
3928 if ( zSig0 < roundIncrement ) {
3929 ++zExp;
e9321124 3930 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
3931 }
3932 roundIncrement = roundMask + 1;
3933 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3934 roundMask |= roundIncrement;
3935 }
3936 zSig0 &= ~ roundMask;
3937 if ( zSig0 == 0 ) zExp = 0;
3938 return packFloatx80( zSign, zExp, zSig0 );
3939 precision80:
dc355b76
PM
3940 switch (roundingMode) {
3941 case float_round_nearest_even:
f9288a76 3942 case float_round_ties_away:
dc355b76
PM
3943 increment = ((int64_t)zSig1 < 0);
3944 break;
3945 case float_round_to_zero:
3946 increment = 0;
3947 break;
3948 case float_round_up:
3949 increment = !zSign && zSig1;
3950 break;
3951 case float_round_down:
3952 increment = zSign && zSig1;
3953 break;
3954 default:
3955 abort();
158142c2 3956 }
bb98fe42 3957 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3958 if ( ( 0x7FFE < zExp )
3959 || ( ( zExp == 0x7FFE )
e9321124 3960 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
3961 && increment
3962 )
3963 ) {
3964 roundMask = 0;
3965 overflow:
ff32e16e 3966 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3967 if ( ( roundingMode == float_round_to_zero )
3968 || ( zSign && ( roundingMode == float_round_up ) )
3969 || ( ! zSign && ( roundingMode == float_round_down ) )
3970 ) {
3971 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3972 }
0f605c88
LV
3973 return packFloatx80(zSign,
3974 floatx80_infinity_high,
3975 floatx80_infinity_low);
158142c2
FB
3976 }
3977 if ( zExp <= 0 ) {
3978 isTiny =
a2f2d288
PM
3979 (status->float_detect_tininess
3980 == float_tininess_before_rounding)
158142c2
FB
3981 || ( zExp < 0 )
3982 || ! increment
e9321124 3983 || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) );
158142c2
FB
3984 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3985 zExp = 0;
ff32e16e
PM
3986 if (isTiny && zSig1) {
3987 float_raise(float_flag_underflow, status);
3988 }
a2f2d288
PM
3989 if (zSig1) {
3990 status->float_exception_flags |= float_flag_inexact;
3991 }
dc355b76
PM
3992 switch (roundingMode) {
3993 case float_round_nearest_even:
f9288a76 3994 case float_round_ties_away:
dc355b76
PM
3995 increment = ((int64_t)zSig1 < 0);
3996 break;
3997 case float_round_to_zero:
3998 increment = 0;
3999 break;
4000 case float_round_up:
4001 increment = !zSign && zSig1;
4002 break;
4003 case float_round_down:
4004 increment = zSign && zSig1;
4005 break;
4006 default:
4007 abort();
158142c2
FB
4008 }
4009 if ( increment ) {
4010 ++zSig0;
4011 zSig0 &=
bb98fe42
AF
4012 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
4013 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4014 }
4015 return packFloatx80( zSign, zExp, zSig0 );
4016 }
4017 }
a2f2d288
PM
4018 if (zSig1) {
4019 status->float_exception_flags |= float_flag_inexact;
4020 }
158142c2
FB
4021 if ( increment ) {
4022 ++zSig0;
4023 if ( zSig0 == 0 ) {
4024 ++zExp;
e9321124 4025 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4026 }
4027 else {
bb98fe42 4028 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
4029 }
4030 }
4031 else {
4032 if ( zSig0 == 0 ) zExp = 0;
4033 }
4034 return packFloatx80( zSign, zExp, zSig0 );
4035
4036}
4037
4038/*----------------------------------------------------------------------------
4039| Takes an abstract floating-point value having sign `zSign', exponent
4040| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4041| and returns the proper extended double-precision floating-point value
4042| corresponding to the abstract input. This routine is just like
4043| `roundAndPackFloatx80' except that the input significand does not have to be
4044| normalized.
4045*----------------------------------------------------------------------------*/
4046
88857aca
LV
4047floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4048 flag zSign, int32_t zExp,
4049 uint64_t zSig0, uint64_t zSig1,
4050 float_status *status)
158142c2 4051{
8f506c70 4052 int8_t shiftCount;
158142c2
FB
4053
4054 if ( zSig0 == 0 ) {
4055 zSig0 = zSig1;
4056 zSig1 = 0;
4057 zExp -= 64;
4058 }
0019d5c3 4059 shiftCount = clz64(zSig0);
158142c2
FB
4060 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4061 zExp -= shiftCount;
ff32e16e
PM
4062 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4063 zSig0, zSig1, status);
158142c2
FB
4064
4065}
4066
158142c2
FB
4067/*----------------------------------------------------------------------------
4068| Returns the least-significant 64 fraction bits of the quadruple-precision
4069| floating-point value `a'.
4070*----------------------------------------------------------------------------*/
4071
a49db98d 4072static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4073{
4074
4075 return a.low;
4076
4077}
4078
4079/*----------------------------------------------------------------------------
4080| Returns the most-significant 48 fraction bits of the quadruple-precision
4081| floating-point value `a'.
4082*----------------------------------------------------------------------------*/
4083
a49db98d 4084static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4085{
4086
e9321124 4087 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4088
4089}
4090
4091/*----------------------------------------------------------------------------
4092| Returns the exponent bits of the quadruple-precision floating-point value
4093| `a'.
4094*----------------------------------------------------------------------------*/
4095
f4014512 4096static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4097{
4098
4099 return ( a.high>>48 ) & 0x7FFF;
4100
4101}
4102
4103/*----------------------------------------------------------------------------
4104| Returns the sign bit of the quadruple-precision floating-point value `a'.
4105*----------------------------------------------------------------------------*/
4106
a49db98d 4107static inline flag extractFloat128Sign( float128 a )
158142c2
FB
4108{
4109
4110 return a.high>>63;
4111
4112}
4113
4114/*----------------------------------------------------------------------------
4115| Normalizes the subnormal quadruple-precision floating-point value
4116| represented by the denormalized significand formed by the concatenation of
4117| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4118| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4119| significand are stored at the location pointed to by `zSig0Ptr', and the
4120| least significant 64 bits of the normalized significand are stored at the
4121| location pointed to by `zSig1Ptr'.
4122*----------------------------------------------------------------------------*/
4123
4124static void
4125 normalizeFloat128Subnormal(
bb98fe42
AF
4126 uint64_t aSig0,
4127 uint64_t aSig1,
f4014512 4128 int32_t *zExpPtr,
bb98fe42
AF
4129 uint64_t *zSig0Ptr,
4130 uint64_t *zSig1Ptr
158142c2
FB
4131 )
4132{
8f506c70 4133 int8_t shiftCount;
158142c2
FB
4134
4135 if ( aSig0 == 0 ) {
0019d5c3 4136 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4137 if ( shiftCount < 0 ) {
4138 *zSig0Ptr = aSig1>>( - shiftCount );
4139 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4140 }
4141 else {
4142 *zSig0Ptr = aSig1<<shiftCount;
4143 *zSig1Ptr = 0;
4144 }
4145 *zExpPtr = - shiftCount - 63;
4146 }
4147 else {
0019d5c3 4148 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4149 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4150 *zExpPtr = 1 - shiftCount;
4151 }
4152
4153}
4154
4155/*----------------------------------------------------------------------------
4156| Packs the sign `zSign', the exponent `zExp', and the significand formed
4157| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4158| floating-point value, returning the result. After being shifted into the
4159| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4160| added together to form the most significant 32 bits of the result. This
4161| means that any integer portion of `zSig0' will be added into the exponent.
4162| Since a properly normalized significand will have an integer portion equal
4163| to 1, the `zExp' input should be 1 less than the desired result exponent
4164| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4165| significand.
4166*----------------------------------------------------------------------------*/
4167
a49db98d 4168static inline float128
f4014512 4169 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
4170{
4171 float128 z;
4172
4173 z.low = zSig1;
bb98fe42 4174 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
4175 return z;
4176
4177}
4178
4179/*----------------------------------------------------------------------------
4180| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4181| and extended significand formed by the concatenation of `zSig0', `zSig1',
4182| and `zSig2', and returns the proper quadruple-precision floating-point value
4183| corresponding to the abstract input. Ordinarily, the abstract value is
4184| simply rounded and packed into the quadruple-precision format, with the
4185| inexact exception raised if the abstract input cannot be represented
4186| exactly. However, if the abstract value is too large, the overflow and
4187| inexact exceptions are raised and an infinity or maximal finite value is
4188| returned. If the abstract value is too small, the input value is rounded to
4189| a subnormal number, and the underflow and inexact exceptions are raised if
4190| the abstract input cannot be represented exactly as a subnormal quadruple-
4191| precision floating-point number.
4192| The input significand must be normalized or smaller. If the input
4193| significand is not normalized, `zExp' must be 0; in that case, the result
4194| returned is a subnormal number, and it must not require rounding. In the
4195| usual case that the input significand is normalized, `zExp' must be 1 less
4196| than the ``true'' floating-point exponent. The handling of underflow and
4197| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4198*----------------------------------------------------------------------------*/
4199
f4014512 4200static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4201 uint64_t zSig0, uint64_t zSig1,
4202 uint64_t zSig2, float_status *status)
158142c2 4203{
8f506c70 4204 int8_t roundingMode;
158142c2
FB
4205 flag roundNearestEven, increment, isTiny;
4206
a2f2d288 4207 roundingMode = status->float_rounding_mode;
158142c2 4208 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4209 switch (roundingMode) {
4210 case float_round_nearest_even:
f9288a76 4211 case float_round_ties_away:
dc355b76
PM
4212 increment = ((int64_t)zSig2 < 0);
4213 break;
4214 case float_round_to_zero:
4215 increment = 0;
4216 break;
4217 case float_round_up:
4218 increment = !zSign && zSig2;
4219 break;
4220 case float_round_down:
4221 increment = zSign && zSig2;
4222 break;
9ee6f678
BR
4223 case float_round_to_odd:
4224 increment = !(zSig1 & 0x1) && zSig2;
4225 break;
dc355b76
PM
4226 default:
4227 abort();
158142c2 4228 }
bb98fe42 4229 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4230 if ( ( 0x7FFD < zExp )
4231 || ( ( zExp == 0x7FFD )
4232 && eq128(
e9321124
AB
4233 UINT64_C(0x0001FFFFFFFFFFFF),
4234 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4235 zSig0,
4236 zSig1
4237 )
4238 && increment
4239 )
4240 ) {
ff32e16e 4241 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4242 if ( ( roundingMode == float_round_to_zero )
4243 || ( zSign && ( roundingMode == float_round_up ) )
4244 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4245 || (roundingMode == float_round_to_odd)
158142c2
FB
4246 ) {
4247 return
4248 packFloat128(
4249 zSign,
4250 0x7FFE,
e9321124
AB
4251 UINT64_C(0x0000FFFFFFFFFFFF),
4252 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4253 );
4254 }
4255 return packFloat128( zSign, 0x7FFF, 0, 0 );
4256 }
4257 if ( zExp < 0 ) {
a2f2d288 4258 if (status->flush_to_zero) {
ff32e16e 4259 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4260 return packFloat128(zSign, 0, 0, 0);
4261 }
158142c2 4262 isTiny =
a2f2d288
PM
4263 (status->float_detect_tininess
4264 == float_tininess_before_rounding)
158142c2
FB
4265 || ( zExp < -1 )
4266 || ! increment
4267 || lt128(
4268 zSig0,
4269 zSig1,
e9321124
AB
4270 UINT64_C(0x0001FFFFFFFFFFFF),
4271 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4272 );
4273 shift128ExtraRightJamming(
4274 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4275 zExp = 0;
ff32e16e
PM
4276 if (isTiny && zSig2) {
4277 float_raise(float_flag_underflow, status);
4278 }
dc355b76
PM
4279 switch (roundingMode) {
4280 case float_round_nearest_even:
f9288a76 4281 case float_round_ties_away:
dc355b76
PM
4282 increment = ((int64_t)zSig2 < 0);
4283 break;
4284 case float_round_to_zero:
4285 increment = 0;
4286 break;
4287 case float_round_up:
4288 increment = !zSign && zSig2;
4289 break;
4290 case float_round_down:
4291 increment = zSign && zSig2;
4292 break;
9ee6f678
BR
4293 case float_round_to_odd:
4294 increment = !(zSig1 & 0x1) && zSig2;
4295 break;
dc355b76
PM
4296 default:
4297 abort();
158142c2
FB
4298 }
4299 }
4300 }
a2f2d288
PM
4301 if (zSig2) {
4302 status->float_exception_flags |= float_flag_inexact;
4303 }
158142c2
FB
4304 if ( increment ) {
4305 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4306 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4307 }
4308 else {
4309 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4310 }
4311 return packFloat128( zSign, zExp, zSig0, zSig1 );
4312
4313}
4314
4315/*----------------------------------------------------------------------------
4316| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4317| and significand formed by the concatenation of `zSig0' and `zSig1', and
4318| returns the proper quadruple-precision floating-point value corresponding
4319| to the abstract input. This routine is just like `roundAndPackFloat128'
4320| except that the input significand has fewer bits and does not have to be
4321| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4322| point exponent.
4323*----------------------------------------------------------------------------*/
4324
f4014512 4325static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4326 uint64_t zSig0, uint64_t zSig1,
4327 float_status *status)
158142c2 4328{
8f506c70 4329 int8_t shiftCount;
bb98fe42 4330 uint64_t zSig2;
158142c2
FB
4331
4332 if ( zSig0 == 0 ) {
4333 zSig0 = zSig1;
4334 zSig1 = 0;
4335 zExp -= 64;
4336 }
0019d5c3 4337 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4338 if ( 0 <= shiftCount ) {
4339 zSig2 = 0;
4340 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4341 }
4342 else {
4343 shift128ExtraRightJamming(
4344 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4345 }
4346 zExp -= shiftCount;
ff32e16e 4347 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4348
4349}
4350
158142c2 4351
158142c2
FB
4352/*----------------------------------------------------------------------------
4353| Returns the result of converting the 32-bit two's complement integer `a'
4354| to the extended double-precision floating-point format. The conversion
4355| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4356| Arithmetic.
4357*----------------------------------------------------------------------------*/
4358
e5a41ffa 4359floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
4360{
4361 flag zSign;
3a87d009 4362 uint32_t absA;
8f506c70 4363 int8_t shiftCount;
bb98fe42 4364 uint64_t zSig;
158142c2
FB
4365
4366 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4367 zSign = ( a < 0 );
4368 absA = zSign ? - a : a;
0019d5c3 4369 shiftCount = clz32(absA) + 32;
158142c2
FB
4370 zSig = absA;
4371 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4372
4373}
4374
158142c2
FB
4375/*----------------------------------------------------------------------------
4376| Returns the result of converting the 32-bit two's complement integer `a' to
4377| the quadruple-precision floating-point format. The conversion is performed
4378| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4379*----------------------------------------------------------------------------*/
4380
e5a41ffa 4381float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
4382{
4383 flag zSign;
3a87d009 4384 uint32_t absA;
8f506c70 4385 int8_t shiftCount;
bb98fe42 4386 uint64_t zSig0;
158142c2
FB
4387
4388 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4389 zSign = ( a < 0 );
4390 absA = zSign ? - a : a;
0019d5c3 4391 shiftCount = clz32(absA) + 17;
158142c2
FB
4392 zSig0 = absA;
4393 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4394
4395}
4396
158142c2
FB
4397/*----------------------------------------------------------------------------
4398| Returns the result of converting the 64-bit two's complement integer `a'
4399| to the extended double-precision floating-point format. The conversion
4400| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4401| Arithmetic.
4402*----------------------------------------------------------------------------*/
4403
e5a41ffa 4404floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
4405{
4406 flag zSign;
182f42fd 4407 uint64_t absA;
8f506c70 4408 int8_t shiftCount;
158142c2
FB
4409
4410 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4411 zSign = ( a < 0 );
4412 absA = zSign ? - a : a;
0019d5c3 4413 shiftCount = clz64(absA);
158142c2
FB
4414 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4415
4416}
4417
158142c2
FB
4418/*----------------------------------------------------------------------------
4419| Returns the result of converting the 64-bit two's complement integer `a' to
4420| the quadruple-precision floating-point format. The conversion is performed
4421| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4422*----------------------------------------------------------------------------*/
4423
e5a41ffa 4424float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
4425{
4426 flag zSign;
182f42fd 4427 uint64_t absA;
8f506c70 4428 int8_t shiftCount;
f4014512 4429 int32_t zExp;
bb98fe42 4430 uint64_t zSig0, zSig1;
158142c2
FB
4431
4432 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4433 zSign = ( a < 0 );
4434 absA = zSign ? - a : a;
0019d5c3 4435 shiftCount = clz64(absA) + 49;
158142c2
FB
4436 zExp = 0x406E - shiftCount;
4437 if ( 64 <= shiftCount ) {
4438 zSig1 = 0;
4439 zSig0 = absA;
4440 shiftCount -= 64;
4441 }
4442 else {
4443 zSig1 = absA;
4444 zSig0 = 0;
4445 }
4446 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4447 return packFloat128( zSign, zExp, zSig0, zSig1 );
4448
4449}
4450
6bb8e0f1
PM
4451/*----------------------------------------------------------------------------
4452| Returns the result of converting the 64-bit unsigned integer `a'
4453| to the quadruple-precision floating-point format. The conversion is performed
4454| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4455*----------------------------------------------------------------------------*/
4456
e5a41ffa 4457float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4458{
4459 if (a == 0) {
4460 return float128_zero;
4461 }
6603d506 4462 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4463}
4464
158142c2
FB
4465/*----------------------------------------------------------------------------
4466| Returns the result of converting the single-precision floating-point value
4467| `a' to the extended double-precision floating-point format. The conversion
4468| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4469| Arithmetic.
4470*----------------------------------------------------------------------------*/
4471
e5a41ffa 4472floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
4473{
4474 flag aSign;
0c48262d 4475 int aExp;
bb98fe42 4476 uint32_t aSig;
158142c2 4477
ff32e16e 4478 a = float32_squash_input_denormal(a, status);
158142c2
FB
4479 aSig = extractFloat32Frac( a );
4480 aExp = extractFloat32Exp( a );
4481 aSign = extractFloat32Sign( a );
4482 if ( aExp == 0xFF ) {
ff32e16e
PM
4483 if (aSig) {
4484 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
4485 }
0f605c88
LV
4486 return packFloatx80(aSign,
4487 floatx80_infinity_high,
4488 floatx80_infinity_low);
158142c2
FB
4489 }
4490 if ( aExp == 0 ) {
4491 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4492 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4493 }
4494 aSig |= 0x00800000;
bb98fe42 4495 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4496
4497}
4498
158142c2
FB
4499/*----------------------------------------------------------------------------
4500| Returns the result of converting the single-precision floating-point value
4501| `a' to the double-precision floating-point format. The conversion is
4502| performed according to the IEC/IEEE Standard for Binary Floating-Point
4503| Arithmetic.
4504*----------------------------------------------------------------------------*/
4505
e5a41ffa 4506float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
4507{
4508 flag aSign;
0c48262d 4509 int aExp;
bb98fe42 4510 uint32_t aSig;
158142c2 4511
ff32e16e 4512 a = float32_squash_input_denormal(a, status);
158142c2
FB
4513 aSig = extractFloat32Frac( a );
4514 aExp = extractFloat32Exp( a );
4515 aSign = extractFloat32Sign( a );
4516 if ( aExp == 0xFF ) {
ff32e16e
PM
4517 if (aSig) {
4518 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4519 }
158142c2
FB
4520 return packFloat128( aSign, 0x7FFF, 0, 0 );
4521 }
4522 if ( aExp == 0 ) {
4523 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4524 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4525 --aExp;
4526 }
bb98fe42 4527 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4528
4529}
4530
158142c2
FB
4531/*----------------------------------------------------------------------------
4532| Returns the remainder of the single-precision floating-point value `a'
4533| with respect to the corresponding value `b'. The operation is performed
4534| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4535*----------------------------------------------------------------------------*/
4536
e5a41ffa 4537float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4538{
ed086f3d 4539 flag aSign, zSign;
0c48262d 4540 int aExp, bExp, expDiff;
bb98fe42
AF
4541 uint32_t aSig, bSig;
4542 uint32_t q;
4543 uint64_t aSig64, bSig64, q64;
4544 uint32_t alternateASig;
4545 int32_t sigMean;
ff32e16e
PM
4546 a = float32_squash_input_denormal(a, status);
4547 b = float32_squash_input_denormal(b, status);
158142c2
FB
4548
4549 aSig = extractFloat32Frac( a );
4550 aExp = extractFloat32Exp( a );
4551 aSign = extractFloat32Sign( a );
4552 bSig = extractFloat32Frac( b );
4553 bExp = extractFloat32Exp( b );
158142c2
FB
4554 if ( aExp == 0xFF ) {
4555 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4556 return propagateFloat32NaN(a, b, status);
158142c2 4557 }
ff32e16e 4558 float_raise(float_flag_invalid, status);
af39bc8c 4559 return float32_default_nan(status);
158142c2
FB
4560 }
4561 if ( bExp == 0xFF ) {
ff32e16e
PM
4562 if (bSig) {
4563 return propagateFloat32NaN(a, b, status);
4564 }
158142c2
FB
4565 return a;
4566 }
4567 if ( bExp == 0 ) {
4568 if ( bSig == 0 ) {
ff32e16e 4569 float_raise(float_flag_invalid, status);
af39bc8c 4570 return float32_default_nan(status);
158142c2
FB
4571 }
4572 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4573 }
4574 if ( aExp == 0 ) {
4575 if ( aSig == 0 ) return a;
4576 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4577 }
4578 expDiff = aExp - bExp;
4579 aSig |= 0x00800000;
4580 bSig |= 0x00800000;
4581 if ( expDiff < 32 ) {
4582 aSig <<= 8;
4583 bSig <<= 8;
4584 if ( expDiff < 0 ) {
4585 if ( expDiff < -1 ) return a;
4586 aSig >>= 1;
4587 }
4588 q = ( bSig <= aSig );
4589 if ( q ) aSig -= bSig;
4590 if ( 0 < expDiff ) {
bb98fe42 4591 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4592 q >>= 32 - expDiff;
4593 bSig >>= 2;
4594 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4595 }
4596 else {
4597 aSig >>= 2;
4598 bSig >>= 2;
4599 }
4600 }
4601 else {
4602 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4603 aSig64 = ( (uint64_t) aSig )<<40;
4604 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4605 expDiff -= 64;
4606 while ( 0 < expDiff ) {
4607 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4608 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4609 aSig64 = - ( ( bSig * q64 )<<38 );
4610 expDiff -= 62;
4611 }
4612 expDiff += 64;
4613 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4614 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4615 q = q64>>( 64 - expDiff );
4616 bSig <<= 6;
4617 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4618 }
4619 do {
4620 alternateASig = aSig;
4621 ++q;
4622 aSig -= bSig;
bb98fe42 4623 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4624 sigMean = aSig + alternateASig;
4625 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4626 aSig = alternateASig;
4627 }
bb98fe42 4628 zSign = ( (int32_t) aSig < 0 );
158142c2 4629 if ( zSign ) aSig = - aSig;
ff32e16e 4630 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4631}
4632
369be8f6 4633
158142c2 4634
8229c991
AJ
4635/*----------------------------------------------------------------------------
4636| Returns the binary exponential of the single-precision floating-point value
4637| `a'. The operation is performed according to the IEC/IEEE Standard for
4638| Binary Floating-Point Arithmetic.
4639|
4640| Uses the following identities:
4641|
4642| 1. -------------------------------------------------------------------------
4643| x x*ln(2)
4644| 2 = e
4645|
4646| 2. -------------------------------------------------------------------------
4647| 2 3 4 5 n
4648| x x x x x x x
4649| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4650| 1! 2! 3! 4! 5! n!
4651*----------------------------------------------------------------------------*/
4652
4653static const float64 float32_exp2_coefficients[15] =
4654{
d5138cf4
PM
4655 const_float64( 0x3ff0000000000000ll ), /* 1 */
4656 const_float64( 0x3fe0000000000000ll ), /* 2 */
4657 const_float64( 0x3fc5555555555555ll ), /* 3 */
4658 const_float64( 0x3fa5555555555555ll ), /* 4 */
4659 const_float64( 0x3f81111111111111ll ), /* 5 */
4660 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4661 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4662 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4663 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4664 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4665 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4666 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4667 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4668 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4669 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4670};
4671
e5a41ffa 4672float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
4673{
4674 flag aSign;
0c48262d 4675 int aExp;
bb98fe42 4676 uint32_t aSig;
8229c991
AJ
4677 float64 r, x, xn;
4678 int i;
ff32e16e 4679 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4680
4681 aSig = extractFloat32Frac( a );
4682 aExp = extractFloat32Exp( a );
4683 aSign = extractFloat32Sign( a );
4684
4685 if ( aExp == 0xFF) {
ff32e16e
PM
4686 if (aSig) {
4687 return propagateFloat32NaN(a, float32_zero, status);
4688 }
8229c991
AJ
4689 return (aSign) ? float32_zero : a;
4690 }
4691 if (aExp == 0) {
4692 if (aSig == 0) return float32_one;
4693 }
4694
ff32e16e 4695 float_raise(float_flag_inexact, status);
8229c991
AJ
4696
4697 /* ******************************* */
4698 /* using float64 for approximation */
4699 /* ******************************* */
ff32e16e
PM
4700 x = float32_to_float64(a, status);
4701 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4702
4703 xn = x;
4704 r = float64_one;
4705 for (i = 0 ; i < 15 ; i++) {
4706 float64 f;
4707
ff32e16e
PM
4708 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4709 r = float64_add(r, f, status);
8229c991 4710
ff32e16e 4711 xn = float64_mul(xn, x, status);
8229c991
AJ
4712 }
4713
4714 return float64_to_float32(r, status);
4715}
4716
374dfc33
AJ
4717/*----------------------------------------------------------------------------
4718| Returns the binary log of the single-precision floating-point value `a'.
4719| The operation is performed according to the IEC/IEEE Standard for Binary
4720| Floating-Point Arithmetic.
4721*----------------------------------------------------------------------------*/
e5a41ffa 4722float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
4723{
4724 flag aSign, zSign;
0c48262d 4725 int aExp;
bb98fe42 4726 uint32_t aSig, zSig, i;
374dfc33 4727
ff32e16e 4728 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4729 aSig = extractFloat32Frac( a );
4730 aExp = extractFloat32Exp( a );
4731 aSign = extractFloat32Sign( a );
4732
4733 if ( aExp == 0 ) {
4734 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4735 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4736 }
4737 if ( aSign ) {
ff32e16e 4738 float_raise(float_flag_invalid, status);
af39bc8c 4739 return float32_default_nan(status);
374dfc33
AJ
4740 }
4741 if ( aExp == 0xFF ) {
ff32e16e
PM
4742 if (aSig) {
4743 return propagateFloat32NaN(a, float32_zero, status);
4744 }
374dfc33
AJ
4745 return a;
4746 }
4747
4748 aExp -= 0x7F;
4749 aSig |= 0x00800000;
4750 zSign = aExp < 0;
4751 zSig = aExp << 23;
4752
4753 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4754 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4755 if ( aSig & 0x01000000 ) {
4756 aSig >>= 1;
4757 zSig |= i;
4758 }
4759 }
4760
4761 if ( zSign )
4762 zSig = -zSig;
4763
ff32e16e 4764 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4765}
4766
158142c2
FB
4767/*----------------------------------------------------------------------------
4768| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4769| the corresponding value `b', and 0 otherwise. The invalid exception is
4770| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4771| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4772*----------------------------------------------------------------------------*/
4773
e5a41ffa 4774int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4775{
b689362d 4776 uint32_t av, bv;
ff32e16e
PM
4777 a = float32_squash_input_denormal(a, status);
4778 b = float32_squash_input_denormal(b, status);
158142c2
FB
4779
4780 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4781 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4782 ) {
ff32e16e 4783 float_raise(float_flag_invalid, status);
158142c2
FB
4784 return 0;
4785 }
b689362d
AJ
4786 av = float32_val(a);
4787 bv = float32_val(b);
4788 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4789}
4790
4791/*----------------------------------------------------------------------------
4792| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4793| or equal to the corresponding value `b', and 0 otherwise. The invalid
4794| exception is raised if either operand is a NaN. The comparison is performed
4795| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4796*----------------------------------------------------------------------------*/
4797
e5a41ffa 4798int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
4799{
4800 flag aSign, bSign;
bb98fe42 4801 uint32_t av, bv;
ff32e16e
PM
4802 a = float32_squash_input_denormal(a, status);
4803 b = float32_squash_input_denormal(b, status);
158142c2
FB
4804
4805 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4806 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4807 ) {
ff32e16e 4808 float_raise(float_flag_invalid, status);
158142c2
FB
4809 return 0;
4810 }
4811 aSign = extractFloat32Sign( a );
4812 bSign = extractFloat32Sign( b );
f090c9d4
PB
4813 av = float32_val(a);
4814 bv = float32_val(b);
bb98fe42 4815 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4816 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4817
4818}
4819
4820/*----------------------------------------------------------------------------
4821| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4822| the corresponding value `b', and 0 otherwise. The invalid exception is
4823| raised if either operand is a NaN. The comparison is performed according
4824| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4825*----------------------------------------------------------------------------*/
4826
e5a41ffa 4827int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4828{
4829 flag aSign, bSign;
bb98fe42 4830 uint32_t av, bv;
ff32e16e
PM
4831 a = float32_squash_input_denormal(a, status);
4832 b = float32_squash_input_denormal(b, status);
158142c2
FB
4833
4834 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4835 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4836 ) {
ff32e16e 4837 float_raise(float_flag_invalid, status);
158142c2
FB
4838 return 0;
4839 }
4840 aSign = extractFloat32Sign( a );
4841 bSign = extractFloat32Sign( b );
f090c9d4
PB
4842 av = float32_val(a);
4843 bv = float32_val(b);
bb98fe42 4844 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4845 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4846
4847}
4848
67b7861d
AJ
4849/*----------------------------------------------------------------------------
4850| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4851| be compared, and 0 otherwise. The invalid exception is raised if either
4852| operand is a NaN. The comparison is performed according to the IEC/IEEE
4853| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4854*----------------------------------------------------------------------------*/
4855
e5a41ffa 4856int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4857{
ff32e16e
PM
4858 a = float32_squash_input_denormal(a, status);
4859 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4860
4861 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4862 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4863 ) {
ff32e16e 4864 float_raise(float_flag_invalid, status);
67b7861d
AJ
4865 return 1;
4866 }
4867 return 0;
4868}
b689362d 4869
158142c2
FB
4870/*----------------------------------------------------------------------------
4871| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4872| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4873| exception. The comparison is performed according to the IEC/IEEE Standard
4874| for Binary Floating-Point Arithmetic.
158142c2
FB
4875*----------------------------------------------------------------------------*/
4876
e5a41ffa 4877int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4878{
ff32e16e
PM
4879 a = float32_squash_input_denormal(a, status);
4880 b = float32_squash_input_denormal(b, status);
158142c2
FB
4881
4882 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4883 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4884 ) {
af39bc8c
AM
4885 if (float32_is_signaling_nan(a, status)
4886 || float32_is_signaling_nan(b, status)) {
ff32e16e 4887 float_raise(float_flag_invalid, status);
b689362d 4888 }
158142c2
FB
4889 return 0;
4890 }
b689362d
AJ
4891 return ( float32_val(a) == float32_val(b) ) ||
4892 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4893}
4894
4895/*----------------------------------------------------------------------------
4896| Returns 1 if the single-precision floating-point value `a' is less than or
4897| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4898| cause an exception. Otherwise, the comparison is performed according to the
4899| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4900*----------------------------------------------------------------------------*/
4901
e5a41ffa 4902int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4903{
4904 flag aSign, bSign;
bb98fe42 4905 uint32_t av, bv;
ff32e16e
PM
4906 a = float32_squash_input_denormal(a, status);
4907 b = float32_squash_input_denormal(b, status);
158142c2
FB
4908
4909 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4910 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4911 ) {
af39bc8c
AM
4912 if (float32_is_signaling_nan(a, status)
4913 || float32_is_signaling_nan(b, status)) {
ff32e16e 4914 float_raise(float_flag_invalid, status);
158142c2
FB
4915 }
4916 return 0;
4917 }
4918 aSign = extractFloat32Sign( a );
4919 bSign = extractFloat32Sign( b );
f090c9d4
PB
4920 av = float32_val(a);
4921 bv = float32_val(b);
bb98fe42 4922 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4923 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4924
4925}
4926
4927/*----------------------------------------------------------------------------
4928| Returns 1 if the single-precision floating-point value `a' is less than
4929| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4930| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4931| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4932*----------------------------------------------------------------------------*/
4933
ab52f973 4934int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4935{
ab52f973
AB
4936 flag aSign, bSign;
4937 uint32_t av, bv;
4938 a = float32_squash_input_denormal(a, status);
4939 b = float32_squash_input_denormal(b, status);
158142c2 4940
ab52f973
AB
4941 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4942 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4943 ) {
4944 if (float32_is_signaling_nan(a, status)
4945 || float32_is_signaling_nan(b, status)) {
ff32e16e 4946 float_raise(float_flag_invalid, status);
158142c2 4947 }
ab52f973 4948 return 0;
158142c2 4949 }
ab52f973
AB
4950 aSign = extractFloat32Sign( a );
4951 bSign = extractFloat32Sign( b );
4952 av = float32_val(a);
4953 bv = float32_val(b);
4954 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4955 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4956
4957}
4958
4959/*----------------------------------------------------------------------------
ab52f973
AB
4960| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4961| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4962| comparison is performed according to the IEC/IEEE Standard for Binary
4963| Floating-Point Arithmetic.
158142c2
FB
4964*----------------------------------------------------------------------------*/
4965
ab52f973 4966int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4967{
ab52f973
AB
4968 a = float32_squash_input_denormal(a, status);
4969 b = float32_squash_input_denormal(b, status);
158142c2 4970
ab52f973
AB
4971 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4972 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4973 ) {
4974 if (float32_is_signaling_nan(a, status)
4975 || float32_is_signaling_nan(b, status)) {
4976 float_raise(float_flag_invalid, status);
158142c2 4977 }
ab52f973 4978 return 1;
158142c2 4979 }
ab52f973 4980 return 0;
158142c2
FB
4981}
4982
158142c2
FB
4983/*----------------------------------------------------------------------------
4984| Returns the result of converting the double-precision floating-point value
4985| `a' to the extended double-precision floating-point format. The conversion
4986| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4987| Arithmetic.
4988*----------------------------------------------------------------------------*/
4989
e5a41ffa 4990floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
4991{
4992 flag aSign;
0c48262d 4993 int aExp;
bb98fe42 4994 uint64_t aSig;
158142c2 4995
ff32e16e 4996 a = float64_squash_input_denormal(a, status);
158142c2
FB
4997 aSig = extractFloat64Frac( a );
4998 aExp = extractFloat64Exp( a );
4999 aSign = extractFloat64Sign( a );
5000 if ( aExp == 0x7FF ) {
ff32e16e
PM
5001 if (aSig) {
5002 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
5003 }
0f605c88
LV
5004 return packFloatx80(aSign,
5005 floatx80_infinity_high,
5006 floatx80_infinity_low);
158142c2
FB
5007 }
5008 if ( aExp == 0 ) {
5009 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5010 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5011 }
5012 return
5013 packFloatx80(
e9321124 5014 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5015
5016}
5017
158142c2
FB
5018/*----------------------------------------------------------------------------
5019| Returns the result of converting the double-precision floating-point value
5020| `a' to the quadruple-precision floating-point format. The conversion is
5021| performed according to the IEC/IEEE Standard for Binary Floating-Point
5022| Arithmetic.
5023*----------------------------------------------------------------------------*/
5024
e5a41ffa 5025float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
5026{
5027 flag aSign;
0c48262d 5028 int aExp;
bb98fe42 5029 uint64_t aSig, zSig0, zSig1;
158142c2 5030
ff32e16e 5031 a = float64_squash_input_denormal(a, status);
158142c2
FB
5032 aSig = extractFloat64Frac( a );
5033 aExp = extractFloat64Exp( a );
5034 aSign = extractFloat64Sign( a );
5035 if ( aExp == 0x7FF ) {
ff32e16e
PM
5036 if (aSig) {
5037 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5038 }
158142c2
FB
5039 return packFloat128( aSign, 0x7FFF, 0, 0 );
5040 }
5041 if ( aExp == 0 ) {
5042 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5043 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5044 --aExp;
5045 }
5046 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5047 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5048
5049}
5050
158142c2
FB
5051
5052/*----------------------------------------------------------------------------
5053| Returns the remainder of the double-precision floating-point value `a'
5054| with respect to the corresponding value `b'. The operation is performed
5055| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5056*----------------------------------------------------------------------------*/
5057
e5a41ffa 5058float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5059{
ed086f3d 5060 flag aSign, zSign;
0c48262d 5061 int aExp, bExp, expDiff;
bb98fe42
AF
5062 uint64_t aSig, bSig;
5063 uint64_t q, alternateASig;
5064 int64_t sigMean;
158142c2 5065
ff32e16e
PM
5066 a = float64_squash_input_denormal(a, status);
5067 b = float64_squash_input_denormal(b, status);
158142c2
FB
5068 aSig = extractFloat64Frac( a );
5069 aExp = extractFloat64Exp( a );
5070 aSign = extractFloat64Sign( a );
5071 bSig = extractFloat64Frac( b );
5072 bExp = extractFloat64Exp( b );
158142c2
FB
5073 if ( aExp == 0x7FF ) {
5074 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5075 return propagateFloat64NaN(a, b, status);
158142c2 5076 }
ff32e16e 5077 float_raise(float_flag_invalid, status);
af39bc8c 5078 return float64_default_nan(status);
158142c2
FB
5079 }
5080 if ( bExp == 0x7FF ) {
ff32e16e
PM
5081 if (bSig) {
5082 return propagateFloat64NaN(a, b, status);
5083 }
158142c2
FB
5084 return a;
5085 }
5086 if ( bExp == 0 ) {
5087 if ( bSig == 0 ) {
ff32e16e 5088 float_raise(float_flag_invalid, status);
af39bc8c 5089 return float64_default_nan(status);
158142c2
FB
5090 }
5091 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5092 }
5093 if ( aExp == 0 ) {
5094 if ( aSig == 0 ) return a;
5095 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5096 }
5097 expDiff = aExp - bExp;
e9321124
AB
5098 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5099 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5100 if ( expDiff < 0 ) {
5101 if ( expDiff < -1 ) return a;
5102 aSig >>= 1;
5103 }
5104 q = ( bSig <= aSig );
5105 if ( q ) aSig -= bSig;
5106 expDiff -= 64;
5107 while ( 0 < expDiff ) {
5108 q = estimateDiv128To64( aSig, 0, bSig );
5109 q = ( 2 < q ) ? q - 2 : 0;
5110 aSig = - ( ( bSig>>2 ) * q );
5111 expDiff -= 62;
5112 }
5113 expDiff += 64;
5114 if ( 0 < expDiff ) {
5115 q = estimateDiv128To64( aSig, 0, bSig );
5116 q = ( 2 < q ) ? q - 2 : 0;
5117 q >>= 64 - expDiff;
5118 bSig >>= 2;
5119 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5120 }
5121 else {
5122 aSig >>= 2;
5123 bSig >>= 2;
5124 }
5125 do {
5126 alternateASig = aSig;
5127 ++q;
5128 aSig -= bSig;
bb98fe42 5129 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5130 sigMean = aSig + alternateASig;
5131 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5132 aSig = alternateASig;
5133 }
bb98fe42 5134 zSign = ( (int64_t) aSig < 0 );
158142c2 5135 if ( zSign ) aSig = - aSig;
ff32e16e 5136 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5137
5138}
5139
374dfc33
AJ
5140/*----------------------------------------------------------------------------
5141| Returns the binary log of the double-precision floating-point value `a'.
5142| The operation is performed according to the IEC/IEEE Standard for Binary
5143| Floating-Point Arithmetic.
5144*----------------------------------------------------------------------------*/
e5a41ffa 5145float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
5146{
5147 flag aSign, zSign;
0c48262d 5148 int aExp;
bb98fe42 5149 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5150 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5151
5152 aSig = extractFloat64Frac( a );
5153 aExp = extractFloat64Exp( a );
5154 aSign = extractFloat64Sign( a );
5155
5156 if ( aExp == 0 ) {
5157 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5158 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5159 }
5160 if ( aSign ) {
ff32e16e 5161 float_raise(float_flag_invalid, status);
af39bc8c 5162 return float64_default_nan(status);
374dfc33
AJ
5163 }
5164 if ( aExp == 0x7FF ) {
ff32e16e
PM
5165 if (aSig) {
5166 return propagateFloat64NaN(a, float64_zero, status);
5167 }
374dfc33
AJ
5168 return a;
5169 }
5170
5171 aExp -= 0x3FF;
e9321124 5172 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5173 zSign = aExp < 0;
bb98fe42 5174 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5175 for (i = 1LL << 51; i > 0; i >>= 1) {
5176 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5177 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5178 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5179 aSig >>= 1;
5180 zSig |= i;
5181 }
5182 }
5183
5184 if ( zSign )
5185 zSig = -zSig;
ff32e16e 5186 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5187}
5188
158142c2
FB
5189/*----------------------------------------------------------------------------
5190| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5191| corresponding value `b', and 0 otherwise. The invalid exception is raised
5192| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5193| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5194*----------------------------------------------------------------------------*/
5195
e5a41ffa 5196int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5197{
bb98fe42 5198 uint64_t av, bv;
ff32e16e
PM
5199 a = float64_squash_input_denormal(a, status);
5200 b = float64_squash_input_denormal(b, status);
158142c2
FB
5201
5202 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5203 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5204 ) {
ff32e16e 5205 float_raise(float_flag_invalid, status);
158142c2
FB
5206 return 0;
5207 }
f090c9d4 5208 av = float64_val(a);
a1b91bb4 5209 bv = float64_val(b);
bb98fe42 5210 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5211
5212}
5213
5214/*----------------------------------------------------------------------------
5215| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5216| equal to the corresponding value `b', and 0 otherwise. The invalid
5217| exception is raised if either operand is a NaN. The comparison is performed
5218| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5219*----------------------------------------------------------------------------*/
5220
e5a41ffa 5221int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
5222{
5223 flag aSign, bSign;
bb98fe42 5224 uint64_t av, bv;
ff32e16e
PM
5225 a = float64_squash_input_denormal(a, status);
5226 b = float64_squash_input_denormal(b, status);
158142c2
FB
5227
5228 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5229 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5230 ) {
ff32e16e 5231 float_raise(float_flag_invalid, status);
158142c2
FB
5232 return 0;
5233 }
5234 aSign = extractFloat64Sign( a );
5235 bSign = extractFloat64Sign( b );
f090c9d4 5236 av = float64_val(a);
a1b91bb4 5237 bv = float64_val(b);
bb98fe42 5238 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5239 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5240
5241}
5242
5243/*----------------------------------------------------------------------------
5244| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5245| the corresponding value `b', and 0 otherwise. The invalid exception is
5246| raised if either operand is a NaN. The comparison is performed according
5247| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5248*----------------------------------------------------------------------------*/
5249
e5a41ffa 5250int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
5251{
5252 flag aSign, bSign;
bb98fe42 5253 uint64_t av, bv;
158142c2 5254
ff32e16e
PM
5255 a = float64_squash_input_denormal(a, status);
5256 b = float64_squash_input_denormal(b, status);
158142c2
FB
5257 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5258 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5259 ) {
ff32e16e 5260 float_raise(float_flag_invalid, status);
158142c2
FB
5261 return 0;
5262 }
5263 aSign = extractFloat64Sign( a );
5264 bSign = extractFloat64Sign( b );
f090c9d4 5265 av = float64_val(a);
a1b91bb4 5266 bv = float64_val(b);
bb98fe42 5267 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5268 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5269
5270}
5271
67b7861d
AJ
5272/*----------------------------------------------------------------------------
5273| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5274| be compared, and 0 otherwise. The invalid exception is raised if either
5275| operand is a NaN. The comparison is performed according to the IEC/IEEE
5276| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5277*----------------------------------------------------------------------------*/
5278
e5a41ffa 5279int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5280{
ff32e16e
PM
5281 a = float64_squash_input_denormal(a, status);
5282 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5283
5284 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5285 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5286 ) {
ff32e16e 5287 float_raise(float_flag_invalid, status);
67b7861d
AJ
5288 return 1;
5289 }
5290 return 0;
5291}
5292
158142c2
FB
5293/*----------------------------------------------------------------------------
5294| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5295| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5296| exception.The comparison is performed according to the IEC/IEEE Standard
5297| for Binary Floating-Point Arithmetic.
158142c2
FB
5298*----------------------------------------------------------------------------*/
5299
e5a41ffa 5300int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5301{
bb98fe42 5302 uint64_t av, bv;
ff32e16e
PM
5303 a = float64_squash_input_denormal(a, status);
5304 b = float64_squash_input_denormal(b, status);
158142c2
FB
5305
5306 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5307 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5308 ) {
af39bc8c
AM
5309 if (float64_is_signaling_nan(a, status)
5310 || float64_is_signaling_nan(b, status)) {
ff32e16e 5311 float_raise(float_flag_invalid, status);
b689362d 5312 }
158142c2
FB
5313 return 0;
5314 }
f090c9d4 5315 av = float64_val(a);
a1b91bb4 5316 bv = float64_val(b);
bb98fe42 5317 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5318
5319}
5320
5321/*----------------------------------------------------------------------------
5322| Returns 1 if the double-precision floating-point value `a' is less than or
5323| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5324| cause an exception. Otherwise, the comparison is performed according to the
5325| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5326*----------------------------------------------------------------------------*/
5327
e5a41ffa 5328int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5329{
5330 flag aSign, bSign;
bb98fe42 5331 uint64_t av, bv;
ff32e16e
PM
5332 a = float64_squash_input_denormal(a, status);
5333 b = float64_squash_input_denormal(b, status);
158142c2
FB
5334
5335 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5336 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5337 ) {
af39bc8c
AM
5338 if (float64_is_signaling_nan(a, status)
5339 || float64_is_signaling_nan(b, status)) {
ff32e16e 5340 float_raise(float_flag_invalid, status);
158142c2
FB
5341 }
5342 return 0;
5343 }
5344 aSign = extractFloat64Sign( a );
5345 bSign = extractFloat64Sign( b );
f090c9d4 5346 av = float64_val(a);
a1b91bb4 5347 bv = float64_val(b);
bb98fe42 5348 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5349 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5350
5351}
5352
5353/*----------------------------------------------------------------------------
5354| Returns 1 if the double-precision floating-point value `a' is less than
5355| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5356| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5357| Standard for Binary Floating-Point Arithmetic.
5358*----------------------------------------------------------------------------*/
5359
e5a41ffa 5360int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5361{
5362 flag aSign, bSign;
bb98fe42 5363 uint64_t av, bv;
ff32e16e
PM
5364 a = float64_squash_input_denormal(a, status);
5365 b = float64_squash_input_denormal(b, status);
158142c2
FB
5366
5367 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5368 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5369 ) {
af39bc8c
AM
5370 if (float64_is_signaling_nan(a, status)
5371 || float64_is_signaling_nan(b, status)) {
ff32e16e 5372 float_raise(float_flag_invalid, status);
158142c2
FB
5373 }
5374 return 0;
5375 }
5376 aSign = extractFloat64Sign( a );
5377 bSign = extractFloat64Sign( b );
f090c9d4 5378 av = float64_val(a);
a1b91bb4 5379 bv = float64_val(b);
bb98fe42 5380 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5381 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5382
5383}
5384
67b7861d
AJ
5385/*----------------------------------------------------------------------------
5386| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5387| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5388| comparison is performed according to the IEC/IEEE Standard for Binary
5389| Floating-Point Arithmetic.
5390*----------------------------------------------------------------------------*/
5391
e5a41ffa 5392int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5393{
ff32e16e
PM
5394 a = float64_squash_input_denormal(a, status);
5395 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5396
5397 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5398 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5399 ) {
af39bc8c
AM
5400 if (float64_is_signaling_nan(a, status)
5401 || float64_is_signaling_nan(b, status)) {
ff32e16e 5402 float_raise(float_flag_invalid, status);
67b7861d
AJ
5403 }
5404 return 1;
5405 }
5406 return 0;
5407}
5408
158142c2
FB
5409/*----------------------------------------------------------------------------
5410| Returns the result of converting the extended double-precision floating-
5411| point value `a' to the 32-bit two's complement integer format. The
5412| conversion is performed according to the IEC/IEEE Standard for Binary
5413| Floating-Point Arithmetic---which means in particular that the conversion
5414| is rounded according to the current rounding mode. If `a' is a NaN, the
5415| largest positive integer is returned. Otherwise, if the conversion
5416| overflows, the largest integer with the same sign as `a' is returned.
5417*----------------------------------------------------------------------------*/
5418
f4014512 5419int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
5420{
5421 flag aSign;
f4014512 5422 int32_t aExp, shiftCount;
bb98fe42 5423 uint64_t aSig;
158142c2 5424
d1eb8f2a
AD
5425 if (floatx80_invalid_encoding(a)) {
5426 float_raise(float_flag_invalid, status);
5427 return 1 << 31;
5428 }
158142c2
FB
5429 aSig = extractFloatx80Frac( a );
5430 aExp = extractFloatx80Exp( a );
5431 aSign = extractFloatx80Sign( a );
bb98fe42 5432 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5433 shiftCount = 0x4037 - aExp;
5434 if ( shiftCount <= 0 ) shiftCount = 1;
5435 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5436 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5437
5438}
5439
5440/*----------------------------------------------------------------------------
5441| Returns the result of converting the extended double-precision floating-
5442| point value `a' to the 32-bit two's complement integer format. The
5443| conversion is performed according to the IEC/IEEE Standard for Binary
5444| Floating-Point Arithmetic, except that the conversion is always rounded
5445| toward zero. If `a' is a NaN, the largest positive integer is returned.
5446| Otherwise, if the conversion overflows, the largest integer with the same
5447| sign as `a' is returned.
5448*----------------------------------------------------------------------------*/
5449
f4014512 5450int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5451{
5452 flag aSign;
f4014512 5453 int32_t aExp, shiftCount;
bb98fe42 5454 uint64_t aSig, savedASig;
b3a6a2e0 5455 int32_t z;
158142c2 5456
d1eb8f2a
AD
5457 if (floatx80_invalid_encoding(a)) {
5458 float_raise(float_flag_invalid, status);
5459 return 1 << 31;
5460 }
158142c2
FB
5461 aSig = extractFloatx80Frac( a );
5462 aExp = extractFloatx80Exp( a );
5463 aSign = extractFloatx80Sign( a );
5464 if ( 0x401E < aExp ) {
bb98fe42 5465 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5466 goto invalid;
5467 }
5468 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5469 if (aExp || aSig) {
5470 status->float_exception_flags |= float_flag_inexact;
5471 }
158142c2
FB
5472 return 0;
5473 }
5474 shiftCount = 0x403E - aExp;
5475 savedASig = aSig;
5476 aSig >>= shiftCount;
5477 z = aSig;
5478 if ( aSign ) z = - z;
5479 if ( ( z < 0 ) ^ aSign ) {
5480 invalid:
ff32e16e 5481 float_raise(float_flag_invalid, status);
bb98fe42 5482 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5483 }
5484 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5485 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5486 }
5487 return z;
5488
5489}
5490
5491/*----------------------------------------------------------------------------
5492| Returns the result of converting the extended double-precision floating-
5493| point value `a' to the 64-bit two's complement integer format. The
5494| conversion is performed according to the IEC/IEEE Standard for Binary
5495| Floating-Point Arithmetic---which means in particular that the conversion
5496| is rounded according to the current rounding mode. If `a' is a NaN,
5497| the largest positive integer is returned. Otherwise, if the conversion
5498| overflows, the largest integer with the same sign as `a' is returned.
5499*----------------------------------------------------------------------------*/
5500
f42c2224 5501int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
5502{
5503 flag aSign;
f4014512 5504 int32_t aExp, shiftCount;
bb98fe42 5505 uint64_t aSig, aSigExtra;
158142c2 5506
d1eb8f2a
AD
5507 if (floatx80_invalid_encoding(a)) {
5508 float_raise(float_flag_invalid, status);
5509 return 1ULL << 63;
5510 }
158142c2
FB
5511 aSig = extractFloatx80Frac( a );
5512 aExp = extractFloatx80Exp( a );
5513 aSign = extractFloatx80Sign( a );
5514 shiftCount = 0x403E - aExp;
5515 if ( shiftCount <= 0 ) {
5516 if ( shiftCount ) {
ff32e16e 5517 float_raise(float_flag_invalid, status);
0f605c88 5518 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5519 return INT64_MAX;
158142c2 5520 }
2c217da0 5521 return INT64_MIN;
158142c2
FB
5522 }
5523 aSigExtra = 0;
5524 }
5525 else {
5526 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5527 }
ff32e16e 5528 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5529
5530}
5531
5532/*----------------------------------------------------------------------------
5533| Returns the result of converting the extended double-precision floating-
5534| point value `a' to the 64-bit two's complement integer format. The
5535| conversion is performed according to the IEC/IEEE Standard for Binary
5536| Floating-Point Arithmetic, except that the conversion is always rounded
5537| toward zero. If `a' is a NaN, the largest positive integer is returned.
5538| Otherwise, if the conversion overflows, the largest integer with the same
5539| sign as `a' is returned.
5540*----------------------------------------------------------------------------*/
5541
f42c2224 5542int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5543{
5544 flag aSign;
f4014512 5545 int32_t aExp, shiftCount;
bb98fe42 5546 uint64_t aSig;
f42c2224 5547 int64_t z;
158142c2 5548
d1eb8f2a
AD
5549 if (floatx80_invalid_encoding(a)) {
5550 float_raise(float_flag_invalid, status);
5551 return 1ULL << 63;
5552 }
158142c2
FB
5553 aSig = extractFloatx80Frac( a );
5554 aExp = extractFloatx80Exp( a );
5555 aSign = extractFloatx80Sign( a );
5556 shiftCount = aExp - 0x403E;
5557 if ( 0 <= shiftCount ) {
e9321124 5558 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5559 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5560 float_raise(float_flag_invalid, status);
158142c2 5561 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5562 return INT64_MAX;
158142c2
FB
5563 }
5564 }
2c217da0 5565 return INT64_MIN;
158142c2
FB
5566 }
5567 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5568 if (aExp | aSig) {
5569 status->float_exception_flags |= float_flag_inexact;
5570 }
158142c2
FB
5571 return 0;
5572 }
5573 z = aSig>>( - shiftCount );
bb98fe42 5574 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5575 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5576 }
5577 if ( aSign ) z = - z;
5578 return z;
5579
5580}
5581
5582/*----------------------------------------------------------------------------
5583| Returns the result of converting the extended double-precision floating-
5584| point value `a' to the single-precision floating-point format. The
5585| conversion is performed according to the IEC/IEEE Standard for Binary
5586| Floating-Point Arithmetic.
5587*----------------------------------------------------------------------------*/
5588
e5a41ffa 5589float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
5590{
5591 flag aSign;
f4014512 5592 int32_t aExp;
bb98fe42 5593 uint64_t aSig;
158142c2 5594
d1eb8f2a
AD
5595 if (floatx80_invalid_encoding(a)) {
5596 float_raise(float_flag_invalid, status);
5597 return float32_default_nan(status);
5598 }
158142c2
FB
5599 aSig = extractFloatx80Frac( a );
5600 aExp = extractFloatx80Exp( a );
5601 aSign = extractFloatx80Sign( a );
5602 if ( aExp == 0x7FFF ) {
bb98fe42 5603 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5604 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5605 }
5606 return packFloat32( aSign, 0xFF, 0 );
5607 }
5608 shift64RightJamming( aSig, 33, &aSig );
5609 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5610 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5611
5612}
5613
5614/*----------------------------------------------------------------------------
5615| Returns the result of converting the extended double-precision floating-
5616| point value `a' to the double-precision floating-point format. The
5617| conversion is performed according to the IEC/IEEE Standard for Binary
5618| Floating-Point Arithmetic.
5619*----------------------------------------------------------------------------*/
5620
e5a41ffa 5621float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5622{
5623 flag aSign;
f4014512 5624 int32_t aExp;
bb98fe42 5625 uint64_t aSig, zSig;
158142c2 5626
d1eb8f2a
AD
5627 if (floatx80_invalid_encoding(a)) {
5628 float_raise(float_flag_invalid, status);
5629 return float64_default_nan(status);
5630 }
158142c2
FB
5631 aSig = extractFloatx80Frac( a );
5632 aExp = extractFloatx80Exp( a );
5633 aSign = extractFloatx80Sign( a );
5634 if ( aExp == 0x7FFF ) {
bb98fe42 5635 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5636 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5637 }
5638 return packFloat64( aSign, 0x7FF, 0 );
5639 }
5640 shift64RightJamming( aSig, 1, &zSig );
5641 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5642 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5643
5644}
5645
158142c2
FB
5646/*----------------------------------------------------------------------------
5647| Returns the result of converting the extended double-precision floating-
5648| point value `a' to the quadruple-precision floating-point format. The
5649| conversion is performed according to the IEC/IEEE Standard for Binary
5650| Floating-Point Arithmetic.
5651*----------------------------------------------------------------------------*/
5652
e5a41ffa 5653float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5654{
5655 flag aSign;
0c48262d 5656 int aExp;
bb98fe42 5657 uint64_t aSig, zSig0, zSig1;
158142c2 5658
d1eb8f2a
AD
5659 if (floatx80_invalid_encoding(a)) {
5660 float_raise(float_flag_invalid, status);
5661 return float128_default_nan(status);
5662 }
158142c2
FB
5663 aSig = extractFloatx80Frac( a );
5664 aExp = extractFloatx80Exp( a );
5665 aSign = extractFloatx80Sign( a );
bb98fe42 5666 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 5667 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
5668 }
5669 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5670 return packFloat128( aSign, aExp, zSig0, zSig1 );
5671
5672}
5673
0f721292
LV
5674/*----------------------------------------------------------------------------
5675| Rounds the extended double-precision floating-point value `a'
5676| to the precision provided by floatx80_rounding_precision and returns the
5677| result as an extended double-precision floating-point value.
5678| The operation is performed according to the IEC/IEEE Standard for Binary
5679| Floating-Point Arithmetic.
5680*----------------------------------------------------------------------------*/
5681
5682floatx80 floatx80_round(floatx80 a, float_status *status)
5683{
5684 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5685 extractFloatx80Sign(a),
5686 extractFloatx80Exp(a),
5687 extractFloatx80Frac(a), 0, status);
5688}
5689
158142c2
FB
5690/*----------------------------------------------------------------------------
5691| Rounds the extended double-precision floating-point value `a' to an integer,
5692| and returns the result as an extended quadruple-precision floating-point
5693| value. The operation is performed according to the IEC/IEEE Standard for
5694| Binary Floating-Point Arithmetic.
5695*----------------------------------------------------------------------------*/
5696
e5a41ffa 5697floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5698{
5699 flag aSign;
f4014512 5700 int32_t aExp;
bb98fe42 5701 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5702 floatx80 z;
5703
d1eb8f2a
AD
5704 if (floatx80_invalid_encoding(a)) {
5705 float_raise(float_flag_invalid, status);
5706 return floatx80_default_nan(status);
5707 }
158142c2
FB
5708 aExp = extractFloatx80Exp( a );
5709 if ( 0x403E <= aExp ) {
bb98fe42 5710 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5711 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5712 }
5713 return a;
5714 }
5715 if ( aExp < 0x3FFF ) {
5716 if ( ( aExp == 0 )
bb98fe42 5717 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
5718 return a;
5719 }
a2f2d288 5720 status->float_exception_flags |= float_flag_inexact;
158142c2 5721 aSign = extractFloatx80Sign( a );
a2f2d288 5722 switch (status->float_rounding_mode) {
158142c2 5723 case float_round_nearest_even:
bb98fe42 5724 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5725 ) {
5726 return
e9321124 5727 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5728 }
5729 break;
f9288a76
PM
5730 case float_round_ties_away:
5731 if (aExp == 0x3FFE) {
e9321124 5732 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5733 }
5734 break;
158142c2
FB
5735 case float_round_down:
5736 return
5737 aSign ?
e9321124 5738 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5739 : packFloatx80( 0, 0, 0 );
5740 case float_round_up:
5741 return
5742 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5743 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5744 }
5745 return packFloatx80( aSign, 0, 0 );
5746 }
5747 lastBitMask = 1;
5748 lastBitMask <<= 0x403E - aExp;
5749 roundBitsMask = lastBitMask - 1;
5750 z = a;
a2f2d288 5751 switch (status->float_rounding_mode) {
dc355b76 5752 case float_round_nearest_even:
158142c2 5753 z.low += lastBitMask>>1;
dc355b76
PM
5754 if ((z.low & roundBitsMask) == 0) {
5755 z.low &= ~lastBitMask;
5756 }
5757 break;
f9288a76
PM
5758 case float_round_ties_away:
5759 z.low += lastBitMask >> 1;
5760 break;
dc355b76
PM
5761 case float_round_to_zero:
5762 break;
5763 case float_round_up:
5764 if (!extractFloatx80Sign(z)) {
5765 z.low += roundBitsMask;
5766 }
5767 break;
5768 case float_round_down:
5769 if (extractFloatx80Sign(z)) {
158142c2
FB
5770 z.low += roundBitsMask;
5771 }
dc355b76
PM
5772 break;
5773 default:
5774 abort();
158142c2
FB
5775 }
5776 z.low &= ~ roundBitsMask;
5777 if ( z.low == 0 ) {
5778 ++z.high;
e9321124 5779 z.low = UINT64_C(0x8000000000000000);
158142c2 5780 }
a2f2d288
PM
5781 if (z.low != a.low) {
5782 status->float_exception_flags |= float_flag_inexact;
5783 }
158142c2
FB
5784 return z;
5785
5786}
5787
5788/*----------------------------------------------------------------------------
5789| Returns the result of adding the absolute values of the extended double-
5790| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5791| negated before being returned. `zSign' is ignored if the result is a NaN.
5792| The addition is performed according to the IEC/IEEE Standard for Binary
5793| Floating-Point Arithmetic.
5794*----------------------------------------------------------------------------*/
5795
e5a41ffa
PM
5796static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5797 float_status *status)
158142c2 5798{
f4014512 5799 int32_t aExp, bExp, zExp;
bb98fe42 5800 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5801 int32_t expDiff;
158142c2
FB
5802
5803 aSig = extractFloatx80Frac( a );
5804 aExp = extractFloatx80Exp( a );
5805 bSig = extractFloatx80Frac( b );
5806 bExp = extractFloatx80Exp( b );
5807 expDiff = aExp - bExp;
5808 if ( 0 < expDiff ) {
5809 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5810 if ((uint64_t)(aSig << 1)) {
5811 return propagateFloatx80NaN(a, b, status);
5812 }
158142c2
FB
5813 return a;
5814 }
5815 if ( bExp == 0 ) --expDiff;
5816 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5817 zExp = aExp;
5818 }
5819 else if ( expDiff < 0 ) {
5820 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5821 if ((uint64_t)(bSig << 1)) {
5822 return propagateFloatx80NaN(a, b, status);
5823 }
0f605c88
LV
5824 return packFloatx80(zSign,
5825 floatx80_infinity_high,
5826 floatx80_infinity_low);
158142c2
FB
5827 }
5828 if ( aExp == 0 ) ++expDiff;
5829 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5830 zExp = bExp;
5831 }
5832 else {
5833 if ( aExp == 0x7FFF ) {
bb98fe42 5834 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5835 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5836 }
5837 return a;
5838 }
5839 zSig1 = 0;
5840 zSig0 = aSig + bSig;
5841 if ( aExp == 0 ) {
5842 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5843 goto roundAndPack;
5844 }
5845 zExp = aExp;
5846 goto shiftRight1;
5847 }
5848 zSig0 = aSig + bSig;
bb98fe42 5849 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5850 shiftRight1:
5851 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5852 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5853 ++zExp;
5854 roundAndPack:
a2f2d288 5855 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5856 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5857}
5858
5859/*----------------------------------------------------------------------------
5860| Returns the result of subtracting the absolute values of the extended
5861| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5862| difference is negated before being returned. `zSign' is ignored if the
5863| result is a NaN. The subtraction is performed according to the IEC/IEEE
5864| Standard for Binary Floating-Point Arithmetic.
5865*----------------------------------------------------------------------------*/
5866
e5a41ffa
PM
5867static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5868 float_status *status)
158142c2 5869{
f4014512 5870 int32_t aExp, bExp, zExp;
bb98fe42 5871 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5872 int32_t expDiff;
158142c2
FB
5873
5874 aSig = extractFloatx80Frac( a );
5875 aExp = extractFloatx80Exp( a );
5876 bSig = extractFloatx80Frac( b );
5877 bExp = extractFloatx80Exp( b );
5878 expDiff = aExp - bExp;
5879 if ( 0 < expDiff ) goto aExpBigger;
5880 if ( expDiff < 0 ) goto bExpBigger;
5881 if ( aExp == 0x7FFF ) {
bb98fe42 5882 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5883 return propagateFloatx80NaN(a, b, status);
158142c2 5884 }
ff32e16e 5885 float_raise(float_flag_invalid, status);
af39bc8c 5886 return floatx80_default_nan(status);
158142c2
FB
5887 }
5888 if ( aExp == 0 ) {
5889 aExp = 1;
5890 bExp = 1;
5891 }
5892 zSig1 = 0;
5893 if ( bSig < aSig ) goto aBigger;
5894 if ( aSig < bSig ) goto bBigger;
a2f2d288 5895 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5896 bExpBigger:
5897 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5898 if ((uint64_t)(bSig << 1)) {
5899 return propagateFloatx80NaN(a, b, status);
5900 }
0f605c88
LV
5901 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5902 floatx80_infinity_low);
158142c2
FB
5903 }
5904 if ( aExp == 0 ) ++expDiff;
5905 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5906 bBigger:
5907 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5908 zExp = bExp;
5909 zSign ^= 1;
5910 goto normalizeRoundAndPack;
5911 aExpBigger:
5912 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5913 if ((uint64_t)(aSig << 1)) {
5914 return propagateFloatx80NaN(a, b, status);
5915 }
158142c2
FB
5916 return a;
5917 }
5918 if ( bExp == 0 ) --expDiff;
5919 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5920 aBigger:
5921 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5922 zExp = aExp;
5923 normalizeRoundAndPack:
a2f2d288 5924 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5925 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5926}
5927
5928/*----------------------------------------------------------------------------
5929| Returns the result of adding the extended double-precision floating-point
5930| values `a' and `b'. The operation is performed according to the IEC/IEEE
5931| Standard for Binary Floating-Point Arithmetic.
5932*----------------------------------------------------------------------------*/
5933
e5a41ffa 5934floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5935{
5936 flag aSign, bSign;
5937
d1eb8f2a
AD
5938 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5939 float_raise(float_flag_invalid, status);
5940 return floatx80_default_nan(status);
5941 }
158142c2
FB
5942 aSign = extractFloatx80Sign( a );
5943 bSign = extractFloatx80Sign( b );
5944 if ( aSign == bSign ) {
ff32e16e 5945 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5946 }
5947 else {
ff32e16e 5948 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5949 }
5950
5951}
5952
5953/*----------------------------------------------------------------------------
5954| Returns the result of subtracting the extended double-precision floating-
5955| point values `a' and `b'. The operation is performed according to the
5956| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5957*----------------------------------------------------------------------------*/
5958
e5a41ffa 5959floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5960{
5961 flag aSign, bSign;
5962
d1eb8f2a
AD
5963 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5964 float_raise(float_flag_invalid, status);
5965 return floatx80_default_nan(status);
5966 }
158142c2
FB
5967 aSign = extractFloatx80Sign( a );
5968 bSign = extractFloatx80Sign( b );
5969 if ( aSign == bSign ) {
ff32e16e 5970 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5971 }
5972 else {
ff32e16e 5973 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5974 }
5975
5976}
5977
5978/*----------------------------------------------------------------------------
5979| Returns the result of multiplying the extended double-precision floating-
5980| point values `a' and `b'. The operation is performed according to the
5981| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5982*----------------------------------------------------------------------------*/
5983
e5a41ffa 5984floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5985{
5986 flag aSign, bSign, zSign;
f4014512 5987 int32_t aExp, bExp, zExp;
bb98fe42 5988 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5989
d1eb8f2a
AD
5990 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5991 float_raise(float_flag_invalid, status);
5992 return floatx80_default_nan(status);
5993 }
158142c2
FB
5994 aSig = extractFloatx80Frac( a );
5995 aExp = extractFloatx80Exp( a );
5996 aSign = extractFloatx80Sign( a );
5997 bSig = extractFloatx80Frac( b );
5998 bExp = extractFloatx80Exp( b );
5999 bSign = extractFloatx80Sign( b );
6000 zSign = aSign ^ bSign;
6001 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6002 if ( (uint64_t) ( aSig<<1 )
6003 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6004 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6005 }
6006 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6007 return packFloatx80(zSign, floatx80_infinity_high,
6008 floatx80_infinity_low);
158142c2
FB
6009 }
6010 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6011 if ((uint64_t)(bSig << 1)) {
6012 return propagateFloatx80NaN(a, b, status);
6013 }
158142c2
FB
6014 if ( ( aExp | aSig ) == 0 ) {
6015 invalid:
ff32e16e 6016 float_raise(float_flag_invalid, status);
af39bc8c 6017 return floatx80_default_nan(status);
158142c2 6018 }
0f605c88
LV
6019 return packFloatx80(zSign, floatx80_infinity_high,
6020 floatx80_infinity_low);
158142c2
FB
6021 }
6022 if ( aExp == 0 ) {
6023 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6024 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6025 }
6026 if ( bExp == 0 ) {
6027 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6028 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6029 }
6030 zExp = aExp + bExp - 0x3FFE;
6031 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6032 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6033 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6034 --zExp;
6035 }
a2f2d288 6036 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6037 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6038}
6039
6040/*----------------------------------------------------------------------------
6041| Returns the result of dividing the extended double-precision floating-point
6042| value `a' by the corresponding value `b'. The operation is performed
6043| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6044*----------------------------------------------------------------------------*/
6045
e5a41ffa 6046floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6047{
6048 flag aSign, bSign, zSign;
f4014512 6049 int32_t aExp, bExp, zExp;
bb98fe42
AF
6050 uint64_t aSig, bSig, zSig0, zSig1;
6051 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6052
d1eb8f2a
AD
6053 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6054 float_raise(float_flag_invalid, status);
6055 return floatx80_default_nan(status);
6056 }
158142c2
FB
6057 aSig = extractFloatx80Frac( a );
6058 aExp = extractFloatx80Exp( a );
6059 aSign = extractFloatx80Sign( a );
6060 bSig = extractFloatx80Frac( b );
6061 bExp = extractFloatx80Exp( b );
6062 bSign = extractFloatx80Sign( b );
6063 zSign = aSign ^ bSign;
6064 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6065 if ((uint64_t)(aSig << 1)) {
6066 return propagateFloatx80NaN(a, b, status);
6067 }
158142c2 6068 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6069 if ((uint64_t)(bSig << 1)) {
6070 return propagateFloatx80NaN(a, b, status);
6071 }
158142c2
FB
6072 goto invalid;
6073 }
0f605c88
LV
6074 return packFloatx80(zSign, floatx80_infinity_high,
6075 floatx80_infinity_low);
158142c2
FB
6076 }
6077 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6078 if ((uint64_t)(bSig << 1)) {
6079 return propagateFloatx80NaN(a, b, status);
6080 }
158142c2
FB
6081 return packFloatx80( zSign, 0, 0 );
6082 }
6083 if ( bExp == 0 ) {
6084 if ( bSig == 0 ) {
6085 if ( ( aExp | aSig ) == 0 ) {
6086 invalid:
ff32e16e 6087 float_raise(float_flag_invalid, status);
af39bc8c 6088 return floatx80_default_nan(status);
158142c2 6089 }
ff32e16e 6090 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6091 return packFloatx80(zSign, floatx80_infinity_high,
6092 floatx80_infinity_low);
158142c2
FB
6093 }
6094 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6095 }
6096 if ( aExp == 0 ) {
6097 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6098 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6099 }
6100 zExp = aExp - bExp + 0x3FFE;
6101 rem1 = 0;
6102 if ( bSig <= aSig ) {
6103 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6104 ++zExp;
6105 }
6106 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6107 mul64To128( bSig, zSig0, &term0, &term1 );
6108 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6109 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6110 --zSig0;
6111 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6112 }
6113 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6114 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6115 mul64To128( bSig, zSig1, &term1, &term2 );
6116 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6117 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6118 --zSig1;
6119 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6120 }
6121 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6122 }
a2f2d288 6123 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6124 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6125}
6126
6127/*----------------------------------------------------------------------------
6128| Returns the remainder of the extended double-precision floating-point value
6129| `a' with respect to the corresponding value `b'. The operation is performed
6130| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6131*----------------------------------------------------------------------------*/
6132
e5a41ffa 6133floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6134{
ed086f3d 6135 flag aSign, zSign;
f4014512 6136 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6137 uint64_t aSig0, aSig1, bSig;
6138 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6139
d1eb8f2a
AD
6140 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6141 float_raise(float_flag_invalid, status);
6142 return floatx80_default_nan(status);
6143 }
158142c2
FB
6144 aSig0 = extractFloatx80Frac( a );
6145 aExp = extractFloatx80Exp( a );
6146 aSign = extractFloatx80Sign( a );
6147 bSig = extractFloatx80Frac( b );
6148 bExp = extractFloatx80Exp( b );
158142c2 6149 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6150 if ( (uint64_t) ( aSig0<<1 )
6151 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6152 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6153 }
6154 goto invalid;
6155 }
6156 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6157 if ((uint64_t)(bSig << 1)) {
6158 return propagateFloatx80NaN(a, b, status);
6159 }
158142c2
FB
6160 return a;
6161 }
6162 if ( bExp == 0 ) {
6163 if ( bSig == 0 ) {
6164 invalid:
ff32e16e 6165 float_raise(float_flag_invalid, status);
af39bc8c 6166 return floatx80_default_nan(status);
158142c2
FB
6167 }
6168 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6169 }
6170 if ( aExp == 0 ) {
bb98fe42 6171 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6172 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6173 }
e9321124 6174 bSig |= UINT64_C(0x8000000000000000);
158142c2
FB
6175 zSign = aSign;
6176 expDiff = aExp - bExp;
6177 aSig1 = 0;
6178 if ( expDiff < 0 ) {
6179 if ( expDiff < -1 ) return a;
6180 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6181 expDiff = 0;
6182 }
6183 q = ( bSig <= aSig0 );
6184 if ( q ) aSig0 -= bSig;
6185 expDiff -= 64;
6186 while ( 0 < expDiff ) {
6187 q = estimateDiv128To64( aSig0, aSig1, bSig );
6188 q = ( 2 < q ) ? q - 2 : 0;
6189 mul64To128( bSig, q, &term0, &term1 );
6190 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6191 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6192 expDiff -= 62;
6193 }
6194 expDiff += 64;
6195 if ( 0 < expDiff ) {
6196 q = estimateDiv128To64( aSig0, aSig1, bSig );
6197 q = ( 2 < q ) ? q - 2 : 0;
6198 q >>= 64 - expDiff;
6199 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6200 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6201 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6202 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6203 ++q;
6204 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6205 }
6206 }
6207 else {
6208 term1 = 0;
6209 term0 = bSig;
6210 }
6211 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6212 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6213 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6214 && ( q & 1 ) )
6215 ) {
6216 aSig0 = alternateASig0;
6217 aSig1 = alternateASig1;
6218 zSign = ! zSign;
6219 }
6220 return
6221 normalizeRoundAndPackFloatx80(
ff32e16e 6222 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6223
6224}
6225
6226/*----------------------------------------------------------------------------
6227| Returns the square root of the extended double-precision floating-point
6228| value `a'. The operation is performed according to the IEC/IEEE Standard
6229| for Binary Floating-Point Arithmetic.
6230*----------------------------------------------------------------------------*/
6231
e5a41ffa 6232floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
6233{
6234 flag aSign;
f4014512 6235 int32_t aExp, zExp;
bb98fe42
AF
6236 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6237 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6238
d1eb8f2a
AD
6239 if (floatx80_invalid_encoding(a)) {
6240 float_raise(float_flag_invalid, status);
6241 return floatx80_default_nan(status);
6242 }
158142c2
FB
6243 aSig0 = extractFloatx80Frac( a );
6244 aExp = extractFloatx80Exp( a );
6245 aSign = extractFloatx80Sign( a );
6246 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6247 if ((uint64_t)(aSig0 << 1)) {
6248 return propagateFloatx80NaN(a, a, status);
6249 }
158142c2
FB
6250 if ( ! aSign ) return a;
6251 goto invalid;
6252 }
6253 if ( aSign ) {
6254 if ( ( aExp | aSig0 ) == 0 ) return a;
6255 invalid:
ff32e16e 6256 float_raise(float_flag_invalid, status);
af39bc8c 6257 return floatx80_default_nan(status);
158142c2
FB
6258 }
6259 if ( aExp == 0 ) {
6260 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6261 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6262 }
6263 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6264 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6265 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6266 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6267 doubleZSig0 = zSig0<<1;
6268 mul64To128( zSig0, zSig0, &term0, &term1 );
6269 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6270 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6271 --zSig0;
6272 doubleZSig0 -= 2;
6273 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6274 }
6275 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6276 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6277 if ( zSig1 == 0 ) zSig1 = 1;
6278 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6279 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6280 mul64To128( zSig1, zSig1, &term2, &term3 );
6281 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6282 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6283 --zSig1;
6284 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6285 term3 |= 1;
6286 term2 |= doubleZSig0;
6287 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6288 }
6289 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6290 }
6291 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6292 zSig0 |= doubleZSig0;
a2f2d288
PM
6293 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6294 0, zExp, zSig0, zSig1, status);
158142c2
FB
6295}
6296
6297/*----------------------------------------------------------------------------
b689362d
AJ
6298| Returns 1 if the extended double-precision floating-point value `a' is equal
6299| to the corresponding value `b', and 0 otherwise. The invalid exception is
6300| raised if either operand is a NaN. Otherwise, the comparison is performed
6301| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6302*----------------------------------------------------------------------------*/
6303
e5a41ffa 6304int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6305{
6306
d1eb8f2a
AD
6307 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6308 || (extractFloatx80Exp(a) == 0x7FFF
6309 && (uint64_t) (extractFloatx80Frac(a) << 1))
6310 || (extractFloatx80Exp(b) == 0x7FFF
6311 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6312 ) {
ff32e16e 6313 float_raise(float_flag_invalid, status);
158142c2
FB
6314 return 0;
6315 }
6316 return
6317 ( a.low == b.low )
6318 && ( ( a.high == b.high )
6319 || ( ( a.low == 0 )
bb98fe42 6320 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6321 );
6322
6323}
6324
6325/*----------------------------------------------------------------------------
6326| Returns 1 if the extended double-precision floating-point value `a' is
6327| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6328| invalid exception is raised if either operand is a NaN. The comparison is
6329| performed according to the IEC/IEEE Standard for Binary Floating-Point
6330| Arithmetic.
158142c2
FB
6331*----------------------------------------------------------------------------*/
6332
e5a41ffa 6333int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6334{
6335 flag aSign, bSign;
6336
d1eb8f2a
AD
6337 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6338 || (extractFloatx80Exp(a) == 0x7FFF
6339 && (uint64_t) (extractFloatx80Frac(a) << 1))
6340 || (extractFloatx80Exp(b) == 0x7FFF
6341 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6342 ) {
ff32e16e 6343 float_raise(float_flag_invalid, status);
158142c2
FB
6344 return 0;
6345 }
6346 aSign = extractFloatx80Sign( a );
6347 bSign = extractFloatx80Sign( b );
6348 if ( aSign != bSign ) {
6349 return
6350 aSign
bb98fe42 6351 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6352 == 0 );
6353 }
6354 return
6355 aSign ? le128( b.high, b.low, a.high, a.low )
6356 : le128( a.high, a.low, b.high, b.low );
6357
6358}
6359
6360/*----------------------------------------------------------------------------
6361| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6362| less than the corresponding value `b', and 0 otherwise. The invalid
6363| exception is raised if either operand is a NaN. The comparison is performed
6364| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6365*----------------------------------------------------------------------------*/
6366
e5a41ffa 6367int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6368{
6369 flag aSign, bSign;
6370
d1eb8f2a
AD
6371 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6372 || (extractFloatx80Exp(a) == 0x7FFF
6373 && (uint64_t) (extractFloatx80Frac(a) << 1))
6374 || (extractFloatx80Exp(b) == 0x7FFF
6375 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6376 ) {
ff32e16e 6377 float_raise(float_flag_invalid, status);
158142c2
FB
6378 return 0;
6379 }
6380 aSign = extractFloatx80Sign( a );
6381 bSign = extractFloatx80Sign( b );
6382 if ( aSign != bSign ) {
6383 return
6384 aSign
bb98fe42 6385 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6386 != 0 );
6387 }
6388 return
6389 aSign ? lt128( b.high, b.low, a.high, a.low )
6390 : lt128( a.high, a.low, b.high, b.low );
6391
6392}
6393
67b7861d
AJ
6394/*----------------------------------------------------------------------------
6395| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6396| cannot be compared, and 0 otherwise. The invalid exception is raised if
6397| either operand is a NaN. The comparison is performed according to the
6398| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6399*----------------------------------------------------------------------------*/
e5a41ffa 6400int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6401{
d1eb8f2a
AD
6402 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6403 || (extractFloatx80Exp(a) == 0x7FFF
6404 && (uint64_t) (extractFloatx80Frac(a) << 1))
6405 || (extractFloatx80Exp(b) == 0x7FFF
6406 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6407 ) {
ff32e16e 6408 float_raise(float_flag_invalid, status);
67b7861d
AJ
6409 return 1;
6410 }
6411 return 0;
6412}
6413
158142c2 6414/*----------------------------------------------------------------------------
b689362d 6415| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6416| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6417| cause an exception. The comparison is performed according to the IEC/IEEE
6418| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6419*----------------------------------------------------------------------------*/
6420
e5a41ffa 6421int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6422{
6423
d1eb8f2a
AD
6424 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6425 float_raise(float_flag_invalid, status);
6426 return 0;
6427 }
158142c2 6428 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6429 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6430 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6431 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6432 ) {
af39bc8c
AM
6433 if (floatx80_is_signaling_nan(a, status)
6434 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6435 float_raise(float_flag_invalid, status);
b689362d 6436 }
158142c2
FB
6437 return 0;
6438 }
6439 return
6440 ( a.low == b.low )
6441 && ( ( a.high == b.high )
6442 || ( ( a.low == 0 )
bb98fe42 6443 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6444 );
6445
6446}
6447
6448/*----------------------------------------------------------------------------
6449| Returns 1 if the extended double-precision floating-point value `a' is less
6450| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6451| do not cause an exception. Otherwise, the comparison is performed according
6452| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6453*----------------------------------------------------------------------------*/
6454
e5a41ffa 6455int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6456{
6457 flag aSign, bSign;
6458
d1eb8f2a
AD
6459 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6460 float_raise(float_flag_invalid, status);
6461 return 0;
6462 }
158142c2 6463 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6464 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6465 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6466 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6467 ) {
af39bc8c
AM
6468 if (floatx80_is_signaling_nan(a, status)
6469 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6470 float_raise(float_flag_invalid, status);
158142c2
FB
6471 }
6472 return 0;
6473 }
6474 aSign = extractFloatx80Sign( a );
6475 bSign = extractFloatx80Sign( b );
6476 if ( aSign != bSign ) {
6477 return
6478 aSign
bb98fe42 6479 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6480 == 0 );
6481 }
6482 return
6483 aSign ? le128( b.high, b.low, a.high, a.low )
6484 : le128( a.high, a.low, b.high, b.low );
6485
6486}
6487
6488/*----------------------------------------------------------------------------
6489| Returns 1 if the extended double-precision floating-point value `a' is less
6490| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6491| an exception. Otherwise, the comparison is performed according to the
6492| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6493*----------------------------------------------------------------------------*/
6494
e5a41ffa 6495int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6496{
6497 flag aSign, bSign;
6498
d1eb8f2a
AD
6499 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6500 float_raise(float_flag_invalid, status);
6501 return 0;
6502 }
158142c2 6503 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6504 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6505 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6506 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6507 ) {
af39bc8c
AM
6508 if (floatx80_is_signaling_nan(a, status)
6509 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6510 float_raise(float_flag_invalid, status);
158142c2
FB
6511 }
6512 return 0;
6513 }
6514 aSign = extractFloatx80Sign( a );
6515 bSign = extractFloatx80Sign( b );
6516 if ( aSign != bSign ) {
6517 return
6518 aSign
bb98fe42 6519 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6520 != 0 );
6521 }
6522 return
6523 aSign ? lt128( b.high, b.low, a.high, a.low )
6524 : lt128( a.high, a.low, b.high, b.low );
6525
6526}
6527
67b7861d
AJ
6528/*----------------------------------------------------------------------------
6529| Returns 1 if the extended double-precision floating-point values `a' and `b'
6530| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6531| The comparison is performed according to the IEC/IEEE Standard for Binary
6532| Floating-Point Arithmetic.
6533*----------------------------------------------------------------------------*/
e5a41ffa 6534int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6535{
d1eb8f2a
AD
6536 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6537 float_raise(float_flag_invalid, status);
6538 return 1;
6539 }
67b7861d
AJ
6540 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6541 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6542 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6543 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6544 ) {
af39bc8c
AM
6545 if (floatx80_is_signaling_nan(a, status)
6546 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6547 float_raise(float_flag_invalid, status);
67b7861d
AJ
6548 }
6549 return 1;
6550 }
6551 return 0;
6552}
6553
158142c2
FB
6554/*----------------------------------------------------------------------------
6555| Returns the result of converting the quadruple-precision floating-point
6556| value `a' to the 32-bit two's complement integer format. The conversion
6557| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6558| Arithmetic---which means in particular that the conversion is rounded
6559| according to the current rounding mode. If `a' is a NaN, the largest
6560| positive integer is returned. Otherwise, if the conversion overflows, the
6561| largest integer with the same sign as `a' is returned.
6562*----------------------------------------------------------------------------*/
6563
f4014512 6564int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
6565{
6566 flag aSign;
f4014512 6567 int32_t aExp, shiftCount;
bb98fe42 6568 uint64_t aSig0, aSig1;
158142c2
FB
6569
6570 aSig1 = extractFloat128Frac1( a );
6571 aSig0 = extractFloat128Frac0( a );
6572 aExp = extractFloat128Exp( a );
6573 aSign = extractFloat128Sign( a );
6574 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6575 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6576 aSig0 |= ( aSig1 != 0 );
6577 shiftCount = 0x4028 - aExp;
6578 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6579 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6580
6581}
6582
6583/*----------------------------------------------------------------------------
6584| Returns the result of converting the quadruple-precision floating-point
6585| value `a' to the 32-bit two's complement integer format. The conversion
6586| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6587| Arithmetic, except that the conversion is always rounded toward zero. If
6588| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6589| conversion overflows, the largest integer with the same sign as `a' is
6590| returned.
6591*----------------------------------------------------------------------------*/
6592
f4014512 6593int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
6594{
6595 flag aSign;
f4014512 6596 int32_t aExp, shiftCount;
bb98fe42 6597 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6598 int32_t z;
158142c2
FB
6599
6600 aSig1 = extractFloat128Frac1( a );
6601 aSig0 = extractFloat128Frac0( a );
6602 aExp = extractFloat128Exp( a );
6603 aSign = extractFloat128Sign( a );
6604 aSig0 |= ( aSig1 != 0 );
6605 if ( 0x401E < aExp ) {
6606 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6607 goto invalid;
6608 }
6609 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6610 if (aExp || aSig0) {
6611 status->float_exception_flags |= float_flag_inexact;
6612 }
158142c2
FB
6613 return 0;
6614 }
e9321124 6615 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6616 shiftCount = 0x402F - aExp;
6617 savedASig = aSig0;
6618 aSig0 >>= shiftCount;
6619 z = aSig0;
6620 if ( aSign ) z = - z;
6621 if ( ( z < 0 ) ^ aSign ) {
6622 invalid:
ff32e16e 6623 float_raise(float_flag_invalid, status);
2c217da0 6624 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6625 }
6626 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6627 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6628 }
6629 return z;
6630
6631}
6632
6633/*----------------------------------------------------------------------------
6634| Returns the result of converting the quadruple-precision floating-point
6635| value `a' to the 64-bit two's complement integer format. The conversion
6636| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6637| Arithmetic---which means in particular that the conversion is rounded
6638| according to the current rounding mode. If `a' is a NaN, the largest
6639| positive integer is returned. Otherwise, if the conversion overflows, the
6640| largest integer with the same sign as `a' is returned.
6641*----------------------------------------------------------------------------*/
6642
f42c2224 6643int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6644{
6645 flag aSign;
f4014512 6646 int32_t aExp, shiftCount;
bb98fe42 6647 uint64_t aSig0, aSig1;
158142c2
FB
6648
6649 aSig1 = extractFloat128Frac1( a );
6650 aSig0 = extractFloat128Frac0( a );
6651 aExp = extractFloat128Exp( a );
6652 aSign = extractFloat128Sign( a );
e9321124 6653 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6654 shiftCount = 0x402F - aExp;
6655 if ( shiftCount <= 0 ) {
6656 if ( 0x403E < aExp ) {
ff32e16e 6657 float_raise(float_flag_invalid, status);
158142c2
FB
6658 if ( ! aSign
6659 || ( ( aExp == 0x7FFF )
e9321124 6660 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6661 )
6662 ) {
2c217da0 6663 return INT64_MAX;
158142c2 6664 }
2c217da0 6665 return INT64_MIN;
158142c2
FB
6666 }
6667 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6668 }
6669 else {
6670 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6671 }
ff32e16e 6672 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6673
6674}
6675
6676/*----------------------------------------------------------------------------
6677| Returns the result of converting the quadruple-precision floating-point
6678| value `a' to the 64-bit two's complement integer format. The conversion
6679| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6680| Arithmetic, except that the conversion is always rounded toward zero.
6681| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6682| the conversion overflows, the largest integer with the same sign as `a' is
6683| returned.
6684*----------------------------------------------------------------------------*/
6685
f42c2224 6686int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6687{
6688 flag aSign;
f4014512 6689 int32_t aExp, shiftCount;
bb98fe42 6690 uint64_t aSig0, aSig1;
f42c2224 6691 int64_t z;
158142c2
FB
6692
6693 aSig1 = extractFloat128Frac1( a );
6694 aSig0 = extractFloat128Frac0( a );
6695 aExp = extractFloat128Exp( a );
6696 aSign = extractFloat128Sign( a );
e9321124 6697 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6698 shiftCount = aExp - 0x402F;
6699 if ( 0 < shiftCount ) {
6700 if ( 0x403E <= aExp ) {
e9321124
AB
6701 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6702 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6703 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288
PM
6704 if (aSig1) {
6705 status->float_exception_flags |= float_flag_inexact;
6706 }
158142c2
FB
6707 }
6708 else {
ff32e16e 6709 float_raise(float_flag_invalid, status);
158142c2 6710 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6711 return INT64_MAX;
158142c2
FB
6712 }
6713 }
2c217da0 6714 return INT64_MIN;
158142c2
FB
6715 }
6716 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6717 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6718 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6719 }
6720 }
6721 else {
6722 if ( aExp < 0x3FFF ) {
6723 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6724 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6725 }
6726 return 0;
6727 }
6728 z = aSig0>>( - shiftCount );
6729 if ( aSig1
bb98fe42 6730 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6731 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6732 }
6733 }
6734 if ( aSign ) z = - z;
6735 return z;
6736
6737}
6738
2e6d8568
BR
6739/*----------------------------------------------------------------------------
6740| Returns the result of converting the quadruple-precision floating-point value
6741| `a' to the 64-bit unsigned integer format. The conversion is
6742| performed according to the IEC/IEEE Standard for Binary Floating-Point
6743| Arithmetic---which means in particular that the conversion is rounded
6744| according to the current rounding mode. If `a' is a NaN, the largest
6745| positive integer is returned. If the conversion overflows, the
6746| largest unsigned integer is returned. If 'a' is negative, the value is
6747| rounded and zero is returned; negative values that do not round to zero
6748| will raise the inexact exception.
6749*----------------------------------------------------------------------------*/
6750
6751uint64_t float128_to_uint64(float128 a, float_status *status)
6752{
6753 flag aSign;
6754 int aExp;
6755 int shiftCount;
6756 uint64_t aSig0, aSig1;
6757
6758 aSig0 = extractFloat128Frac0(a);
6759 aSig1 = extractFloat128Frac1(a);
6760 aExp = extractFloat128Exp(a);
6761 aSign = extractFloat128Sign(a);
6762 if (aSign && (aExp > 0x3FFE)) {
6763 float_raise(float_flag_invalid, status);
6764 if (float128_is_any_nan(a)) {
2c217da0 6765 return UINT64_MAX;
2e6d8568
BR
6766 } else {
6767 return 0;
6768 }
6769 }
6770 if (aExp) {
2c217da0 6771 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6772 }
6773 shiftCount = 0x402F - aExp;
6774 if (shiftCount <= 0) {
6775 if (0x403E < aExp) {
6776 float_raise(float_flag_invalid, status);
2c217da0 6777 return UINT64_MAX;
2e6d8568
BR
6778 }
6779 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6780 } else {
6781 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6782 }
6783 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6784}
6785
6786uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6787{
6788 uint64_t v;
6789 signed char current_rounding_mode = status->float_rounding_mode;
6790
6791 set_float_rounding_mode(float_round_to_zero, status);
6792 v = float128_to_uint64(a, status);
6793 set_float_rounding_mode(current_rounding_mode, status);
6794
6795 return v;
6796}
6797
158142c2
FB
6798/*----------------------------------------------------------------------------
6799| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6800| value `a' to the 32-bit unsigned integer format. The conversion
6801| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6802| Arithmetic except that the conversion is always rounded toward zero.
6803| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6804| if the conversion overflows, the largest unsigned integer is returned.
6805| If 'a' is negative, the value is rounded and zero is returned; negative
6806| values that do not round to zero will raise the inexact exception.
6807*----------------------------------------------------------------------------*/
6808
6809uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6810{
6811 uint64_t v;
6812 uint32_t res;
6813 int old_exc_flags = get_float_exception_flags(status);
6814
6815 v = float128_to_uint64_round_to_zero(a, status);
6816 if (v > 0xffffffff) {
6817 res = 0xffffffff;
6818 } else {
6819 return v;
6820 }
6821 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6822 float_raise(float_flag_invalid, status);
6823 return res;
6824}
6825
6826/*----------------------------------------------------------------------------
6827| Returns the result of converting the quadruple-precision floating-point value
6828| `a' to the 32-bit unsigned integer format. The conversion is
6829| performed according to the IEC/IEEE Standard for Binary Floating-Point
6830| Arithmetic---which means in particular that the conversion is rounded
6831| according to the current rounding mode. If `a' is a NaN, the largest
6832| positive integer is returned. If the conversion overflows, the
6833| largest unsigned integer is returned. If 'a' is negative, the value is
6834| rounded and zero is returned; negative values that do not round to zero
6835| will raise the inexact exception.
6836*----------------------------------------------------------------------------*/
6837
6838uint32_t float128_to_uint32(float128 a, float_status *status)
6839{
6840 uint64_t v;
6841 uint32_t res;
6842 int old_exc_flags = get_float_exception_flags(status);
6843
6844 v = float128_to_uint64(a, status);
6845 if (v > 0xffffffff) {
6846 res = 0xffffffff;
6847 } else {
6848 return v;
6849 }
6850 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6851 float_raise(float_flag_invalid, status);
6852 return res;
6853}
6854
6855/*----------------------------------------------------------------------------
6856| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6857| value `a' to the single-precision floating-point format. The conversion
6858| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6859| Arithmetic.
6860*----------------------------------------------------------------------------*/
6861
e5a41ffa 6862float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6863{
6864 flag aSign;
f4014512 6865 int32_t aExp;
bb98fe42
AF
6866 uint64_t aSig0, aSig1;
6867 uint32_t zSig;
158142c2
FB
6868
6869 aSig1 = extractFloat128Frac1( a );
6870 aSig0 = extractFloat128Frac0( a );
6871 aExp = extractFloat128Exp( a );
6872 aSign = extractFloat128Sign( a );
6873 if ( aExp == 0x7FFF ) {
6874 if ( aSig0 | aSig1 ) {
ff32e16e 6875 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6876 }
6877 return packFloat32( aSign, 0xFF, 0 );
6878 }
6879 aSig0 |= ( aSig1 != 0 );
6880 shift64RightJamming( aSig0, 18, &aSig0 );
6881 zSig = aSig0;
6882 if ( aExp || zSig ) {
6883 zSig |= 0x40000000;
6884 aExp -= 0x3F81;
6885 }
ff32e16e 6886 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6887
6888}
6889
6890/*----------------------------------------------------------------------------
6891| Returns the result of converting the quadruple-precision floating-point
6892| value `a' to the double-precision floating-point format. The conversion
6893| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6894| Arithmetic.
6895*----------------------------------------------------------------------------*/
6896
e5a41ffa 6897float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6898{
6899 flag aSign;
f4014512 6900 int32_t aExp;
bb98fe42 6901 uint64_t aSig0, aSig1;
158142c2
FB
6902
6903 aSig1 = extractFloat128Frac1( a );
6904 aSig0 = extractFloat128Frac0( a );
6905 aExp = extractFloat128Exp( a );
6906 aSign = extractFloat128Sign( a );
6907 if ( aExp == 0x7FFF ) {
6908 if ( aSig0 | aSig1 ) {
ff32e16e 6909 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6910 }
6911 return packFloat64( aSign, 0x7FF, 0 );
6912 }
6913 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6914 aSig0 |= ( aSig1 != 0 );
6915 if ( aExp || aSig0 ) {
e9321124 6916 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6917 aExp -= 0x3C01;
6918 }
ff32e16e 6919 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6920
6921}
6922
158142c2
FB
6923/*----------------------------------------------------------------------------
6924| Returns the result of converting the quadruple-precision floating-point
6925| value `a' to the extended double-precision floating-point format. The
6926| conversion is performed according to the IEC/IEEE Standard for Binary
6927| Floating-Point Arithmetic.
6928*----------------------------------------------------------------------------*/
6929
e5a41ffa 6930floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6931{
6932 flag aSign;
f4014512 6933 int32_t aExp;
bb98fe42 6934 uint64_t aSig0, aSig1;
158142c2
FB
6935
6936 aSig1 = extractFloat128Frac1( a );
6937 aSig0 = extractFloat128Frac0( a );
6938 aExp = extractFloat128Exp( a );
6939 aSign = extractFloat128Sign( a );
6940 if ( aExp == 0x7FFF ) {
6941 if ( aSig0 | aSig1 ) {
ff32e16e 6942 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 6943 }
0f605c88
LV
6944 return packFloatx80(aSign, floatx80_infinity_high,
6945 floatx80_infinity_low);
158142c2
FB
6946 }
6947 if ( aExp == 0 ) {
6948 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6949 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6950 }
6951 else {
e9321124 6952 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6953 }
6954 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6955 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6956
6957}
6958
158142c2
FB
6959/*----------------------------------------------------------------------------
6960| Rounds the quadruple-precision floating-point value `a' to an integer, and
6961| returns the result as a quadruple-precision floating-point value. The
6962| operation is performed according to the IEC/IEEE Standard for Binary
6963| Floating-Point Arithmetic.
6964*----------------------------------------------------------------------------*/
6965
e5a41ffa 6966float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
6967{
6968 flag aSign;
f4014512 6969 int32_t aExp;
bb98fe42 6970 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6971 float128 z;
6972
6973 aExp = extractFloat128Exp( a );
6974 if ( 0x402F <= aExp ) {
6975 if ( 0x406F <= aExp ) {
6976 if ( ( aExp == 0x7FFF )
6977 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6978 ) {
ff32e16e 6979 return propagateFloat128NaN(a, a, status);
158142c2
FB
6980 }
6981 return a;
6982 }
6983 lastBitMask = 1;
6984 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6985 roundBitsMask = lastBitMask - 1;
6986 z = a;
a2f2d288 6987 switch (status->float_rounding_mode) {
dc355b76 6988 case float_round_nearest_even:
158142c2
FB
6989 if ( lastBitMask ) {
6990 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6991 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6992 }
6993 else {
bb98fe42 6994 if ( (int64_t) z.low < 0 ) {
158142c2 6995 ++z.high;
bb98fe42 6996 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6997 }
6998 }
dc355b76 6999 break;
f9288a76
PM
7000 case float_round_ties_away:
7001 if (lastBitMask) {
7002 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7003 } else {
7004 if ((int64_t) z.low < 0) {
7005 ++z.high;
7006 }
7007 }
7008 break;
dc355b76
PM
7009 case float_round_to_zero:
7010 break;
7011 case float_round_up:
7012 if (!extractFloat128Sign(z)) {
7013 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7014 }
7015 break;
7016 case float_round_down:
7017 if (extractFloat128Sign(z)) {
7018 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 7019 }
dc355b76 7020 break;
5d64abb3
RH
7021 case float_round_to_odd:
7022 /*
7023 * Note that if lastBitMask == 0, the last bit is the lsb
7024 * of high, and roundBitsMask == -1.
7025 */
7026 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7027 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7028 }
7029 break;
dc355b76
PM
7030 default:
7031 abort();
158142c2
FB
7032 }
7033 z.low &= ~ roundBitsMask;
7034 }
7035 else {
7036 if ( aExp < 0x3FFF ) {
bb98fe42 7037 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 7038 status->float_exception_flags |= float_flag_inexact;
158142c2 7039 aSign = extractFloat128Sign( a );
a2f2d288 7040 switch (status->float_rounding_mode) {
5d64abb3 7041 case float_round_nearest_even:
158142c2
FB
7042 if ( ( aExp == 0x3FFE )
7043 && ( extractFloat128Frac0( a )
7044 | extractFloat128Frac1( a ) )
7045 ) {
7046 return packFloat128( aSign, 0x3FFF, 0, 0 );
7047 }
7048 break;
f9288a76
PM
7049 case float_round_ties_away:
7050 if (aExp == 0x3FFE) {
7051 return packFloat128(aSign, 0x3FFF, 0, 0);
7052 }
7053 break;
5d64abb3 7054 case float_round_down:
158142c2
FB
7055 return
7056 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7057 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7058 case float_round_up:
158142c2
FB
7059 return
7060 aSign ? packFloat128( 1, 0, 0, 0 )
7061 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7062
7063 case float_round_to_odd:
7064 return packFloat128(aSign, 0x3FFF, 0, 0);
158142c2
FB
7065 }
7066 return packFloat128( aSign, 0, 0, 0 );
7067 }
7068 lastBitMask = 1;
7069 lastBitMask <<= 0x402F - aExp;
7070 roundBitsMask = lastBitMask - 1;
7071 z.low = 0;
7072 z.high = a.high;
a2f2d288 7073 switch (status->float_rounding_mode) {
dc355b76 7074 case float_round_nearest_even:
158142c2
FB
7075 z.high += lastBitMask>>1;
7076 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7077 z.high &= ~ lastBitMask;
7078 }
dc355b76 7079 break;
f9288a76
PM
7080 case float_round_ties_away:
7081 z.high += lastBitMask>>1;
7082 break;
dc355b76
PM
7083 case float_round_to_zero:
7084 break;
7085 case float_round_up:
7086 if (!extractFloat128Sign(z)) {
158142c2
FB
7087 z.high |= ( a.low != 0 );
7088 z.high += roundBitsMask;
7089 }
dc355b76
PM
7090 break;
7091 case float_round_down:
7092 if (extractFloat128Sign(z)) {
7093 z.high |= (a.low != 0);
7094 z.high += roundBitsMask;
7095 }
7096 break;
5d64abb3
RH
7097 case float_round_to_odd:
7098 if ((z.high & lastBitMask) == 0) {
7099 z.high |= (a.low != 0);
7100 z.high += roundBitsMask;
7101 }
7102 break;
dc355b76
PM
7103 default:
7104 abort();
158142c2
FB
7105 }
7106 z.high &= ~ roundBitsMask;
7107 }
7108 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 7109 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
7110 }
7111 return z;
7112
7113}
7114
7115/*----------------------------------------------------------------------------
7116| Returns the result of adding the absolute values of the quadruple-precision
7117| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7118| before being returned. `zSign' is ignored if the result is a NaN.
7119| The addition is performed according to the IEC/IEEE Standard for Binary
7120| Floating-Point Arithmetic.
7121*----------------------------------------------------------------------------*/
7122
e5a41ffa
PM
7123static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
7124 float_status *status)
158142c2 7125{
f4014512 7126 int32_t aExp, bExp, zExp;
bb98fe42 7127 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7128 int32_t expDiff;
158142c2
FB
7129
7130 aSig1 = extractFloat128Frac1( a );
7131 aSig0 = extractFloat128Frac0( a );
7132 aExp = extractFloat128Exp( a );
7133 bSig1 = extractFloat128Frac1( b );
7134 bSig0 = extractFloat128Frac0( b );
7135 bExp = extractFloat128Exp( b );
7136 expDiff = aExp - bExp;
7137 if ( 0 < expDiff ) {
7138 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7139 if (aSig0 | aSig1) {
7140 return propagateFloat128NaN(a, b, status);
7141 }
158142c2
FB
7142 return a;
7143 }
7144 if ( bExp == 0 ) {
7145 --expDiff;
7146 }
7147 else {
e9321124 7148 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7149 }
7150 shift128ExtraRightJamming(
7151 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7152 zExp = aExp;
7153 }
7154 else if ( expDiff < 0 ) {
7155 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7156 if (bSig0 | bSig1) {
7157 return propagateFloat128NaN(a, b, status);
7158 }
158142c2
FB
7159 return packFloat128( zSign, 0x7FFF, 0, 0 );
7160 }
7161 if ( aExp == 0 ) {
7162 ++expDiff;
7163 }
7164 else {
e9321124 7165 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7166 }
7167 shift128ExtraRightJamming(
7168 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7169 zExp = bExp;
7170 }
7171 else {
7172 if ( aExp == 0x7FFF ) {
7173 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7174 return propagateFloat128NaN(a, b, status);
158142c2
FB
7175 }
7176 return a;
7177 }
7178 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7179 if ( aExp == 0 ) {
a2f2d288 7180 if (status->flush_to_zero) {
e6afc87f 7181 if (zSig0 | zSig1) {
ff32e16e 7182 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7183 }
7184 return packFloat128(zSign, 0, 0, 0);
7185 }
fe76d976
PB
7186 return packFloat128( zSign, 0, zSig0, zSig1 );
7187 }
158142c2 7188 zSig2 = 0;
e9321124 7189 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
7190 zExp = aExp;
7191 goto shiftRight1;
7192 }
e9321124 7193 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7194 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7195 --zExp;
e9321124 7196 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
7197 ++zExp;
7198 shiftRight1:
7199 shift128ExtraRightJamming(
7200 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7201 roundAndPack:
ff32e16e 7202 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7203
7204}
7205
7206/*----------------------------------------------------------------------------
7207| Returns the result of subtracting the absolute values of the quadruple-
7208| precision floating-point values `a' and `b'. If `zSign' is 1, the
7209| difference is negated before being returned. `zSign' is ignored if the
7210| result is a NaN. The subtraction is performed according to the IEC/IEEE
7211| Standard for Binary Floating-Point Arithmetic.
7212*----------------------------------------------------------------------------*/
7213
e5a41ffa
PM
7214static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7215 float_status *status)
158142c2 7216{
f4014512 7217 int32_t aExp, bExp, zExp;
bb98fe42 7218 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7219 int32_t expDiff;
158142c2
FB
7220
7221 aSig1 = extractFloat128Frac1( a );
7222 aSig0 = extractFloat128Frac0( a );
7223 aExp = extractFloat128Exp( a );
7224 bSig1 = extractFloat128Frac1( b );
7225 bSig0 = extractFloat128Frac0( b );
7226 bExp = extractFloat128Exp( b );
7227 expDiff = aExp - bExp;
7228 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7229 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7230 if ( 0 < expDiff ) goto aExpBigger;
7231 if ( expDiff < 0 ) goto bExpBigger;
7232 if ( aExp == 0x7FFF ) {
7233 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7234 return propagateFloat128NaN(a, b, status);
158142c2 7235 }
ff32e16e 7236 float_raise(float_flag_invalid, status);
af39bc8c 7237 return float128_default_nan(status);
158142c2
FB
7238 }
7239 if ( aExp == 0 ) {
7240 aExp = 1;
7241 bExp = 1;
7242 }
7243 if ( bSig0 < aSig0 ) goto aBigger;
7244 if ( aSig0 < bSig0 ) goto bBigger;
7245 if ( bSig1 < aSig1 ) goto aBigger;
7246 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7247 return packFloat128(status->float_rounding_mode == float_round_down,
7248 0, 0, 0);
158142c2
FB
7249 bExpBigger:
7250 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7251 if (bSig0 | bSig1) {
7252 return propagateFloat128NaN(a, b, status);
7253 }
158142c2
FB
7254 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7255 }
7256 if ( aExp == 0 ) {
7257 ++expDiff;
7258 }
7259 else {
e9321124 7260 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7261 }
7262 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7263 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7264 bBigger:
7265 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7266 zExp = bExp;
7267 zSign ^= 1;
7268 goto normalizeRoundAndPack;
7269 aExpBigger:
7270 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7271 if (aSig0 | aSig1) {
7272 return propagateFloat128NaN(a, b, status);
7273 }
158142c2
FB
7274 return a;
7275 }
7276 if ( bExp == 0 ) {
7277 --expDiff;
7278 }
7279 else {
e9321124 7280 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7281 }
7282 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7283 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7284 aBigger:
7285 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7286 zExp = aExp;
7287 normalizeRoundAndPack:
7288 --zExp;
ff32e16e
PM
7289 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7290 status);
158142c2
FB
7291
7292}
7293
7294/*----------------------------------------------------------------------------
7295| Returns the result of adding the quadruple-precision floating-point values
7296| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7297| for Binary Floating-Point Arithmetic.
7298*----------------------------------------------------------------------------*/
7299
e5a41ffa 7300float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
7301{
7302 flag aSign, bSign;
7303
7304 aSign = extractFloat128Sign( a );
7305 bSign = extractFloat128Sign( b );
7306 if ( aSign == bSign ) {
ff32e16e 7307 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7308 }
7309 else {
ff32e16e 7310 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7311 }
7312
7313}
7314
7315/*----------------------------------------------------------------------------
7316| Returns the result of subtracting the quadruple-precision floating-point
7317| values `a' and `b'. The operation is performed according to the IEC/IEEE
7318| Standard for Binary Floating-Point Arithmetic.
7319*----------------------------------------------------------------------------*/
7320
e5a41ffa 7321float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
7322{
7323 flag aSign, bSign;
7324
7325 aSign = extractFloat128Sign( a );
7326 bSign = extractFloat128Sign( b );
7327 if ( aSign == bSign ) {
ff32e16e 7328 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7329 }
7330 else {
ff32e16e 7331 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7332 }
7333
7334}
7335
7336/*----------------------------------------------------------------------------
7337| Returns the result of multiplying the quadruple-precision floating-point
7338| values `a' and `b'. The operation is performed according to the IEC/IEEE
7339| Standard for Binary Floating-Point Arithmetic.
7340*----------------------------------------------------------------------------*/
7341
e5a41ffa 7342float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
7343{
7344 flag aSign, bSign, zSign;
f4014512 7345 int32_t aExp, bExp, zExp;
bb98fe42 7346 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7347
7348 aSig1 = extractFloat128Frac1( a );
7349 aSig0 = extractFloat128Frac0( a );
7350 aExp = extractFloat128Exp( a );
7351 aSign = extractFloat128Sign( a );
7352 bSig1 = extractFloat128Frac1( b );
7353 bSig0 = extractFloat128Frac0( b );
7354 bExp = extractFloat128Exp( b );
7355 bSign = extractFloat128Sign( b );
7356 zSign = aSign ^ bSign;
7357 if ( aExp == 0x7FFF ) {
7358 if ( ( aSig0 | aSig1 )
7359 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7360 return propagateFloat128NaN(a, b, status);
158142c2
FB
7361 }
7362 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7363 return packFloat128( zSign, 0x7FFF, 0, 0 );
7364 }
7365 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7366 if (bSig0 | bSig1) {
7367 return propagateFloat128NaN(a, b, status);
7368 }
158142c2
FB
7369 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7370 invalid:
ff32e16e 7371 float_raise(float_flag_invalid, status);
af39bc8c 7372 return float128_default_nan(status);
158142c2
FB
7373 }
7374 return packFloat128( zSign, 0x7FFF, 0, 0 );
7375 }
7376 if ( aExp == 0 ) {
7377 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7378 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7379 }
7380 if ( bExp == 0 ) {
7381 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7382 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7383 }
7384 zExp = aExp + bExp - 0x4000;
e9321124 7385 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7386 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7387 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7388 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7389 zSig2 |= ( zSig3 != 0 );
e9321124 7390 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7391 shift128ExtraRightJamming(
7392 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7393 ++zExp;
7394 }
ff32e16e 7395 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7396
7397}
7398
7399/*----------------------------------------------------------------------------
7400| Returns the result of dividing the quadruple-precision floating-point value
7401| `a' by the corresponding value `b'. The operation is performed according to
7402| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7403*----------------------------------------------------------------------------*/
7404
e5a41ffa 7405float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
7406{
7407 flag aSign, bSign, zSign;
f4014512 7408 int32_t aExp, bExp, zExp;
bb98fe42
AF
7409 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7410 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7411
7412 aSig1 = extractFloat128Frac1( a );
7413 aSig0 = extractFloat128Frac0( a );
7414 aExp = extractFloat128Exp( a );
7415 aSign = extractFloat128Sign( a );
7416 bSig1 = extractFloat128Frac1( b );
7417 bSig0 = extractFloat128Frac0( b );
7418 bExp = extractFloat128Exp( b );
7419 bSign = extractFloat128Sign( b );
7420 zSign = aSign ^ bSign;
7421 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7422 if (aSig0 | aSig1) {
7423 return propagateFloat128NaN(a, b, status);
7424 }
158142c2 7425 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7426 if (bSig0 | bSig1) {
7427 return propagateFloat128NaN(a, b, status);
7428 }
158142c2
FB
7429 goto invalid;
7430 }
7431 return packFloat128( zSign, 0x7FFF, 0, 0 );
7432 }
7433 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7434 if (bSig0 | bSig1) {
7435 return propagateFloat128NaN(a, b, status);
7436 }
158142c2
FB
7437 return packFloat128( zSign, 0, 0, 0 );
7438 }
7439 if ( bExp == 0 ) {
7440 if ( ( bSig0 | bSig1 ) == 0 ) {
7441 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7442 invalid:
ff32e16e 7443 float_raise(float_flag_invalid, status);
af39bc8c 7444 return float128_default_nan(status);
158142c2 7445 }
ff32e16e 7446 float_raise(float_flag_divbyzero, status);
158142c2
FB
7447 return packFloat128( zSign, 0x7FFF, 0, 0 );
7448 }
7449 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7450 }
7451 if ( aExp == 0 ) {
7452 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7453 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7454 }
7455 zExp = aExp - bExp + 0x3FFD;
7456 shortShift128Left(
e9321124 7457 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7458 shortShift128Left(
e9321124 7459 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7460 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7461 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7462 ++zExp;
7463 }
7464 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7465 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7466 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7467 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7468 --zSig0;
7469 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7470 }
7471 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7472 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7473 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7474 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7475 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7476 --zSig1;
7477 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7478 }
7479 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7480 }
7481 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7482 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7483
7484}
7485
7486/*----------------------------------------------------------------------------
7487| Returns the remainder of the quadruple-precision floating-point value `a'
7488| with respect to the corresponding value `b'. The operation is performed
7489| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7490*----------------------------------------------------------------------------*/
7491
e5a41ffa 7492float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7493{
ed086f3d 7494 flag aSign, zSign;
f4014512 7495 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7496 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7497 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7498 int64_t sigMean0;
158142c2
FB
7499
7500 aSig1 = extractFloat128Frac1( a );
7501 aSig0 = extractFloat128Frac0( a );
7502 aExp = extractFloat128Exp( a );
7503 aSign = extractFloat128Sign( a );
7504 bSig1 = extractFloat128Frac1( b );
7505 bSig0 = extractFloat128Frac0( b );
7506 bExp = extractFloat128Exp( b );
158142c2
FB
7507 if ( aExp == 0x7FFF ) {
7508 if ( ( aSig0 | aSig1 )
7509 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7510 return propagateFloat128NaN(a, b, status);
158142c2
FB
7511 }
7512 goto invalid;
7513 }
7514 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7515 if (bSig0 | bSig1) {
7516 return propagateFloat128NaN(a, b, status);
7517 }
158142c2
FB
7518 return a;
7519 }
7520 if ( bExp == 0 ) {
7521 if ( ( bSig0 | bSig1 ) == 0 ) {
7522 invalid:
ff32e16e 7523 float_raise(float_flag_invalid, status);
af39bc8c 7524 return float128_default_nan(status);
158142c2
FB
7525 }
7526 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7527 }
7528 if ( aExp == 0 ) {
7529 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7530 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7531 }
7532 expDiff = aExp - bExp;
7533 if ( expDiff < -1 ) return a;
7534 shortShift128Left(
e9321124 7535 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7536 aSig1,
7537 15 - ( expDiff < 0 ),
7538 &aSig0,
7539 &aSig1
7540 );
7541 shortShift128Left(
e9321124 7542 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7543 q = le128( bSig0, bSig1, aSig0, aSig1 );
7544 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7545 expDiff -= 64;
7546 while ( 0 < expDiff ) {
7547 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7548 q = ( 4 < q ) ? q - 4 : 0;
7549 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7550 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7551 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7552 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7553 expDiff -= 61;
7554 }
7555 if ( -64 < expDiff ) {
7556 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7557 q = ( 4 < q ) ? q - 4 : 0;
7558 q >>= - expDiff;
7559 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7560 expDiff += 52;
7561 if ( expDiff < 0 ) {
7562 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7563 }
7564 else {
7565 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7566 }
7567 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7568 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7569 }
7570 else {
7571 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7572 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7573 }
7574 do {
7575 alternateASig0 = aSig0;
7576 alternateASig1 = aSig1;
7577 ++q;
7578 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7579 } while ( 0 <= (int64_t) aSig0 );
158142c2 7580 add128(
bb98fe42 7581 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7582 if ( ( sigMean0 < 0 )
7583 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7584 aSig0 = alternateASig0;
7585 aSig1 = alternateASig1;
7586 }
bb98fe42 7587 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7588 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7589 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7590 status);
158142c2
FB
7591}
7592
7593/*----------------------------------------------------------------------------
7594| Returns the square root of the quadruple-precision floating-point value `a'.
7595| The operation is performed according to the IEC/IEEE Standard for Binary
7596| Floating-Point Arithmetic.
7597*----------------------------------------------------------------------------*/
7598
e5a41ffa 7599float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
7600{
7601 flag aSign;
f4014512 7602 int32_t aExp, zExp;
bb98fe42
AF
7603 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7604 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7605
7606 aSig1 = extractFloat128Frac1( a );
7607 aSig0 = extractFloat128Frac0( a );
7608 aExp = extractFloat128Exp( a );
7609 aSign = extractFloat128Sign( a );
7610 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7611 if (aSig0 | aSig1) {
7612 return propagateFloat128NaN(a, a, status);
7613 }
158142c2
FB
7614 if ( ! aSign ) return a;
7615 goto invalid;
7616 }
7617 if ( aSign ) {
7618 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7619 invalid:
ff32e16e 7620 float_raise(float_flag_invalid, status);
af39bc8c 7621 return float128_default_nan(status);
158142c2
FB
7622 }
7623 if ( aExp == 0 ) {
7624 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7625 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7626 }
7627 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7628 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7629 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7630 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7631 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7632 doubleZSig0 = zSig0<<1;
7633 mul64To128( zSig0, zSig0, &term0, &term1 );
7634 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7635 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7636 --zSig0;
7637 doubleZSig0 -= 2;
7638 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7639 }
7640 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7641 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7642 if ( zSig1 == 0 ) zSig1 = 1;
7643 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7644 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7645 mul64To128( zSig1, zSig1, &term2, &term3 );
7646 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7647 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7648 --zSig1;
7649 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7650 term3 |= 1;
7651 term2 |= doubleZSig0;
7652 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7653 }
7654 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7655 }
7656 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7657 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7658
7659}
7660
7661/*----------------------------------------------------------------------------
7662| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7663| the corresponding value `b', and 0 otherwise. The invalid exception is
7664| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7665| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7666*----------------------------------------------------------------------------*/
7667
e5a41ffa 7668int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7669{
7670
7671 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7672 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7673 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7674 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7675 ) {
ff32e16e 7676 float_raise(float_flag_invalid, status);
158142c2
FB
7677 return 0;
7678 }
7679 return
7680 ( a.low == b.low )
7681 && ( ( a.high == b.high )
7682 || ( ( a.low == 0 )
bb98fe42 7683 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7684 );
7685
7686}
7687
7688/*----------------------------------------------------------------------------
7689| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7690| or equal to the corresponding value `b', and 0 otherwise. The invalid
7691| exception is raised if either operand is a NaN. The comparison is performed
7692| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7693*----------------------------------------------------------------------------*/
7694
e5a41ffa 7695int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
7696{
7697 flag aSign, bSign;
7698
7699 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7700 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7701 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7702 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7703 ) {
ff32e16e 7704 float_raise(float_flag_invalid, status);
158142c2
FB
7705 return 0;
7706 }
7707 aSign = extractFloat128Sign( a );
7708 bSign = extractFloat128Sign( b );
7709 if ( aSign != bSign ) {
7710 return
7711 aSign
bb98fe42 7712 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7713 == 0 );
7714 }
7715 return
7716 aSign ? le128( b.high, b.low, a.high, a.low )
7717 : le128( a.high, a.low, b.high, b.low );
7718
7719}
7720
7721/*----------------------------------------------------------------------------
7722| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7723| the corresponding value `b', and 0 otherwise. The invalid exception is
7724| raised if either operand is a NaN. The comparison is performed according
7725| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7726*----------------------------------------------------------------------------*/
7727
e5a41ffa 7728int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
7729{
7730 flag aSign, bSign;
7731
7732 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7733 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7734 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7735 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7736 ) {
ff32e16e 7737 float_raise(float_flag_invalid, status);
158142c2
FB
7738 return 0;
7739 }
7740 aSign = extractFloat128Sign( a );
7741 bSign = extractFloat128Sign( b );
7742 if ( aSign != bSign ) {
7743 return
7744 aSign
bb98fe42 7745 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7746 != 0 );
7747 }
7748 return
7749 aSign ? lt128( b.high, b.low, a.high, a.low )
7750 : lt128( a.high, a.low, b.high, b.low );
7751
7752}
7753
67b7861d
AJ
7754/*----------------------------------------------------------------------------
7755| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7756| be compared, and 0 otherwise. The invalid exception is raised if either
7757| operand is a NaN. The comparison is performed according to the IEC/IEEE
7758| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7759*----------------------------------------------------------------------------*/
7760
e5a41ffa 7761int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7762{
7763 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7764 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7765 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7766 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7767 ) {
ff32e16e 7768 float_raise(float_flag_invalid, status);
67b7861d
AJ
7769 return 1;
7770 }
7771 return 0;
7772}
7773
158142c2
FB
7774/*----------------------------------------------------------------------------
7775| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7776| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7777| exception. The comparison is performed according to the IEC/IEEE Standard
7778| for Binary Floating-Point Arithmetic.
158142c2
FB
7779*----------------------------------------------------------------------------*/
7780
e5a41ffa 7781int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7782{
7783
7784 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7785 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7786 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7787 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7788 ) {
af39bc8c
AM
7789 if (float128_is_signaling_nan(a, status)
7790 || float128_is_signaling_nan(b, status)) {
ff32e16e 7791 float_raise(float_flag_invalid, status);
b689362d 7792 }
158142c2
FB
7793 return 0;
7794 }
7795 return
7796 ( a.low == b.low )
7797 && ( ( a.high == b.high )
7798 || ( ( a.low == 0 )
bb98fe42 7799 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7800 );
7801
7802}
7803
7804/*----------------------------------------------------------------------------
7805| Returns 1 if the quadruple-precision floating-point value `a' is less than
7806| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7807| cause an exception. Otherwise, the comparison is performed according to the
7808| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7809*----------------------------------------------------------------------------*/
7810
e5a41ffa 7811int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7812{
7813 flag aSign, bSign;
7814
7815 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7816 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7817 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7818 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7819 ) {
af39bc8c
AM
7820 if (float128_is_signaling_nan(a, status)
7821 || float128_is_signaling_nan(b, status)) {
ff32e16e 7822 float_raise(float_flag_invalid, status);
158142c2
FB
7823 }
7824 return 0;
7825 }
7826 aSign = extractFloat128Sign( a );
7827 bSign = extractFloat128Sign( b );
7828 if ( aSign != bSign ) {
7829 return
7830 aSign
bb98fe42 7831 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7832 == 0 );
7833 }
7834 return
7835 aSign ? le128( b.high, b.low, a.high, a.low )
7836 : le128( a.high, a.low, b.high, b.low );
7837
7838}
7839
7840/*----------------------------------------------------------------------------
7841| Returns 1 if the quadruple-precision floating-point value `a' is less than
7842| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7843| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7844| Standard for Binary Floating-Point Arithmetic.
7845*----------------------------------------------------------------------------*/
7846
e5a41ffa 7847int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7848{
7849 flag aSign, bSign;
7850
7851 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7852 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7853 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7854 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7855 ) {
af39bc8c
AM
7856 if (float128_is_signaling_nan(a, status)
7857 || float128_is_signaling_nan(b, status)) {
ff32e16e 7858 float_raise(float_flag_invalid, status);
158142c2
FB
7859 }
7860 return 0;
7861 }
7862 aSign = extractFloat128Sign( a );
7863 bSign = extractFloat128Sign( b );
7864 if ( aSign != bSign ) {
7865 return
7866 aSign
bb98fe42 7867 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7868 != 0 );
7869 }
7870 return
7871 aSign ? lt128( b.high, b.low, a.high, a.low )
7872 : lt128( a.high, a.low, b.high, b.low );
7873
7874}
7875
67b7861d
AJ
7876/*----------------------------------------------------------------------------
7877| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7878| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7879| comparison is performed according to the IEC/IEEE Standard for Binary
7880| Floating-Point Arithmetic.
7881*----------------------------------------------------------------------------*/
7882
e5a41ffa 7883int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7884{
7885 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7886 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7887 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7888 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7889 ) {
af39bc8c
AM
7890 if (float128_is_signaling_nan(a, status)
7891 || float128_is_signaling_nan(b, status)) {
ff32e16e 7892 float_raise(float_flag_invalid, status);
67b7861d
AJ
7893 }
7894 return 1;
7895 }
7896 return 0;
7897}
7898
e5a41ffa
PM
7899static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7900 int is_quiet, float_status *status)
f6714d36
AJ
7901{
7902 flag aSign, bSign;
7903
d1eb8f2a
AD
7904 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7905 float_raise(float_flag_invalid, status);
7906 return float_relation_unordered;
7907 }
f6714d36
AJ
7908 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7909 ( extractFloatx80Frac( a )<<1 ) ) ||
7910 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7911 ( extractFloatx80Frac( b )<<1 ) )) {
7912 if (!is_quiet ||
af39bc8c
AM
7913 floatx80_is_signaling_nan(a, status) ||
7914 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7915 float_raise(float_flag_invalid, status);
f6714d36
AJ
7916 }
7917 return float_relation_unordered;
7918 }
7919 aSign = extractFloatx80Sign( a );
7920 bSign = extractFloatx80Sign( b );
7921 if ( aSign != bSign ) {
7922
7923 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7924 ( ( a.low | b.low ) == 0 ) ) {
7925 /* zero case */
7926 return float_relation_equal;
7927 } else {
7928 return 1 - (2 * aSign);
7929 }
7930 } else {
7931 if (a.low == b.low && a.high == b.high) {
7932 return float_relation_equal;
7933 } else {
7934 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7935 }
7936 }
7937}
7938
e5a41ffa 7939int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7940{
ff32e16e 7941 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7942}
7943
e5a41ffa 7944int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7945{
ff32e16e 7946 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7947}
7948
e5a41ffa
PM
7949static inline int float128_compare_internal(float128 a, float128 b,
7950 int is_quiet, float_status *status)
1f587329
BS
7951{
7952 flag aSign, bSign;
7953
7954 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7955 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7956 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7957 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7958 if (!is_quiet ||
af39bc8c
AM
7959 float128_is_signaling_nan(a, status) ||
7960 float128_is_signaling_nan(b, status)) {
ff32e16e 7961 float_raise(float_flag_invalid, status);
1f587329
BS
7962 }
7963 return float_relation_unordered;
7964 }
7965 aSign = extractFloat128Sign( a );
7966 bSign = extractFloat128Sign( b );
7967 if ( aSign != bSign ) {
7968 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7969 /* zero case */
7970 return float_relation_equal;
7971 } else {
7972 return 1 - (2 * aSign);
7973 }
7974 } else {
7975 if (a.low == b.low && a.high == b.high) {
7976 return float_relation_equal;
7977 } else {
7978 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7979 }
7980 }
7981}
7982
e5a41ffa 7983int float128_compare(float128 a, float128 b, float_status *status)
1f587329 7984{
ff32e16e 7985 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7986}
7987
e5a41ffa 7988int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 7989{
ff32e16e 7990 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7991}
7992
e5a41ffa 7993floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
7994{
7995 flag aSign;
326b9e98 7996 int32_t aExp;
bb98fe42 7997 uint64_t aSig;
9ee6e8bb 7998
d1eb8f2a
AD
7999 if (floatx80_invalid_encoding(a)) {
8000 float_raise(float_flag_invalid, status);
8001 return floatx80_default_nan(status);
8002 }
9ee6e8bb
PB
8003 aSig = extractFloatx80Frac( a );
8004 aExp = extractFloatx80Exp( a );
8005 aSign = extractFloatx80Sign( a );
8006
326b9e98
AJ
8007 if ( aExp == 0x7FFF ) {
8008 if ( aSig<<1 ) {
ff32e16e 8009 return propagateFloatx80NaN(a, a, status);
326b9e98 8010 }
9ee6e8bb
PB
8011 return a;
8012 }
326b9e98 8013
3c85c37f
PM
8014 if (aExp == 0) {
8015 if (aSig == 0) {
8016 return a;
8017 }
8018 aExp++;
8019 }
69397542 8020
326b9e98
AJ
8021 if (n > 0x10000) {
8022 n = 0x10000;
8023 } else if (n < -0x10000) {
8024 n = -0x10000;
8025 }
8026
9ee6e8bb 8027 aExp += n;
a2f2d288
PM
8028 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
8029 aSign, aExp, aSig, 0, status);
9ee6e8bb 8030}
9ee6e8bb 8031
e5a41ffa 8032float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
8033{
8034 flag aSign;
326b9e98 8035 int32_t aExp;
bb98fe42 8036 uint64_t aSig0, aSig1;
9ee6e8bb
PB
8037
8038 aSig1 = extractFloat128Frac1( a );
8039 aSig0 = extractFloat128Frac0( a );
8040 aExp = extractFloat128Exp( a );
8041 aSign = extractFloat128Sign( a );
8042 if ( aExp == 0x7FFF ) {
326b9e98 8043 if ( aSig0 | aSig1 ) {
ff32e16e 8044 return propagateFloat128NaN(a, a, status);
326b9e98 8045 }
9ee6e8bb
PB
8046 return a;
8047 }
3c85c37f 8048 if (aExp != 0) {
e9321124 8049 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 8050 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 8051 return a;
3c85c37f
PM
8052 } else {
8053 aExp++;
8054 }
69397542 8055
326b9e98
AJ
8056 if (n > 0x10000) {
8057 n = 0x10000;
8058 } else if (n < -0x10000) {
8059 n = -0x10000;
8060 }
8061
69397542
PB
8062 aExp += n - 1;
8063 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 8064 , status);
9ee6e8bb
PB
8065
8066}
f6b3b108
EC
8067
8068static void __attribute__((constructor)) softfloat_init(void)
8069{
8070 union_float64 ua, ub, uc, ur;
8071
8072 if (QEMU_NO_HARDFLOAT) {
8073 return;
8074 }
8075 /*
8076 * Test that the host's FMA is not obviously broken. For example,
8077 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8078 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8079 */
8080 ua.s = 0x0020000000000001ULL;
8081 ub.s = 0x3ca0000000000000ULL;
8082 uc.s = 0x0020000000000000ULL;
8083 ur.h = fma(ua.h, ub.h, uc.h);
8084 if (ur.s != 0x0020000000000001ULL) {
8085 force_soft_fma = true;
8086 }
8087}