]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
135 s->float_exception_flags |= float_flag_input_denormal; \
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
342/* Note: @fast_test and @post can be NULL */
343static inline float32
344float32_gen2(float32 xa, float32 xb, float_status *s,
345 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346 f32_check_fn pre, f32_check_fn post,
347 f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348{
349 union_float32 ua, ub, ur;
350
351 ua.s = xa;
352 ub.s = xb;
353
354 if (unlikely(!can_use_fpu(s))) {
355 goto soft;
356 }
357
358 float32_input_flush2(&ua.s, &ub.s, s);
359 if (unlikely(!pre(ua, ub))) {
360 goto soft;
361 }
362 if (fast_test && fast_test(ua, ub)) {
363 return fast_op(ua.s, ub.s, s);
364 }
365
366 ur.h = hard(ua.h, ub.h);
367 if (unlikely(f32_is_inf(ur))) {
368 s->float_exception_flags |= float_flag_overflow;
369 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370 if (post == NULL || post(ua, ub)) {
371 goto soft;
372 }
373 }
374 return ur.s;
375
376 soft:
377 return soft(ua.s, ub.s, s);
378}
379
380static inline float64
381float64_gen2(float64 xa, float64 xb, float_status *s,
382 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383 f64_check_fn pre, f64_check_fn post,
384 f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385{
386 union_float64 ua, ub, ur;
387
388 ua.s = xa;
389 ub.s = xb;
390
391 if (unlikely(!can_use_fpu(s))) {
392 goto soft;
393 }
394
395 float64_input_flush2(&ua.s, &ub.s, s);
396 if (unlikely(!pre(ua, ub))) {
397 goto soft;
398 }
399 if (fast_test && fast_test(ua, ub)) {
400 return fast_op(ua.s, ub.s, s);
401 }
402
403 ur.h = hard(ua.h, ub.h);
404 if (unlikely(f64_is_inf(ur))) {
405 s->float_exception_flags |= float_flag_overflow;
406 } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407 if (post == NULL || post(ua, ub)) {
408 goto soft;
409 }
410 }
411 return ur.s;
412
413 soft:
414 return soft(ua.s, ub.s, s);
415}
416
d97544c9
AB
417/*----------------------------------------------------------------------------
418| Returns the fraction bits of the single-precision floating-point value `a'.
419*----------------------------------------------------------------------------*/
420
421static inline uint32_t extractFloat32Frac(float32 a)
422{
423 return float32_val(a) & 0x007FFFFF;
424}
425
426/*----------------------------------------------------------------------------
427| Returns the exponent bits of the single-precision floating-point value `a'.
428*----------------------------------------------------------------------------*/
429
430static inline int extractFloat32Exp(float32 a)
431{
432 return (float32_val(a) >> 23) & 0xFF;
433}
434
435/*----------------------------------------------------------------------------
436| Returns the sign bit of the single-precision floating-point value `a'.
437*----------------------------------------------------------------------------*/
438
439static inline flag extractFloat32Sign(float32 a)
440{
441 return float32_val(a) >> 31;
442}
443
444/*----------------------------------------------------------------------------
445| Returns the fraction bits of the double-precision floating-point value `a'.
446*----------------------------------------------------------------------------*/
447
448static inline uint64_t extractFloat64Frac(float64 a)
449{
e9321124 450 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
451}
452
453/*----------------------------------------------------------------------------
454| Returns the exponent bits of the double-precision floating-point value `a'.
455*----------------------------------------------------------------------------*/
456
457static inline int extractFloat64Exp(float64 a)
458{
459 return (float64_val(a) >> 52) & 0x7FF;
460}
461
462/*----------------------------------------------------------------------------
463| Returns the sign bit of the double-precision floating-point value `a'.
464*----------------------------------------------------------------------------*/
465
466static inline flag extractFloat64Sign(float64 a)
467{
468 return float64_val(a) >> 63;
469}
470
a90119b5
AB
471/*
472 * Classify a floating point number. Everything above float_class_qnan
473 * is a NaN so cls >= float_class_qnan is any NaN.
474 */
475
476typedef enum __attribute__ ((__packed__)) {
477 float_class_unclassified,
478 float_class_zero,
479 float_class_normal,
480 float_class_inf,
481 float_class_qnan, /* all NaNs from here */
482 float_class_snan,
a90119b5
AB
483} FloatClass;
484
247d1f21
RH
485/* Simple helpers for checking if, or what kind of, NaN we have */
486static inline __attribute__((unused)) bool is_nan(FloatClass c)
487{
488 return unlikely(c >= float_class_qnan);
489}
490
491static inline __attribute__((unused)) bool is_snan(FloatClass c)
492{
493 return c == float_class_snan;
494}
495
496static inline __attribute__((unused)) bool is_qnan(FloatClass c)
497{
498 return c == float_class_qnan;
499}
500
a90119b5
AB
501/*
502 * Structure holding all of the decomposed parts of a float. The
503 * exponent is unbiased and the fraction is normalized. All
504 * calculations are done with a 64 bit fraction and then rounded as
505 * appropriate for the final format.
506 *
507 * Thanks to the packed FloatClass a decent compiler should be able to
508 * fit the whole structure into registers and avoid using the stack
509 * for parameter passing.
510 */
511
512typedef struct {
513 uint64_t frac;
514 int32_t exp;
515 FloatClass cls;
516 bool sign;
517} FloatParts;
518
519#define DECOMPOSED_BINARY_POINT (64 - 2)
520#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
521#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
522
523/* Structure holding all of the relevant parameters for a format.
524 * exp_size: the size of the exponent field
525 * exp_bias: the offset applied to the exponent field
526 * exp_max: the maximum normalised exponent
527 * frac_size: the size of the fraction field
528 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
529 * The following are computed based the size of fraction
530 * frac_lsb: least significant bit of fraction
ca3a3d5a 531 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 532 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
533 * The following optional modifiers are available:
534 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
535 */
536typedef struct {
537 int exp_size;
538 int exp_bias;
539 int exp_max;
540 int frac_size;
541 int frac_shift;
542 uint64_t frac_lsb;
543 uint64_t frac_lsbm1;
544 uint64_t round_mask;
545 uint64_t roundeven_mask;
ca3a3d5a 546 bool arm_althp;
a90119b5
AB
547} FloatFmt;
548
549/* Expand fields based on the size of exponent and fraction */
550#define FLOAT_PARAMS(E, F) \
551 .exp_size = E, \
552 .exp_bias = ((1 << E) - 1) >> 1, \
553 .exp_max = (1 << E) - 1, \
554 .frac_size = F, \
555 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
556 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
557 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
558 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
559 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
560
561static const FloatFmt float16_params = {
562 FLOAT_PARAMS(5, 10)
563};
564
6fed16b2
AB
565static const FloatFmt float16_params_ahp = {
566 FLOAT_PARAMS(5, 10),
567 .arm_althp = true
568};
569
a90119b5
AB
570static const FloatFmt float32_params = {
571 FLOAT_PARAMS(8, 23)
572};
573
574static const FloatFmt float64_params = {
575 FLOAT_PARAMS(11, 52)
576};
577
6fff2167
AB
578/* Unpack a float to parts, but do not canonicalize. */
579static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
580{
581 const int sign_pos = fmt.frac_size + fmt.exp_size;
582
583 return (FloatParts) {
584 .cls = float_class_unclassified,
585 .sign = extract64(raw, sign_pos, 1),
586 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
587 .frac = extract64(raw, 0, fmt.frac_size),
588 };
589}
590
591static inline FloatParts float16_unpack_raw(float16 f)
592{
593 return unpack_raw(float16_params, f);
594}
595
596static inline FloatParts float32_unpack_raw(float32 f)
597{
598 return unpack_raw(float32_params, f);
599}
600
601static inline FloatParts float64_unpack_raw(float64 f)
602{
603 return unpack_raw(float64_params, f);
604}
605
606/* Pack a float from parts, but do not canonicalize. */
607static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
608{
609 const int sign_pos = fmt.frac_size + fmt.exp_size;
610 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
611 return deposit64(ret, sign_pos, 1, p.sign);
612}
613
614static inline float16 float16_pack_raw(FloatParts p)
615{
616 return make_float16(pack_raw(float16_params, p));
617}
618
619static inline float32 float32_pack_raw(FloatParts p)
620{
621 return make_float32(pack_raw(float32_params, p));
622}
623
624static inline float64 float64_pack_raw(FloatParts p)
625{
626 return make_float64(pack_raw(float64_params, p));
627}
628
0664335a
RH
629/*----------------------------------------------------------------------------
630| Functions and definitions to determine: (1) whether tininess for underflow
631| is detected before or after rounding by default, (2) what (if anything)
632| happens when exceptions are raised, (3) how signaling NaNs are distinguished
633| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
634| are propagated from function inputs to output. These details are target-
635| specific.
636*----------------------------------------------------------------------------*/
00f43279 637#include "softfloat-specialize.inc.c"
0664335a 638
6fff2167 639/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
640static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
641 float_status *status)
6fff2167 642{
ca3a3d5a 643 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
644 if (part.frac == 0) {
645 part.cls = float_class_inf;
646 } else {
94933df0 647 part.frac <<= parm->frac_shift;
298b468e
RH
648 part.cls = (parts_is_snan_frac(part.frac, status)
649 ? float_class_snan : float_class_qnan);
6fff2167
AB
650 }
651 } else if (part.exp == 0) {
652 if (likely(part.frac == 0)) {
653 part.cls = float_class_zero;
654 } else if (status->flush_inputs_to_zero) {
655 float_raise(float_flag_input_denormal, status);
656 part.cls = float_class_zero;
657 part.frac = 0;
658 } else {
659 int shift = clz64(part.frac) - 1;
660 part.cls = float_class_normal;
661 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
662 part.frac <<= shift;
663 }
664 } else {
665 part.cls = float_class_normal;
666 part.exp -= parm->exp_bias;
667 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
668 }
669 return part;
670}
671
672/* Round and uncanonicalize a floating-point number by parts. There
673 * are FRAC_SHIFT bits that may require rounding at the bottom of the
674 * fraction; these bits will be removed. The exponent will be biased
675 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
676 */
677
678static FloatParts round_canonical(FloatParts p, float_status *s,
679 const FloatFmt *parm)
680{
5d64abb3 681 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
682 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
683 const uint64_t round_mask = parm->round_mask;
684 const uint64_t roundeven_mask = parm->roundeven_mask;
685 const int exp_max = parm->exp_max;
686 const int frac_shift = parm->frac_shift;
687 uint64_t frac, inc;
688 int exp, flags = 0;
689 bool overflow_norm;
690
691 frac = p.frac;
692 exp = p.exp;
693
694 switch (p.cls) {
695 case float_class_normal:
696 switch (s->float_rounding_mode) {
697 case float_round_nearest_even:
698 overflow_norm = false;
699 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
700 break;
701 case float_round_ties_away:
702 overflow_norm = false;
703 inc = frac_lsbm1;
704 break;
705 case float_round_to_zero:
706 overflow_norm = true;
707 inc = 0;
708 break;
709 case float_round_up:
710 inc = p.sign ? 0 : round_mask;
711 overflow_norm = p.sign;
712 break;
713 case float_round_down:
714 inc = p.sign ? round_mask : 0;
715 overflow_norm = !p.sign;
716 break;
5d64abb3
RH
717 case float_round_to_odd:
718 overflow_norm = true;
719 inc = frac & frac_lsb ? 0 : round_mask;
720 break;
6fff2167
AB
721 default:
722 g_assert_not_reached();
723 }
724
725 exp += parm->exp_bias;
726 if (likely(exp > 0)) {
727 if (frac & round_mask) {
728 flags |= float_flag_inexact;
729 frac += inc;
730 if (frac & DECOMPOSED_OVERFLOW_BIT) {
731 frac >>= 1;
732 exp++;
733 }
734 }
735 frac >>= frac_shift;
736
ca3a3d5a
AB
737 if (parm->arm_althp) {
738 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
739 if (unlikely(exp > exp_max)) {
740 /* Overflow. Return the maximum normal. */
741 flags = float_flag_invalid;
742 exp = exp_max;
743 frac = -1;
744 }
745 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
746 flags |= float_flag_overflow | float_flag_inexact;
747 if (overflow_norm) {
748 exp = exp_max - 1;
749 frac = -1;
750 } else {
751 p.cls = float_class_inf;
752 goto do_inf;
753 }
754 }
755 } else if (s->flush_to_zero) {
756 flags |= float_flag_output_denormal;
757 p.cls = float_class_zero;
758 goto do_zero;
759 } else {
760 bool is_tiny = (s->float_detect_tininess
761 == float_tininess_before_rounding)
762 || (exp < 0)
763 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
764
765 shift64RightJamming(frac, 1 - exp, &frac);
766 if (frac & round_mask) {
767 /* Need to recompute round-to-even. */
5d64abb3
RH
768 switch (s->float_rounding_mode) {
769 case float_round_nearest_even:
6fff2167
AB
770 inc = ((frac & roundeven_mask) != frac_lsbm1
771 ? frac_lsbm1 : 0);
5d64abb3
RH
772 break;
773 case float_round_to_odd:
774 inc = frac & frac_lsb ? 0 : round_mask;
775 break;
6fff2167
AB
776 }
777 flags |= float_flag_inexact;
778 frac += inc;
779 }
780
781 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
782 frac >>= frac_shift;
783
784 if (is_tiny && (flags & float_flag_inexact)) {
785 flags |= float_flag_underflow;
786 }
787 if (exp == 0 && frac == 0) {
788 p.cls = float_class_zero;
789 }
790 }
791 break;
792
793 case float_class_zero:
794 do_zero:
795 exp = 0;
796 frac = 0;
797 break;
798
799 case float_class_inf:
800 do_inf:
ca3a3d5a 801 assert(!parm->arm_althp);
6fff2167
AB
802 exp = exp_max;
803 frac = 0;
804 break;
805
806 case float_class_qnan:
807 case float_class_snan:
ca3a3d5a 808 assert(!parm->arm_althp);
6fff2167 809 exp = exp_max;
94933df0 810 frac >>= parm->frac_shift;
6fff2167
AB
811 break;
812
813 default:
814 g_assert_not_reached();
815 }
816
817 float_raise(flags, s);
818 p.exp = exp;
819 p.frac = frac;
820 return p;
821}
822
6fed16b2
AB
823/* Explicit FloatFmt version */
824static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
825 const FloatFmt *params)
826{
f9943c7f 827 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
828}
829
6fff2167
AB
830static FloatParts float16_unpack_canonical(float16 f, float_status *s)
831{
6fed16b2
AB
832 return float16a_unpack_canonical(f, s, &float16_params);
833}
834
835static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
836 const FloatFmt *params)
837{
838 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
839}
840
841static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
842{
6fed16b2 843 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
844}
845
846static FloatParts float32_unpack_canonical(float32 f, float_status *s)
847{
f9943c7f 848 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
849}
850
851static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
852{
0bcfbcbe 853 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
854}
855
856static FloatParts float64_unpack_canonical(float64 f, float_status *s)
857{
f9943c7f 858 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
859}
860
861static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
862{
0bcfbcbe 863 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
864}
865
dbe4d53a
AB
866static FloatParts return_nan(FloatParts a, float_status *s)
867{
868 switch (a.cls) {
869 case float_class_snan:
870 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 871 a = parts_silence_nan(a, s);
dbe4d53a
AB
872 /* fall through */
873 case float_class_qnan:
874 if (s->default_nan_mode) {
f7e598e2 875 return parts_default_nan(s);
dbe4d53a
AB
876 }
877 break;
878
879 default:
880 g_assert_not_reached();
881 }
882 return a;
883}
884
6fff2167
AB
885static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
886{
887 if (is_snan(a.cls) || is_snan(b.cls)) {
888 s->float_exception_flags |= float_flag_invalid;
889 }
890
891 if (s->default_nan_mode) {
f7e598e2 892 return parts_default_nan(s);
6fff2167 893 } else {
4f251cfd 894 if (pickNaN(a.cls, b.cls,
6fff2167
AB
895 a.frac > b.frac ||
896 (a.frac == b.frac && a.sign < b.sign))) {
897 a = b;
898 }
0bcfbcbe
RH
899 if (is_snan(a.cls)) {
900 return parts_silence_nan(a, s);
901 }
6fff2167
AB
902 }
903 return a;
904}
905
d446830a
AB
906static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
907 bool inf_zero, float_status *s)
908{
1839189b
PM
909 int which;
910
d446830a
AB
911 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
912 s->float_exception_flags |= float_flag_invalid;
913 }
914
3bd2dec1 915 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 916
d446830a 917 if (s->default_nan_mode) {
1839189b
PM
918 /* Note that this check is after pickNaNMulAdd so that function
919 * has an opportunity to set the Invalid flag.
920 */
f7e598e2 921 which = 3;
1839189b 922 }
d446830a 923
1839189b
PM
924 switch (which) {
925 case 0:
926 break;
927 case 1:
928 a = b;
929 break;
930 case 2:
931 a = c;
932 break;
933 case 3:
f7e598e2 934 return parts_default_nan(s);
1839189b
PM
935 default:
936 g_assert_not_reached();
d446830a 937 }
1839189b 938
0bcfbcbe
RH
939 if (is_snan(a.cls)) {
940 return parts_silence_nan(a, s);
941 }
d446830a
AB
942 return a;
943}
944
6fff2167
AB
945/*
946 * Returns the result of adding or subtracting the values of the
947 * floating-point values `a' and `b'. The operation is performed
948 * according to the IEC/IEEE Standard for Binary Floating-Point
949 * Arithmetic.
950 */
951
952static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
953 float_status *s)
954{
955 bool a_sign = a.sign;
956 bool b_sign = b.sign ^ subtract;
957
958 if (a_sign != b_sign) {
959 /* Subtraction */
960
961 if (a.cls == float_class_normal && b.cls == float_class_normal) {
962 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
963 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
964 a.frac = a.frac - b.frac;
965 } else {
966 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
967 a.frac = b.frac - a.frac;
968 a.exp = b.exp;
969 a_sign ^= 1;
970 }
971
972 if (a.frac == 0) {
973 a.cls = float_class_zero;
974 a.sign = s->float_rounding_mode == float_round_down;
975 } else {
976 int shift = clz64(a.frac) - 1;
977 a.frac = a.frac << shift;
978 a.exp = a.exp - shift;
979 a.sign = a_sign;
980 }
981 return a;
982 }
983 if (is_nan(a.cls) || is_nan(b.cls)) {
984 return pick_nan(a, b, s);
985 }
986 if (a.cls == float_class_inf) {
987 if (b.cls == float_class_inf) {
988 float_raise(float_flag_invalid, s);
f7e598e2 989 return parts_default_nan(s);
6fff2167
AB
990 }
991 return a;
992 }
993 if (a.cls == float_class_zero && b.cls == float_class_zero) {
994 a.sign = s->float_rounding_mode == float_round_down;
995 return a;
996 }
997 if (a.cls == float_class_zero || b.cls == float_class_inf) {
998 b.sign = a_sign ^ 1;
999 return b;
1000 }
1001 if (b.cls == float_class_zero) {
1002 return a;
1003 }
1004 } else {
1005 /* Addition */
1006 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1007 if (a.exp > b.exp) {
1008 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1009 } else if (a.exp < b.exp) {
1010 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1011 a.exp = b.exp;
1012 }
1013 a.frac += b.frac;
1014 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
64d450a0 1015 shift64RightJamming(a.frac, 1, &a.frac);
6fff2167
AB
1016 a.exp += 1;
1017 }
1018 return a;
1019 }
1020 if (is_nan(a.cls) || is_nan(b.cls)) {
1021 return pick_nan(a, b, s);
1022 }
1023 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1024 return a;
1025 }
1026 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1027 b.sign = b_sign;
1028 return b;
1029 }
1030 }
1031 g_assert_not_reached();
1032}
1033
1034/*
1035 * Returns the result of adding or subtracting the floating-point
1036 * values `a' and `b'. The operation is performed according to the
1037 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1038 */
1039
97ff87c0 1040float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1041{
1042 FloatParts pa = float16_unpack_canonical(a, status);
1043 FloatParts pb = float16_unpack_canonical(b, status);
1044 FloatParts pr = addsub_floats(pa, pb, false, status);
1045
1046 return float16_round_pack_canonical(pr, status);
1047}
1048
1b615d48
EC
1049float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1050{
1051 FloatParts pa = float16_unpack_canonical(a, status);
1052 FloatParts pb = float16_unpack_canonical(b, status);
1053 FloatParts pr = addsub_floats(pa, pb, true, status);
1054
1055 return float16_round_pack_canonical(pr, status);
1056}
1057
1058static float32 QEMU_SOFTFLOAT_ATTR
1059soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1060{
1061 FloatParts pa = float32_unpack_canonical(a, status);
1062 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1063 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1064
1065 return float32_round_pack_canonical(pr, status);
1066}
1067
1b615d48
EC
1068static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1069{
1070 return soft_f32_addsub(a, b, false, status);
1071}
1072
1073static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1074{
1075 return soft_f32_addsub(a, b, true, status);
1076}
1077
1078static float64 QEMU_SOFTFLOAT_ATTR
1079soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1080{
1081 FloatParts pa = float64_unpack_canonical(a, status);
1082 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1083 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1084
1085 return float64_round_pack_canonical(pr, status);
1086}
1087
1b615d48 1088static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1089{
1b615d48
EC
1090 return soft_f64_addsub(a, b, false, status);
1091}
6fff2167 1092
1b615d48
EC
1093static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1094{
1095 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1096}
1097
1b615d48 1098static float hard_f32_add(float a, float b)
6fff2167 1099{
1b615d48
EC
1100 return a + b;
1101}
6fff2167 1102
1b615d48
EC
1103static float hard_f32_sub(float a, float b)
1104{
1105 return a - b;
6fff2167
AB
1106}
1107
1b615d48 1108static double hard_f64_add(double a, double b)
6fff2167 1109{
1b615d48
EC
1110 return a + b;
1111}
6fff2167 1112
1b615d48
EC
1113static double hard_f64_sub(double a, double b)
1114{
1115 return a - b;
1116}
1117
1118static bool f32_addsub_post(union_float32 a, union_float32 b)
1119{
1120 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1121 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1122 }
1123 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1124}
1125
1126static bool f64_addsub_post(union_float64 a, union_float64 b)
1127{
1128 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1129 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1130 } else {
1131 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1132 }
1133}
1134
1135static float32 float32_addsub(float32 a, float32 b, float_status *s,
1136 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1137{
1138 return float32_gen2(a, b, s, hard, soft,
1139 f32_is_zon2, f32_addsub_post, NULL, NULL);
1140}
1141
1142static float64 float64_addsub(float64 a, float64 b, float_status *s,
1143 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1144{
1145 return float64_gen2(a, b, s, hard, soft,
1146 f64_is_zon2, f64_addsub_post, NULL, NULL);
1147}
1148
1149float32 QEMU_FLATTEN
1150float32_add(float32 a, float32 b, float_status *s)
1151{
1152 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1153}
1154
1155float32 QEMU_FLATTEN
1156float32_sub(float32 a, float32 b, float_status *s)
1157{
1158 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1159}
1160
1161float64 QEMU_FLATTEN
1162float64_add(float64 a, float64 b, float_status *s)
1163{
1164 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1165}
1166
1167float64 QEMU_FLATTEN
1168float64_sub(float64 a, float64 b, float_status *s)
1169{
1170 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1171}
1172
74d707e2
AB
1173/*
1174 * Returns the result of multiplying the floating-point values `a' and
1175 * `b'. The operation is performed according to the IEC/IEEE Standard
1176 * for Binary Floating-Point Arithmetic.
1177 */
1178
1179static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1180{
1181 bool sign = a.sign ^ b.sign;
1182
1183 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1184 uint64_t hi, lo;
1185 int exp = a.exp + b.exp;
1186
1187 mul64To128(a.frac, b.frac, &hi, &lo);
1188 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1189 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1190 shift64RightJamming(lo, 1, &lo);
1191 exp += 1;
1192 }
1193
1194 /* Re-use a */
1195 a.exp = exp;
1196 a.sign = sign;
1197 a.frac = lo;
1198 return a;
1199 }
1200 /* handle all the NaN cases */
1201 if (is_nan(a.cls) || is_nan(b.cls)) {
1202 return pick_nan(a, b, s);
1203 }
1204 /* Inf * Zero == NaN */
1205 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1206 (a.cls == float_class_zero && b.cls == float_class_inf)) {
1207 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1208 return parts_default_nan(s);
74d707e2
AB
1209 }
1210 /* Multiply by 0 or Inf */
1211 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1212 a.sign = sign;
1213 return a;
1214 }
1215 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1216 b.sign = sign;
1217 return b;
1218 }
1219 g_assert_not_reached();
1220}
1221
97ff87c0 1222float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1223{
1224 FloatParts pa = float16_unpack_canonical(a, status);
1225 FloatParts pb = float16_unpack_canonical(b, status);
1226 FloatParts pr = mul_floats(pa, pb, status);
1227
1228 return float16_round_pack_canonical(pr, status);
1229}
1230
2dfabc86
EC
1231static float32 QEMU_SOFTFLOAT_ATTR
1232soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1233{
1234 FloatParts pa = float32_unpack_canonical(a, status);
1235 FloatParts pb = float32_unpack_canonical(b, status);
1236 FloatParts pr = mul_floats(pa, pb, status);
1237
1238 return float32_round_pack_canonical(pr, status);
1239}
1240
2dfabc86
EC
1241static float64 QEMU_SOFTFLOAT_ATTR
1242soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1243{
1244 FloatParts pa = float64_unpack_canonical(a, status);
1245 FloatParts pb = float64_unpack_canonical(b, status);
1246 FloatParts pr = mul_floats(pa, pb, status);
1247
1248 return float64_round_pack_canonical(pr, status);
1249}
1250
2dfabc86
EC
1251static float hard_f32_mul(float a, float b)
1252{
1253 return a * b;
1254}
1255
1256static double hard_f64_mul(double a, double b)
1257{
1258 return a * b;
1259}
1260
1261static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1262{
1263 return float32_is_zero(a.s) || float32_is_zero(b.s);
1264}
1265
1266static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1267{
1268 return float64_is_zero(a.s) || float64_is_zero(b.s);
1269}
1270
1271static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1272{
1273 bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1274
1275 return float32_set_sign(float32_zero, signbit);
1276}
1277
1278static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1279{
1280 bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1281
1282 return float64_set_sign(float64_zero, signbit);
1283}
1284
1285float32 QEMU_FLATTEN
1286float32_mul(float32 a, float32 b, float_status *s)
1287{
1288 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1289 f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1290}
1291
1292float64 QEMU_FLATTEN
1293float64_mul(float64 a, float64 b, float_status *s)
1294{
1295 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1296 f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1297}
1298
d446830a
AB
1299/*
1300 * Returns the result of multiplying the floating-point values `a' and
1301 * `b' then adding 'c', with no intermediate rounding step after the
1302 * multiplication. The operation is performed according to the
1303 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1304 * The flags argument allows the caller to select negation of the
1305 * addend, the intermediate product, or the final result. (The
1306 * difference between this and having the caller do a separate
1307 * negation is that negating externally will flip the sign bit on
1308 * NaNs.)
1309 */
1310
1311static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1312 int flags, float_status *s)
1313{
1314 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1315 ((1 << float_class_inf) | (1 << float_class_zero));
1316 bool p_sign;
1317 bool sign_flip = flags & float_muladd_negate_result;
1318 FloatClass p_class;
1319 uint64_t hi, lo;
1320 int p_exp;
1321
1322 /* It is implementation-defined whether the cases of (0,inf,qnan)
1323 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1324 * they return if they do), so we have to hand this information
1325 * off to the target-specific pick-a-NaN routine.
1326 */
1327 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1328 return pick_nan_muladd(a, b, c, inf_zero, s);
1329 }
1330
1331 if (inf_zero) {
1332 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1333 return parts_default_nan(s);
d446830a
AB
1334 }
1335
1336 if (flags & float_muladd_negate_c) {
1337 c.sign ^= 1;
1338 }
1339
1340 p_sign = a.sign ^ b.sign;
1341
1342 if (flags & float_muladd_negate_product) {
1343 p_sign ^= 1;
1344 }
1345
1346 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1347 p_class = float_class_inf;
1348 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1349 p_class = float_class_zero;
1350 } else {
1351 p_class = float_class_normal;
1352 }
1353
1354 if (c.cls == float_class_inf) {
1355 if (p_class == float_class_inf && p_sign != c.sign) {
1356 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1357 return parts_default_nan(s);
d446830a
AB
1358 } else {
1359 a.cls = float_class_inf;
1360 a.sign = c.sign ^ sign_flip;
f7e598e2 1361 return a;
d446830a 1362 }
d446830a
AB
1363 }
1364
1365 if (p_class == float_class_inf) {
1366 a.cls = float_class_inf;
1367 a.sign = p_sign ^ sign_flip;
1368 return a;
1369 }
1370
1371 if (p_class == float_class_zero) {
1372 if (c.cls == float_class_zero) {
1373 if (p_sign != c.sign) {
1374 p_sign = s->float_rounding_mode == float_round_down;
1375 }
1376 c.sign = p_sign;
1377 } else if (flags & float_muladd_halve_result) {
1378 c.exp -= 1;
1379 }
1380 c.sign ^= sign_flip;
1381 return c;
1382 }
1383
1384 /* a & b should be normals now... */
1385 assert(a.cls == float_class_normal &&
1386 b.cls == float_class_normal);
1387
1388 p_exp = a.exp + b.exp;
1389
1390 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1391 * result.
1392 */
1393 mul64To128(a.frac, b.frac, &hi, &lo);
1394 /* binary point now at bit 124 */
1395
1396 /* check for overflow */
1397 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1398 shift128RightJamming(hi, lo, 1, &hi, &lo);
1399 p_exp += 1;
1400 }
1401
1402 /* + add/sub */
1403 if (c.cls == float_class_zero) {
1404 /* move binary point back to 62 */
1405 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1406 } else {
1407 int exp_diff = p_exp - c.exp;
1408 if (p_sign == c.sign) {
1409 /* Addition */
1410 if (exp_diff <= 0) {
1411 shift128RightJamming(hi, lo,
1412 DECOMPOSED_BINARY_POINT - exp_diff,
1413 &hi, &lo);
1414 lo += c.frac;
1415 p_exp = c.exp;
1416 } else {
1417 uint64_t c_hi, c_lo;
1418 /* shift c to the same binary point as the product (124) */
1419 c_hi = c.frac >> 2;
1420 c_lo = 0;
1421 shift128RightJamming(c_hi, c_lo,
1422 exp_diff,
1423 &c_hi, &c_lo);
1424 add128(hi, lo, c_hi, c_lo, &hi, &lo);
1425 /* move binary point back to 62 */
1426 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1427 }
1428
1429 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1430 shift64RightJamming(lo, 1, &lo);
1431 p_exp += 1;
1432 }
1433
1434 } else {
1435 /* Subtraction */
1436 uint64_t c_hi, c_lo;
1437 /* make C binary point match product at bit 124 */
1438 c_hi = c.frac >> 2;
1439 c_lo = 0;
1440
1441 if (exp_diff <= 0) {
1442 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1443 if (exp_diff == 0
1444 &&
1445 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1446 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1447 } else {
1448 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1449 p_sign ^= 1;
1450 p_exp = c.exp;
1451 }
1452 } else {
1453 shift128RightJamming(c_hi, c_lo,
1454 exp_diff,
1455 &c_hi, &c_lo);
1456 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1457 }
1458
1459 if (hi == 0 && lo == 0) {
1460 a.cls = float_class_zero;
1461 a.sign = s->float_rounding_mode == float_round_down;
1462 a.sign ^= sign_flip;
1463 return a;
1464 } else {
1465 int shift;
1466 if (hi != 0) {
1467 shift = clz64(hi);
1468 } else {
1469 shift = clz64(lo) + 64;
1470 }
1471 /* Normalizing to a binary point of 124 is the
1472 correct adjust for the exponent. However since we're
1473 shifting, we might as well put the binary point back
1474 at 62 where we really want it. Therefore shift as
1475 if we're leaving 1 bit at the top of the word, but
1476 adjust the exponent as if we're leaving 3 bits. */
1477 shift -= 1;
1478 if (shift >= 64) {
1479 lo = lo << (shift - 64);
1480 } else {
1481 hi = (hi << shift) | (lo >> (64 - shift));
1482 lo = hi | ((lo << shift) != 0);
1483 }
1484 p_exp -= shift - 2;
1485 }
1486 }
1487 }
1488
1489 if (flags & float_muladd_halve_result) {
1490 p_exp -= 1;
1491 }
1492
1493 /* finally prepare our result */
1494 a.cls = float_class_normal;
1495 a.sign = p_sign ^ sign_flip;
1496 a.exp = p_exp;
1497 a.frac = lo;
1498
1499 return a;
1500}
1501
97ff87c0 1502float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1503 int flags, float_status *status)
1504{
1505 FloatParts pa = float16_unpack_canonical(a, status);
1506 FloatParts pb = float16_unpack_canonical(b, status);
1507 FloatParts pc = float16_unpack_canonical(c, status);
1508 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1509
1510 return float16_round_pack_canonical(pr, status);
1511}
1512
ccf770ba
EC
1513static float32 QEMU_SOFTFLOAT_ATTR
1514soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1515 float_status *status)
d446830a
AB
1516{
1517 FloatParts pa = float32_unpack_canonical(a, status);
1518 FloatParts pb = float32_unpack_canonical(b, status);
1519 FloatParts pc = float32_unpack_canonical(c, status);
1520 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1521
1522 return float32_round_pack_canonical(pr, status);
1523}
1524
ccf770ba
EC
1525static float64 QEMU_SOFTFLOAT_ATTR
1526soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1527 float_status *status)
d446830a
AB
1528{
1529 FloatParts pa = float64_unpack_canonical(a, status);
1530 FloatParts pb = float64_unpack_canonical(b, status);
1531 FloatParts pc = float64_unpack_canonical(c, status);
1532 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534 return float64_round_pack_canonical(pr, status);
1535}
1536
f6b3b108
EC
1537static bool force_soft_fma;
1538
ccf770ba
EC
1539float32 QEMU_FLATTEN
1540float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1541{
1542 union_float32 ua, ub, uc, ur;
1543
1544 ua.s = xa;
1545 ub.s = xb;
1546 uc.s = xc;
1547
1548 if (unlikely(!can_use_fpu(s))) {
1549 goto soft;
1550 }
1551 if (unlikely(flags & float_muladd_halve_result)) {
1552 goto soft;
1553 }
1554
1555 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1556 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1557 goto soft;
1558 }
f6b3b108
EC
1559
1560 if (unlikely(force_soft_fma)) {
1561 goto soft;
1562 }
1563
ccf770ba
EC
1564 /*
1565 * When (a || b) == 0, there's no need to check for under/over flow,
1566 * since we know the addend is (normal || 0) and the product is 0.
1567 */
1568 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1569 union_float32 up;
1570 bool prod_sign;
1571
1572 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1573 prod_sign ^= !!(flags & float_muladd_negate_product);
1574 up.s = float32_set_sign(float32_zero, prod_sign);
1575
1576 if (flags & float_muladd_negate_c) {
1577 uc.h = -uc.h;
1578 }
1579 ur.h = up.h + uc.h;
1580 } else {
896f51fb
KC
1581 union_float32 ua_orig = ua;
1582 union_float32 uc_orig = uc;
1583
ccf770ba
EC
1584 if (flags & float_muladd_negate_product) {
1585 ua.h = -ua.h;
1586 }
1587 if (flags & float_muladd_negate_c) {
1588 uc.h = -uc.h;
1589 }
1590
1591 ur.h = fmaf(ua.h, ub.h, uc.h);
1592
1593 if (unlikely(f32_is_inf(ur))) {
1594 s->float_exception_flags |= float_flag_overflow;
1595 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1596 ua = ua_orig;
1597 uc = uc_orig;
ccf770ba
EC
1598 goto soft;
1599 }
1600 }
1601 if (flags & float_muladd_negate_result) {
1602 return float32_chs(ur.s);
1603 }
1604 return ur.s;
1605
1606 soft:
1607 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1608}
1609
1610float64 QEMU_FLATTEN
1611float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1612{
1613 union_float64 ua, ub, uc, ur;
1614
1615 ua.s = xa;
1616 ub.s = xb;
1617 uc.s = xc;
1618
1619 if (unlikely(!can_use_fpu(s))) {
1620 goto soft;
1621 }
1622 if (unlikely(flags & float_muladd_halve_result)) {
1623 goto soft;
1624 }
1625
1626 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1627 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1628 goto soft;
1629 }
f6b3b108
EC
1630
1631 if (unlikely(force_soft_fma)) {
1632 goto soft;
1633 }
1634
ccf770ba
EC
1635 /*
1636 * When (a || b) == 0, there's no need to check for under/over flow,
1637 * since we know the addend is (normal || 0) and the product is 0.
1638 */
1639 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1640 union_float64 up;
1641 bool prod_sign;
1642
1643 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1644 prod_sign ^= !!(flags & float_muladd_negate_product);
1645 up.s = float64_set_sign(float64_zero, prod_sign);
1646
1647 if (flags & float_muladd_negate_c) {
1648 uc.h = -uc.h;
1649 }
1650 ur.h = up.h + uc.h;
1651 } else {
896f51fb
KC
1652 union_float64 ua_orig = ua;
1653 union_float64 uc_orig = uc;
1654
ccf770ba
EC
1655 if (flags & float_muladd_negate_product) {
1656 ua.h = -ua.h;
1657 }
1658 if (flags & float_muladd_negate_c) {
1659 uc.h = -uc.h;
1660 }
1661
1662 ur.h = fma(ua.h, ub.h, uc.h);
1663
1664 if (unlikely(f64_is_inf(ur))) {
1665 s->float_exception_flags |= float_flag_overflow;
1666 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1667 ua = ua_orig;
1668 uc = uc_orig;
ccf770ba
EC
1669 goto soft;
1670 }
1671 }
1672 if (flags & float_muladd_negate_result) {
1673 return float64_chs(ur.s);
1674 }
1675 return ur.s;
1676
1677 soft:
1678 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1679}
1680
cf07323d
AB
1681/*
1682 * Returns the result of dividing the floating-point value `a' by the
1683 * corresponding value `b'. The operation is performed according to
1684 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1685 */
1686
1687static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1688{
1689 bool sign = a.sign ^ b.sign;
1690
1691 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1692 uint64_t n0, n1, q, r;
cf07323d 1693 int exp = a.exp - b.exp;
5dfbc9e4
RH
1694
1695 /*
1696 * We want a 2*N / N-bit division to produce exactly an N-bit
1697 * result, so that we do not lose any precision and so that we
1698 * do not have to renormalize afterward. If A.frac < B.frac,
1699 * then division would produce an (N-1)-bit result; shift A left
1700 * by one to produce the an N-bit result, and decrement the
1701 * exponent to match.
1702 *
1703 * The udiv_qrnnd algorithm that we're using requires normalization,
1704 * i.e. the msb of the denominator must be set. Since we know that
1705 * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1706 * by one (more), and the remainder must be shifted right by one.
1707 */
cf07323d
AB
1708 if (a.frac < b.frac) {
1709 exp -= 1;
5dfbc9e4 1710 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
cf07323d 1711 } else {
5dfbc9e4 1712 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
cf07323d 1713 }
5dfbc9e4
RH
1714 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1715
1716 /*
1717 * Set lsb if there is a remainder, to set inexact.
1718 * As mentioned above, to find the actual value of the remainder we
1719 * would need to shift right, but (1) we are only concerned about
1720 * non-zero-ness, and (2) the remainder will always be even because
1721 * both inputs to the division primitive are even.
1722 */
1723 a.frac = q | (r != 0);
cf07323d
AB
1724 a.sign = sign;
1725 a.exp = exp;
1726 return a;
1727 }
1728 /* handle all the NaN cases */
1729 if (is_nan(a.cls) || is_nan(b.cls)) {
1730 return pick_nan(a, b, s);
1731 }
1732 /* 0/0 or Inf/Inf */
1733 if (a.cls == b.cls
1734 &&
1735 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1736 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1737 return parts_default_nan(s);
cf07323d 1738 }
9cb4e398
AB
1739 /* Inf / x or 0 / x */
1740 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1741 a.sign = sign;
1742 return a;
1743 }
cf07323d
AB
1744 /* Div 0 => Inf */
1745 if (b.cls == float_class_zero) {
1746 s->float_exception_flags |= float_flag_divbyzero;
1747 a.cls = float_class_inf;
1748 a.sign = sign;
1749 return a;
1750 }
cf07323d
AB
1751 /* Div by Inf */
1752 if (b.cls == float_class_inf) {
1753 a.cls = float_class_zero;
1754 a.sign = sign;
1755 return a;
1756 }
1757 g_assert_not_reached();
1758}
1759
1760float16 float16_div(float16 a, float16 b, float_status *status)
1761{
1762 FloatParts pa = float16_unpack_canonical(a, status);
1763 FloatParts pb = float16_unpack_canonical(b, status);
1764 FloatParts pr = div_floats(pa, pb, status);
1765
1766 return float16_round_pack_canonical(pr, status);
1767}
1768
4a629561
EC
1769static float32 QEMU_SOFTFLOAT_ATTR
1770soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1771{
1772 FloatParts pa = float32_unpack_canonical(a, status);
1773 FloatParts pb = float32_unpack_canonical(b, status);
1774 FloatParts pr = div_floats(pa, pb, status);
1775
1776 return float32_round_pack_canonical(pr, status);
1777}
1778
4a629561
EC
1779static float64 QEMU_SOFTFLOAT_ATTR
1780soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1781{
1782 FloatParts pa = float64_unpack_canonical(a, status);
1783 FloatParts pb = float64_unpack_canonical(b, status);
1784 FloatParts pr = div_floats(pa, pb, status);
1785
1786 return float64_round_pack_canonical(pr, status);
1787}
1788
4a629561
EC
1789static float hard_f32_div(float a, float b)
1790{
1791 return a / b;
1792}
1793
1794static double hard_f64_div(double a, double b)
1795{
1796 return a / b;
1797}
1798
1799static bool f32_div_pre(union_float32 a, union_float32 b)
1800{
1801 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1802 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1803 fpclassify(b.h) == FP_NORMAL;
1804 }
1805 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1806}
1807
1808static bool f64_div_pre(union_float64 a, union_float64 b)
1809{
1810 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1811 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1812 fpclassify(b.h) == FP_NORMAL;
1813 }
1814 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1815}
1816
1817static bool f32_div_post(union_float32 a, union_float32 b)
1818{
1819 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1820 return fpclassify(a.h) != FP_ZERO;
1821 }
1822 return !float32_is_zero(a.s);
1823}
1824
1825static bool f64_div_post(union_float64 a, union_float64 b)
1826{
1827 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1828 return fpclassify(a.h) != FP_ZERO;
1829 }
1830 return !float64_is_zero(a.s);
1831}
1832
1833float32 QEMU_FLATTEN
1834float32_div(float32 a, float32 b, float_status *s)
1835{
1836 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1837 f32_div_pre, f32_div_post, NULL, NULL);
1838}
1839
1840float64 QEMU_FLATTEN
1841float64_div(float64 a, float64 b, float_status *s)
1842{
1843 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1844 f64_div_pre, f64_div_post, NULL, NULL);
1845}
1846
6fed16b2
AB
1847/*
1848 * Float to Float conversions
1849 *
1850 * Returns the result of converting one float format to another. The
1851 * conversion is performed according to the IEC/IEEE Standard for
1852 * Binary Floating-Point Arithmetic.
1853 *
1854 * The float_to_float helper only needs to take care of raising
1855 * invalid exceptions and handling the conversion on NaNs.
1856 */
1857
1858static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1859 float_status *s)
1860{
1861 if (dstf->arm_althp) {
1862 switch (a.cls) {
1863 case float_class_qnan:
1864 case float_class_snan:
1865 /* There is no NaN in the destination format. Raise Invalid
1866 * and return a zero with the sign of the input NaN.
1867 */
1868 s->float_exception_flags |= float_flag_invalid;
1869 a.cls = float_class_zero;
1870 a.frac = 0;
1871 a.exp = 0;
1872 break;
1873
1874 case float_class_inf:
1875 /* There is no Inf in the destination format. Raise Invalid
1876 * and return the maximum normal with the correct sign.
1877 */
1878 s->float_exception_flags |= float_flag_invalid;
1879 a.cls = float_class_normal;
1880 a.exp = dstf->exp_max;
1881 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1882 break;
1883
1884 default:
1885 break;
1886 }
1887 } else if (is_nan(a.cls)) {
1888 if (is_snan(a.cls)) {
1889 s->float_exception_flags |= float_flag_invalid;
1890 a = parts_silence_nan(a, s);
1891 }
1892 if (s->default_nan_mode) {
1893 return parts_default_nan(s);
1894 }
1895 }
1896 return a;
1897}
1898
1899float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1900{
1901 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1902 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1903 FloatParts pr = float_to_float(p, &float32_params, s);
1904 return float32_round_pack_canonical(pr, s);
1905}
1906
1907float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1908{
1909 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1910 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1911 FloatParts pr = float_to_float(p, &float64_params, s);
1912 return float64_round_pack_canonical(pr, s);
1913}
1914
1915float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1916{
1917 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1918 FloatParts p = float32_unpack_canonical(a, s);
1919 FloatParts pr = float_to_float(p, fmt16, s);
1920 return float16a_round_pack_canonical(pr, s, fmt16);
1921}
1922
21381dcf
MK
1923static float64 QEMU_SOFTFLOAT_ATTR
1924soft_float32_to_float64(float32 a, float_status *s)
6fed16b2
AB
1925{
1926 FloatParts p = float32_unpack_canonical(a, s);
1927 FloatParts pr = float_to_float(p, &float64_params, s);
1928 return float64_round_pack_canonical(pr, s);
1929}
1930
21381dcf
MK
1931float64 float32_to_float64(float32 a, float_status *s)
1932{
1933 if (likely(float32_is_normal(a))) {
1934 /* Widening conversion can never produce inexact results. */
1935 union_float32 uf;
1936 union_float64 ud;
1937 uf.s = a;
1938 ud.h = uf.h;
1939 return ud.s;
1940 } else if (float32_is_zero(a)) {
1941 return float64_set_sign(float64_zero, float32_is_neg(a));
1942 } else {
1943 return soft_float32_to_float64(a, s);
1944 }
1945}
1946
6fed16b2
AB
1947float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1948{
1949 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1950 FloatParts p = float64_unpack_canonical(a, s);
1951 FloatParts pr = float_to_float(p, fmt16, s);
1952 return float16a_round_pack_canonical(pr, s, fmt16);
1953}
1954
1955float32 float64_to_float32(float64 a, float_status *s)
1956{
1957 FloatParts p = float64_unpack_canonical(a, s);
1958 FloatParts pr = float_to_float(p, &float32_params, s);
1959 return float32_round_pack_canonical(pr, s);
1960}
1961
dbe4d53a
AB
1962/*
1963 * Rounds the floating-point value `a' to an integer, and returns the
1964 * result as a floating-point value. The operation is performed
1965 * according to the IEC/IEEE Standard for Binary Floating-Point
1966 * Arithmetic.
1967 */
1968
2f6c74be
RH
1969static FloatParts round_to_int(FloatParts a, int rmode,
1970 int scale, float_status *s)
dbe4d53a 1971{
2f6c74be
RH
1972 switch (a.cls) {
1973 case float_class_qnan:
1974 case float_class_snan:
dbe4d53a 1975 return return_nan(a, s);
dbe4d53a 1976
dbe4d53a
AB
1977 case float_class_zero:
1978 case float_class_inf:
dbe4d53a
AB
1979 /* already "integral" */
1980 break;
2f6c74be 1981
dbe4d53a 1982 case float_class_normal:
2f6c74be
RH
1983 scale = MIN(MAX(scale, -0x10000), 0x10000);
1984 a.exp += scale;
1985
dbe4d53a
AB
1986 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1987 /* already integral */
1988 break;
1989 }
1990 if (a.exp < 0) {
1991 bool one;
1992 /* all fractional */
1993 s->float_exception_flags |= float_flag_inexact;
2f6c74be 1994 switch (rmode) {
dbe4d53a
AB
1995 case float_round_nearest_even:
1996 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1997 break;
1998 case float_round_ties_away:
1999 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2000 break;
2001 case float_round_to_zero:
2002 one = false;
2003 break;
2004 case float_round_up:
2005 one = !a.sign;
2006 break;
2007 case float_round_down:
2008 one = a.sign;
2009 break;
5d64abb3
RH
2010 case float_round_to_odd:
2011 one = true;
2012 break;
dbe4d53a
AB
2013 default:
2014 g_assert_not_reached();
2015 }
2016
2017 if (one) {
2018 a.frac = DECOMPOSED_IMPLICIT_BIT;
2019 a.exp = 0;
2020 } else {
2021 a.cls = float_class_zero;
2022 }
2023 } else {
2024 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2025 uint64_t frac_lsbm1 = frac_lsb >> 1;
2026 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2027 uint64_t rnd_mask = rnd_even_mask >> 1;
2028 uint64_t inc;
2029
2f6c74be 2030 switch (rmode) {
dbe4d53a
AB
2031 case float_round_nearest_even:
2032 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2033 break;
2034 case float_round_ties_away:
2035 inc = frac_lsbm1;
2036 break;
2037 case float_round_to_zero:
2038 inc = 0;
2039 break;
2040 case float_round_up:
2041 inc = a.sign ? 0 : rnd_mask;
2042 break;
2043 case float_round_down:
2044 inc = a.sign ? rnd_mask : 0;
2045 break;
5d64abb3
RH
2046 case float_round_to_odd:
2047 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2048 break;
dbe4d53a
AB
2049 default:
2050 g_assert_not_reached();
2051 }
2052
2053 if (a.frac & rnd_mask) {
2054 s->float_exception_flags |= float_flag_inexact;
2055 a.frac += inc;
2056 a.frac &= ~rnd_mask;
2057 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2058 a.frac >>= 1;
2059 a.exp++;
2060 }
2061 }
2062 }
2063 break;
2064 default:
2065 g_assert_not_reached();
2066 }
2067 return a;
2068}
2069
2070float16 float16_round_to_int(float16 a, float_status *s)
2071{
2072 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2073 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2074 return float16_round_pack_canonical(pr, s);
2075}
2076
2077float32 float32_round_to_int(float32 a, float_status *s)
2078{
2079 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2080 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2081 return float32_round_pack_canonical(pr, s);
2082}
2083
2084float64 float64_round_to_int(float64 a, float_status *s)
2085{
2086 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2087 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2088 return float64_round_pack_canonical(pr, s);
2089}
2090
ab52f973
AB
2091/*
2092 * Returns the result of converting the floating-point value `a' to
2093 * the two's complement integer format. The conversion is performed
2094 * according to the IEC/IEEE Standard for Binary Floating-Point
2095 * Arithmetic---which means in particular that the conversion is
2096 * rounded according to the current rounding mode. If `a' is a NaN,
2097 * the largest positive integer is returned. Otherwise, if the
2098 * conversion overflows, the largest integer with the same sign as `a'
2099 * is returned.
2100*/
2101
2f6c74be 2102static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
ab52f973
AB
2103 int64_t min, int64_t max,
2104 float_status *s)
2105{
2106 uint64_t r;
2107 int orig_flags = get_float_exception_flags(s);
2f6c74be 2108 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2109
2110 switch (p.cls) {
2111 case float_class_snan:
2112 case float_class_qnan:
801bc563 2113 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2114 return max;
2115 case float_class_inf:
801bc563 2116 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2117 return p.sign ? min : max;
2118 case float_class_zero:
2119 return 0;
2120 case float_class_normal:
2121 if (p.exp < DECOMPOSED_BINARY_POINT) {
2122 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2123 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2124 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2125 } else {
2126 r = UINT64_MAX;
2127 }
2128 if (p.sign) {
33358375 2129 if (r <= -(uint64_t) min) {
ab52f973
AB
2130 return -r;
2131 } else {
2132 s->float_exception_flags = orig_flags | float_flag_invalid;
2133 return min;
2134 }
2135 } else {
33358375 2136 if (r <= max) {
ab52f973
AB
2137 return r;
2138 } else {
2139 s->float_exception_flags = orig_flags | float_flag_invalid;
2140 return max;
2141 }
2142 }
2143 default:
2144 g_assert_not_reached();
2145 }
2146}
2147
2f6c74be
RH
2148int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2149 float_status *s)
2150{
2151 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2152 rmode, scale, INT16_MIN, INT16_MAX, s);
2153}
2154
2155int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2156 float_status *s)
2157{
2158 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2159 rmode, scale, INT32_MIN, INT32_MAX, s);
2160}
2161
2162int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2163 float_status *s)
2164{
2165 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2166 rmode, scale, INT64_MIN, INT64_MAX, s);
2167}
2168
2169int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2170 float_status *s)
2171{
2172 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2173 rmode, scale, INT16_MIN, INT16_MAX, s);
2174}
2175
2176int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2177 float_status *s)
2178{
2179 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2180 rmode, scale, INT32_MIN, INT32_MAX, s);
2181}
2182
2183int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2184 float_status *s)
2185{
2186 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2187 rmode, scale, INT64_MIN, INT64_MAX, s);
2188}
2189
2190int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2191 float_status *s)
2192{
2193 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2194 rmode, scale, INT16_MIN, INT16_MAX, s);
2195}
2196
2197int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2198 float_status *s)
2199{
2200 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2201 rmode, scale, INT32_MIN, INT32_MAX, s);
2202}
2203
2204int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2205 float_status *s)
2206{
2207 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2208 rmode, scale, INT64_MIN, INT64_MAX, s);
2209}
2210
2211int16_t float16_to_int16(float16 a, float_status *s)
2212{
2213 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2214}
2215
2216int32_t float16_to_int32(float16 a, float_status *s)
2217{
2218 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2219}
2220
2221int64_t float16_to_int64(float16 a, float_status *s)
2222{
2223 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2224}
2225
2226int16_t float32_to_int16(float32 a, float_status *s)
2227{
2228 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2229}
2230
2231int32_t float32_to_int32(float32 a, float_status *s)
2232{
2233 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2234}
2235
2236int64_t float32_to_int64(float32 a, float_status *s)
2237{
2238 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2239}
2240
2241int16_t float64_to_int16(float64 a, float_status *s)
2242{
2243 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2244}
2245
2246int32_t float64_to_int32(float64 a, float_status *s)
2247{
2248 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2249}
2250
2251int64_t float64_to_int64(float64 a, float_status *s)
2252{
2253 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2254}
2255
2256int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2257{
2258 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2259}
2260
2261int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2262{
2263 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2264}
2265
2266int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2267{
2268 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2269}
2270
2f6c74be
RH
2271int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2272{
2273 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2274}
ab52f973 2275
2f6c74be
RH
2276int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2277{
2278 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2279}
2280
2281int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2282{
2283 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2284}
2285
2286int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2287{
2288 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2289}
ab52f973 2290
2f6c74be
RH
2291int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2292{
2293 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2294}
ab52f973 2295
2f6c74be
RH
2296int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2297{
2298 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2299}
ab52f973
AB
2300
2301/*
2302 * Returns the result of converting the floating-point value `a' to
2303 * the unsigned integer format. The conversion is performed according
2304 * to the IEC/IEEE Standard for Binary Floating-Point
2305 * Arithmetic---which means in particular that the conversion is
2306 * rounded according to the current rounding mode. If `a' is a NaN,
2307 * the largest unsigned integer is returned. Otherwise, if the
2308 * conversion overflows, the largest unsigned integer is returned. If
2309 * the 'a' is negative, the result is rounded and zero is returned;
2310 * values that do not round to zero will raise the inexact exception
2311 * flag.
2312 */
2313
2f6c74be
RH
2314static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2315 uint64_t max, float_status *s)
ab52f973
AB
2316{
2317 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2318 FloatParts p = round_to_int(in, rmode, scale, s);
2319 uint64_t r;
ab52f973
AB
2320
2321 switch (p.cls) {
2322 case float_class_snan:
2323 case float_class_qnan:
2324 s->float_exception_flags = orig_flags | float_flag_invalid;
2325 return max;
2326 case float_class_inf:
801bc563 2327 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2328 return p.sign ? 0 : max;
2329 case float_class_zero:
2330 return 0;
2331 case float_class_normal:
ab52f973
AB
2332 if (p.sign) {
2333 s->float_exception_flags = orig_flags | float_flag_invalid;
2334 return 0;
2335 }
2336
2337 if (p.exp < DECOMPOSED_BINARY_POINT) {
2338 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2339 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2340 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2341 } else {
2342 s->float_exception_flags = orig_flags | float_flag_invalid;
2343 return max;
2344 }
2345
2346 /* For uint64 this will never trip, but if p.exp is too large
2347 * to shift a decomposed fraction we shall have exited via the
2348 * 3rd leg above.
2349 */
2350 if (r > max) {
2351 s->float_exception_flags = orig_flags | float_flag_invalid;
2352 return max;
ab52f973 2353 }
2f6c74be 2354 return r;
ab52f973
AB
2355 default:
2356 g_assert_not_reached();
2357 }
2358}
2359
2f6c74be
RH
2360uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2361 float_status *s)
2362{
2363 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2364 rmode, scale, UINT16_MAX, s);
2365}
2366
2367uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2368 float_status *s)
2369{
2370 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2371 rmode, scale, UINT32_MAX, s);
2372}
2373
2374uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2375 float_status *s)
2376{
2377 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2378 rmode, scale, UINT64_MAX, s);
2379}
2380
2381uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2382 float_status *s)
2383{
2384 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2385 rmode, scale, UINT16_MAX, s);
2386}
2387
2388uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2389 float_status *s)
2390{
2391 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2392 rmode, scale, UINT32_MAX, s);
2393}
2394
2395uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2396 float_status *s)
2397{
2398 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2399 rmode, scale, UINT64_MAX, s);
2400}
2401
2402uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2403 float_status *s)
2404{
2405 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2406 rmode, scale, UINT16_MAX, s);
2407}
2408
2409uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2410 float_status *s)
2411{
2412 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2413 rmode, scale, UINT32_MAX, s);
2414}
2415
2416uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2417 float_status *s)
2418{
2419 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2420 rmode, scale, UINT64_MAX, s);
2421}
2422
2423uint16_t float16_to_uint16(float16 a, float_status *s)
2424{
2425 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2426}
2427
2428uint32_t float16_to_uint32(float16 a, float_status *s)
2429{
2430 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2431}
2432
2433uint64_t float16_to_uint64(float16 a, float_status *s)
2434{
2435 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2436}
2437
2438uint16_t float32_to_uint16(float32 a, float_status *s)
2439{
2440 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2441}
2442
2443uint32_t float32_to_uint32(float32 a, float_status *s)
2444{
2445 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2446}
2447
2448uint64_t float32_to_uint64(float32 a, float_status *s)
2449{
2450 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2451}
2452
2453uint16_t float64_to_uint16(float64 a, float_status *s)
2454{
2455 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2456}
2457
2458uint32_t float64_to_uint32(float64 a, float_status *s)
2459{
2460 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2461}
2462
2463uint64_t float64_to_uint64(float64 a, float_status *s)
2464{
2465 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2466}
2467
2468uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2469{
2470 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2471}
2472
2473uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2474{
2475 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2476}
2477
2478uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2479{
2480 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2481}
2482
2483uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2484{
2485 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2486}
2487
2488uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2489{
2490 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2491}
2492
2493uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2494{
2495 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2496}
2497
2498uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2499{
2500 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2501}
2502
2503uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2504{
2505 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2506}
2507
2508uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2509{
2510 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2511}
ab52f973 2512
c02e1fb8
AB
2513/*
2514 * Integer to float conversions
2515 *
2516 * Returns the result of converting the two's complement integer `a'
2517 * to the floating-point format. The conversion is performed according
2518 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2519 */
2520
2abdfe24 2521static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2522{
2abdfe24
RH
2523 FloatParts r = { .sign = false };
2524
c02e1fb8
AB
2525 if (a == 0) {
2526 r.cls = float_class_zero;
c02e1fb8 2527 } else {
2abdfe24
RH
2528 uint64_t f = a;
2529 int shift;
2530
2531 r.cls = float_class_normal;
c02e1fb8 2532 if (a < 0) {
2abdfe24 2533 f = -f;
c02e1fb8 2534 r.sign = true;
c02e1fb8 2535 }
2abdfe24
RH
2536 shift = clz64(f) - 1;
2537 scale = MIN(MAX(scale, -0x10000), 0x10000);
2538
2539 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2540 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
c02e1fb8
AB
2541 }
2542
2543 return r;
2544}
2545
2abdfe24 2546float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2547{
2abdfe24 2548 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2549 return float16_round_pack_canonical(pa, status);
2550}
2551
2abdfe24
RH
2552float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2553{
2554 return int64_to_float16_scalbn(a, scale, status);
2555}
2556
2557float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2558{
2559 return int64_to_float16_scalbn(a, scale, status);
2560}
2561
2562float16 int64_to_float16(int64_t a, float_status *status)
2563{
2564 return int64_to_float16_scalbn(a, 0, status);
2565}
2566
c02e1fb8
AB
2567float16 int32_to_float16(int32_t a, float_status *status)
2568{
2abdfe24 2569 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2570}
2571
2572float16 int16_to_float16(int16_t a, float_status *status)
2573{
2abdfe24 2574 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2575}
2576
2abdfe24 2577float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2578{
2abdfe24 2579 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2580 return float32_round_pack_canonical(pa, status);
2581}
2582
2abdfe24
RH
2583float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2584{
2585 return int64_to_float32_scalbn(a, scale, status);
2586}
2587
2588float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2589{
2590 return int64_to_float32_scalbn(a, scale, status);
2591}
2592
2593float32 int64_to_float32(int64_t a, float_status *status)
2594{
2595 return int64_to_float32_scalbn(a, 0, status);
2596}
2597
c02e1fb8
AB
2598float32 int32_to_float32(int32_t a, float_status *status)
2599{
2abdfe24 2600 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2601}
2602
2603float32 int16_to_float32(int16_t a, float_status *status)
2604{
2abdfe24 2605 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2606}
2607
2abdfe24 2608float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2609{
2abdfe24 2610 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2611 return float64_round_pack_canonical(pa, status);
2612}
2613
2abdfe24
RH
2614float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2615{
2616 return int64_to_float64_scalbn(a, scale, status);
2617}
2618
2619float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2620{
2621 return int64_to_float64_scalbn(a, scale, status);
2622}
2623
2624float64 int64_to_float64(int64_t a, float_status *status)
2625{
2626 return int64_to_float64_scalbn(a, 0, status);
2627}
2628
c02e1fb8
AB
2629float64 int32_to_float64(int32_t a, float_status *status)
2630{
2abdfe24 2631 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2632}
2633
2634float64 int16_to_float64(int16_t a, float_status *status)
2635{
2abdfe24 2636 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2637}
2638
2639
2640/*
2641 * Unsigned Integer to float conversions
2642 *
2643 * Returns the result of converting the unsigned integer `a' to the
2644 * floating-point format. The conversion is performed according to the
2645 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2646 */
2647
2abdfe24 2648static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2649{
2abdfe24 2650 FloatParts r = { .sign = false };
c02e1fb8
AB
2651
2652 if (a == 0) {
2653 r.cls = float_class_zero;
2654 } else {
2abdfe24 2655 scale = MIN(MAX(scale, -0x10000), 0x10000);
c02e1fb8 2656 r.cls = float_class_normal;
2abdfe24
RH
2657 if ((int64_t)a < 0) {
2658 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2659 shift64RightJamming(a, 1, &a);
c02e1fb8
AB
2660 r.frac = a;
2661 } else {
2abdfe24
RH
2662 int shift = clz64(a) - 1;
2663 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2664 r.frac = a << shift;
c02e1fb8
AB
2665 }
2666 }
2667
2668 return r;
2669}
2670
2abdfe24 2671float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2672{
2abdfe24 2673 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2674 return float16_round_pack_canonical(pa, status);
2675}
2676
2abdfe24
RH
2677float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2678{
2679 return uint64_to_float16_scalbn(a, scale, status);
2680}
2681
2682float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2683{
2684 return uint64_to_float16_scalbn(a, scale, status);
2685}
2686
2687float16 uint64_to_float16(uint64_t a, float_status *status)
2688{
2689 return uint64_to_float16_scalbn(a, 0, status);
2690}
2691
c02e1fb8
AB
2692float16 uint32_to_float16(uint32_t a, float_status *status)
2693{
2abdfe24 2694 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2695}
2696
2697float16 uint16_to_float16(uint16_t a, float_status *status)
2698{
2abdfe24 2699 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2700}
2701
2abdfe24 2702float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2703{
2abdfe24 2704 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2705 return float32_round_pack_canonical(pa, status);
2706}
2707
2abdfe24
RH
2708float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2709{
2710 return uint64_to_float32_scalbn(a, scale, status);
2711}
2712
2713float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2714{
2715 return uint64_to_float32_scalbn(a, scale, status);
2716}
2717
2718float32 uint64_to_float32(uint64_t a, float_status *status)
2719{
2720 return uint64_to_float32_scalbn(a, 0, status);
2721}
2722
c02e1fb8
AB
2723float32 uint32_to_float32(uint32_t a, float_status *status)
2724{
2abdfe24 2725 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2726}
2727
2728float32 uint16_to_float32(uint16_t a, float_status *status)
2729{
2abdfe24 2730 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2731}
2732
2abdfe24 2733float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2734{
2abdfe24 2735 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2736 return float64_round_pack_canonical(pa, status);
2737}
2738
2abdfe24
RH
2739float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2740{
2741 return uint64_to_float64_scalbn(a, scale, status);
2742}
2743
2744float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2745{
2746 return uint64_to_float64_scalbn(a, scale, status);
2747}
2748
2749float64 uint64_to_float64(uint64_t a, float_status *status)
2750{
2751 return uint64_to_float64_scalbn(a, 0, status);
2752}
2753
c02e1fb8
AB
2754float64 uint32_to_float64(uint32_t a, float_status *status)
2755{
2abdfe24 2756 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2757}
2758
2759float64 uint16_to_float64(uint16_t a, float_status *status)
2760{
2abdfe24 2761 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2762}
2763
89360067
AB
2764/* Float Min/Max */
2765/* min() and max() functions. These can't be implemented as
2766 * 'compare and pick one input' because that would mishandle
2767 * NaNs and +0 vs -0.
2768 *
2769 * minnum() and maxnum() functions. These are similar to the min()
2770 * and max() functions but if one of the arguments is a QNaN and
2771 * the other is numerical then the numerical argument is returned.
2772 * SNaNs will get quietened before being returned.
2773 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2774 * and maxNum() operations. min() and max() are the typical min/max
2775 * semantics provided by many CPUs which predate that specification.
2776 *
2777 * minnummag() and maxnummag() functions correspond to minNumMag()
2778 * and minNumMag() from the IEEE-754 2008.
2779 */
2780static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2781 bool ieee, bool ismag, float_status *s)
2782{
2783 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2784 if (ieee) {
2785 /* Takes two floating-point values `a' and `b', one of
2786 * which is a NaN, and returns the appropriate NaN
2787 * result. If either `a' or `b' is a signaling NaN,
2788 * the invalid exception is raised.
2789 */
2790 if (is_snan(a.cls) || is_snan(b.cls)) {
2791 return pick_nan(a, b, s);
2792 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2793 return b;
2794 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2795 return a;
2796 }
2797 }
2798 return pick_nan(a, b, s);
2799 } else {
2800 int a_exp, b_exp;
89360067
AB
2801
2802 switch (a.cls) {
2803 case float_class_normal:
2804 a_exp = a.exp;
2805 break;
2806 case float_class_inf:
2807 a_exp = INT_MAX;
2808 break;
2809 case float_class_zero:
2810 a_exp = INT_MIN;
2811 break;
2812 default:
2813 g_assert_not_reached();
2814 break;
2815 }
2816 switch (b.cls) {
2817 case float_class_normal:
2818 b_exp = b.exp;
2819 break;
2820 case float_class_inf:
2821 b_exp = INT_MAX;
2822 break;
2823 case float_class_zero:
2824 b_exp = INT_MIN;
2825 break;
2826 default:
2827 g_assert_not_reached();
2828 break;
2829 }
2830
6245327a
EC
2831 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2832 bool a_less = a_exp < b_exp;
2833 if (a_exp == b_exp) {
2834 a_less = a.frac < b.frac;
2835 }
2836 return a_less ^ ismin ? b : a;
89360067
AB
2837 }
2838
6245327a 2839 if (a.sign == b.sign) {
89360067
AB
2840 bool a_less = a_exp < b_exp;
2841 if (a_exp == b_exp) {
2842 a_less = a.frac < b.frac;
2843 }
6245327a 2844 return a.sign ^ a_less ^ ismin ? b : a;
89360067 2845 } else {
6245327a 2846 return a.sign ^ ismin ? b : a;
89360067
AB
2847 }
2848 }
2849}
2850
2851#define MINMAX(sz, name, ismin, isiee, ismag) \
2852float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2853 float_status *s) \
2854{ \
2855 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2856 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2857 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2858 \
2859 return float ## sz ## _round_pack_canonical(pr, s); \
2860}
2861
2862MINMAX(16, min, true, false, false)
2863MINMAX(16, minnum, true, true, false)
2864MINMAX(16, minnummag, true, true, true)
2865MINMAX(16, max, false, false, false)
2866MINMAX(16, maxnum, false, true, false)
2867MINMAX(16, maxnummag, false, true, true)
2868
2869MINMAX(32, min, true, false, false)
2870MINMAX(32, minnum, true, true, false)
2871MINMAX(32, minnummag, true, true, true)
2872MINMAX(32, max, false, false, false)
2873MINMAX(32, maxnum, false, true, false)
2874MINMAX(32, maxnummag, false, true, true)
2875
2876MINMAX(64, min, true, false, false)
2877MINMAX(64, minnum, true, true, false)
2878MINMAX(64, minnummag, true, true, true)
2879MINMAX(64, max, false, false, false)
2880MINMAX(64, maxnum, false, true, false)
2881MINMAX(64, maxnummag, false, true, true)
2882
2883#undef MINMAX
2884
0c4c9092
AB
2885/* Floating point compare */
2886static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2887 float_status *s)
2888{
2889 if (is_nan(a.cls) || is_nan(b.cls)) {
2890 if (!is_quiet ||
2891 a.cls == float_class_snan ||
2892 b.cls == float_class_snan) {
2893 s->float_exception_flags |= float_flag_invalid;
2894 }
2895 return float_relation_unordered;
2896 }
2897
2898 if (a.cls == float_class_zero) {
2899 if (b.cls == float_class_zero) {
2900 return float_relation_equal;
2901 }
2902 return b.sign ? float_relation_greater : float_relation_less;
2903 } else if (b.cls == float_class_zero) {
2904 return a.sign ? float_relation_less : float_relation_greater;
2905 }
2906
2907 /* The only really important thing about infinity is its sign. If
2908 * both are infinities the sign marks the smallest of the two.
2909 */
2910 if (a.cls == float_class_inf) {
2911 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2912 return float_relation_equal;
2913 }
2914 return a.sign ? float_relation_less : float_relation_greater;
2915 } else if (b.cls == float_class_inf) {
2916 return b.sign ? float_relation_greater : float_relation_less;
2917 }
2918
2919 if (a.sign != b.sign) {
2920 return a.sign ? float_relation_less : float_relation_greater;
2921 }
2922
2923 if (a.exp == b.exp) {
2924 if (a.frac == b.frac) {
2925 return float_relation_equal;
2926 }
2927 if (a.sign) {
2928 return a.frac > b.frac ?
2929 float_relation_less : float_relation_greater;
2930 } else {
2931 return a.frac > b.frac ?
2932 float_relation_greater : float_relation_less;
2933 }
2934 } else {
2935 if (a.sign) {
2936 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2937 } else {
2938 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2939 }
2940 }
2941}
2942
d9fe9db9
EC
2943#define COMPARE(name, attr, sz) \
2944static int attr \
2945name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
2946{ \
2947 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2948 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 2949 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
2950}
2951
d9fe9db9
EC
2952COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
2953COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
2954COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
2955
2956#undef COMPARE
2957
d9fe9db9
EC
2958int float16_compare(float16 a, float16 b, float_status *s)
2959{
2960 return soft_f16_compare(a, b, false, s);
2961}
2962
2963int float16_compare_quiet(float16 a, float16 b, float_status *s)
2964{
2965 return soft_f16_compare(a, b, true, s);
2966}
2967
2968static int QEMU_FLATTEN
2969f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2970{
2971 union_float32 ua, ub;
2972
2973 ua.s = xa;
2974 ub.s = xb;
2975
2976 if (QEMU_NO_HARDFLOAT) {
2977 goto soft;
2978 }
2979
2980 float32_input_flush2(&ua.s, &ub.s, s);
2981 if (isgreaterequal(ua.h, ub.h)) {
2982 if (isgreater(ua.h, ub.h)) {
2983 return float_relation_greater;
2984 }
2985 return float_relation_equal;
2986 }
2987 if (likely(isless(ua.h, ub.h))) {
2988 return float_relation_less;
2989 }
2990 /* The only condition remaining is unordered.
2991 * Fall through to set flags.
2992 */
2993 soft:
2994 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2995}
2996
2997int float32_compare(float32 a, float32 b, float_status *s)
2998{
2999 return f32_compare(a, b, false, s);
3000}
3001
3002int float32_compare_quiet(float32 a, float32 b, float_status *s)
3003{
3004 return f32_compare(a, b, true, s);
3005}
3006
3007static int QEMU_FLATTEN
3008f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3009{
3010 union_float64 ua, ub;
3011
3012 ua.s = xa;
3013 ub.s = xb;
3014
3015 if (QEMU_NO_HARDFLOAT) {
3016 goto soft;
3017 }
3018
3019 float64_input_flush2(&ua.s, &ub.s, s);
3020 if (isgreaterequal(ua.h, ub.h)) {
3021 if (isgreater(ua.h, ub.h)) {
3022 return float_relation_greater;
3023 }
3024 return float_relation_equal;
3025 }
3026 if (likely(isless(ua.h, ub.h))) {
3027 return float_relation_less;
3028 }
3029 /* The only condition remaining is unordered.
3030 * Fall through to set flags.
3031 */
3032 soft:
3033 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3034}
3035
3036int float64_compare(float64 a, float64 b, float_status *s)
3037{
3038 return f64_compare(a, b, false, s);
3039}
3040
3041int float64_compare_quiet(float64 a, float64 b, float_status *s)
3042{
3043 return f64_compare(a, b, true, s);
3044}
3045
0bfc9f19
AB
3046/* Multiply A by 2 raised to the power N. */
3047static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3048{
3049 if (unlikely(is_nan(a.cls))) {
3050 return return_nan(a, s);
3051 }
3052 if (a.cls == float_class_normal) {
ce8d4082
RH
3053 /* The largest float type (even though not supported by FloatParts)
3054 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3055 * still allows rounding to infinity, without allowing overflow
3056 * within the int32_t that backs FloatParts.exp.
3057 */
3058 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3059 a.exp += n;
3060 }
3061 return a;
3062}
3063
3064float16 float16_scalbn(float16 a, int n, float_status *status)
3065{
3066 FloatParts pa = float16_unpack_canonical(a, status);
3067 FloatParts pr = scalbn_decomposed(pa, n, status);
3068 return float16_round_pack_canonical(pr, status);
3069}
3070
3071float32 float32_scalbn(float32 a, int n, float_status *status)
3072{
3073 FloatParts pa = float32_unpack_canonical(a, status);
3074 FloatParts pr = scalbn_decomposed(pa, n, status);
3075 return float32_round_pack_canonical(pr, status);
3076}
3077
3078float64 float64_scalbn(float64 a, int n, float_status *status)
3079{
3080 FloatParts pa = float64_unpack_canonical(a, status);
3081 FloatParts pr = scalbn_decomposed(pa, n, status);
3082 return float64_round_pack_canonical(pr, status);
3083}
3084
c13bb2da
AB
3085/*
3086 * Square Root
3087 *
3088 * The old softfloat code did an approximation step before zeroing in
3089 * on the final result. However for simpleness we just compute the
3090 * square root by iterating down from the implicit bit to enough extra
3091 * bits to ensure we get a correctly rounded result.
3092 *
3093 * This does mean however the calculation is slower than before,
3094 * especially for 64 bit floats.
3095 */
3096
3097static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3098{
3099 uint64_t a_frac, r_frac, s_frac;
3100 int bit, last_bit;
3101
3102 if (is_nan(a.cls)) {
3103 return return_nan(a, s);
3104 }
3105 if (a.cls == float_class_zero) {
3106 return a; /* sqrt(+-0) = +-0 */
3107 }
3108 if (a.sign) {
3109 s->float_exception_flags |= float_flag_invalid;
f7e598e2 3110 return parts_default_nan(s);
c13bb2da
AB
3111 }
3112 if (a.cls == float_class_inf) {
3113 return a; /* sqrt(+inf) = +inf */
3114 }
3115
3116 assert(a.cls == float_class_normal);
3117
3118 /* We need two overflow bits at the top. Adding room for that is a
3119 * right shift. If the exponent is odd, we can discard the low bit
3120 * by multiplying the fraction by 2; that's a left shift. Combine
3121 * those and we shift right if the exponent is even.
3122 */
3123 a_frac = a.frac;
3124 if (!(a.exp & 1)) {
3125 a_frac >>= 1;
3126 }
3127 a.exp >>= 1;
3128
3129 /* Bit-by-bit computation of sqrt. */
3130 r_frac = 0;
3131 s_frac = 0;
3132
3133 /* Iterate from implicit bit down to the 3 extra bits to compute a
3134 * properly rounded result. Remember we've inserted one more bit
3135 * at the top, so these positions are one less.
3136 */
3137 bit = DECOMPOSED_BINARY_POINT - 1;
3138 last_bit = MAX(p->frac_shift - 4, 0);
3139 do {
3140 uint64_t q = 1ULL << bit;
3141 uint64_t t_frac = s_frac + q;
3142 if (t_frac <= a_frac) {
3143 s_frac = t_frac + q;
3144 a_frac -= t_frac;
3145 r_frac += q;
3146 }
3147 a_frac <<= 1;
3148 } while (--bit >= last_bit);
3149
3150 /* Undo the right shift done above. If there is any remaining
3151 * fraction, the result is inexact. Set the sticky bit.
3152 */
3153 a.frac = (r_frac << 1) + (a_frac != 0);
3154
3155 return a;
3156}
3157
97ff87c0 3158float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3159{
3160 FloatParts pa = float16_unpack_canonical(a, status);
3161 FloatParts pr = sqrt_float(pa, status, &float16_params);
3162 return float16_round_pack_canonical(pr, status);
3163}
3164
f131bae8
EC
3165static float32 QEMU_SOFTFLOAT_ATTR
3166soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3167{
3168 FloatParts pa = float32_unpack_canonical(a, status);
3169 FloatParts pr = sqrt_float(pa, status, &float32_params);
3170 return float32_round_pack_canonical(pr, status);
3171}
3172
f131bae8
EC
3173static float64 QEMU_SOFTFLOAT_ATTR
3174soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3175{
3176 FloatParts pa = float64_unpack_canonical(a, status);
3177 FloatParts pr = sqrt_float(pa, status, &float64_params);
3178 return float64_round_pack_canonical(pr, status);
3179}
3180
f131bae8
EC
3181float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3182{
3183 union_float32 ua, ur;
3184
3185 ua.s = xa;
3186 if (unlikely(!can_use_fpu(s))) {
3187 goto soft;
3188 }
3189
3190 float32_input_flush1(&ua.s, s);
3191 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3192 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3193 fpclassify(ua.h) == FP_ZERO) ||
3194 signbit(ua.h))) {
3195 goto soft;
3196 }
3197 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3198 float32_is_neg(ua.s))) {
3199 goto soft;
3200 }
3201 ur.h = sqrtf(ua.h);
3202 return ur.s;
3203
3204 soft:
3205 return soft_f32_sqrt(ua.s, s);
3206}
3207
3208float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3209{
3210 union_float64 ua, ur;
3211
3212 ua.s = xa;
3213 if (unlikely(!can_use_fpu(s))) {
3214 goto soft;
3215 }
3216
3217 float64_input_flush1(&ua.s, s);
3218 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3219 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3220 fpclassify(ua.h) == FP_ZERO) ||
3221 signbit(ua.h))) {
3222 goto soft;
3223 }
3224 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3225 float64_is_neg(ua.s))) {
3226 goto soft;
3227 }
3228 ur.h = sqrt(ua.h);
3229 return ur.s;
3230
3231 soft:
3232 return soft_f64_sqrt(ua.s, s);
3233}
3234
0218a16e
RH
3235/*----------------------------------------------------------------------------
3236| The pattern for a default generated NaN.
3237*----------------------------------------------------------------------------*/
3238
3239float16 float16_default_nan(float_status *status)
3240{
3241 FloatParts p = parts_default_nan(status);
3242 p.frac >>= float16_params.frac_shift;
3243 return float16_pack_raw(p);
3244}
3245
3246float32 float32_default_nan(float_status *status)
3247{
3248 FloatParts p = parts_default_nan(status);
3249 p.frac >>= float32_params.frac_shift;
3250 return float32_pack_raw(p);
3251}
3252
3253float64 float64_default_nan(float_status *status)
3254{
3255 FloatParts p = parts_default_nan(status);
3256 p.frac >>= float64_params.frac_shift;
3257 return float64_pack_raw(p);
3258}
3259
3260float128 float128_default_nan(float_status *status)
3261{
3262 FloatParts p = parts_default_nan(status);
3263 float128 r;
3264
3265 /* Extrapolate from the choices made by parts_default_nan to fill
3266 * in the quad-floating format. If the low bit is set, assume we
3267 * want to set all non-snan bits.
3268 */
3269 r.low = -(p.frac & 1);
3270 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3271 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3272 r.high |= (uint64_t)p.sign << 63;
3273
3274 return r;
3275}
c13bb2da 3276
158142c2 3277/*----------------------------------------------------------------------------
377ed926
RH
3278| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3279*----------------------------------------------------------------------------*/
3280
3281float16 float16_silence_nan(float16 a, float_status *status)
3282{
3283 FloatParts p = float16_unpack_raw(a);
3284 p.frac <<= float16_params.frac_shift;
3285 p = parts_silence_nan(p, status);
3286 p.frac >>= float16_params.frac_shift;
3287 return float16_pack_raw(p);
3288}
3289
3290float32 float32_silence_nan(float32 a, float_status *status)
3291{
3292 FloatParts p = float32_unpack_raw(a);
3293 p.frac <<= float32_params.frac_shift;
3294 p = parts_silence_nan(p, status);
3295 p.frac >>= float32_params.frac_shift;
3296 return float32_pack_raw(p);
3297}
3298
3299float64 float64_silence_nan(float64 a, float_status *status)
3300{
3301 FloatParts p = float64_unpack_raw(a);
3302 p.frac <<= float64_params.frac_shift;
3303 p = parts_silence_nan(p, status);
3304 p.frac >>= float64_params.frac_shift;
3305 return float64_pack_raw(p);
3306}
3307
e6b405fe
AB
3308
3309/*----------------------------------------------------------------------------
3310| If `a' is denormal and we are in flush-to-zero mode then set the
3311| input-denormal exception and return zero. Otherwise just return the value.
3312*----------------------------------------------------------------------------*/
3313
3314static bool parts_squash_denormal(FloatParts p, float_status *status)
3315{
3316 if (p.exp == 0 && p.frac != 0) {
3317 float_raise(float_flag_input_denormal, status);
3318 return true;
3319 }
3320
3321 return false;
3322}
3323
3324float16 float16_squash_input_denormal(float16 a, float_status *status)
3325{
3326 if (status->flush_inputs_to_zero) {
3327 FloatParts p = float16_unpack_raw(a);
3328 if (parts_squash_denormal(p, status)) {
3329 return float16_set_sign(float16_zero, p.sign);
3330 }
3331 }
3332 return a;
3333}
3334
3335float32 float32_squash_input_denormal(float32 a, float_status *status)
3336{
3337 if (status->flush_inputs_to_zero) {
3338 FloatParts p = float32_unpack_raw(a);
3339 if (parts_squash_denormal(p, status)) {
3340 return float32_set_sign(float32_zero, p.sign);
3341 }
3342 }
3343 return a;
3344}
3345
3346float64 float64_squash_input_denormal(float64 a, float_status *status)
3347{
3348 if (status->flush_inputs_to_zero) {
3349 FloatParts p = float64_unpack_raw(a);
3350 if (parts_squash_denormal(p, status)) {
3351 return float64_set_sign(float64_zero, p.sign);
3352 }
3353 }
3354 return a;
3355}
3356
377ed926 3357/*----------------------------------------------------------------------------
158142c2
FB
3358| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3359| and 7, and returns the properly rounded 32-bit integer corresponding to the
3360| input. If `zSign' is 1, the input is negated before being converted to an
3361| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3362| is simply rounded to an integer, with the inexact exception raised if the
3363| input cannot be represented exactly as an integer. However, if the fixed-
3364| point input is too large, the invalid exception is raised and the largest
3365| positive or negative integer is returned.
3366*----------------------------------------------------------------------------*/
3367
f4014512 3368static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 3369{
8f506c70 3370 int8_t roundingMode;
158142c2 3371 flag roundNearestEven;
8f506c70 3372 int8_t roundIncrement, roundBits;
760e1416 3373 int32_t z;
158142c2 3374
a2f2d288 3375 roundingMode = status->float_rounding_mode;
158142c2 3376 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3377 switch (roundingMode) {
3378 case float_round_nearest_even:
f9288a76 3379 case float_round_ties_away:
dc355b76
PM
3380 roundIncrement = 0x40;
3381 break;
3382 case float_round_to_zero:
3383 roundIncrement = 0;
3384 break;
3385 case float_round_up:
3386 roundIncrement = zSign ? 0 : 0x7f;
3387 break;
3388 case float_round_down:
3389 roundIncrement = zSign ? 0x7f : 0;
3390 break;
5d64abb3
RH
3391 case float_round_to_odd:
3392 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3393 break;
dc355b76
PM
3394 default:
3395 abort();
158142c2
FB
3396 }
3397 roundBits = absZ & 0x7F;
3398 absZ = ( absZ + roundIncrement )>>7;
3399 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3400 z = absZ;
3401 if ( zSign ) z = - z;
3402 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3403 float_raise(float_flag_invalid, status);
2c217da0 3404 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3405 }
a2f2d288
PM
3406 if (roundBits) {
3407 status->float_exception_flags |= float_flag_inexact;
3408 }
158142c2
FB
3409 return z;
3410
3411}
3412
3413/*----------------------------------------------------------------------------
3414| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3415| `absZ1', with binary point between bits 63 and 64 (between the input words),
3416| and returns the properly rounded 64-bit integer corresponding to the input.
3417| If `zSign' is 1, the input is negated before being converted to an integer.
3418| Ordinarily, the fixed-point input is simply rounded to an integer, with
3419| the inexact exception raised if the input cannot be represented exactly as
3420| an integer. However, if the fixed-point input is too large, the invalid
3421| exception is raised and the largest positive or negative integer is
3422| returned.
3423*----------------------------------------------------------------------------*/
3424
f42c2224 3425static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3426 float_status *status)
158142c2 3427{
8f506c70 3428 int8_t roundingMode;
158142c2 3429 flag roundNearestEven, increment;
760e1416 3430 int64_t z;
158142c2 3431
a2f2d288 3432 roundingMode = status->float_rounding_mode;
158142c2 3433 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3434 switch (roundingMode) {
3435 case float_round_nearest_even:
f9288a76 3436 case float_round_ties_away:
dc355b76
PM
3437 increment = ((int64_t) absZ1 < 0);
3438 break;
3439 case float_round_to_zero:
3440 increment = 0;
3441 break;
3442 case float_round_up:
3443 increment = !zSign && absZ1;
3444 break;
3445 case float_round_down:
3446 increment = zSign && absZ1;
3447 break;
5d64abb3
RH
3448 case float_round_to_odd:
3449 increment = !(absZ0 & 1) && absZ1;
3450 break;
dc355b76
PM
3451 default:
3452 abort();
158142c2
FB
3453 }
3454 if ( increment ) {
3455 ++absZ0;
3456 if ( absZ0 == 0 ) goto overflow;
bb98fe42 3457 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
3458 }
3459 z = absZ0;
3460 if ( zSign ) z = - z;
3461 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3462 overflow:
ff32e16e 3463 float_raise(float_flag_invalid, status);
2c217da0 3464 return zSign ? INT64_MIN : INT64_MAX;
158142c2 3465 }
a2f2d288
PM
3466 if (absZ1) {
3467 status->float_exception_flags |= float_flag_inexact;
3468 }
158142c2
FB
3469 return z;
3470
3471}
3472
fb3ea83a
TM
3473/*----------------------------------------------------------------------------
3474| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3475| `absZ1', with binary point between bits 63 and 64 (between the input words),
3476| and returns the properly rounded 64-bit unsigned integer corresponding to the
3477| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3478| with the inexact exception raised if the input cannot be represented exactly
3479| as an integer. However, if the fixed-point input is too large, the invalid
3480| exception is raised and the largest unsigned integer is returned.
3481*----------------------------------------------------------------------------*/
3482
f42c2224 3483static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 3484 uint64_t absZ1, float_status *status)
fb3ea83a 3485{
8f506c70 3486 int8_t roundingMode;
fb3ea83a
TM
3487 flag roundNearestEven, increment;
3488
a2f2d288 3489 roundingMode = status->float_rounding_mode;
fb3ea83a 3490 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3491 switch (roundingMode) {
3492 case float_round_nearest_even:
f9288a76 3493 case float_round_ties_away:
dc355b76
PM
3494 increment = ((int64_t)absZ1 < 0);
3495 break;
3496 case float_round_to_zero:
3497 increment = 0;
3498 break;
3499 case float_round_up:
3500 increment = !zSign && absZ1;
3501 break;
3502 case float_round_down:
3503 increment = zSign && absZ1;
3504 break;
5d64abb3
RH
3505 case float_round_to_odd:
3506 increment = !(absZ0 & 1) && absZ1;
3507 break;
dc355b76
PM
3508 default:
3509 abort();
fb3ea83a
TM
3510 }
3511 if (increment) {
3512 ++absZ0;
3513 if (absZ0 == 0) {
ff32e16e 3514 float_raise(float_flag_invalid, status);
2c217da0 3515 return UINT64_MAX;
fb3ea83a
TM
3516 }
3517 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
3518 }
3519
3520 if (zSign && absZ0) {
ff32e16e 3521 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3522 return 0;
3523 }
3524
3525 if (absZ1) {
a2f2d288 3526 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
3527 }
3528 return absZ0;
3529}
3530
158142c2
FB
3531/*----------------------------------------------------------------------------
3532| Normalizes the subnormal single-precision floating-point value represented
3533| by the denormalized significand `aSig'. The normalized exponent and
3534| significand are stored at the locations pointed to by `zExpPtr' and
3535| `zSigPtr', respectively.
3536*----------------------------------------------------------------------------*/
3537
3538static void
0c48262d 3539 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3540{
8f506c70 3541 int8_t shiftCount;
158142c2 3542
0019d5c3 3543 shiftCount = clz32(aSig) - 8;
158142c2
FB
3544 *zSigPtr = aSig<<shiftCount;
3545 *zExpPtr = 1 - shiftCount;
3546
3547}
3548
158142c2
FB
3549/*----------------------------------------------------------------------------
3550| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3551| and significand `zSig', and returns the proper single-precision floating-
3552| point value corresponding to the abstract input. Ordinarily, the abstract
3553| value is simply rounded and packed into the single-precision format, with
3554| the inexact exception raised if the abstract input cannot be represented
3555| exactly. However, if the abstract value is too large, the overflow and
3556| inexact exceptions are raised and an infinity or maximal finite value is
3557| returned. If the abstract value is too small, the input value is rounded to
3558| a subnormal number, and the underflow and inexact exceptions are raised if
3559| the abstract input cannot be represented exactly as a subnormal single-
3560| precision floating-point number.
3561| The input significand `zSig' has its binary point between bits 30
3562| and 29, which is 7 bits to the left of the usual location. This shifted
3563| significand must be normalized or smaller. If `zSig' is not normalized,
3564| `zExp' must be 0; in that case, the result returned is a subnormal number,
3565| and it must not require rounding. In the usual case that `zSig' is
3566| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3567| The handling of underflow and overflow follows the IEC/IEEE Standard for
3568| Binary Floating-Point Arithmetic.
3569*----------------------------------------------------------------------------*/
3570
0c48262d 3571static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3572 float_status *status)
158142c2 3573{
8f506c70 3574 int8_t roundingMode;
158142c2 3575 flag roundNearestEven;
8f506c70 3576 int8_t roundIncrement, roundBits;
158142c2
FB
3577 flag isTiny;
3578
a2f2d288 3579 roundingMode = status->float_rounding_mode;
158142c2 3580 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3581 switch (roundingMode) {
3582 case float_round_nearest_even:
f9288a76 3583 case float_round_ties_away:
dc355b76
PM
3584 roundIncrement = 0x40;
3585 break;
3586 case float_round_to_zero:
3587 roundIncrement = 0;
3588 break;
3589 case float_round_up:
3590 roundIncrement = zSign ? 0 : 0x7f;
3591 break;
3592 case float_round_down:
3593 roundIncrement = zSign ? 0x7f : 0;
3594 break;
5d64abb3
RH
3595 case float_round_to_odd:
3596 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3597 break;
dc355b76
PM
3598 default:
3599 abort();
3600 break;
158142c2
FB
3601 }
3602 roundBits = zSig & 0x7F;
bb98fe42 3603 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3604 if ( ( 0xFD < zExp )
3605 || ( ( zExp == 0xFD )
bb98fe42 3606 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3607 ) {
5d64abb3
RH
3608 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3609 roundIncrement != 0;
ff32e16e 3610 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 3611 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
3612 }
3613 if ( zExp < 0 ) {
a2f2d288 3614 if (status->flush_to_zero) {
ff32e16e 3615 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3616 return packFloat32(zSign, 0, 0);
3617 }
158142c2 3618 isTiny =
a2f2d288
PM
3619 (status->float_detect_tininess
3620 == float_tininess_before_rounding)
158142c2
FB
3621 || ( zExp < -1 )
3622 || ( zSig + roundIncrement < 0x80000000 );
3623 shift32RightJamming( zSig, - zExp, &zSig );
3624 zExp = 0;
3625 roundBits = zSig & 0x7F;
ff32e16e
PM
3626 if (isTiny && roundBits) {
3627 float_raise(float_flag_underflow, status);
3628 }
5d64abb3
RH
3629 if (roundingMode == float_round_to_odd) {
3630 /*
3631 * For round-to-odd case, the roundIncrement depends on
3632 * zSig which just changed.
3633 */
3634 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3635 }
158142c2
FB
3636 }
3637 }
a2f2d288
PM
3638 if (roundBits) {
3639 status->float_exception_flags |= float_flag_inexact;
3640 }
158142c2
FB
3641 zSig = ( zSig + roundIncrement )>>7;
3642 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3643 if ( zSig == 0 ) zExp = 0;
3644 return packFloat32( zSign, zExp, zSig );
3645
3646}
3647
3648/*----------------------------------------------------------------------------
3649| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3650| and significand `zSig', and returns the proper single-precision floating-
3651| point value corresponding to the abstract input. This routine is just like
3652| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3653| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3654| floating-point exponent.
3655*----------------------------------------------------------------------------*/
3656
3657static float32
0c48262d 3658 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 3659 float_status *status)
158142c2 3660{
8f506c70 3661 int8_t shiftCount;
158142c2 3662
0019d5c3 3663 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
3664 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3665 status);
158142c2
FB
3666
3667}
3668
158142c2
FB
3669/*----------------------------------------------------------------------------
3670| Normalizes the subnormal double-precision floating-point value represented
3671| by the denormalized significand `aSig'. The normalized exponent and
3672| significand are stored at the locations pointed to by `zExpPtr' and
3673| `zSigPtr', respectively.
3674*----------------------------------------------------------------------------*/
3675
3676static void
0c48262d 3677 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 3678{
8f506c70 3679 int8_t shiftCount;
158142c2 3680
0019d5c3 3681 shiftCount = clz64(aSig) - 11;
158142c2
FB
3682 *zSigPtr = aSig<<shiftCount;
3683 *zExpPtr = 1 - shiftCount;
3684
3685}
3686
3687/*----------------------------------------------------------------------------
3688| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3689| double-precision floating-point value, returning the result. After being
3690| shifted into the proper positions, the three fields are simply added
3691| together to form the result. This means that any integer portion of `zSig'
3692| will be added into the exponent. Since a properly normalized significand
3693| will have an integer portion equal to 1, the `zExp' input should be 1 less
3694| than the desired result exponent whenever `zSig' is a complete, normalized
3695| significand.
3696*----------------------------------------------------------------------------*/
3697
0c48262d 3698static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
3699{
3700
f090c9d4 3701 return make_float64(
bb98fe42 3702 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
3703
3704}
3705
3706/*----------------------------------------------------------------------------
3707| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3708| and significand `zSig', and returns the proper double-precision floating-
3709| point value corresponding to the abstract input. Ordinarily, the abstract
3710| value is simply rounded and packed into the double-precision format, with
3711| the inexact exception raised if the abstract input cannot be represented
3712| exactly. However, if the abstract value is too large, the overflow and
3713| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
3714| returned. If the abstract value is too small, the input value is rounded to
3715| a subnormal number, and the underflow and inexact exceptions are raised if
3716| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
3717| precision floating-point number.
3718| The input significand `zSig' has its binary point between bits 62
3719| and 61, which is 10 bits to the left of the usual location. This shifted
3720| significand must be normalized or smaller. If `zSig' is not normalized,
3721| `zExp' must be 0; in that case, the result returned is a subnormal number,
3722| and it must not require rounding. In the usual case that `zSig' is
3723| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3724| The handling of underflow and overflow follows the IEC/IEEE Standard for
3725| Binary Floating-Point Arithmetic.
3726*----------------------------------------------------------------------------*/
3727
0c48262d 3728static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3729 float_status *status)
158142c2 3730{
8f506c70 3731 int8_t roundingMode;
158142c2 3732 flag roundNearestEven;
0c48262d 3733 int roundIncrement, roundBits;
158142c2
FB
3734 flag isTiny;
3735
a2f2d288 3736 roundingMode = status->float_rounding_mode;
158142c2 3737 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3738 switch (roundingMode) {
3739 case float_round_nearest_even:
f9288a76 3740 case float_round_ties_away:
dc355b76
PM
3741 roundIncrement = 0x200;
3742 break;
3743 case float_round_to_zero:
3744 roundIncrement = 0;
3745 break;
3746 case float_round_up:
3747 roundIncrement = zSign ? 0 : 0x3ff;
3748 break;
3749 case float_round_down:
3750 roundIncrement = zSign ? 0x3ff : 0;
3751 break;
9ee6f678
BR
3752 case float_round_to_odd:
3753 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3754 break;
dc355b76
PM
3755 default:
3756 abort();
158142c2
FB
3757 }
3758 roundBits = zSig & 0x3FF;
bb98fe42 3759 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
3760 if ( ( 0x7FD < zExp )
3761 || ( ( zExp == 0x7FD )
bb98fe42 3762 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3763 ) {
9ee6f678
BR
3764 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3765 roundIncrement != 0;
ff32e16e 3766 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 3767 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
3768 }
3769 if ( zExp < 0 ) {
a2f2d288 3770 if (status->flush_to_zero) {
ff32e16e 3771 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3772 return packFloat64(zSign, 0, 0);
3773 }
158142c2 3774 isTiny =
a2f2d288
PM
3775 (status->float_detect_tininess
3776 == float_tininess_before_rounding)
158142c2 3777 || ( zExp < -1 )
e9321124 3778 || ( zSig + roundIncrement < UINT64_C(0x8000000000000000) );
158142c2
FB
3779 shift64RightJamming( zSig, - zExp, &zSig );
3780 zExp = 0;
3781 roundBits = zSig & 0x3FF;
ff32e16e
PM
3782 if (isTiny && roundBits) {
3783 float_raise(float_flag_underflow, status);
3784 }
9ee6f678
BR
3785 if (roundingMode == float_round_to_odd) {
3786 /*
3787 * For round-to-odd case, the roundIncrement depends on
3788 * zSig which just changed.
3789 */
3790 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
3791 }
158142c2
FB
3792 }
3793 }
a2f2d288
PM
3794 if (roundBits) {
3795 status->float_exception_flags |= float_flag_inexact;
3796 }
158142c2
FB
3797 zSig = ( zSig + roundIncrement )>>10;
3798 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3799 if ( zSig == 0 ) zExp = 0;
3800 return packFloat64( zSign, zExp, zSig );
3801
3802}
3803
3804/*----------------------------------------------------------------------------
3805| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3806| and significand `zSig', and returns the proper double-precision floating-
3807| point value corresponding to the abstract input. This routine is just like
3808| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3809| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3810| floating-point exponent.
3811*----------------------------------------------------------------------------*/
3812
3813static float64
0c48262d 3814 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 3815 float_status *status)
158142c2 3816{
8f506c70 3817 int8_t shiftCount;
158142c2 3818
0019d5c3 3819 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
3820 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3821 status);
158142c2
FB
3822
3823}
3824
158142c2
FB
3825/*----------------------------------------------------------------------------
3826| Normalizes the subnormal extended double-precision floating-point value
3827| represented by the denormalized significand `aSig'. The normalized exponent
3828| and significand are stored at the locations pointed to by `zExpPtr' and
3829| `zSigPtr', respectively.
3830*----------------------------------------------------------------------------*/
3831
88857aca
LV
3832void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3833 uint64_t *zSigPtr)
158142c2 3834{
8f506c70 3835 int8_t shiftCount;
158142c2 3836
0019d5c3 3837 shiftCount = clz64(aSig);
158142c2
FB
3838 *zSigPtr = aSig<<shiftCount;
3839 *zExpPtr = 1 - shiftCount;
158142c2
FB
3840}
3841
3842/*----------------------------------------------------------------------------
3843| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3844| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3845| and returns the proper extended double-precision floating-point value
3846| corresponding to the abstract input. Ordinarily, the abstract value is
3847| rounded and packed into the extended double-precision format, with the
3848| inexact exception raised if the abstract input cannot be represented
3849| exactly. However, if the abstract value is too large, the overflow and
3850| inexact exceptions are raised and an infinity or maximal finite value is
3851| returned. If the abstract value is too small, the input value is rounded to
3852| a subnormal number, and the underflow and inexact exceptions are raised if
3853| the abstract input cannot be represented exactly as a subnormal extended
3854| double-precision floating-point number.
3855| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3856| number of bits as single or double precision, respectively. Otherwise, the
3857| result is rounded to the full precision of the extended double-precision
3858| format.
3859| The input significand must be normalized or smaller. If the input
3860| significand is not normalized, `zExp' must be 0; in that case, the result
3861| returned is a subnormal number, and it must not require rounding. The
3862| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3863| Floating-Point Arithmetic.
3864*----------------------------------------------------------------------------*/
3865
88857aca
LV
3866floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3867 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3868 float_status *status)
158142c2 3869{
8f506c70 3870 int8_t roundingMode;
158142c2 3871 flag roundNearestEven, increment, isTiny;
f42c2224 3872 int64_t roundIncrement, roundMask, roundBits;
158142c2 3873
a2f2d288 3874 roundingMode = status->float_rounding_mode;
158142c2
FB
3875 roundNearestEven = ( roundingMode == float_round_nearest_even );
3876 if ( roundingPrecision == 80 ) goto precision80;
3877 if ( roundingPrecision == 64 ) {
e9321124
AB
3878 roundIncrement = UINT64_C(0x0000000000000400);
3879 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
3880 }
3881 else if ( roundingPrecision == 32 ) {
e9321124
AB
3882 roundIncrement = UINT64_C(0x0000008000000000);
3883 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
3884 }
3885 else {
3886 goto precision80;
3887 }
3888 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
3889 switch (roundingMode) {
3890 case float_round_nearest_even:
f9288a76 3891 case float_round_ties_away:
dc355b76
PM
3892 break;
3893 case float_round_to_zero:
3894 roundIncrement = 0;
3895 break;
3896 case float_round_up:
3897 roundIncrement = zSign ? 0 : roundMask;
3898 break;
3899 case float_round_down:
3900 roundIncrement = zSign ? roundMask : 0;
3901 break;
3902 default:
3903 abort();
158142c2
FB
3904 }
3905 roundBits = zSig0 & roundMask;
bb98fe42 3906 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3907 if ( ( 0x7FFE < zExp )
3908 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3909 ) {
3910 goto overflow;
3911 }
3912 if ( zExp <= 0 ) {
a2f2d288 3913 if (status->flush_to_zero) {
ff32e16e 3914 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3915 return packFloatx80(zSign, 0, 0);
3916 }
158142c2 3917 isTiny =
a2f2d288
PM
3918 (status->float_detect_tininess
3919 == float_tininess_before_rounding)
158142c2
FB
3920 || ( zExp < 0 )
3921 || ( zSig0 <= zSig0 + roundIncrement );
3922 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3923 zExp = 0;
3924 roundBits = zSig0 & roundMask;
ff32e16e
PM
3925 if (isTiny && roundBits) {
3926 float_raise(float_flag_underflow, status);
3927 }
a2f2d288
PM
3928 if (roundBits) {
3929 status->float_exception_flags |= float_flag_inexact;
3930 }
158142c2 3931 zSig0 += roundIncrement;
bb98fe42 3932 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
3933 roundIncrement = roundMask + 1;
3934 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3935 roundMask |= roundIncrement;
3936 }
3937 zSig0 &= ~ roundMask;
3938 return packFloatx80( zSign, zExp, zSig0 );
3939 }
3940 }
a2f2d288
PM
3941 if (roundBits) {
3942 status->float_exception_flags |= float_flag_inexact;
3943 }
158142c2
FB
3944 zSig0 += roundIncrement;
3945 if ( zSig0 < roundIncrement ) {
3946 ++zExp;
e9321124 3947 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
3948 }
3949 roundIncrement = roundMask + 1;
3950 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3951 roundMask |= roundIncrement;
3952 }
3953 zSig0 &= ~ roundMask;
3954 if ( zSig0 == 0 ) zExp = 0;
3955 return packFloatx80( zSign, zExp, zSig0 );
3956 precision80:
dc355b76
PM
3957 switch (roundingMode) {
3958 case float_round_nearest_even:
f9288a76 3959 case float_round_ties_away:
dc355b76
PM
3960 increment = ((int64_t)zSig1 < 0);
3961 break;
3962 case float_round_to_zero:
3963 increment = 0;
3964 break;
3965 case float_round_up:
3966 increment = !zSign && zSig1;
3967 break;
3968 case float_round_down:
3969 increment = zSign && zSig1;
3970 break;
3971 default:
3972 abort();
158142c2 3973 }
bb98fe42 3974 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
3975 if ( ( 0x7FFE < zExp )
3976 || ( ( zExp == 0x7FFE )
e9321124 3977 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
3978 && increment
3979 )
3980 ) {
3981 roundMask = 0;
3982 overflow:
ff32e16e 3983 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3984 if ( ( roundingMode == float_round_to_zero )
3985 || ( zSign && ( roundingMode == float_round_up ) )
3986 || ( ! zSign && ( roundingMode == float_round_down ) )
3987 ) {
3988 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3989 }
0f605c88
LV
3990 return packFloatx80(zSign,
3991 floatx80_infinity_high,
3992 floatx80_infinity_low);
158142c2
FB
3993 }
3994 if ( zExp <= 0 ) {
3995 isTiny =
a2f2d288
PM
3996 (status->float_detect_tininess
3997 == float_tininess_before_rounding)
158142c2
FB
3998 || ( zExp < 0 )
3999 || ! increment
e9321124 4000 || ( zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF) );
158142c2
FB
4001 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4002 zExp = 0;
ff32e16e
PM
4003 if (isTiny && zSig1) {
4004 float_raise(float_flag_underflow, status);
4005 }
a2f2d288
PM
4006 if (zSig1) {
4007 status->float_exception_flags |= float_flag_inexact;
4008 }
dc355b76
PM
4009 switch (roundingMode) {
4010 case float_round_nearest_even:
f9288a76 4011 case float_round_ties_away:
dc355b76
PM
4012 increment = ((int64_t)zSig1 < 0);
4013 break;
4014 case float_round_to_zero:
4015 increment = 0;
4016 break;
4017 case float_round_up:
4018 increment = !zSign && zSig1;
4019 break;
4020 case float_round_down:
4021 increment = zSign && zSig1;
4022 break;
4023 default:
4024 abort();
158142c2
FB
4025 }
4026 if ( increment ) {
4027 ++zSig0;
4028 zSig0 &=
bb98fe42
AF
4029 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
4030 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4031 }
4032 return packFloatx80( zSign, zExp, zSig0 );
4033 }
4034 }
a2f2d288
PM
4035 if (zSig1) {
4036 status->float_exception_flags |= float_flag_inexact;
4037 }
158142c2
FB
4038 if ( increment ) {
4039 ++zSig0;
4040 if ( zSig0 == 0 ) {
4041 ++zExp;
e9321124 4042 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4043 }
4044 else {
bb98fe42 4045 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
4046 }
4047 }
4048 else {
4049 if ( zSig0 == 0 ) zExp = 0;
4050 }
4051 return packFloatx80( zSign, zExp, zSig0 );
4052
4053}
4054
4055/*----------------------------------------------------------------------------
4056| Takes an abstract floating-point value having sign `zSign', exponent
4057| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4058| and returns the proper extended double-precision floating-point value
4059| corresponding to the abstract input. This routine is just like
4060| `roundAndPackFloatx80' except that the input significand does not have to be
4061| normalized.
4062*----------------------------------------------------------------------------*/
4063
88857aca
LV
4064floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4065 flag zSign, int32_t zExp,
4066 uint64_t zSig0, uint64_t zSig1,
4067 float_status *status)
158142c2 4068{
8f506c70 4069 int8_t shiftCount;
158142c2
FB
4070
4071 if ( zSig0 == 0 ) {
4072 zSig0 = zSig1;
4073 zSig1 = 0;
4074 zExp -= 64;
4075 }
0019d5c3 4076 shiftCount = clz64(zSig0);
158142c2
FB
4077 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4078 zExp -= shiftCount;
ff32e16e
PM
4079 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4080 zSig0, zSig1, status);
158142c2
FB
4081
4082}
4083
158142c2
FB
4084/*----------------------------------------------------------------------------
4085| Returns the least-significant 64 fraction bits of the quadruple-precision
4086| floating-point value `a'.
4087*----------------------------------------------------------------------------*/
4088
a49db98d 4089static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4090{
4091
4092 return a.low;
4093
4094}
4095
4096/*----------------------------------------------------------------------------
4097| Returns the most-significant 48 fraction bits of the quadruple-precision
4098| floating-point value `a'.
4099*----------------------------------------------------------------------------*/
4100
a49db98d 4101static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4102{
4103
e9321124 4104 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4105
4106}
4107
4108/*----------------------------------------------------------------------------
4109| Returns the exponent bits of the quadruple-precision floating-point value
4110| `a'.
4111*----------------------------------------------------------------------------*/
4112
f4014512 4113static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4114{
4115
4116 return ( a.high>>48 ) & 0x7FFF;
4117
4118}
4119
4120/*----------------------------------------------------------------------------
4121| Returns the sign bit of the quadruple-precision floating-point value `a'.
4122*----------------------------------------------------------------------------*/
4123
a49db98d 4124static inline flag extractFloat128Sign( float128 a )
158142c2
FB
4125{
4126
4127 return a.high>>63;
4128
4129}
4130
4131/*----------------------------------------------------------------------------
4132| Normalizes the subnormal quadruple-precision floating-point value
4133| represented by the denormalized significand formed by the concatenation of
4134| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4135| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4136| significand are stored at the location pointed to by `zSig0Ptr', and the
4137| least significant 64 bits of the normalized significand are stored at the
4138| location pointed to by `zSig1Ptr'.
4139*----------------------------------------------------------------------------*/
4140
4141static void
4142 normalizeFloat128Subnormal(
bb98fe42
AF
4143 uint64_t aSig0,
4144 uint64_t aSig1,
f4014512 4145 int32_t *zExpPtr,
bb98fe42
AF
4146 uint64_t *zSig0Ptr,
4147 uint64_t *zSig1Ptr
158142c2
FB
4148 )
4149{
8f506c70 4150 int8_t shiftCount;
158142c2
FB
4151
4152 if ( aSig0 == 0 ) {
0019d5c3 4153 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4154 if ( shiftCount < 0 ) {
4155 *zSig0Ptr = aSig1>>( - shiftCount );
4156 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4157 }
4158 else {
4159 *zSig0Ptr = aSig1<<shiftCount;
4160 *zSig1Ptr = 0;
4161 }
4162 *zExpPtr = - shiftCount - 63;
4163 }
4164 else {
0019d5c3 4165 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4166 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4167 *zExpPtr = 1 - shiftCount;
4168 }
4169
4170}
4171
4172/*----------------------------------------------------------------------------
4173| Packs the sign `zSign', the exponent `zExp', and the significand formed
4174| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4175| floating-point value, returning the result. After being shifted into the
4176| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4177| added together to form the most significant 32 bits of the result. This
4178| means that any integer portion of `zSig0' will be added into the exponent.
4179| Since a properly normalized significand will have an integer portion equal
4180| to 1, the `zExp' input should be 1 less than the desired result exponent
4181| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4182| significand.
4183*----------------------------------------------------------------------------*/
4184
a49db98d 4185static inline float128
f4014512 4186 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
4187{
4188 float128 z;
4189
4190 z.low = zSig1;
bb98fe42 4191 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
4192 return z;
4193
4194}
4195
4196/*----------------------------------------------------------------------------
4197| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4198| and extended significand formed by the concatenation of `zSig0', `zSig1',
4199| and `zSig2', and returns the proper quadruple-precision floating-point value
4200| corresponding to the abstract input. Ordinarily, the abstract value is
4201| simply rounded and packed into the quadruple-precision format, with the
4202| inexact exception raised if the abstract input cannot be represented
4203| exactly. However, if the abstract value is too large, the overflow and
4204| inexact exceptions are raised and an infinity or maximal finite value is
4205| returned. If the abstract value is too small, the input value is rounded to
4206| a subnormal number, and the underflow and inexact exceptions are raised if
4207| the abstract input cannot be represented exactly as a subnormal quadruple-
4208| precision floating-point number.
4209| The input significand must be normalized or smaller. If the input
4210| significand is not normalized, `zExp' must be 0; in that case, the result
4211| returned is a subnormal number, and it must not require rounding. In the
4212| usual case that the input significand is normalized, `zExp' must be 1 less
4213| than the ``true'' floating-point exponent. The handling of underflow and
4214| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4215*----------------------------------------------------------------------------*/
4216
f4014512 4217static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4218 uint64_t zSig0, uint64_t zSig1,
4219 uint64_t zSig2, float_status *status)
158142c2 4220{
8f506c70 4221 int8_t roundingMode;
158142c2
FB
4222 flag roundNearestEven, increment, isTiny;
4223
a2f2d288 4224 roundingMode = status->float_rounding_mode;
158142c2 4225 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4226 switch (roundingMode) {
4227 case float_round_nearest_even:
f9288a76 4228 case float_round_ties_away:
dc355b76
PM
4229 increment = ((int64_t)zSig2 < 0);
4230 break;
4231 case float_round_to_zero:
4232 increment = 0;
4233 break;
4234 case float_round_up:
4235 increment = !zSign && zSig2;
4236 break;
4237 case float_round_down:
4238 increment = zSign && zSig2;
4239 break;
9ee6f678
BR
4240 case float_round_to_odd:
4241 increment = !(zSig1 & 0x1) && zSig2;
4242 break;
dc355b76
PM
4243 default:
4244 abort();
158142c2 4245 }
bb98fe42 4246 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4247 if ( ( 0x7FFD < zExp )
4248 || ( ( zExp == 0x7FFD )
4249 && eq128(
e9321124
AB
4250 UINT64_C(0x0001FFFFFFFFFFFF),
4251 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4252 zSig0,
4253 zSig1
4254 )
4255 && increment
4256 )
4257 ) {
ff32e16e 4258 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4259 if ( ( roundingMode == float_round_to_zero )
4260 || ( zSign && ( roundingMode == float_round_up ) )
4261 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4262 || (roundingMode == float_round_to_odd)
158142c2
FB
4263 ) {
4264 return
4265 packFloat128(
4266 zSign,
4267 0x7FFE,
e9321124
AB
4268 UINT64_C(0x0000FFFFFFFFFFFF),
4269 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4270 );
4271 }
4272 return packFloat128( zSign, 0x7FFF, 0, 0 );
4273 }
4274 if ( zExp < 0 ) {
a2f2d288 4275 if (status->flush_to_zero) {
ff32e16e 4276 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4277 return packFloat128(zSign, 0, 0, 0);
4278 }
158142c2 4279 isTiny =
a2f2d288
PM
4280 (status->float_detect_tininess
4281 == float_tininess_before_rounding)
158142c2
FB
4282 || ( zExp < -1 )
4283 || ! increment
4284 || lt128(
4285 zSig0,
4286 zSig1,
e9321124
AB
4287 UINT64_C(0x0001FFFFFFFFFFFF),
4288 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4289 );
4290 shift128ExtraRightJamming(
4291 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4292 zExp = 0;
ff32e16e
PM
4293 if (isTiny && zSig2) {
4294 float_raise(float_flag_underflow, status);
4295 }
dc355b76
PM
4296 switch (roundingMode) {
4297 case float_round_nearest_even:
f9288a76 4298 case float_round_ties_away:
dc355b76
PM
4299 increment = ((int64_t)zSig2 < 0);
4300 break;
4301 case float_round_to_zero:
4302 increment = 0;
4303 break;
4304 case float_round_up:
4305 increment = !zSign && zSig2;
4306 break;
4307 case float_round_down:
4308 increment = zSign && zSig2;
4309 break;
9ee6f678
BR
4310 case float_round_to_odd:
4311 increment = !(zSig1 & 0x1) && zSig2;
4312 break;
dc355b76
PM
4313 default:
4314 abort();
158142c2
FB
4315 }
4316 }
4317 }
a2f2d288
PM
4318 if (zSig2) {
4319 status->float_exception_flags |= float_flag_inexact;
4320 }
158142c2
FB
4321 if ( increment ) {
4322 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4323 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
4324 }
4325 else {
4326 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4327 }
4328 return packFloat128( zSign, zExp, zSig0, zSig1 );
4329
4330}
4331
4332/*----------------------------------------------------------------------------
4333| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4334| and significand formed by the concatenation of `zSig0' and `zSig1', and
4335| returns the proper quadruple-precision floating-point value corresponding
4336| to the abstract input. This routine is just like `roundAndPackFloat128'
4337| except that the input significand has fewer bits and does not have to be
4338| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4339| point exponent.
4340*----------------------------------------------------------------------------*/
4341
f4014512 4342static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
4343 uint64_t zSig0, uint64_t zSig1,
4344 float_status *status)
158142c2 4345{
8f506c70 4346 int8_t shiftCount;
bb98fe42 4347 uint64_t zSig2;
158142c2
FB
4348
4349 if ( zSig0 == 0 ) {
4350 zSig0 = zSig1;
4351 zSig1 = 0;
4352 zExp -= 64;
4353 }
0019d5c3 4354 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4355 if ( 0 <= shiftCount ) {
4356 zSig2 = 0;
4357 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4358 }
4359 else {
4360 shift128ExtraRightJamming(
4361 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4362 }
4363 zExp -= shiftCount;
ff32e16e 4364 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4365
4366}
4367
158142c2 4368
158142c2
FB
4369/*----------------------------------------------------------------------------
4370| Returns the result of converting the 32-bit two's complement integer `a'
4371| to the extended double-precision floating-point format. The conversion
4372| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4373| Arithmetic.
4374*----------------------------------------------------------------------------*/
4375
e5a41ffa 4376floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
4377{
4378 flag zSign;
3a87d009 4379 uint32_t absA;
8f506c70 4380 int8_t shiftCount;
bb98fe42 4381 uint64_t zSig;
158142c2
FB
4382
4383 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4384 zSign = ( a < 0 );
4385 absA = zSign ? - a : a;
0019d5c3 4386 shiftCount = clz32(absA) + 32;
158142c2
FB
4387 zSig = absA;
4388 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4389
4390}
4391
158142c2
FB
4392/*----------------------------------------------------------------------------
4393| Returns the result of converting the 32-bit two's complement integer `a' to
4394| the quadruple-precision floating-point format. The conversion is performed
4395| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4396*----------------------------------------------------------------------------*/
4397
e5a41ffa 4398float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
4399{
4400 flag zSign;
3a87d009 4401 uint32_t absA;
8f506c70 4402 int8_t shiftCount;
bb98fe42 4403 uint64_t zSig0;
158142c2
FB
4404
4405 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4406 zSign = ( a < 0 );
4407 absA = zSign ? - a : a;
0019d5c3 4408 shiftCount = clz32(absA) + 17;
158142c2
FB
4409 zSig0 = absA;
4410 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4411
4412}
4413
158142c2
FB
4414/*----------------------------------------------------------------------------
4415| Returns the result of converting the 64-bit two's complement integer `a'
4416| to the extended double-precision floating-point format. The conversion
4417| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4418| Arithmetic.
4419*----------------------------------------------------------------------------*/
4420
e5a41ffa 4421floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
4422{
4423 flag zSign;
182f42fd 4424 uint64_t absA;
8f506c70 4425 int8_t shiftCount;
158142c2
FB
4426
4427 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4428 zSign = ( a < 0 );
4429 absA = zSign ? - a : a;
0019d5c3 4430 shiftCount = clz64(absA);
158142c2
FB
4431 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4432
4433}
4434
158142c2
FB
4435/*----------------------------------------------------------------------------
4436| Returns the result of converting the 64-bit two's complement integer `a' to
4437| the quadruple-precision floating-point format. The conversion is performed
4438| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4439*----------------------------------------------------------------------------*/
4440
e5a41ffa 4441float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
4442{
4443 flag zSign;
182f42fd 4444 uint64_t absA;
8f506c70 4445 int8_t shiftCount;
f4014512 4446 int32_t zExp;
bb98fe42 4447 uint64_t zSig0, zSig1;
158142c2
FB
4448
4449 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4450 zSign = ( a < 0 );
4451 absA = zSign ? - a : a;
0019d5c3 4452 shiftCount = clz64(absA) + 49;
158142c2
FB
4453 zExp = 0x406E - shiftCount;
4454 if ( 64 <= shiftCount ) {
4455 zSig1 = 0;
4456 zSig0 = absA;
4457 shiftCount -= 64;
4458 }
4459 else {
4460 zSig1 = absA;
4461 zSig0 = 0;
4462 }
4463 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4464 return packFloat128( zSign, zExp, zSig0, zSig1 );
4465
4466}
4467
6bb8e0f1
PM
4468/*----------------------------------------------------------------------------
4469| Returns the result of converting the 64-bit unsigned integer `a'
4470| to the quadruple-precision floating-point format. The conversion is performed
4471| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4472*----------------------------------------------------------------------------*/
4473
e5a41ffa 4474float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4475{
4476 if (a == 0) {
4477 return float128_zero;
4478 }
6603d506 4479 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4480}
4481
158142c2
FB
4482/*----------------------------------------------------------------------------
4483| Returns the result of converting the single-precision floating-point value
4484| `a' to the extended double-precision floating-point format. The conversion
4485| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4486| Arithmetic.
4487*----------------------------------------------------------------------------*/
4488
e5a41ffa 4489floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
4490{
4491 flag aSign;
0c48262d 4492 int aExp;
bb98fe42 4493 uint32_t aSig;
158142c2 4494
ff32e16e 4495 a = float32_squash_input_denormal(a, status);
158142c2
FB
4496 aSig = extractFloat32Frac( a );
4497 aExp = extractFloat32Exp( a );
4498 aSign = extractFloat32Sign( a );
4499 if ( aExp == 0xFF ) {
ff32e16e 4500 if (aSig) {
7537c2b4
JM
4501 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4502 status);
4503 return floatx80_silence_nan(res, status);
ff32e16e 4504 }
0f605c88
LV
4505 return packFloatx80(aSign,
4506 floatx80_infinity_high,
4507 floatx80_infinity_low);
158142c2
FB
4508 }
4509 if ( aExp == 0 ) {
4510 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4511 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4512 }
4513 aSig |= 0x00800000;
bb98fe42 4514 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4515
4516}
4517
158142c2
FB
4518/*----------------------------------------------------------------------------
4519| Returns the result of converting the single-precision floating-point value
4520| `a' to the double-precision floating-point format. The conversion is
4521| performed according to the IEC/IEEE Standard for Binary Floating-Point
4522| Arithmetic.
4523*----------------------------------------------------------------------------*/
4524
e5a41ffa 4525float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
4526{
4527 flag aSign;
0c48262d 4528 int aExp;
bb98fe42 4529 uint32_t aSig;
158142c2 4530
ff32e16e 4531 a = float32_squash_input_denormal(a, status);
158142c2
FB
4532 aSig = extractFloat32Frac( a );
4533 aExp = extractFloat32Exp( a );
4534 aSign = extractFloat32Sign( a );
4535 if ( aExp == 0xFF ) {
ff32e16e
PM
4536 if (aSig) {
4537 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4538 }
158142c2
FB
4539 return packFloat128( aSign, 0x7FFF, 0, 0 );
4540 }
4541 if ( aExp == 0 ) {
4542 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4543 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4544 --aExp;
4545 }
bb98fe42 4546 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4547
4548}
4549
158142c2
FB
4550/*----------------------------------------------------------------------------
4551| Returns the remainder of the single-precision floating-point value `a'
4552| with respect to the corresponding value `b'. The operation is performed
4553| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4554*----------------------------------------------------------------------------*/
4555
e5a41ffa 4556float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4557{
ed086f3d 4558 flag aSign, zSign;
0c48262d 4559 int aExp, bExp, expDiff;
bb98fe42
AF
4560 uint32_t aSig, bSig;
4561 uint32_t q;
4562 uint64_t aSig64, bSig64, q64;
4563 uint32_t alternateASig;
4564 int32_t sigMean;
ff32e16e
PM
4565 a = float32_squash_input_denormal(a, status);
4566 b = float32_squash_input_denormal(b, status);
158142c2
FB
4567
4568 aSig = extractFloat32Frac( a );
4569 aExp = extractFloat32Exp( a );
4570 aSign = extractFloat32Sign( a );
4571 bSig = extractFloat32Frac( b );
4572 bExp = extractFloat32Exp( b );
158142c2
FB
4573 if ( aExp == 0xFF ) {
4574 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4575 return propagateFloat32NaN(a, b, status);
158142c2 4576 }
ff32e16e 4577 float_raise(float_flag_invalid, status);
af39bc8c 4578 return float32_default_nan(status);
158142c2
FB
4579 }
4580 if ( bExp == 0xFF ) {
ff32e16e
PM
4581 if (bSig) {
4582 return propagateFloat32NaN(a, b, status);
4583 }
158142c2
FB
4584 return a;
4585 }
4586 if ( bExp == 0 ) {
4587 if ( bSig == 0 ) {
ff32e16e 4588 float_raise(float_flag_invalid, status);
af39bc8c 4589 return float32_default_nan(status);
158142c2
FB
4590 }
4591 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4592 }
4593 if ( aExp == 0 ) {
4594 if ( aSig == 0 ) return a;
4595 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4596 }
4597 expDiff = aExp - bExp;
4598 aSig |= 0x00800000;
4599 bSig |= 0x00800000;
4600 if ( expDiff < 32 ) {
4601 aSig <<= 8;
4602 bSig <<= 8;
4603 if ( expDiff < 0 ) {
4604 if ( expDiff < -1 ) return a;
4605 aSig >>= 1;
4606 }
4607 q = ( bSig <= aSig );
4608 if ( q ) aSig -= bSig;
4609 if ( 0 < expDiff ) {
bb98fe42 4610 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4611 q >>= 32 - expDiff;
4612 bSig >>= 2;
4613 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4614 }
4615 else {
4616 aSig >>= 2;
4617 bSig >>= 2;
4618 }
4619 }
4620 else {
4621 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4622 aSig64 = ( (uint64_t) aSig )<<40;
4623 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4624 expDiff -= 64;
4625 while ( 0 < expDiff ) {
4626 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4627 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4628 aSig64 = - ( ( bSig * q64 )<<38 );
4629 expDiff -= 62;
4630 }
4631 expDiff += 64;
4632 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4633 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4634 q = q64>>( 64 - expDiff );
4635 bSig <<= 6;
4636 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4637 }
4638 do {
4639 alternateASig = aSig;
4640 ++q;
4641 aSig -= bSig;
bb98fe42 4642 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4643 sigMean = aSig + alternateASig;
4644 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4645 aSig = alternateASig;
4646 }
bb98fe42 4647 zSign = ( (int32_t) aSig < 0 );
158142c2 4648 if ( zSign ) aSig = - aSig;
ff32e16e 4649 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
4650}
4651
369be8f6 4652
158142c2 4653
8229c991
AJ
4654/*----------------------------------------------------------------------------
4655| Returns the binary exponential of the single-precision floating-point value
4656| `a'. The operation is performed according to the IEC/IEEE Standard for
4657| Binary Floating-Point Arithmetic.
4658|
4659| Uses the following identities:
4660|
4661| 1. -------------------------------------------------------------------------
4662| x x*ln(2)
4663| 2 = e
4664|
4665| 2. -------------------------------------------------------------------------
4666| 2 3 4 5 n
4667| x x x x x x x
4668| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4669| 1! 2! 3! 4! 5! n!
4670*----------------------------------------------------------------------------*/
4671
4672static const float64 float32_exp2_coefficients[15] =
4673{
d5138cf4
PM
4674 const_float64( 0x3ff0000000000000ll ), /* 1 */
4675 const_float64( 0x3fe0000000000000ll ), /* 2 */
4676 const_float64( 0x3fc5555555555555ll ), /* 3 */
4677 const_float64( 0x3fa5555555555555ll ), /* 4 */
4678 const_float64( 0x3f81111111111111ll ), /* 5 */
4679 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
4680 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
4681 const_float64( 0x3efa01a01a01a01all ), /* 8 */
4682 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
4683 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4684 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4685 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4686 const_float64( 0x3de6124613a86d09ll ), /* 13 */
4687 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4688 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
4689};
4690
e5a41ffa 4691float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
4692{
4693 flag aSign;
0c48262d 4694 int aExp;
bb98fe42 4695 uint32_t aSig;
8229c991
AJ
4696 float64 r, x, xn;
4697 int i;
ff32e16e 4698 a = float32_squash_input_denormal(a, status);
8229c991
AJ
4699
4700 aSig = extractFloat32Frac( a );
4701 aExp = extractFloat32Exp( a );
4702 aSign = extractFloat32Sign( a );
4703
4704 if ( aExp == 0xFF) {
ff32e16e
PM
4705 if (aSig) {
4706 return propagateFloat32NaN(a, float32_zero, status);
4707 }
8229c991
AJ
4708 return (aSign) ? float32_zero : a;
4709 }
4710 if (aExp == 0) {
4711 if (aSig == 0) return float32_one;
4712 }
4713
ff32e16e 4714 float_raise(float_flag_inexact, status);
8229c991
AJ
4715
4716 /* ******************************* */
4717 /* using float64 for approximation */
4718 /* ******************************* */
ff32e16e
PM
4719 x = float32_to_float64(a, status);
4720 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
4721
4722 xn = x;
4723 r = float64_one;
4724 for (i = 0 ; i < 15 ; i++) {
4725 float64 f;
4726
ff32e16e
PM
4727 f = float64_mul(xn, float32_exp2_coefficients[i], status);
4728 r = float64_add(r, f, status);
8229c991 4729
ff32e16e 4730 xn = float64_mul(xn, x, status);
8229c991
AJ
4731 }
4732
4733 return float64_to_float32(r, status);
4734}
4735
374dfc33
AJ
4736/*----------------------------------------------------------------------------
4737| Returns the binary log of the single-precision floating-point value `a'.
4738| The operation is performed according to the IEC/IEEE Standard for Binary
4739| Floating-Point Arithmetic.
4740*----------------------------------------------------------------------------*/
e5a41ffa 4741float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
4742{
4743 flag aSign, zSign;
0c48262d 4744 int aExp;
bb98fe42 4745 uint32_t aSig, zSig, i;
374dfc33 4746
ff32e16e 4747 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
4748 aSig = extractFloat32Frac( a );
4749 aExp = extractFloat32Exp( a );
4750 aSign = extractFloat32Sign( a );
4751
4752 if ( aExp == 0 ) {
4753 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
4754 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4755 }
4756 if ( aSign ) {
ff32e16e 4757 float_raise(float_flag_invalid, status);
af39bc8c 4758 return float32_default_nan(status);
374dfc33
AJ
4759 }
4760 if ( aExp == 0xFF ) {
ff32e16e
PM
4761 if (aSig) {
4762 return propagateFloat32NaN(a, float32_zero, status);
4763 }
374dfc33
AJ
4764 return a;
4765 }
4766
4767 aExp -= 0x7F;
4768 aSig |= 0x00800000;
4769 zSign = aExp < 0;
4770 zSig = aExp << 23;
4771
4772 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 4773 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
4774 if ( aSig & 0x01000000 ) {
4775 aSig >>= 1;
4776 zSig |= i;
4777 }
4778 }
4779
4780 if ( zSign )
4781 zSig = -zSig;
4782
ff32e16e 4783 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
4784}
4785
158142c2
FB
4786/*----------------------------------------------------------------------------
4787| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
4788| the corresponding value `b', and 0 otherwise. The invalid exception is
4789| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4790| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4791*----------------------------------------------------------------------------*/
4792
e5a41ffa 4793int float32_eq(float32 a, float32 b, float_status *status)
158142c2 4794{
b689362d 4795 uint32_t av, bv;
ff32e16e
PM
4796 a = float32_squash_input_denormal(a, status);
4797 b = float32_squash_input_denormal(b, status);
158142c2
FB
4798
4799 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4800 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4801 ) {
ff32e16e 4802 float_raise(float_flag_invalid, status);
158142c2
FB
4803 return 0;
4804 }
b689362d
AJ
4805 av = float32_val(a);
4806 bv = float32_val(b);
4807 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4808}
4809
4810/*----------------------------------------------------------------------------
4811| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4812| or equal to the corresponding value `b', and 0 otherwise. The invalid
4813| exception is raised if either operand is a NaN. The comparison is performed
4814| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4815*----------------------------------------------------------------------------*/
4816
e5a41ffa 4817int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
4818{
4819 flag aSign, bSign;
bb98fe42 4820 uint32_t av, bv;
ff32e16e
PM
4821 a = float32_squash_input_denormal(a, status);
4822 b = float32_squash_input_denormal(b, status);
158142c2
FB
4823
4824 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4825 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4826 ) {
ff32e16e 4827 float_raise(float_flag_invalid, status);
158142c2
FB
4828 return 0;
4829 }
4830 aSign = extractFloat32Sign( a );
4831 bSign = extractFloat32Sign( b );
f090c9d4
PB
4832 av = float32_val(a);
4833 bv = float32_val(b);
bb98fe42 4834 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4835 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4836
4837}
4838
4839/*----------------------------------------------------------------------------
4840| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
4841| the corresponding value `b', and 0 otherwise. The invalid exception is
4842| raised if either operand is a NaN. The comparison is performed according
4843| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4844*----------------------------------------------------------------------------*/
4845
e5a41ffa 4846int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
4847{
4848 flag aSign, bSign;
bb98fe42 4849 uint32_t av, bv;
ff32e16e
PM
4850 a = float32_squash_input_denormal(a, status);
4851 b = float32_squash_input_denormal(b, status);
158142c2
FB
4852
4853 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4854 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4855 ) {
ff32e16e 4856 float_raise(float_flag_invalid, status);
158142c2
FB
4857 return 0;
4858 }
4859 aSign = extractFloat32Sign( a );
4860 bSign = extractFloat32Sign( b );
f090c9d4
PB
4861 av = float32_val(a);
4862 bv = float32_val(b);
bb98fe42 4863 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4864 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4865
4866}
4867
67b7861d
AJ
4868/*----------------------------------------------------------------------------
4869| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4870| be compared, and 0 otherwise. The invalid exception is raised if either
4871| operand is a NaN. The comparison is performed according to the IEC/IEEE
4872| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4873*----------------------------------------------------------------------------*/
4874
e5a41ffa 4875int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 4876{
ff32e16e
PM
4877 a = float32_squash_input_denormal(a, status);
4878 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
4879
4880 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4881 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4882 ) {
ff32e16e 4883 float_raise(float_flag_invalid, status);
67b7861d
AJ
4884 return 1;
4885 }
4886 return 0;
4887}
b689362d 4888
158142c2
FB
4889/*----------------------------------------------------------------------------
4890| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
4891| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4892| exception. The comparison is performed according to the IEC/IEEE Standard
4893| for Binary Floating-Point Arithmetic.
158142c2
FB
4894*----------------------------------------------------------------------------*/
4895
e5a41ffa 4896int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 4897{
ff32e16e
PM
4898 a = float32_squash_input_denormal(a, status);
4899 b = float32_squash_input_denormal(b, status);
158142c2
FB
4900
4901 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4902 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4903 ) {
af39bc8c
AM
4904 if (float32_is_signaling_nan(a, status)
4905 || float32_is_signaling_nan(b, status)) {
ff32e16e 4906 float_raise(float_flag_invalid, status);
b689362d 4907 }
158142c2
FB
4908 return 0;
4909 }
b689362d
AJ
4910 return ( float32_val(a) == float32_val(b) ) ||
4911 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
4912}
4913
4914/*----------------------------------------------------------------------------
4915| Returns 1 if the single-precision floating-point value `a' is less than or
4916| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4917| cause an exception. Otherwise, the comparison is performed according to the
4918| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4919*----------------------------------------------------------------------------*/
4920
e5a41ffa 4921int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
4922{
4923 flag aSign, bSign;
bb98fe42 4924 uint32_t av, bv;
ff32e16e
PM
4925 a = float32_squash_input_denormal(a, status);
4926 b = float32_squash_input_denormal(b, status);
158142c2
FB
4927
4928 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4929 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4930 ) {
af39bc8c
AM
4931 if (float32_is_signaling_nan(a, status)
4932 || float32_is_signaling_nan(b, status)) {
ff32e16e 4933 float_raise(float_flag_invalid, status);
158142c2
FB
4934 }
4935 return 0;
4936 }
4937 aSign = extractFloat32Sign( a );
4938 bSign = extractFloat32Sign( b );
f090c9d4
PB
4939 av = float32_val(a);
4940 bv = float32_val(b);
bb98fe42 4941 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4942 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4943
4944}
4945
4946/*----------------------------------------------------------------------------
4947| Returns 1 if the single-precision floating-point value `a' is less than
4948| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4949| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 4950| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4951*----------------------------------------------------------------------------*/
4952
ab52f973 4953int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 4954{
ab52f973
AB
4955 flag aSign, bSign;
4956 uint32_t av, bv;
4957 a = float32_squash_input_denormal(a, status);
4958 b = float32_squash_input_denormal(b, status);
158142c2 4959
ab52f973
AB
4960 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4961 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4962 ) {
4963 if (float32_is_signaling_nan(a, status)
4964 || float32_is_signaling_nan(b, status)) {
ff32e16e 4965 float_raise(float_flag_invalid, status);
158142c2 4966 }
ab52f973 4967 return 0;
158142c2 4968 }
ab52f973
AB
4969 aSign = extractFloat32Sign( a );
4970 bSign = extractFloat32Sign( b );
4971 av = float32_val(a);
4972 bv = float32_val(b);
4973 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4974 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4975
4976}
4977
4978/*----------------------------------------------------------------------------
ab52f973
AB
4979| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4980| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4981| comparison is performed according to the IEC/IEEE Standard for Binary
4982| Floating-Point Arithmetic.
158142c2
FB
4983*----------------------------------------------------------------------------*/
4984
ab52f973 4985int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 4986{
ab52f973
AB
4987 a = float32_squash_input_denormal(a, status);
4988 b = float32_squash_input_denormal(b, status);
158142c2 4989
ab52f973
AB
4990 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4991 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4992 ) {
4993 if (float32_is_signaling_nan(a, status)
4994 || float32_is_signaling_nan(b, status)) {
4995 float_raise(float_flag_invalid, status);
158142c2 4996 }
ab52f973 4997 return 1;
158142c2 4998 }
ab52f973 4999 return 0;
158142c2
FB
5000}
5001
158142c2
FB
5002/*----------------------------------------------------------------------------
5003| Returns the result of converting the double-precision floating-point value
5004| `a' to the extended double-precision floating-point format. The conversion
5005| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5006| Arithmetic.
5007*----------------------------------------------------------------------------*/
5008
e5a41ffa 5009floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
5010{
5011 flag aSign;
0c48262d 5012 int aExp;
bb98fe42 5013 uint64_t aSig;
158142c2 5014
ff32e16e 5015 a = float64_squash_input_denormal(a, status);
158142c2
FB
5016 aSig = extractFloat64Frac( a );
5017 aExp = extractFloat64Exp( a );
5018 aSign = extractFloat64Sign( a );
5019 if ( aExp == 0x7FF ) {
ff32e16e 5020 if (aSig) {
7537c2b4
JM
5021 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5022 status);
5023 return floatx80_silence_nan(res, status);
ff32e16e 5024 }
0f605c88
LV
5025 return packFloatx80(aSign,
5026 floatx80_infinity_high,
5027 floatx80_infinity_low);
158142c2
FB
5028 }
5029 if ( aExp == 0 ) {
5030 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5031 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5032 }
5033 return
5034 packFloatx80(
e9321124 5035 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5036
5037}
5038
158142c2
FB
5039/*----------------------------------------------------------------------------
5040| Returns the result of converting the double-precision floating-point value
5041| `a' to the quadruple-precision floating-point format. The conversion is
5042| performed according to the IEC/IEEE Standard for Binary Floating-Point
5043| Arithmetic.
5044*----------------------------------------------------------------------------*/
5045
e5a41ffa 5046float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
5047{
5048 flag aSign;
0c48262d 5049 int aExp;
bb98fe42 5050 uint64_t aSig, zSig0, zSig1;
158142c2 5051
ff32e16e 5052 a = float64_squash_input_denormal(a, status);
158142c2
FB
5053 aSig = extractFloat64Frac( a );
5054 aExp = extractFloat64Exp( a );
5055 aSign = extractFloat64Sign( a );
5056 if ( aExp == 0x7FF ) {
ff32e16e
PM
5057 if (aSig) {
5058 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5059 }
158142c2
FB
5060 return packFloat128( aSign, 0x7FFF, 0, 0 );
5061 }
5062 if ( aExp == 0 ) {
5063 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5064 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5065 --aExp;
5066 }
5067 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5068 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5069
5070}
5071
158142c2
FB
5072
5073/*----------------------------------------------------------------------------
5074| Returns the remainder of the double-precision floating-point value `a'
5075| with respect to the corresponding value `b'. The operation is performed
5076| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5077*----------------------------------------------------------------------------*/
5078
e5a41ffa 5079float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5080{
ed086f3d 5081 flag aSign, zSign;
0c48262d 5082 int aExp, bExp, expDiff;
bb98fe42
AF
5083 uint64_t aSig, bSig;
5084 uint64_t q, alternateASig;
5085 int64_t sigMean;
158142c2 5086
ff32e16e
PM
5087 a = float64_squash_input_denormal(a, status);
5088 b = float64_squash_input_denormal(b, status);
158142c2
FB
5089 aSig = extractFloat64Frac( a );
5090 aExp = extractFloat64Exp( a );
5091 aSign = extractFloat64Sign( a );
5092 bSig = extractFloat64Frac( b );
5093 bExp = extractFloat64Exp( b );
158142c2
FB
5094 if ( aExp == 0x7FF ) {
5095 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5096 return propagateFloat64NaN(a, b, status);
158142c2 5097 }
ff32e16e 5098 float_raise(float_flag_invalid, status);
af39bc8c 5099 return float64_default_nan(status);
158142c2
FB
5100 }
5101 if ( bExp == 0x7FF ) {
ff32e16e
PM
5102 if (bSig) {
5103 return propagateFloat64NaN(a, b, status);
5104 }
158142c2
FB
5105 return a;
5106 }
5107 if ( bExp == 0 ) {
5108 if ( bSig == 0 ) {
ff32e16e 5109 float_raise(float_flag_invalid, status);
af39bc8c 5110 return float64_default_nan(status);
158142c2
FB
5111 }
5112 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5113 }
5114 if ( aExp == 0 ) {
5115 if ( aSig == 0 ) return a;
5116 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5117 }
5118 expDiff = aExp - bExp;
e9321124
AB
5119 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5120 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5121 if ( expDiff < 0 ) {
5122 if ( expDiff < -1 ) return a;
5123 aSig >>= 1;
5124 }
5125 q = ( bSig <= aSig );
5126 if ( q ) aSig -= bSig;
5127 expDiff -= 64;
5128 while ( 0 < expDiff ) {
5129 q = estimateDiv128To64( aSig, 0, bSig );
5130 q = ( 2 < q ) ? q - 2 : 0;
5131 aSig = - ( ( bSig>>2 ) * q );
5132 expDiff -= 62;
5133 }
5134 expDiff += 64;
5135 if ( 0 < expDiff ) {
5136 q = estimateDiv128To64( aSig, 0, bSig );
5137 q = ( 2 < q ) ? q - 2 : 0;
5138 q >>= 64 - expDiff;
5139 bSig >>= 2;
5140 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5141 }
5142 else {
5143 aSig >>= 2;
5144 bSig >>= 2;
5145 }
5146 do {
5147 alternateASig = aSig;
5148 ++q;
5149 aSig -= bSig;
bb98fe42 5150 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5151 sigMean = aSig + alternateASig;
5152 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5153 aSig = alternateASig;
5154 }
bb98fe42 5155 zSign = ( (int64_t) aSig < 0 );
158142c2 5156 if ( zSign ) aSig = - aSig;
ff32e16e 5157 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5158
5159}
5160
374dfc33
AJ
5161/*----------------------------------------------------------------------------
5162| Returns the binary log of the double-precision floating-point value `a'.
5163| The operation is performed according to the IEC/IEEE Standard for Binary
5164| Floating-Point Arithmetic.
5165*----------------------------------------------------------------------------*/
e5a41ffa 5166float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
5167{
5168 flag aSign, zSign;
0c48262d 5169 int aExp;
bb98fe42 5170 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5171 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5172
5173 aSig = extractFloat64Frac( a );
5174 aExp = extractFloat64Exp( a );
5175 aSign = extractFloat64Sign( a );
5176
5177 if ( aExp == 0 ) {
5178 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5179 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5180 }
5181 if ( aSign ) {
ff32e16e 5182 float_raise(float_flag_invalid, status);
af39bc8c 5183 return float64_default_nan(status);
374dfc33
AJ
5184 }
5185 if ( aExp == 0x7FF ) {
ff32e16e
PM
5186 if (aSig) {
5187 return propagateFloat64NaN(a, float64_zero, status);
5188 }
374dfc33
AJ
5189 return a;
5190 }
5191
5192 aExp -= 0x3FF;
e9321124 5193 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5194 zSign = aExp < 0;
bb98fe42 5195 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5196 for (i = 1LL << 51; i > 0; i >>= 1) {
5197 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5198 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5199 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5200 aSig >>= 1;
5201 zSig |= i;
5202 }
5203 }
5204
5205 if ( zSign )
5206 zSig = -zSig;
ff32e16e 5207 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5208}
5209
158142c2
FB
5210/*----------------------------------------------------------------------------
5211| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
5212| corresponding value `b', and 0 otherwise. The invalid exception is raised
5213| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
5214| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5215*----------------------------------------------------------------------------*/
5216
e5a41ffa 5217int float64_eq(float64 a, float64 b, float_status *status)
158142c2 5218{
bb98fe42 5219 uint64_t av, bv;
ff32e16e
PM
5220 a = float64_squash_input_denormal(a, status);
5221 b = float64_squash_input_denormal(b, status);
158142c2
FB
5222
5223 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5224 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5225 ) {
ff32e16e 5226 float_raise(float_flag_invalid, status);
158142c2
FB
5227 return 0;
5228 }
f090c9d4 5229 av = float64_val(a);
a1b91bb4 5230 bv = float64_val(b);
bb98fe42 5231 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5232
5233}
5234
5235/*----------------------------------------------------------------------------
5236| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
5237| equal to the corresponding value `b', and 0 otherwise. The invalid
5238| exception is raised if either operand is a NaN. The comparison is performed
5239| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5240*----------------------------------------------------------------------------*/
5241
e5a41ffa 5242int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
5243{
5244 flag aSign, bSign;
bb98fe42 5245 uint64_t av, bv;
ff32e16e
PM
5246 a = float64_squash_input_denormal(a, status);
5247 b = float64_squash_input_denormal(b, status);
158142c2
FB
5248
5249 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5250 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5251 ) {
ff32e16e 5252 float_raise(float_flag_invalid, status);
158142c2
FB
5253 return 0;
5254 }
5255 aSign = extractFloat64Sign( a );
5256 bSign = extractFloat64Sign( b );
f090c9d4 5257 av = float64_val(a);
a1b91bb4 5258 bv = float64_val(b);
bb98fe42 5259 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5260 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5261
5262}
5263
5264/*----------------------------------------------------------------------------
5265| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
5266| the corresponding value `b', and 0 otherwise. The invalid exception is
5267| raised if either operand is a NaN. The comparison is performed according
5268| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5269*----------------------------------------------------------------------------*/
5270
e5a41ffa 5271int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
5272{
5273 flag aSign, bSign;
bb98fe42 5274 uint64_t av, bv;
158142c2 5275
ff32e16e
PM
5276 a = float64_squash_input_denormal(a, status);
5277 b = float64_squash_input_denormal(b, status);
158142c2
FB
5278 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5279 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5280 ) {
ff32e16e 5281 float_raise(float_flag_invalid, status);
158142c2
FB
5282 return 0;
5283 }
5284 aSign = extractFloat64Sign( a );
5285 bSign = extractFloat64Sign( b );
f090c9d4 5286 av = float64_val(a);
a1b91bb4 5287 bv = float64_val(b);
bb98fe42 5288 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5289 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5290
5291}
5292
67b7861d
AJ
5293/*----------------------------------------------------------------------------
5294| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
5295| be compared, and 0 otherwise. The invalid exception is raised if either
5296| operand is a NaN. The comparison is performed according to the IEC/IEEE
5297| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
5298*----------------------------------------------------------------------------*/
5299
e5a41ffa 5300int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 5301{
ff32e16e
PM
5302 a = float64_squash_input_denormal(a, status);
5303 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5304
5305 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5306 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5307 ) {
ff32e16e 5308 float_raise(float_flag_invalid, status);
67b7861d
AJ
5309 return 1;
5310 }
5311 return 0;
5312}
5313
158142c2
FB
5314/*----------------------------------------------------------------------------
5315| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
5316| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5317| exception.The comparison is performed according to the IEC/IEEE Standard
5318| for Binary Floating-Point Arithmetic.
158142c2
FB
5319*----------------------------------------------------------------------------*/
5320
e5a41ffa 5321int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 5322{
bb98fe42 5323 uint64_t av, bv;
ff32e16e
PM
5324 a = float64_squash_input_denormal(a, status);
5325 b = float64_squash_input_denormal(b, status);
158142c2
FB
5326
5327 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5328 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5329 ) {
af39bc8c
AM
5330 if (float64_is_signaling_nan(a, status)
5331 || float64_is_signaling_nan(b, status)) {
ff32e16e 5332 float_raise(float_flag_invalid, status);
b689362d 5333 }
158142c2
FB
5334 return 0;
5335 }
f090c9d4 5336 av = float64_val(a);
a1b91bb4 5337 bv = float64_val(b);
bb98fe42 5338 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
5339
5340}
5341
5342/*----------------------------------------------------------------------------
5343| Returns 1 if the double-precision floating-point value `a' is less than or
5344| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5345| cause an exception. Otherwise, the comparison is performed according to the
5346| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5347*----------------------------------------------------------------------------*/
5348
e5a41ffa 5349int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5350{
5351 flag aSign, bSign;
bb98fe42 5352 uint64_t av, bv;
ff32e16e
PM
5353 a = float64_squash_input_denormal(a, status);
5354 b = float64_squash_input_denormal(b, status);
158142c2
FB
5355
5356 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5357 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5358 ) {
af39bc8c
AM
5359 if (float64_is_signaling_nan(a, status)
5360 || float64_is_signaling_nan(b, status)) {
ff32e16e 5361 float_raise(float_flag_invalid, status);
158142c2
FB
5362 }
5363 return 0;
5364 }
5365 aSign = extractFloat64Sign( a );
5366 bSign = extractFloat64Sign( b );
f090c9d4 5367 av = float64_val(a);
a1b91bb4 5368 bv = float64_val(b);
bb98fe42 5369 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 5370 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
5371
5372}
5373
5374/*----------------------------------------------------------------------------
5375| Returns 1 if the double-precision floating-point value `a' is less than
5376| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5377| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5378| Standard for Binary Floating-Point Arithmetic.
5379*----------------------------------------------------------------------------*/
5380
e5a41ffa 5381int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
5382{
5383 flag aSign, bSign;
bb98fe42 5384 uint64_t av, bv;
ff32e16e
PM
5385 a = float64_squash_input_denormal(a, status);
5386 b = float64_squash_input_denormal(b, status);
158142c2
FB
5387
5388 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5389 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5390 ) {
af39bc8c
AM
5391 if (float64_is_signaling_nan(a, status)
5392 || float64_is_signaling_nan(b, status)) {
ff32e16e 5393 float_raise(float_flag_invalid, status);
158142c2
FB
5394 }
5395 return 0;
5396 }
5397 aSign = extractFloat64Sign( a );
5398 bSign = extractFloat64Sign( b );
f090c9d4 5399 av = float64_val(a);
a1b91bb4 5400 bv = float64_val(b);
bb98fe42 5401 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 5402 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
5403
5404}
5405
67b7861d
AJ
5406/*----------------------------------------------------------------------------
5407| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5408| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5409| comparison is performed according to the IEC/IEEE Standard for Binary
5410| Floating-Point Arithmetic.
5411*----------------------------------------------------------------------------*/
5412
e5a41ffa 5413int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 5414{
ff32e16e
PM
5415 a = float64_squash_input_denormal(a, status);
5416 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
5417
5418 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
5419 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
5420 ) {
af39bc8c
AM
5421 if (float64_is_signaling_nan(a, status)
5422 || float64_is_signaling_nan(b, status)) {
ff32e16e 5423 float_raise(float_flag_invalid, status);
67b7861d
AJ
5424 }
5425 return 1;
5426 }
5427 return 0;
5428}
5429
158142c2
FB
5430/*----------------------------------------------------------------------------
5431| Returns the result of converting the extended double-precision floating-
5432| point value `a' to the 32-bit two's complement integer format. The
5433| conversion is performed according to the IEC/IEEE Standard for Binary
5434| Floating-Point Arithmetic---which means in particular that the conversion
5435| is rounded according to the current rounding mode. If `a' is a NaN, the
5436| largest positive integer is returned. Otherwise, if the conversion
5437| overflows, the largest integer with the same sign as `a' is returned.
5438*----------------------------------------------------------------------------*/
5439
f4014512 5440int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
5441{
5442 flag aSign;
f4014512 5443 int32_t aExp, shiftCount;
bb98fe42 5444 uint64_t aSig;
158142c2 5445
d1eb8f2a
AD
5446 if (floatx80_invalid_encoding(a)) {
5447 float_raise(float_flag_invalid, status);
5448 return 1 << 31;
5449 }
158142c2
FB
5450 aSig = extractFloatx80Frac( a );
5451 aExp = extractFloatx80Exp( a );
5452 aSign = extractFloatx80Sign( a );
bb98fe42 5453 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5454 shiftCount = 0x4037 - aExp;
5455 if ( shiftCount <= 0 ) shiftCount = 1;
5456 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5457 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5458
5459}
5460
5461/*----------------------------------------------------------------------------
5462| Returns the result of converting the extended double-precision floating-
5463| point value `a' to the 32-bit two's complement integer format. The
5464| conversion is performed according to the IEC/IEEE Standard for Binary
5465| Floating-Point Arithmetic, except that the conversion is always rounded
5466| toward zero. If `a' is a NaN, the largest positive integer is returned.
5467| Otherwise, if the conversion overflows, the largest integer with the same
5468| sign as `a' is returned.
5469*----------------------------------------------------------------------------*/
5470
f4014512 5471int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5472{
5473 flag aSign;
f4014512 5474 int32_t aExp, shiftCount;
bb98fe42 5475 uint64_t aSig, savedASig;
b3a6a2e0 5476 int32_t z;
158142c2 5477
d1eb8f2a
AD
5478 if (floatx80_invalid_encoding(a)) {
5479 float_raise(float_flag_invalid, status);
5480 return 1 << 31;
5481 }
158142c2
FB
5482 aSig = extractFloatx80Frac( a );
5483 aExp = extractFloatx80Exp( a );
5484 aSign = extractFloatx80Sign( a );
5485 if ( 0x401E < aExp ) {
bb98fe42 5486 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5487 goto invalid;
5488 }
5489 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5490 if (aExp || aSig) {
5491 status->float_exception_flags |= float_flag_inexact;
5492 }
158142c2
FB
5493 return 0;
5494 }
5495 shiftCount = 0x403E - aExp;
5496 savedASig = aSig;
5497 aSig >>= shiftCount;
5498 z = aSig;
5499 if ( aSign ) z = - z;
5500 if ( ( z < 0 ) ^ aSign ) {
5501 invalid:
ff32e16e 5502 float_raise(float_flag_invalid, status);
bb98fe42 5503 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5504 }
5505 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 5506 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5507 }
5508 return z;
5509
5510}
5511
5512/*----------------------------------------------------------------------------
5513| Returns the result of converting the extended double-precision floating-
5514| point value `a' to the 64-bit two's complement integer format. The
5515| conversion is performed according to the IEC/IEEE Standard for Binary
5516| Floating-Point Arithmetic---which means in particular that the conversion
5517| is rounded according to the current rounding mode. If `a' is a NaN,
5518| the largest positive integer is returned. Otherwise, if the conversion
5519| overflows, the largest integer with the same sign as `a' is returned.
5520*----------------------------------------------------------------------------*/
5521
f42c2224 5522int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
5523{
5524 flag aSign;
f4014512 5525 int32_t aExp, shiftCount;
bb98fe42 5526 uint64_t aSig, aSigExtra;
158142c2 5527
d1eb8f2a
AD
5528 if (floatx80_invalid_encoding(a)) {
5529 float_raise(float_flag_invalid, status);
5530 return 1ULL << 63;
5531 }
158142c2
FB
5532 aSig = extractFloatx80Frac( a );
5533 aExp = extractFloatx80Exp( a );
5534 aSign = extractFloatx80Sign( a );
5535 shiftCount = 0x403E - aExp;
5536 if ( shiftCount <= 0 ) {
5537 if ( shiftCount ) {
ff32e16e 5538 float_raise(float_flag_invalid, status);
0f605c88 5539 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5540 return INT64_MAX;
158142c2 5541 }
2c217da0 5542 return INT64_MIN;
158142c2
FB
5543 }
5544 aSigExtra = 0;
5545 }
5546 else {
5547 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5548 }
ff32e16e 5549 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5550
5551}
5552
5553/*----------------------------------------------------------------------------
5554| Returns the result of converting the extended double-precision floating-
5555| point value `a' to the 64-bit two's complement integer format. The
5556| conversion is performed according to the IEC/IEEE Standard for Binary
5557| Floating-Point Arithmetic, except that the conversion is always rounded
5558| toward zero. If `a' is a NaN, the largest positive integer is returned.
5559| Otherwise, if the conversion overflows, the largest integer with the same
5560| sign as `a' is returned.
5561*----------------------------------------------------------------------------*/
5562
f42c2224 5563int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
5564{
5565 flag aSign;
f4014512 5566 int32_t aExp, shiftCount;
bb98fe42 5567 uint64_t aSig;
f42c2224 5568 int64_t z;
158142c2 5569
d1eb8f2a
AD
5570 if (floatx80_invalid_encoding(a)) {
5571 float_raise(float_flag_invalid, status);
5572 return 1ULL << 63;
5573 }
158142c2
FB
5574 aSig = extractFloatx80Frac( a );
5575 aExp = extractFloatx80Exp( a );
5576 aSign = extractFloatx80Sign( a );
5577 shiftCount = aExp - 0x403E;
5578 if ( 0 <= shiftCount ) {
e9321124 5579 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5580 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5581 float_raise(float_flag_invalid, status);
158142c2 5582 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5583 return INT64_MAX;
158142c2
FB
5584 }
5585 }
2c217da0 5586 return INT64_MIN;
158142c2
FB
5587 }
5588 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5589 if (aExp | aSig) {
5590 status->float_exception_flags |= float_flag_inexact;
5591 }
158142c2
FB
5592 return 0;
5593 }
5594 z = aSig>>( - shiftCount );
bb98fe42 5595 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 5596 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5597 }
5598 if ( aSign ) z = - z;
5599 return z;
5600
5601}
5602
5603/*----------------------------------------------------------------------------
5604| Returns the result of converting the extended double-precision floating-
5605| point value `a' to the single-precision floating-point format. The
5606| conversion is performed according to the IEC/IEEE Standard for Binary
5607| Floating-Point Arithmetic.
5608*----------------------------------------------------------------------------*/
5609
e5a41ffa 5610float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
5611{
5612 flag aSign;
f4014512 5613 int32_t aExp;
bb98fe42 5614 uint64_t aSig;
158142c2 5615
d1eb8f2a
AD
5616 if (floatx80_invalid_encoding(a)) {
5617 float_raise(float_flag_invalid, status);
5618 return float32_default_nan(status);
5619 }
158142c2
FB
5620 aSig = extractFloatx80Frac( a );
5621 aExp = extractFloatx80Exp( a );
5622 aSign = extractFloatx80Sign( a );
5623 if ( aExp == 0x7FFF ) {
bb98fe42 5624 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5625 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5626 status);
5627 return float32_silence_nan(res, status);
158142c2
FB
5628 }
5629 return packFloat32( aSign, 0xFF, 0 );
5630 }
5631 shift64RightJamming( aSig, 33, &aSig );
5632 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5633 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5634
5635}
5636
5637/*----------------------------------------------------------------------------
5638| Returns the result of converting the extended double-precision floating-
5639| point value `a' to the double-precision floating-point format. The
5640| conversion is performed according to the IEC/IEEE Standard for Binary
5641| Floating-Point Arithmetic.
5642*----------------------------------------------------------------------------*/
5643
e5a41ffa 5644float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
5645{
5646 flag aSign;
f4014512 5647 int32_t aExp;
bb98fe42 5648 uint64_t aSig, zSig;
158142c2 5649
d1eb8f2a
AD
5650 if (floatx80_invalid_encoding(a)) {
5651 float_raise(float_flag_invalid, status);
5652 return float64_default_nan(status);
5653 }
158142c2
FB
5654 aSig = extractFloatx80Frac( a );
5655 aExp = extractFloatx80Exp( a );
5656 aSign = extractFloatx80Sign( a );
5657 if ( aExp == 0x7FFF ) {
bb98fe42 5658 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5659 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5660 status);
5661 return float64_silence_nan(res, status);
158142c2
FB
5662 }
5663 return packFloat64( aSign, 0x7FF, 0 );
5664 }
5665 shift64RightJamming( aSig, 1, &zSig );
5666 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5667 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5668
5669}
5670
158142c2
FB
5671/*----------------------------------------------------------------------------
5672| Returns the result of converting the extended double-precision floating-
5673| point value `a' to the quadruple-precision floating-point format. The
5674| conversion is performed according to the IEC/IEEE Standard for Binary
5675| Floating-Point Arithmetic.
5676*----------------------------------------------------------------------------*/
5677
e5a41ffa 5678float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
5679{
5680 flag aSign;
0c48262d 5681 int aExp;
bb98fe42 5682 uint64_t aSig, zSig0, zSig1;
158142c2 5683
d1eb8f2a
AD
5684 if (floatx80_invalid_encoding(a)) {
5685 float_raise(float_flag_invalid, status);
5686 return float128_default_nan(status);
5687 }
158142c2
FB
5688 aSig = extractFloatx80Frac( a );
5689 aExp = extractFloatx80Exp( a );
5690 aSign = extractFloatx80Sign( a );
bb98fe42 5691 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5692 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5693 status);
5694 return float128_silence_nan(res, status);
158142c2
FB
5695 }
5696 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5697 return packFloat128( aSign, aExp, zSig0, zSig1 );
5698
5699}
5700
0f721292
LV
5701/*----------------------------------------------------------------------------
5702| Rounds the extended double-precision floating-point value `a'
5703| to the precision provided by floatx80_rounding_precision and returns the
5704| result as an extended double-precision floating-point value.
5705| The operation is performed according to the IEC/IEEE Standard for Binary
5706| Floating-Point Arithmetic.
5707*----------------------------------------------------------------------------*/
5708
5709floatx80 floatx80_round(floatx80 a, float_status *status)
5710{
5711 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5712 extractFloatx80Sign(a),
5713 extractFloatx80Exp(a),
5714 extractFloatx80Frac(a), 0, status);
5715}
5716
158142c2
FB
5717/*----------------------------------------------------------------------------
5718| Rounds the extended double-precision floating-point value `a' to an integer,
5719| and returns the result as an extended quadruple-precision floating-point
5720| value. The operation is performed according to the IEC/IEEE Standard for
5721| Binary Floating-Point Arithmetic.
5722*----------------------------------------------------------------------------*/
5723
e5a41ffa 5724floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
5725{
5726 flag aSign;
f4014512 5727 int32_t aExp;
bb98fe42 5728 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5729 floatx80 z;
5730
d1eb8f2a
AD
5731 if (floatx80_invalid_encoding(a)) {
5732 float_raise(float_flag_invalid, status);
5733 return floatx80_default_nan(status);
5734 }
158142c2
FB
5735 aExp = extractFloatx80Exp( a );
5736 if ( 0x403E <= aExp ) {
bb98fe42 5737 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5738 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5739 }
5740 return a;
5741 }
5742 if ( aExp < 0x3FFF ) {
5743 if ( ( aExp == 0 )
9ecaf5cc 5744 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5745 return a;
5746 }
a2f2d288 5747 status->float_exception_flags |= float_flag_inexact;
158142c2 5748 aSign = extractFloatx80Sign( a );
a2f2d288 5749 switch (status->float_rounding_mode) {
158142c2 5750 case float_round_nearest_even:
bb98fe42 5751 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5752 ) {
5753 return
e9321124 5754 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5755 }
5756 break;
f9288a76
PM
5757 case float_round_ties_away:
5758 if (aExp == 0x3FFE) {
e9321124 5759 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5760 }
5761 break;
158142c2
FB
5762 case float_round_down:
5763 return
5764 aSign ?
e9321124 5765 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5766 : packFloatx80( 0, 0, 0 );
5767 case float_round_up:
5768 return
5769 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5770 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5771 }
5772 return packFloatx80( aSign, 0, 0 );
5773 }
5774 lastBitMask = 1;
5775 lastBitMask <<= 0x403E - aExp;
5776 roundBitsMask = lastBitMask - 1;
5777 z = a;
a2f2d288 5778 switch (status->float_rounding_mode) {
dc355b76 5779 case float_round_nearest_even:
158142c2 5780 z.low += lastBitMask>>1;
dc355b76
PM
5781 if ((z.low & roundBitsMask) == 0) {
5782 z.low &= ~lastBitMask;
5783 }
5784 break;
f9288a76
PM
5785 case float_round_ties_away:
5786 z.low += lastBitMask >> 1;
5787 break;
dc355b76
PM
5788 case float_round_to_zero:
5789 break;
5790 case float_round_up:
5791 if (!extractFloatx80Sign(z)) {
5792 z.low += roundBitsMask;
5793 }
5794 break;
5795 case float_round_down:
5796 if (extractFloatx80Sign(z)) {
158142c2
FB
5797 z.low += roundBitsMask;
5798 }
dc355b76
PM
5799 break;
5800 default:
5801 abort();
158142c2
FB
5802 }
5803 z.low &= ~ roundBitsMask;
5804 if ( z.low == 0 ) {
5805 ++z.high;
e9321124 5806 z.low = UINT64_C(0x8000000000000000);
158142c2 5807 }
a2f2d288
PM
5808 if (z.low != a.low) {
5809 status->float_exception_flags |= float_flag_inexact;
5810 }
158142c2
FB
5811 return z;
5812
5813}
5814
5815/*----------------------------------------------------------------------------
5816| Returns the result of adding the absolute values of the extended double-
5817| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5818| negated before being returned. `zSign' is ignored if the result is a NaN.
5819| The addition is performed according to the IEC/IEEE Standard for Binary
5820| Floating-Point Arithmetic.
5821*----------------------------------------------------------------------------*/
5822
e5a41ffa
PM
5823static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5824 float_status *status)
158142c2 5825{
f4014512 5826 int32_t aExp, bExp, zExp;
bb98fe42 5827 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5828 int32_t expDiff;
158142c2
FB
5829
5830 aSig = extractFloatx80Frac( a );
5831 aExp = extractFloatx80Exp( a );
5832 bSig = extractFloatx80Frac( b );
5833 bExp = extractFloatx80Exp( b );
5834 expDiff = aExp - bExp;
5835 if ( 0 < expDiff ) {
5836 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5837 if ((uint64_t)(aSig << 1)) {
5838 return propagateFloatx80NaN(a, b, status);
5839 }
158142c2
FB
5840 return a;
5841 }
5842 if ( bExp == 0 ) --expDiff;
5843 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5844 zExp = aExp;
5845 }
5846 else if ( expDiff < 0 ) {
5847 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5848 if ((uint64_t)(bSig << 1)) {
5849 return propagateFloatx80NaN(a, b, status);
5850 }
0f605c88
LV
5851 return packFloatx80(zSign,
5852 floatx80_infinity_high,
5853 floatx80_infinity_low);
158142c2
FB
5854 }
5855 if ( aExp == 0 ) ++expDiff;
5856 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5857 zExp = bExp;
5858 }
5859 else {
5860 if ( aExp == 0x7FFF ) {
bb98fe42 5861 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5862 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5863 }
5864 return a;
5865 }
5866 zSig1 = 0;
5867 zSig0 = aSig + bSig;
5868 if ( aExp == 0 ) {
41602807
JM
5869 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5870 /* At least one of the values is a pseudo-denormal,
5871 * and there is a carry out of the result. */
5872 zExp = 1;
5873 goto shiftRight1;
5874 }
2f311075
RH
5875 if (zSig0 == 0) {
5876 return packFloatx80(zSign, 0, 0);
5877 }
158142c2
FB
5878 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5879 goto roundAndPack;
5880 }
5881 zExp = aExp;
5882 goto shiftRight1;
5883 }
5884 zSig0 = aSig + bSig;
bb98fe42 5885 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5886 shiftRight1:
5887 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5888 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5889 ++zExp;
5890 roundAndPack:
a2f2d288 5891 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5892 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5893}
5894
5895/*----------------------------------------------------------------------------
5896| Returns the result of subtracting the absolute values of the extended
5897| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5898| difference is negated before being returned. `zSign' is ignored if the
5899| result is a NaN. The subtraction is performed according to the IEC/IEEE
5900| Standard for Binary Floating-Point Arithmetic.
5901*----------------------------------------------------------------------------*/
5902
e5a41ffa
PM
5903static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5904 float_status *status)
158142c2 5905{
f4014512 5906 int32_t aExp, bExp, zExp;
bb98fe42 5907 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5908 int32_t expDiff;
158142c2
FB
5909
5910 aSig = extractFloatx80Frac( a );
5911 aExp = extractFloatx80Exp( a );
5912 bSig = extractFloatx80Frac( b );
5913 bExp = extractFloatx80Exp( b );
5914 expDiff = aExp - bExp;
5915 if ( 0 < expDiff ) goto aExpBigger;
5916 if ( expDiff < 0 ) goto bExpBigger;
5917 if ( aExp == 0x7FFF ) {
bb98fe42 5918 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5919 return propagateFloatx80NaN(a, b, status);
158142c2 5920 }
ff32e16e 5921 float_raise(float_flag_invalid, status);
af39bc8c 5922 return floatx80_default_nan(status);
158142c2
FB
5923 }
5924 if ( aExp == 0 ) {
5925 aExp = 1;
5926 bExp = 1;
5927 }
5928 zSig1 = 0;
5929 if ( bSig < aSig ) goto aBigger;
5930 if ( aSig < bSig ) goto bBigger;
a2f2d288 5931 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5932 bExpBigger:
5933 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5934 if ((uint64_t)(bSig << 1)) {
5935 return propagateFloatx80NaN(a, b, status);
5936 }
0f605c88
LV
5937 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5938 floatx80_infinity_low);
158142c2
FB
5939 }
5940 if ( aExp == 0 ) ++expDiff;
5941 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5942 bBigger:
5943 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5944 zExp = bExp;
5945 zSign ^= 1;
5946 goto normalizeRoundAndPack;
5947 aExpBigger:
5948 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5949 if ((uint64_t)(aSig << 1)) {
5950 return propagateFloatx80NaN(a, b, status);
5951 }
158142c2
FB
5952 return a;
5953 }
5954 if ( bExp == 0 ) --expDiff;
5955 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5956 aBigger:
5957 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5958 zExp = aExp;
5959 normalizeRoundAndPack:
a2f2d288 5960 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5961 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5962}
5963
5964/*----------------------------------------------------------------------------
5965| Returns the result of adding the extended double-precision floating-point
5966| values `a' and `b'. The operation is performed according to the IEC/IEEE
5967| Standard for Binary Floating-Point Arithmetic.
5968*----------------------------------------------------------------------------*/
5969
e5a41ffa 5970floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5971{
5972 flag aSign, bSign;
5973
d1eb8f2a
AD
5974 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5975 float_raise(float_flag_invalid, status);
5976 return floatx80_default_nan(status);
5977 }
158142c2
FB
5978 aSign = extractFloatx80Sign( a );
5979 bSign = extractFloatx80Sign( b );
5980 if ( aSign == bSign ) {
ff32e16e 5981 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5982 }
5983 else {
ff32e16e 5984 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5985 }
5986
5987}
5988
5989/*----------------------------------------------------------------------------
5990| Returns the result of subtracting the extended double-precision floating-
5991| point values `a' and `b'. The operation is performed according to the
5992| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5993*----------------------------------------------------------------------------*/
5994
e5a41ffa 5995floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5996{
5997 flag aSign, bSign;
5998
d1eb8f2a
AD
5999 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6000 float_raise(float_flag_invalid, status);
6001 return floatx80_default_nan(status);
6002 }
158142c2
FB
6003 aSign = extractFloatx80Sign( a );
6004 bSign = extractFloatx80Sign( b );
6005 if ( aSign == bSign ) {
ff32e16e 6006 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6007 }
6008 else {
ff32e16e 6009 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
6010 }
6011
6012}
6013
6014/*----------------------------------------------------------------------------
6015| Returns the result of multiplying the extended double-precision floating-
6016| point values `a' and `b'. The operation is performed according to the
6017| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6018*----------------------------------------------------------------------------*/
6019
e5a41ffa 6020floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6021{
6022 flag aSign, bSign, zSign;
f4014512 6023 int32_t aExp, bExp, zExp;
bb98fe42 6024 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 6025
d1eb8f2a
AD
6026 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6027 float_raise(float_flag_invalid, status);
6028 return floatx80_default_nan(status);
6029 }
158142c2
FB
6030 aSig = extractFloatx80Frac( a );
6031 aExp = extractFloatx80Exp( a );
6032 aSign = extractFloatx80Sign( a );
6033 bSig = extractFloatx80Frac( b );
6034 bExp = extractFloatx80Exp( b );
6035 bSign = extractFloatx80Sign( b );
6036 zSign = aSign ^ bSign;
6037 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6038 if ( (uint64_t) ( aSig<<1 )
6039 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6040 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6041 }
6042 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
6043 return packFloatx80(zSign, floatx80_infinity_high,
6044 floatx80_infinity_low);
158142c2
FB
6045 }
6046 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6047 if ((uint64_t)(bSig << 1)) {
6048 return propagateFloatx80NaN(a, b, status);
6049 }
158142c2
FB
6050 if ( ( aExp | aSig ) == 0 ) {
6051 invalid:
ff32e16e 6052 float_raise(float_flag_invalid, status);
af39bc8c 6053 return floatx80_default_nan(status);
158142c2 6054 }
0f605c88
LV
6055 return packFloatx80(zSign, floatx80_infinity_high,
6056 floatx80_infinity_low);
158142c2
FB
6057 }
6058 if ( aExp == 0 ) {
6059 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6060 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6061 }
6062 if ( bExp == 0 ) {
6063 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6064 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6065 }
6066 zExp = aExp + bExp - 0x3FFE;
6067 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 6068 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
6069 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6070 --zExp;
6071 }
a2f2d288 6072 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6073 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6074}
6075
6076/*----------------------------------------------------------------------------
6077| Returns the result of dividing the extended double-precision floating-point
6078| value `a' by the corresponding value `b'. The operation is performed
6079| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6080*----------------------------------------------------------------------------*/
6081
e5a41ffa 6082floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6083{
6084 flag aSign, bSign, zSign;
f4014512 6085 int32_t aExp, bExp, zExp;
bb98fe42
AF
6086 uint64_t aSig, bSig, zSig0, zSig1;
6087 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6088
d1eb8f2a
AD
6089 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6090 float_raise(float_flag_invalid, status);
6091 return floatx80_default_nan(status);
6092 }
158142c2
FB
6093 aSig = extractFloatx80Frac( a );
6094 aExp = extractFloatx80Exp( a );
6095 aSign = extractFloatx80Sign( a );
6096 bSig = extractFloatx80Frac( b );
6097 bExp = extractFloatx80Exp( b );
6098 bSign = extractFloatx80Sign( b );
6099 zSign = aSign ^ bSign;
6100 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6101 if ((uint64_t)(aSig << 1)) {
6102 return propagateFloatx80NaN(a, b, status);
6103 }
158142c2 6104 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6105 if ((uint64_t)(bSig << 1)) {
6106 return propagateFloatx80NaN(a, b, status);
6107 }
158142c2
FB
6108 goto invalid;
6109 }
0f605c88
LV
6110 return packFloatx80(zSign, floatx80_infinity_high,
6111 floatx80_infinity_low);
158142c2
FB
6112 }
6113 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6114 if ((uint64_t)(bSig << 1)) {
6115 return propagateFloatx80NaN(a, b, status);
6116 }
158142c2
FB
6117 return packFloatx80( zSign, 0, 0 );
6118 }
6119 if ( bExp == 0 ) {
6120 if ( bSig == 0 ) {
6121 if ( ( aExp | aSig ) == 0 ) {
6122 invalid:
ff32e16e 6123 float_raise(float_flag_invalid, status);
af39bc8c 6124 return floatx80_default_nan(status);
158142c2 6125 }
ff32e16e 6126 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6127 return packFloatx80(zSign, floatx80_infinity_high,
6128 floatx80_infinity_low);
158142c2
FB
6129 }
6130 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6131 }
6132 if ( aExp == 0 ) {
6133 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6134 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6135 }
6136 zExp = aExp - bExp + 0x3FFE;
6137 rem1 = 0;
6138 if ( bSig <= aSig ) {
6139 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6140 ++zExp;
6141 }
6142 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6143 mul64To128( bSig, zSig0, &term0, &term1 );
6144 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6145 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6146 --zSig0;
6147 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6148 }
6149 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6150 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6151 mul64To128( bSig, zSig1, &term1, &term2 );
6152 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6153 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6154 --zSig1;
6155 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6156 }
6157 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6158 }
a2f2d288 6159 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6160 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6161}
6162
6163/*----------------------------------------------------------------------------
6164| Returns the remainder of the extended double-precision floating-point value
6165| `a' with respect to the corresponding value `b'. The operation is performed
6166| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6167*----------------------------------------------------------------------------*/
6168
e5a41ffa 6169floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 6170{
ed086f3d 6171 flag aSign, zSign;
f4014512 6172 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6173 uint64_t aSig0, aSig1, bSig;
6174 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6175
d1eb8f2a
AD
6176 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6177 float_raise(float_flag_invalid, status);
6178 return floatx80_default_nan(status);
6179 }
158142c2
FB
6180 aSig0 = extractFloatx80Frac( a );
6181 aExp = extractFloatx80Exp( a );
6182 aSign = extractFloatx80Sign( a );
6183 bSig = extractFloatx80Frac( b );
6184 bExp = extractFloatx80Exp( b );
158142c2 6185 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6186 if ( (uint64_t) ( aSig0<<1 )
6187 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6188 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6189 }
6190 goto invalid;
6191 }
6192 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6193 if ((uint64_t)(bSig << 1)) {
6194 return propagateFloatx80NaN(a, b, status);
6195 }
158142c2
FB
6196 return a;
6197 }
6198 if ( bExp == 0 ) {
6199 if ( bSig == 0 ) {
6200 invalid:
ff32e16e 6201 float_raise(float_flag_invalid, status);
af39bc8c 6202 return floatx80_default_nan(status);
158142c2
FB
6203 }
6204 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6205 }
6206 if ( aExp == 0 ) {
bb98fe42 6207 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
6208 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6209 }
e9321124 6210 bSig |= UINT64_C(0x8000000000000000);
158142c2
FB
6211 zSign = aSign;
6212 expDiff = aExp - bExp;
6213 aSig1 = 0;
6214 if ( expDiff < 0 ) {
6215 if ( expDiff < -1 ) return a;
6216 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6217 expDiff = 0;
6218 }
6219 q = ( bSig <= aSig0 );
6220 if ( q ) aSig0 -= bSig;
6221 expDiff -= 64;
6222 while ( 0 < expDiff ) {
6223 q = estimateDiv128To64( aSig0, aSig1, bSig );
6224 q = ( 2 < q ) ? q - 2 : 0;
6225 mul64To128( bSig, q, &term0, &term1 );
6226 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6227 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6228 expDiff -= 62;
6229 }
6230 expDiff += 64;
6231 if ( 0 < expDiff ) {
6232 q = estimateDiv128To64( aSig0, aSig1, bSig );
6233 q = ( 2 < q ) ? q - 2 : 0;
6234 q >>= 64 - expDiff;
6235 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6236 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6237 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6238 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6239 ++q;
6240 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6241 }
6242 }
6243 else {
6244 term1 = 0;
6245 term0 = bSig;
6246 }
6247 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6248 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6249 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6250 && ( q & 1 ) )
6251 ) {
6252 aSig0 = alternateASig0;
6253 aSig1 = alternateASig1;
6254 zSign = ! zSign;
6255 }
6256 return
6257 normalizeRoundAndPackFloatx80(
ff32e16e 6258 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6259
6260}
6261
6262/*----------------------------------------------------------------------------
6263| Returns the square root of the extended double-precision floating-point
6264| value `a'. The operation is performed according to the IEC/IEEE Standard
6265| for Binary Floating-Point Arithmetic.
6266*----------------------------------------------------------------------------*/
6267
e5a41ffa 6268floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
6269{
6270 flag aSign;
f4014512 6271 int32_t aExp, zExp;
bb98fe42
AF
6272 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6273 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6274
d1eb8f2a
AD
6275 if (floatx80_invalid_encoding(a)) {
6276 float_raise(float_flag_invalid, status);
6277 return floatx80_default_nan(status);
6278 }
158142c2
FB
6279 aSig0 = extractFloatx80Frac( a );
6280 aExp = extractFloatx80Exp( a );
6281 aSign = extractFloatx80Sign( a );
6282 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6283 if ((uint64_t)(aSig0 << 1)) {
6284 return propagateFloatx80NaN(a, a, status);
6285 }
158142c2
FB
6286 if ( ! aSign ) return a;
6287 goto invalid;
6288 }
6289 if ( aSign ) {
6290 if ( ( aExp | aSig0 ) == 0 ) return a;
6291 invalid:
ff32e16e 6292 float_raise(float_flag_invalid, status);
af39bc8c 6293 return floatx80_default_nan(status);
158142c2
FB
6294 }
6295 if ( aExp == 0 ) {
6296 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6297 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6298 }
6299 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6300 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6301 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6302 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6303 doubleZSig0 = zSig0<<1;
6304 mul64To128( zSig0, zSig0, &term0, &term1 );
6305 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6306 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6307 --zSig0;
6308 doubleZSig0 -= 2;
6309 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6310 }
6311 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6312 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6313 if ( zSig1 == 0 ) zSig1 = 1;
6314 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6315 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6316 mul64To128( zSig1, zSig1, &term2, &term3 );
6317 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6318 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6319 --zSig1;
6320 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6321 term3 |= 1;
6322 term2 |= doubleZSig0;
6323 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6324 }
6325 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6326 }
6327 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6328 zSig0 |= doubleZSig0;
a2f2d288
PM
6329 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6330 0, zExp, zSig0, zSig1, status);
158142c2
FB
6331}
6332
6333/*----------------------------------------------------------------------------
b689362d
AJ
6334| Returns 1 if the extended double-precision floating-point value `a' is equal
6335| to the corresponding value `b', and 0 otherwise. The invalid exception is
6336| raised if either operand is a NaN. Otherwise, the comparison is performed
6337| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6338*----------------------------------------------------------------------------*/
6339
e5a41ffa 6340int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6341{
6342
d1eb8f2a
AD
6343 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6344 || (extractFloatx80Exp(a) == 0x7FFF
6345 && (uint64_t) (extractFloatx80Frac(a) << 1))
6346 || (extractFloatx80Exp(b) == 0x7FFF
6347 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6348 ) {
ff32e16e 6349 float_raise(float_flag_invalid, status);
158142c2
FB
6350 return 0;
6351 }
6352 return
6353 ( a.low == b.low )
6354 && ( ( a.high == b.high )
6355 || ( ( a.low == 0 )
bb98fe42 6356 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6357 );
6358
6359}
6360
6361/*----------------------------------------------------------------------------
6362| Returns 1 if the extended double-precision floating-point value `a' is
6363| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
6364| invalid exception is raised if either operand is a NaN. The comparison is
6365| performed according to the IEC/IEEE Standard for Binary Floating-Point
6366| Arithmetic.
158142c2
FB
6367*----------------------------------------------------------------------------*/
6368
e5a41ffa 6369int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6370{
6371 flag aSign, bSign;
6372
d1eb8f2a
AD
6373 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6374 || (extractFloatx80Exp(a) == 0x7FFF
6375 && (uint64_t) (extractFloatx80Frac(a) << 1))
6376 || (extractFloatx80Exp(b) == 0x7FFF
6377 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6378 ) {
ff32e16e 6379 float_raise(float_flag_invalid, status);
158142c2
FB
6380 return 0;
6381 }
6382 aSign = extractFloatx80Sign( a );
6383 bSign = extractFloatx80Sign( b );
6384 if ( aSign != bSign ) {
6385 return
6386 aSign
bb98fe42 6387 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6388 == 0 );
6389 }
6390 return
6391 aSign ? le128( b.high, b.low, a.high, a.low )
6392 : le128( a.high, a.low, b.high, b.low );
6393
6394}
6395
6396/*----------------------------------------------------------------------------
6397| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6398| less than the corresponding value `b', and 0 otherwise. The invalid
6399| exception is raised if either operand is a NaN. The comparison is performed
6400| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6401*----------------------------------------------------------------------------*/
6402
e5a41ffa 6403int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6404{
6405 flag aSign, bSign;
6406
d1eb8f2a
AD
6407 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6408 || (extractFloatx80Exp(a) == 0x7FFF
6409 && (uint64_t) (extractFloatx80Frac(a) << 1))
6410 || (extractFloatx80Exp(b) == 0x7FFF
6411 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 6412 ) {
ff32e16e 6413 float_raise(float_flag_invalid, status);
158142c2
FB
6414 return 0;
6415 }
6416 aSign = extractFloatx80Sign( a );
6417 bSign = extractFloatx80Sign( b );
6418 if ( aSign != bSign ) {
6419 return
6420 aSign
bb98fe42 6421 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6422 != 0 );
6423 }
6424 return
6425 aSign ? lt128( b.high, b.low, a.high, a.low )
6426 : lt128( a.high, a.low, b.high, b.low );
6427
6428}
6429
67b7861d
AJ
6430/*----------------------------------------------------------------------------
6431| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
6432| cannot be compared, and 0 otherwise. The invalid exception is raised if
6433| either operand is a NaN. The comparison is performed according to the
6434| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 6435*----------------------------------------------------------------------------*/
e5a41ffa 6436int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 6437{
d1eb8f2a
AD
6438 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
6439 || (extractFloatx80Exp(a) == 0x7FFF
6440 && (uint64_t) (extractFloatx80Frac(a) << 1))
6441 || (extractFloatx80Exp(b) == 0x7FFF
6442 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 6443 ) {
ff32e16e 6444 float_raise(float_flag_invalid, status);
67b7861d
AJ
6445 return 1;
6446 }
6447 return 0;
6448}
6449
158142c2 6450/*----------------------------------------------------------------------------
b689362d 6451| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
6452| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6453| cause an exception. The comparison is performed according to the IEC/IEEE
6454| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6455*----------------------------------------------------------------------------*/
6456
e5a41ffa 6457int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6458{
6459
d1eb8f2a
AD
6460 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6461 float_raise(float_flag_invalid, status);
6462 return 0;
6463 }
158142c2 6464 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6465 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6466 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6467 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6468 ) {
af39bc8c
AM
6469 if (floatx80_is_signaling_nan(a, status)
6470 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6471 float_raise(float_flag_invalid, status);
b689362d 6472 }
158142c2
FB
6473 return 0;
6474 }
6475 return
6476 ( a.low == b.low )
6477 && ( ( a.high == b.high )
6478 || ( ( a.low == 0 )
bb98fe42 6479 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6480 );
6481
6482}
6483
6484/*----------------------------------------------------------------------------
6485| Returns 1 if the extended double-precision floating-point value `a' is less
6486| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6487| do not cause an exception. Otherwise, the comparison is performed according
6488| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6489*----------------------------------------------------------------------------*/
6490
e5a41ffa 6491int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6492{
6493 flag aSign, bSign;
6494
d1eb8f2a
AD
6495 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6496 float_raise(float_flag_invalid, status);
6497 return 0;
6498 }
158142c2 6499 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6500 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6501 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6502 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6503 ) {
af39bc8c
AM
6504 if (floatx80_is_signaling_nan(a, status)
6505 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6506 float_raise(float_flag_invalid, status);
158142c2
FB
6507 }
6508 return 0;
6509 }
6510 aSign = extractFloatx80Sign( a );
6511 bSign = extractFloatx80Sign( b );
6512 if ( aSign != bSign ) {
6513 return
6514 aSign
bb98fe42 6515 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6516 == 0 );
6517 }
6518 return
6519 aSign ? le128( b.high, b.low, a.high, a.low )
6520 : le128( a.high, a.low, b.high, b.low );
6521
6522}
6523
6524/*----------------------------------------------------------------------------
6525| Returns 1 if the extended double-precision floating-point value `a' is less
6526| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6527| an exception. Otherwise, the comparison is performed according to the
6528| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6529*----------------------------------------------------------------------------*/
6530
e5a41ffa 6531int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
6532{
6533 flag aSign, bSign;
6534
d1eb8f2a
AD
6535 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6536 float_raise(float_flag_invalid, status);
6537 return 0;
6538 }
158142c2 6539 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 6540 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 6541 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 6542 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 6543 ) {
af39bc8c
AM
6544 if (floatx80_is_signaling_nan(a, status)
6545 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6546 float_raise(float_flag_invalid, status);
158142c2
FB
6547 }
6548 return 0;
6549 }
6550 aSign = extractFloatx80Sign( a );
6551 bSign = extractFloatx80Sign( b );
6552 if ( aSign != bSign ) {
6553 return
6554 aSign
bb98fe42 6555 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6556 != 0 );
6557 }
6558 return
6559 aSign ? lt128( b.high, b.low, a.high, a.low )
6560 : lt128( a.high, a.low, b.high, b.low );
6561
6562}
6563
67b7861d
AJ
6564/*----------------------------------------------------------------------------
6565| Returns 1 if the extended double-precision floating-point values `a' and `b'
6566| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6567| The comparison is performed according to the IEC/IEEE Standard for Binary
6568| Floating-Point Arithmetic.
6569*----------------------------------------------------------------------------*/
e5a41ffa 6570int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 6571{
d1eb8f2a
AD
6572 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6573 float_raise(float_flag_invalid, status);
6574 return 1;
6575 }
67b7861d
AJ
6576 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
6577 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
6578 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
6579 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
6580 ) {
af39bc8c
AM
6581 if (floatx80_is_signaling_nan(a, status)
6582 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 6583 float_raise(float_flag_invalid, status);
67b7861d
AJ
6584 }
6585 return 1;
6586 }
6587 return 0;
6588}
6589
158142c2
FB
6590/*----------------------------------------------------------------------------
6591| Returns the result of converting the quadruple-precision floating-point
6592| value `a' to the 32-bit two's complement integer format. The conversion
6593| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6594| Arithmetic---which means in particular that the conversion is rounded
6595| according to the current rounding mode. If `a' is a NaN, the largest
6596| positive integer is returned. Otherwise, if the conversion overflows, the
6597| largest integer with the same sign as `a' is returned.
6598*----------------------------------------------------------------------------*/
6599
f4014512 6600int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
6601{
6602 flag aSign;
f4014512 6603 int32_t aExp, shiftCount;
bb98fe42 6604 uint64_t aSig0, aSig1;
158142c2
FB
6605
6606 aSig1 = extractFloat128Frac1( a );
6607 aSig0 = extractFloat128Frac0( a );
6608 aExp = extractFloat128Exp( a );
6609 aSign = extractFloat128Sign( a );
6610 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6611 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6612 aSig0 |= ( aSig1 != 0 );
6613 shiftCount = 0x4028 - aExp;
6614 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6615 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6616
6617}
6618
6619/*----------------------------------------------------------------------------
6620| Returns the result of converting the quadruple-precision floating-point
6621| value `a' to the 32-bit two's complement integer format. The conversion
6622| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6623| Arithmetic, except that the conversion is always rounded toward zero. If
6624| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6625| conversion overflows, the largest integer with the same sign as `a' is
6626| returned.
6627*----------------------------------------------------------------------------*/
6628
f4014512 6629int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
6630{
6631 flag aSign;
f4014512 6632 int32_t aExp, shiftCount;
bb98fe42 6633 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6634 int32_t z;
158142c2
FB
6635
6636 aSig1 = extractFloat128Frac1( a );
6637 aSig0 = extractFloat128Frac0( a );
6638 aExp = extractFloat128Exp( a );
6639 aSign = extractFloat128Sign( a );
6640 aSig0 |= ( aSig1 != 0 );
6641 if ( 0x401E < aExp ) {
6642 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6643 goto invalid;
6644 }
6645 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
6646 if (aExp || aSig0) {
6647 status->float_exception_flags |= float_flag_inexact;
6648 }
158142c2
FB
6649 return 0;
6650 }
e9321124 6651 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6652 shiftCount = 0x402F - aExp;
6653 savedASig = aSig0;
6654 aSig0 >>= shiftCount;
6655 z = aSig0;
6656 if ( aSign ) z = - z;
6657 if ( ( z < 0 ) ^ aSign ) {
6658 invalid:
ff32e16e 6659 float_raise(float_flag_invalid, status);
2c217da0 6660 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6661 }
6662 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 6663 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6664 }
6665 return z;
6666
6667}
6668
6669/*----------------------------------------------------------------------------
6670| Returns the result of converting the quadruple-precision floating-point
6671| value `a' to the 64-bit two's complement integer format. The conversion
6672| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6673| Arithmetic---which means in particular that the conversion is rounded
6674| according to the current rounding mode. If `a' is a NaN, the largest
6675| positive integer is returned. Otherwise, if the conversion overflows, the
6676| largest integer with the same sign as `a' is returned.
6677*----------------------------------------------------------------------------*/
6678
f42c2224 6679int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
6680{
6681 flag aSign;
f4014512 6682 int32_t aExp, shiftCount;
bb98fe42 6683 uint64_t aSig0, aSig1;
158142c2
FB
6684
6685 aSig1 = extractFloat128Frac1( a );
6686 aSig0 = extractFloat128Frac0( a );
6687 aExp = extractFloat128Exp( a );
6688 aSign = extractFloat128Sign( a );
e9321124 6689 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6690 shiftCount = 0x402F - aExp;
6691 if ( shiftCount <= 0 ) {
6692 if ( 0x403E < aExp ) {
ff32e16e 6693 float_raise(float_flag_invalid, status);
158142c2
FB
6694 if ( ! aSign
6695 || ( ( aExp == 0x7FFF )
e9321124 6696 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6697 )
6698 ) {
2c217da0 6699 return INT64_MAX;
158142c2 6700 }
2c217da0 6701 return INT64_MIN;
158142c2
FB
6702 }
6703 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6704 }
6705 else {
6706 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6707 }
ff32e16e 6708 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6709
6710}
6711
6712/*----------------------------------------------------------------------------
6713| Returns the result of converting the quadruple-precision floating-point
6714| value `a' to the 64-bit two's complement integer format. The conversion
6715| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6716| Arithmetic, except that the conversion is always rounded toward zero.
6717| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6718| the conversion overflows, the largest integer with the same sign as `a' is
6719| returned.
6720*----------------------------------------------------------------------------*/
6721
f42c2224 6722int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
6723{
6724 flag aSign;
f4014512 6725 int32_t aExp, shiftCount;
bb98fe42 6726 uint64_t aSig0, aSig1;
f42c2224 6727 int64_t z;
158142c2
FB
6728
6729 aSig1 = extractFloat128Frac1( a );
6730 aSig0 = extractFloat128Frac0( a );
6731 aExp = extractFloat128Exp( a );
6732 aSign = extractFloat128Sign( a );
e9321124 6733 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6734 shiftCount = aExp - 0x402F;
6735 if ( 0 < shiftCount ) {
6736 if ( 0x403E <= aExp ) {
e9321124
AB
6737 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6738 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6739 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288
PM
6740 if (aSig1) {
6741 status->float_exception_flags |= float_flag_inexact;
6742 }
158142c2
FB
6743 }
6744 else {
ff32e16e 6745 float_raise(float_flag_invalid, status);
158142c2 6746 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6747 return INT64_MAX;
158142c2
FB
6748 }
6749 }
2c217da0 6750 return INT64_MIN;
158142c2
FB
6751 }
6752 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6753 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 6754 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6755 }
6756 }
6757 else {
6758 if ( aExp < 0x3FFF ) {
6759 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 6760 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6761 }
6762 return 0;
6763 }
6764 z = aSig0>>( - shiftCount );
6765 if ( aSig1
bb98fe42 6766 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 6767 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
6768 }
6769 }
6770 if ( aSign ) z = - z;
6771 return z;
6772
6773}
6774
2e6d8568
BR
6775/*----------------------------------------------------------------------------
6776| Returns the result of converting the quadruple-precision floating-point value
6777| `a' to the 64-bit unsigned integer format. The conversion is
6778| performed according to the IEC/IEEE Standard for Binary Floating-Point
6779| Arithmetic---which means in particular that the conversion is rounded
6780| according to the current rounding mode. If `a' is a NaN, the largest
6781| positive integer is returned. If the conversion overflows, the
6782| largest unsigned integer is returned. If 'a' is negative, the value is
6783| rounded and zero is returned; negative values that do not round to zero
6784| will raise the inexact exception.
6785*----------------------------------------------------------------------------*/
6786
6787uint64_t float128_to_uint64(float128 a, float_status *status)
6788{
6789 flag aSign;
6790 int aExp;
6791 int shiftCount;
6792 uint64_t aSig0, aSig1;
6793
6794 aSig0 = extractFloat128Frac0(a);
6795 aSig1 = extractFloat128Frac1(a);
6796 aExp = extractFloat128Exp(a);
6797 aSign = extractFloat128Sign(a);
6798 if (aSign && (aExp > 0x3FFE)) {
6799 float_raise(float_flag_invalid, status);
6800 if (float128_is_any_nan(a)) {
2c217da0 6801 return UINT64_MAX;
2e6d8568
BR
6802 } else {
6803 return 0;
6804 }
6805 }
6806 if (aExp) {
2c217da0 6807 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6808 }
6809 shiftCount = 0x402F - aExp;
6810 if (shiftCount <= 0) {
6811 if (0x403E < aExp) {
6812 float_raise(float_flag_invalid, status);
2c217da0 6813 return UINT64_MAX;
2e6d8568
BR
6814 }
6815 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6816 } else {
6817 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6818 }
6819 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6820}
6821
6822uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6823{
6824 uint64_t v;
6825 signed char current_rounding_mode = status->float_rounding_mode;
6826
6827 set_float_rounding_mode(float_round_to_zero, status);
6828 v = float128_to_uint64(a, status);
6829 set_float_rounding_mode(current_rounding_mode, status);
6830
6831 return v;
6832}
6833
158142c2
FB
6834/*----------------------------------------------------------------------------
6835| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6836| value `a' to the 32-bit unsigned integer format. The conversion
6837| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6838| Arithmetic except that the conversion is always rounded toward zero.
6839| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6840| if the conversion overflows, the largest unsigned integer is returned.
6841| If 'a' is negative, the value is rounded and zero is returned; negative
6842| values that do not round to zero will raise the inexact exception.
6843*----------------------------------------------------------------------------*/
6844
6845uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6846{
6847 uint64_t v;
6848 uint32_t res;
6849 int old_exc_flags = get_float_exception_flags(status);
6850
6851 v = float128_to_uint64_round_to_zero(a, status);
6852 if (v > 0xffffffff) {
6853 res = 0xffffffff;
6854 } else {
6855 return v;
6856 }
6857 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6858 float_raise(float_flag_invalid, status);
6859 return res;
6860}
6861
6862/*----------------------------------------------------------------------------
6863| Returns the result of converting the quadruple-precision floating-point value
6864| `a' to the 32-bit unsigned integer format. The conversion is
6865| performed according to the IEC/IEEE Standard for Binary Floating-Point
6866| Arithmetic---which means in particular that the conversion is rounded
6867| according to the current rounding mode. If `a' is a NaN, the largest
6868| positive integer is returned. If the conversion overflows, the
6869| largest unsigned integer is returned. If 'a' is negative, the value is
6870| rounded and zero is returned; negative values that do not round to zero
6871| will raise the inexact exception.
6872*----------------------------------------------------------------------------*/
6873
6874uint32_t float128_to_uint32(float128 a, float_status *status)
6875{
6876 uint64_t v;
6877 uint32_t res;
6878 int old_exc_flags = get_float_exception_flags(status);
6879
6880 v = float128_to_uint64(a, status);
6881 if (v > 0xffffffff) {
6882 res = 0xffffffff;
6883 } else {
6884 return v;
6885 }
6886 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6887 float_raise(float_flag_invalid, status);
6888 return res;
6889}
6890
6891/*----------------------------------------------------------------------------
6892| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6893| value `a' to the single-precision floating-point format. The conversion
6894| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6895| Arithmetic.
6896*----------------------------------------------------------------------------*/
6897
e5a41ffa 6898float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
6899{
6900 flag aSign;
f4014512 6901 int32_t aExp;
bb98fe42
AF
6902 uint64_t aSig0, aSig1;
6903 uint32_t zSig;
158142c2
FB
6904
6905 aSig1 = extractFloat128Frac1( a );
6906 aSig0 = extractFloat128Frac0( a );
6907 aExp = extractFloat128Exp( a );
6908 aSign = extractFloat128Sign( a );
6909 if ( aExp == 0x7FFF ) {
6910 if ( aSig0 | aSig1 ) {
ff32e16e 6911 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6912 }
6913 return packFloat32( aSign, 0xFF, 0 );
6914 }
6915 aSig0 |= ( aSig1 != 0 );
6916 shift64RightJamming( aSig0, 18, &aSig0 );
6917 zSig = aSig0;
6918 if ( aExp || zSig ) {
6919 zSig |= 0x40000000;
6920 aExp -= 0x3F81;
6921 }
ff32e16e 6922 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6923
6924}
6925
6926/*----------------------------------------------------------------------------
6927| Returns the result of converting the quadruple-precision floating-point
6928| value `a' to the double-precision floating-point format. The conversion
6929| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6930| Arithmetic.
6931*----------------------------------------------------------------------------*/
6932
e5a41ffa 6933float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
6934{
6935 flag aSign;
f4014512 6936 int32_t aExp;
bb98fe42 6937 uint64_t aSig0, aSig1;
158142c2
FB
6938
6939 aSig1 = extractFloat128Frac1( a );
6940 aSig0 = extractFloat128Frac0( a );
6941 aExp = extractFloat128Exp( a );
6942 aSign = extractFloat128Sign( a );
6943 if ( aExp == 0x7FFF ) {
6944 if ( aSig0 | aSig1 ) {
ff32e16e 6945 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6946 }
6947 return packFloat64( aSign, 0x7FF, 0 );
6948 }
6949 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6950 aSig0 |= ( aSig1 != 0 );
6951 if ( aExp || aSig0 ) {
e9321124 6952 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6953 aExp -= 0x3C01;
6954 }
ff32e16e 6955 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6956
6957}
6958
158142c2
FB
6959/*----------------------------------------------------------------------------
6960| Returns the result of converting the quadruple-precision floating-point
6961| value `a' to the extended double-precision floating-point format. The
6962| conversion is performed according to the IEC/IEEE Standard for Binary
6963| Floating-Point Arithmetic.
6964*----------------------------------------------------------------------------*/
6965
e5a41ffa 6966floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
6967{
6968 flag aSign;
f4014512 6969 int32_t aExp;
bb98fe42 6970 uint64_t aSig0, aSig1;
158142c2
FB
6971
6972 aSig1 = extractFloat128Frac1( a );
6973 aSig0 = extractFloat128Frac0( a );
6974 aExp = extractFloat128Exp( a );
6975 aSign = extractFloat128Sign( a );
6976 if ( aExp == 0x7FFF ) {
6977 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6978 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6979 status);
6980 return floatx80_silence_nan(res, status);
158142c2 6981 }
0f605c88
LV
6982 return packFloatx80(aSign, floatx80_infinity_high,
6983 floatx80_infinity_low);
158142c2
FB
6984 }
6985 if ( aExp == 0 ) {
6986 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6987 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6988 }
6989 else {
e9321124 6990 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6991 }
6992 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6993 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6994
6995}
6996
158142c2
FB
6997/*----------------------------------------------------------------------------
6998| Rounds the quadruple-precision floating-point value `a' to an integer, and
6999| returns the result as a quadruple-precision floating-point value. The
7000| operation is performed according to the IEC/IEEE Standard for Binary
7001| Floating-Point Arithmetic.
7002*----------------------------------------------------------------------------*/
7003
e5a41ffa 7004float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
7005{
7006 flag aSign;
f4014512 7007 int32_t aExp;
bb98fe42 7008 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
7009 float128 z;
7010
7011 aExp = extractFloat128Exp( a );
7012 if ( 0x402F <= aExp ) {
7013 if ( 0x406F <= aExp ) {
7014 if ( ( aExp == 0x7FFF )
7015 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
7016 ) {
ff32e16e 7017 return propagateFloat128NaN(a, a, status);
158142c2
FB
7018 }
7019 return a;
7020 }
7021 lastBitMask = 1;
7022 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
7023 roundBitsMask = lastBitMask - 1;
7024 z = a;
a2f2d288 7025 switch (status->float_rounding_mode) {
dc355b76 7026 case float_round_nearest_even:
158142c2
FB
7027 if ( lastBitMask ) {
7028 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
7029 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
7030 }
7031 else {
bb98fe42 7032 if ( (int64_t) z.low < 0 ) {
158142c2 7033 ++z.high;
bb98fe42 7034 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
7035 }
7036 }
dc355b76 7037 break;
f9288a76
PM
7038 case float_round_ties_away:
7039 if (lastBitMask) {
7040 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
7041 } else {
7042 if ((int64_t) z.low < 0) {
7043 ++z.high;
7044 }
7045 }
7046 break;
dc355b76
PM
7047 case float_round_to_zero:
7048 break;
7049 case float_round_up:
7050 if (!extractFloat128Sign(z)) {
7051 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7052 }
7053 break;
7054 case float_round_down:
7055 if (extractFloat128Sign(z)) {
7056 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 7057 }
dc355b76 7058 break;
5d64abb3
RH
7059 case float_round_to_odd:
7060 /*
7061 * Note that if lastBitMask == 0, the last bit is the lsb
7062 * of high, and roundBitsMask == -1.
7063 */
7064 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
7065 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
7066 }
7067 break;
dc355b76
PM
7068 default:
7069 abort();
158142c2
FB
7070 }
7071 z.low &= ~ roundBitsMask;
7072 }
7073 else {
7074 if ( aExp < 0x3FFF ) {
bb98fe42 7075 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 7076 status->float_exception_flags |= float_flag_inexact;
158142c2 7077 aSign = extractFloat128Sign( a );
a2f2d288 7078 switch (status->float_rounding_mode) {
5d64abb3 7079 case float_round_nearest_even:
158142c2
FB
7080 if ( ( aExp == 0x3FFE )
7081 && ( extractFloat128Frac0( a )
7082 | extractFloat128Frac1( a ) )
7083 ) {
7084 return packFloat128( aSign, 0x3FFF, 0, 0 );
7085 }
7086 break;
f9288a76
PM
7087 case float_round_ties_away:
7088 if (aExp == 0x3FFE) {
7089 return packFloat128(aSign, 0x3FFF, 0, 0);
7090 }
7091 break;
5d64abb3 7092 case float_round_down:
158142c2
FB
7093 return
7094 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
7095 : packFloat128( 0, 0, 0, 0 );
5d64abb3 7096 case float_round_up:
158142c2
FB
7097 return
7098 aSign ? packFloat128( 1, 0, 0, 0 )
7099 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
7100
7101 case float_round_to_odd:
7102 return packFloat128(aSign, 0x3FFF, 0, 0);
158142c2
FB
7103 }
7104 return packFloat128( aSign, 0, 0, 0 );
7105 }
7106 lastBitMask = 1;
7107 lastBitMask <<= 0x402F - aExp;
7108 roundBitsMask = lastBitMask - 1;
7109 z.low = 0;
7110 z.high = a.high;
a2f2d288 7111 switch (status->float_rounding_mode) {
dc355b76 7112 case float_round_nearest_even:
158142c2
FB
7113 z.high += lastBitMask>>1;
7114 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
7115 z.high &= ~ lastBitMask;
7116 }
dc355b76 7117 break;
f9288a76
PM
7118 case float_round_ties_away:
7119 z.high += lastBitMask>>1;
7120 break;
dc355b76
PM
7121 case float_round_to_zero:
7122 break;
7123 case float_round_up:
7124 if (!extractFloat128Sign(z)) {
158142c2
FB
7125 z.high |= ( a.low != 0 );
7126 z.high += roundBitsMask;
7127 }
dc355b76
PM
7128 break;
7129 case float_round_down:
7130 if (extractFloat128Sign(z)) {
7131 z.high |= (a.low != 0);
7132 z.high += roundBitsMask;
7133 }
7134 break;
5d64abb3
RH
7135 case float_round_to_odd:
7136 if ((z.high & lastBitMask) == 0) {
7137 z.high |= (a.low != 0);
7138 z.high += roundBitsMask;
7139 }
7140 break;
dc355b76
PM
7141 default:
7142 abort();
158142c2
FB
7143 }
7144 z.high &= ~ roundBitsMask;
7145 }
7146 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 7147 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
7148 }
7149 return z;
7150
7151}
7152
7153/*----------------------------------------------------------------------------
7154| Returns the result of adding the absolute values of the quadruple-precision
7155| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7156| before being returned. `zSign' is ignored if the result is a NaN.
7157| The addition is performed according to the IEC/IEEE Standard for Binary
7158| Floating-Point Arithmetic.
7159*----------------------------------------------------------------------------*/
7160
e5a41ffa
PM
7161static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
7162 float_status *status)
158142c2 7163{
f4014512 7164 int32_t aExp, bExp, zExp;
bb98fe42 7165 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 7166 int32_t expDiff;
158142c2
FB
7167
7168 aSig1 = extractFloat128Frac1( a );
7169 aSig0 = extractFloat128Frac0( a );
7170 aExp = extractFloat128Exp( a );
7171 bSig1 = extractFloat128Frac1( b );
7172 bSig0 = extractFloat128Frac0( b );
7173 bExp = extractFloat128Exp( b );
7174 expDiff = aExp - bExp;
7175 if ( 0 < expDiff ) {
7176 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7177 if (aSig0 | aSig1) {
7178 return propagateFloat128NaN(a, b, status);
7179 }
158142c2
FB
7180 return a;
7181 }
7182 if ( bExp == 0 ) {
7183 --expDiff;
7184 }
7185 else {
e9321124 7186 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7187 }
7188 shift128ExtraRightJamming(
7189 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
7190 zExp = aExp;
7191 }
7192 else if ( expDiff < 0 ) {
7193 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7194 if (bSig0 | bSig1) {
7195 return propagateFloat128NaN(a, b, status);
7196 }
158142c2
FB
7197 return packFloat128( zSign, 0x7FFF, 0, 0 );
7198 }
7199 if ( aExp == 0 ) {
7200 ++expDiff;
7201 }
7202 else {
e9321124 7203 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7204 }
7205 shift128ExtraRightJamming(
7206 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
7207 zExp = bExp;
7208 }
7209 else {
7210 if ( aExp == 0x7FFF ) {
7211 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7212 return propagateFloat128NaN(a, b, status);
158142c2
FB
7213 }
7214 return a;
7215 }
7216 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 7217 if ( aExp == 0 ) {
a2f2d288 7218 if (status->flush_to_zero) {
e6afc87f 7219 if (zSig0 | zSig1) {
ff32e16e 7220 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
7221 }
7222 return packFloat128(zSign, 0, 0, 0);
7223 }
fe76d976
PB
7224 return packFloat128( zSign, 0, zSig0, zSig1 );
7225 }
158142c2 7226 zSig2 = 0;
e9321124 7227 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
7228 zExp = aExp;
7229 goto shiftRight1;
7230 }
e9321124 7231 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7232 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7233 --zExp;
e9321124 7234 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
7235 ++zExp;
7236 shiftRight1:
7237 shift128ExtraRightJamming(
7238 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7239 roundAndPack:
ff32e16e 7240 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7241
7242}
7243
7244/*----------------------------------------------------------------------------
7245| Returns the result of subtracting the absolute values of the quadruple-
7246| precision floating-point values `a' and `b'. If `zSign' is 1, the
7247| difference is negated before being returned. `zSign' is ignored if the
7248| result is a NaN. The subtraction is performed according to the IEC/IEEE
7249| Standard for Binary Floating-Point Arithmetic.
7250*----------------------------------------------------------------------------*/
7251
e5a41ffa
PM
7252static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7253 float_status *status)
158142c2 7254{
f4014512 7255 int32_t aExp, bExp, zExp;
bb98fe42 7256 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 7257 int32_t expDiff;
158142c2
FB
7258
7259 aSig1 = extractFloat128Frac1( a );
7260 aSig0 = extractFloat128Frac0( a );
7261 aExp = extractFloat128Exp( a );
7262 bSig1 = extractFloat128Frac1( b );
7263 bSig0 = extractFloat128Frac0( b );
7264 bExp = extractFloat128Exp( b );
7265 expDiff = aExp - bExp;
7266 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7267 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7268 if ( 0 < expDiff ) goto aExpBigger;
7269 if ( expDiff < 0 ) goto bExpBigger;
7270 if ( aExp == 0x7FFF ) {
7271 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 7272 return propagateFloat128NaN(a, b, status);
158142c2 7273 }
ff32e16e 7274 float_raise(float_flag_invalid, status);
af39bc8c 7275 return float128_default_nan(status);
158142c2
FB
7276 }
7277 if ( aExp == 0 ) {
7278 aExp = 1;
7279 bExp = 1;
7280 }
7281 if ( bSig0 < aSig0 ) goto aBigger;
7282 if ( aSig0 < bSig0 ) goto bBigger;
7283 if ( bSig1 < aSig1 ) goto aBigger;
7284 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7285 return packFloat128(status->float_rounding_mode == float_round_down,
7286 0, 0, 0);
158142c2
FB
7287 bExpBigger:
7288 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7289 if (bSig0 | bSig1) {
7290 return propagateFloat128NaN(a, b, status);
7291 }
158142c2
FB
7292 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7293 }
7294 if ( aExp == 0 ) {
7295 ++expDiff;
7296 }
7297 else {
e9321124 7298 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7299 }
7300 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7301 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7302 bBigger:
7303 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7304 zExp = bExp;
7305 zSign ^= 1;
7306 goto normalizeRoundAndPack;
7307 aExpBigger:
7308 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7309 if (aSig0 | aSig1) {
7310 return propagateFloat128NaN(a, b, status);
7311 }
158142c2
FB
7312 return a;
7313 }
7314 if ( bExp == 0 ) {
7315 --expDiff;
7316 }
7317 else {
e9321124 7318 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7319 }
7320 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7321 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7322 aBigger:
7323 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7324 zExp = aExp;
7325 normalizeRoundAndPack:
7326 --zExp;
ff32e16e
PM
7327 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7328 status);
158142c2
FB
7329
7330}
7331
7332/*----------------------------------------------------------------------------
7333| Returns the result of adding the quadruple-precision floating-point values
7334| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7335| for Binary Floating-Point Arithmetic.
7336*----------------------------------------------------------------------------*/
7337
e5a41ffa 7338float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
7339{
7340 flag aSign, bSign;
7341
7342 aSign = extractFloat128Sign( a );
7343 bSign = extractFloat128Sign( b );
7344 if ( aSign == bSign ) {
ff32e16e 7345 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7346 }
7347 else {
ff32e16e 7348 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7349 }
7350
7351}
7352
7353/*----------------------------------------------------------------------------
7354| Returns the result of subtracting the quadruple-precision floating-point
7355| values `a' and `b'. The operation is performed according to the IEC/IEEE
7356| Standard for Binary Floating-Point Arithmetic.
7357*----------------------------------------------------------------------------*/
7358
e5a41ffa 7359float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
7360{
7361 flag aSign, bSign;
7362
7363 aSign = extractFloat128Sign( a );
7364 bSign = extractFloat128Sign( b );
7365 if ( aSign == bSign ) {
ff32e16e 7366 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7367 }
7368 else {
ff32e16e 7369 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7370 }
7371
7372}
7373
7374/*----------------------------------------------------------------------------
7375| Returns the result of multiplying the quadruple-precision floating-point
7376| values `a' and `b'. The operation is performed according to the IEC/IEEE
7377| Standard for Binary Floating-Point Arithmetic.
7378*----------------------------------------------------------------------------*/
7379
e5a41ffa 7380float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
7381{
7382 flag aSign, bSign, zSign;
f4014512 7383 int32_t aExp, bExp, zExp;
bb98fe42 7384 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7385
7386 aSig1 = extractFloat128Frac1( a );
7387 aSig0 = extractFloat128Frac0( a );
7388 aExp = extractFloat128Exp( a );
7389 aSign = extractFloat128Sign( a );
7390 bSig1 = extractFloat128Frac1( b );
7391 bSig0 = extractFloat128Frac0( b );
7392 bExp = extractFloat128Exp( b );
7393 bSign = extractFloat128Sign( b );
7394 zSign = aSign ^ bSign;
7395 if ( aExp == 0x7FFF ) {
7396 if ( ( aSig0 | aSig1 )
7397 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7398 return propagateFloat128NaN(a, b, status);
158142c2
FB
7399 }
7400 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7401 return packFloat128( zSign, 0x7FFF, 0, 0 );
7402 }
7403 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7404 if (bSig0 | bSig1) {
7405 return propagateFloat128NaN(a, b, status);
7406 }
158142c2
FB
7407 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7408 invalid:
ff32e16e 7409 float_raise(float_flag_invalid, status);
af39bc8c 7410 return float128_default_nan(status);
158142c2
FB
7411 }
7412 return packFloat128( zSign, 0x7FFF, 0, 0 );
7413 }
7414 if ( aExp == 0 ) {
7415 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7416 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7417 }
7418 if ( bExp == 0 ) {
7419 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7420 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7421 }
7422 zExp = aExp + bExp - 0x4000;
e9321124 7423 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7424 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7425 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7426 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7427 zSig2 |= ( zSig3 != 0 );
e9321124 7428 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7429 shift128ExtraRightJamming(
7430 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7431 ++zExp;
7432 }
ff32e16e 7433 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7434
7435}
7436
7437/*----------------------------------------------------------------------------
7438| Returns the result of dividing the quadruple-precision floating-point value
7439| `a' by the corresponding value `b'. The operation is performed according to
7440| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7441*----------------------------------------------------------------------------*/
7442
e5a41ffa 7443float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
7444{
7445 flag aSign, bSign, zSign;
f4014512 7446 int32_t aExp, bExp, zExp;
bb98fe42
AF
7447 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7448 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7449
7450 aSig1 = extractFloat128Frac1( a );
7451 aSig0 = extractFloat128Frac0( a );
7452 aExp = extractFloat128Exp( a );
7453 aSign = extractFloat128Sign( a );
7454 bSig1 = extractFloat128Frac1( b );
7455 bSig0 = extractFloat128Frac0( b );
7456 bExp = extractFloat128Exp( b );
7457 bSign = extractFloat128Sign( b );
7458 zSign = aSign ^ bSign;
7459 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7460 if (aSig0 | aSig1) {
7461 return propagateFloat128NaN(a, b, status);
7462 }
158142c2 7463 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7464 if (bSig0 | bSig1) {
7465 return propagateFloat128NaN(a, b, status);
7466 }
158142c2
FB
7467 goto invalid;
7468 }
7469 return packFloat128( zSign, 0x7FFF, 0, 0 );
7470 }
7471 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7472 if (bSig0 | bSig1) {
7473 return propagateFloat128NaN(a, b, status);
7474 }
158142c2
FB
7475 return packFloat128( zSign, 0, 0, 0 );
7476 }
7477 if ( bExp == 0 ) {
7478 if ( ( bSig0 | bSig1 ) == 0 ) {
7479 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7480 invalid:
ff32e16e 7481 float_raise(float_flag_invalid, status);
af39bc8c 7482 return float128_default_nan(status);
158142c2 7483 }
ff32e16e 7484 float_raise(float_flag_divbyzero, status);
158142c2
FB
7485 return packFloat128( zSign, 0x7FFF, 0, 0 );
7486 }
7487 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7488 }
7489 if ( aExp == 0 ) {
7490 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7491 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7492 }
7493 zExp = aExp - bExp + 0x3FFD;
7494 shortShift128Left(
e9321124 7495 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7496 shortShift128Left(
e9321124 7497 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7498 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7499 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7500 ++zExp;
7501 }
7502 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7503 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7504 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7505 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7506 --zSig0;
7507 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7508 }
7509 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7510 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7511 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7512 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7513 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7514 --zSig1;
7515 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7516 }
7517 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7518 }
7519 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7520 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7521
7522}
7523
7524/*----------------------------------------------------------------------------
7525| Returns the remainder of the quadruple-precision floating-point value `a'
7526| with respect to the corresponding value `b'. The operation is performed
7527| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7528*----------------------------------------------------------------------------*/
7529
e5a41ffa 7530float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7531{
ed086f3d 7532 flag aSign, zSign;
f4014512 7533 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7534 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7535 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7536 int64_t sigMean0;
158142c2
FB
7537
7538 aSig1 = extractFloat128Frac1( a );
7539 aSig0 = extractFloat128Frac0( a );
7540 aExp = extractFloat128Exp( a );
7541 aSign = extractFloat128Sign( a );
7542 bSig1 = extractFloat128Frac1( b );
7543 bSig0 = extractFloat128Frac0( b );
7544 bExp = extractFloat128Exp( b );
158142c2
FB
7545 if ( aExp == 0x7FFF ) {
7546 if ( ( aSig0 | aSig1 )
7547 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7548 return propagateFloat128NaN(a, b, status);
158142c2
FB
7549 }
7550 goto invalid;
7551 }
7552 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7553 if (bSig0 | bSig1) {
7554 return propagateFloat128NaN(a, b, status);
7555 }
158142c2
FB
7556 return a;
7557 }
7558 if ( bExp == 0 ) {
7559 if ( ( bSig0 | bSig1 ) == 0 ) {
7560 invalid:
ff32e16e 7561 float_raise(float_flag_invalid, status);
af39bc8c 7562 return float128_default_nan(status);
158142c2
FB
7563 }
7564 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7565 }
7566 if ( aExp == 0 ) {
7567 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7568 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7569 }
7570 expDiff = aExp - bExp;
7571 if ( expDiff < -1 ) return a;
7572 shortShift128Left(
e9321124 7573 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7574 aSig1,
7575 15 - ( expDiff < 0 ),
7576 &aSig0,
7577 &aSig1
7578 );
7579 shortShift128Left(
e9321124 7580 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7581 q = le128( bSig0, bSig1, aSig0, aSig1 );
7582 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7583 expDiff -= 64;
7584 while ( 0 < expDiff ) {
7585 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7586 q = ( 4 < q ) ? q - 4 : 0;
7587 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7588 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7589 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7590 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7591 expDiff -= 61;
7592 }
7593 if ( -64 < expDiff ) {
7594 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7595 q = ( 4 < q ) ? q - 4 : 0;
7596 q >>= - expDiff;
7597 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7598 expDiff += 52;
7599 if ( expDiff < 0 ) {
7600 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7601 }
7602 else {
7603 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7604 }
7605 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7606 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7607 }
7608 else {
7609 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7610 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7611 }
7612 do {
7613 alternateASig0 = aSig0;
7614 alternateASig1 = aSig1;
7615 ++q;
7616 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7617 } while ( 0 <= (int64_t) aSig0 );
158142c2 7618 add128(
bb98fe42 7619 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7620 if ( ( sigMean0 < 0 )
7621 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7622 aSig0 = alternateASig0;
7623 aSig1 = alternateASig1;
7624 }
bb98fe42 7625 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7626 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7627 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7628 status);
158142c2
FB
7629}
7630
7631/*----------------------------------------------------------------------------
7632| Returns the square root of the quadruple-precision floating-point value `a'.
7633| The operation is performed according to the IEC/IEEE Standard for Binary
7634| Floating-Point Arithmetic.
7635*----------------------------------------------------------------------------*/
7636
e5a41ffa 7637float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
7638{
7639 flag aSign;
f4014512 7640 int32_t aExp, zExp;
bb98fe42
AF
7641 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7642 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7643
7644 aSig1 = extractFloat128Frac1( a );
7645 aSig0 = extractFloat128Frac0( a );
7646 aExp = extractFloat128Exp( a );
7647 aSign = extractFloat128Sign( a );
7648 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7649 if (aSig0 | aSig1) {
7650 return propagateFloat128NaN(a, a, status);
7651 }
158142c2
FB
7652 if ( ! aSign ) return a;
7653 goto invalid;
7654 }
7655 if ( aSign ) {
7656 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7657 invalid:
ff32e16e 7658 float_raise(float_flag_invalid, status);
af39bc8c 7659 return float128_default_nan(status);
158142c2
FB
7660 }
7661 if ( aExp == 0 ) {
7662 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7663 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7664 }
7665 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7666 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7667 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7668 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7669 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7670 doubleZSig0 = zSig0<<1;
7671 mul64To128( zSig0, zSig0, &term0, &term1 );
7672 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7673 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7674 --zSig0;
7675 doubleZSig0 -= 2;
7676 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7677 }
7678 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7679 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7680 if ( zSig1 == 0 ) zSig1 = 1;
7681 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7682 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7683 mul64To128( zSig1, zSig1, &term2, &term3 );
7684 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7685 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7686 --zSig1;
7687 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7688 term3 |= 1;
7689 term2 |= doubleZSig0;
7690 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7691 }
7692 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7693 }
7694 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7695 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7696
7697}
7698
7699/*----------------------------------------------------------------------------
7700| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
7701| the corresponding value `b', and 0 otherwise. The invalid exception is
7702| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
7703| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7704*----------------------------------------------------------------------------*/
7705
e5a41ffa 7706int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
7707{
7708
7709 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7710 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7711 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7712 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7713 ) {
ff32e16e 7714 float_raise(float_flag_invalid, status);
158142c2
FB
7715 return 0;
7716 }
7717 return
7718 ( a.low == b.low )
7719 && ( ( a.high == b.high )
7720 || ( ( a.low == 0 )
bb98fe42 7721 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7722 );
7723
7724}
7725
7726/*----------------------------------------------------------------------------
7727| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7728| or equal to the corresponding value `b', and 0 otherwise. The invalid
7729| exception is raised if either operand is a NaN. The comparison is performed
7730| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7731*----------------------------------------------------------------------------*/
7732
e5a41ffa 7733int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
7734{
7735 flag aSign, bSign;
7736
7737 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7738 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7739 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7740 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7741 ) {
ff32e16e 7742 float_raise(float_flag_invalid, status);
158142c2
FB
7743 return 0;
7744 }
7745 aSign = extractFloat128Sign( a );
7746 bSign = extractFloat128Sign( b );
7747 if ( aSign != bSign ) {
7748 return
7749 aSign
bb98fe42 7750 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7751 == 0 );
7752 }
7753 return
7754 aSign ? le128( b.high, b.low, a.high, a.low )
7755 : le128( a.high, a.low, b.high, b.low );
7756
7757}
7758
7759/*----------------------------------------------------------------------------
7760| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
7761| the corresponding value `b', and 0 otherwise. The invalid exception is
7762| raised if either operand is a NaN. The comparison is performed according
7763| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
7764*----------------------------------------------------------------------------*/
7765
e5a41ffa 7766int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
7767{
7768 flag aSign, bSign;
7769
7770 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7771 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7772 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7773 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7774 ) {
ff32e16e 7775 float_raise(float_flag_invalid, status);
158142c2
FB
7776 return 0;
7777 }
7778 aSign = extractFloat128Sign( a );
7779 bSign = extractFloat128Sign( b );
7780 if ( aSign != bSign ) {
7781 return
7782 aSign
bb98fe42 7783 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7784 != 0 );
7785 }
7786 return
7787 aSign ? lt128( b.high, b.low, a.high, a.low )
7788 : lt128( a.high, a.low, b.high, b.low );
7789
7790}
7791
67b7861d
AJ
7792/*----------------------------------------------------------------------------
7793| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
7794| be compared, and 0 otherwise. The invalid exception is raised if either
7795| operand is a NaN. The comparison is performed according to the IEC/IEEE
7796| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
7797*----------------------------------------------------------------------------*/
7798
e5a41ffa 7799int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
7800{
7801 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7802 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7803 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7804 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7805 ) {
ff32e16e 7806 float_raise(float_flag_invalid, status);
67b7861d
AJ
7807 return 1;
7808 }
7809 return 0;
7810}
7811
158142c2
FB
7812/*----------------------------------------------------------------------------
7813| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
7814| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7815| exception. The comparison is performed according to the IEC/IEEE Standard
7816| for Binary Floating-Point Arithmetic.
158142c2
FB
7817*----------------------------------------------------------------------------*/
7818
e5a41ffa 7819int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7820{
7821
7822 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7823 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7824 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7825 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7826 ) {
af39bc8c
AM
7827 if (float128_is_signaling_nan(a, status)
7828 || float128_is_signaling_nan(b, status)) {
ff32e16e 7829 float_raise(float_flag_invalid, status);
b689362d 7830 }
158142c2
FB
7831 return 0;
7832 }
7833 return
7834 ( a.low == b.low )
7835 && ( ( a.high == b.high )
7836 || ( ( a.low == 0 )
bb98fe42 7837 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
7838 );
7839
7840}
7841
7842/*----------------------------------------------------------------------------
7843| Returns 1 if the quadruple-precision floating-point value `a' is less than
7844| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7845| cause an exception. Otherwise, the comparison is performed according to the
7846| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7847*----------------------------------------------------------------------------*/
7848
e5a41ffa 7849int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7850{
7851 flag aSign, bSign;
7852
7853 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7854 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7855 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7856 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7857 ) {
af39bc8c
AM
7858 if (float128_is_signaling_nan(a, status)
7859 || float128_is_signaling_nan(b, status)) {
ff32e16e 7860 float_raise(float_flag_invalid, status);
158142c2
FB
7861 }
7862 return 0;
7863 }
7864 aSign = extractFloat128Sign( a );
7865 bSign = extractFloat128Sign( b );
7866 if ( aSign != bSign ) {
7867 return
7868 aSign
bb98fe42 7869 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7870 == 0 );
7871 }
7872 return
7873 aSign ? le128( b.high, b.low, a.high, a.low )
7874 : le128( a.high, a.low, b.high, b.low );
7875
7876}
7877
7878/*----------------------------------------------------------------------------
7879| Returns 1 if the quadruple-precision floating-point value `a' is less than
7880| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7881| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7882| Standard for Binary Floating-Point Arithmetic.
7883*----------------------------------------------------------------------------*/
7884
e5a41ffa 7885int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
7886{
7887 flag aSign, bSign;
7888
7889 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7890 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7891 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7892 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7893 ) {
af39bc8c
AM
7894 if (float128_is_signaling_nan(a, status)
7895 || float128_is_signaling_nan(b, status)) {
ff32e16e 7896 float_raise(float_flag_invalid, status);
158142c2
FB
7897 }
7898 return 0;
7899 }
7900 aSign = extractFloat128Sign( a );
7901 bSign = extractFloat128Sign( b );
7902 if ( aSign != bSign ) {
7903 return
7904 aSign
bb98fe42 7905 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
7906 != 0 );
7907 }
7908 return
7909 aSign ? lt128( b.high, b.low, a.high, a.low )
7910 : lt128( a.high, a.low, b.high, b.low );
7911
7912}
7913
67b7861d
AJ
7914/*----------------------------------------------------------------------------
7915| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7916| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7917| comparison is performed according to the IEC/IEEE Standard for Binary
7918| Floating-Point Arithmetic.
7919*----------------------------------------------------------------------------*/
7920
e5a41ffa 7921int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
7922{
7923 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7924 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7925 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7926 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7927 ) {
af39bc8c
AM
7928 if (float128_is_signaling_nan(a, status)
7929 || float128_is_signaling_nan(b, status)) {
ff32e16e 7930 float_raise(float_flag_invalid, status);
67b7861d
AJ
7931 }
7932 return 1;
7933 }
7934 return 0;
7935}
7936
e5a41ffa
PM
7937static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7938 int is_quiet, float_status *status)
f6714d36
AJ
7939{
7940 flag aSign, bSign;
7941
d1eb8f2a
AD
7942 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7943 float_raise(float_flag_invalid, status);
7944 return float_relation_unordered;
7945 }
f6714d36
AJ
7946 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7947 ( extractFloatx80Frac( a )<<1 ) ) ||
7948 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7949 ( extractFloatx80Frac( b )<<1 ) )) {
7950 if (!is_quiet ||
af39bc8c
AM
7951 floatx80_is_signaling_nan(a, status) ||
7952 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7953 float_raise(float_flag_invalid, status);
f6714d36
AJ
7954 }
7955 return float_relation_unordered;
7956 }
7957 aSign = extractFloatx80Sign( a );
7958 bSign = extractFloatx80Sign( b );
7959 if ( aSign != bSign ) {
7960
7961 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7962 ( ( a.low | b.low ) == 0 ) ) {
7963 /* zero case */
7964 return float_relation_equal;
7965 } else {
7966 return 1 - (2 * aSign);
7967 }
7968 } else {
be53fa78
JM
7969 /* Normalize pseudo-denormals before comparison. */
7970 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7971 ++a.high;
7972 }
7973 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7974 ++b.high;
7975 }
f6714d36
AJ
7976 if (a.low == b.low && a.high == b.high) {
7977 return float_relation_equal;
7978 } else {
7979 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7980 }
7981 }
7982}
7983
e5a41ffa 7984int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7985{
ff32e16e 7986 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7987}
7988
e5a41ffa 7989int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 7990{
ff32e16e 7991 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7992}
7993
e5a41ffa
PM
7994static inline int float128_compare_internal(float128 a, float128 b,
7995 int is_quiet, float_status *status)
1f587329
BS
7996{
7997 flag aSign, bSign;
7998
7999 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
8000 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
8001 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
8002 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
8003 if (!is_quiet ||
af39bc8c
AM
8004 float128_is_signaling_nan(a, status) ||
8005 float128_is_signaling_nan(b, status)) {
ff32e16e 8006 float_raise(float_flag_invalid, status);
1f587329
BS
8007 }
8008 return float_relation_unordered;
8009 }
8010 aSign = extractFloat128Sign( a );
8011 bSign = extractFloat128Sign( b );
8012 if ( aSign != bSign ) {
8013 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
8014 /* zero case */
8015 return float_relation_equal;
8016 } else {
8017 return 1 - (2 * aSign);
8018 }
8019 } else {
8020 if (a.low == b.low && a.high == b.high) {
8021 return float_relation_equal;
8022 } else {
8023 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
8024 }
8025 }
8026}
8027
e5a41ffa 8028int float128_compare(float128 a, float128 b, float_status *status)
1f587329 8029{
ff32e16e 8030 return float128_compare_internal(a, b, 0, status);
1f587329
BS
8031}
8032
e5a41ffa 8033int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 8034{
ff32e16e 8035 return float128_compare_internal(a, b, 1, status);
1f587329
BS
8036}
8037
e5a41ffa 8038floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
8039{
8040 flag aSign;
326b9e98 8041 int32_t aExp;
bb98fe42 8042 uint64_t aSig;
9ee6e8bb 8043
d1eb8f2a
AD
8044 if (floatx80_invalid_encoding(a)) {
8045 float_raise(float_flag_invalid, status);
8046 return floatx80_default_nan(status);
8047 }
9ee6e8bb
PB
8048 aSig = extractFloatx80Frac( a );
8049 aExp = extractFloatx80Exp( a );
8050 aSign = extractFloatx80Sign( a );
8051
326b9e98
AJ
8052 if ( aExp == 0x7FFF ) {
8053 if ( aSig<<1 ) {
ff32e16e 8054 return propagateFloatx80NaN(a, a, status);
326b9e98 8055 }
9ee6e8bb
PB
8056 return a;
8057 }
326b9e98 8058
3c85c37f
PM
8059 if (aExp == 0) {
8060 if (aSig == 0) {
8061 return a;
8062 }
8063 aExp++;
8064 }
69397542 8065
326b9e98
AJ
8066 if (n > 0x10000) {
8067 n = 0x10000;
8068 } else if (n < -0x10000) {
8069 n = -0x10000;
8070 }
8071
9ee6e8bb 8072 aExp += n;
a2f2d288
PM
8073 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
8074 aSign, aExp, aSig, 0, status);
9ee6e8bb 8075}
9ee6e8bb 8076
e5a41ffa 8077float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
8078{
8079 flag aSign;
326b9e98 8080 int32_t aExp;
bb98fe42 8081 uint64_t aSig0, aSig1;
9ee6e8bb
PB
8082
8083 aSig1 = extractFloat128Frac1( a );
8084 aSig0 = extractFloat128Frac0( a );
8085 aExp = extractFloat128Exp( a );
8086 aSign = extractFloat128Sign( a );
8087 if ( aExp == 0x7FFF ) {
326b9e98 8088 if ( aSig0 | aSig1 ) {
ff32e16e 8089 return propagateFloat128NaN(a, a, status);
326b9e98 8090 }
9ee6e8bb
PB
8091 return a;
8092 }
3c85c37f 8093 if (aExp != 0) {
e9321124 8094 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 8095 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 8096 return a;
3c85c37f
PM
8097 } else {
8098 aExp++;
8099 }
69397542 8100
326b9e98
AJ
8101 if (n > 0x10000) {
8102 n = 0x10000;
8103 } else if (n < -0x10000) {
8104 n = -0x10000;
8105 }
8106
69397542
PB
8107 aExp += n - 1;
8108 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 8109 , status);
9ee6e8bb
PB
8110
8111}
f6b3b108
EC
8112
8113static void __attribute__((constructor)) softfloat_init(void)
8114{
8115 union_float64 ua, ub, uc, ur;
8116
8117 if (QEMU_NO_HARDFLOAT) {
8118 return;
8119 }
8120 /*
8121 * Test that the host's FMA is not obviously broken. For example,
8122 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8123 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8124 */
8125 ua.s = 0x0020000000000001ULL;
8126 ub.s = 0x3ca0000000000000ULL;
8127 uc.s = 0x0020000000000000ULL;
8128 ur.h = fma(ua.h, ub.h, uc.h);
8129 if (ur.s != 0x0020000000000001ULL) {
8130 force_soft_fma = true;
8131 }
8132}