]> git.proxmox.com Git - mirror_qemu.git/blame - fpu/softfloat.c
softfloat: Tidy a * b + inf return
[mirror_qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
a94b7839 86#include <math.h>
6fff2167 87#include "qemu/bitops.h"
6b4c305c 88#include "fpu/softfloat.h"
158142c2 89
dc355b76 90/* We only need stdlib for abort() */
dc355b76 91
158142c2
FB
92/*----------------------------------------------------------------------------
93| Primitive arithmetic functions, including multi-word arithmetic, and
94| division and square root approximations. (Can be specialized to target if
95| desired.)
96*----------------------------------------------------------------------------*/
88857aca 97#include "fpu/softfloat-macros.h"
158142c2 98
a94b7839
EC
99/*
100 * Hardfloat
101 *
102 * Fast emulation of guest FP instructions is challenging for two reasons.
103 * First, FP instruction semantics are similar but not identical, particularly
104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
105 * exception flags is not trivial: reading the host's flags register with a
106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107 * and trapping on every FP exception is not fast nor pleasant to work with.
108 *
109 * We address these challenges by leveraging the host FPU for a subset of the
110 * operations. To do this we expand on the idea presented in this paper:
111 *
112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114 *
115 * The idea is thus to leverage the host FPU to (1) compute FP operations
116 * and (2) identify whether FP exceptions occurred while avoiding
117 * expensive exception flag register accesses.
118 *
119 * An important optimization shown in the paper is that given that exception
120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
121 * This is particularly useful for the inexact flag, which is very frequently
122 * raised in floating-point workloads.
123 *
124 * We optimize the code further by deferring to soft-fp whenever FP exception
125 * detection might get hairy. Two examples: (1) when at least one operand is
126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127 * and the result is < the minimum normal.
128 */
129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130 static inline void name(soft_t *a, float_status *s) \
131 { \
132 if (unlikely(soft_t ## _is_denormal(*a))) { \
133 *a = soft_t ## _set_sign(soft_t ## _zero, \
134 soft_t ## _is_neg(*a)); \
d82f3b2d 135 float_raise(float_flag_input_denormal, s); \
a94b7839
EC
136 } \
137 }
138
139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141#undef GEN_INPUT_FLUSH__NOCHECK
142
143#define GEN_INPUT_FLUSH1(name, soft_t) \
144 static inline void name(soft_t *a, float_status *s) \
145 { \
146 if (likely(!s->flush_inputs_to_zero)) { \
147 return; \
148 } \
149 soft_t ## _input_flush__nocheck(a, s); \
150 }
151
152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154#undef GEN_INPUT_FLUSH1
155
156#define GEN_INPUT_FLUSH2(name, soft_t) \
157 static inline void name(soft_t *a, soft_t *b, float_status *s) \
158 { \
159 if (likely(!s->flush_inputs_to_zero)) { \
160 return; \
161 } \
162 soft_t ## _input_flush__nocheck(a, s); \
163 soft_t ## _input_flush__nocheck(b, s); \
164 }
165
166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168#undef GEN_INPUT_FLUSH2
169
170#define GEN_INPUT_FLUSH3(name, soft_t) \
171 static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
172 { \
173 if (likely(!s->flush_inputs_to_zero)) { \
174 return; \
175 } \
176 soft_t ## _input_flush__nocheck(a, s); \
177 soft_t ## _input_flush__nocheck(b, s); \
178 soft_t ## _input_flush__nocheck(c, s); \
179 }
180
181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183#undef GEN_INPUT_FLUSH3
184
185/*
186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
187 * hardfloat functions. Each combination of number of inputs and float size
188 * gets its own value.
189 */
190#if defined(__x86_64__)
191# define QEMU_HARDFLOAT_1F32_USE_FP 0
192# define QEMU_HARDFLOAT_1F64_USE_FP 1
193# define QEMU_HARDFLOAT_2F32_USE_FP 0
194# define QEMU_HARDFLOAT_2F64_USE_FP 1
195# define QEMU_HARDFLOAT_3F32_USE_FP 0
196# define QEMU_HARDFLOAT_3F64_USE_FP 1
197#else
198# define QEMU_HARDFLOAT_1F32_USE_FP 0
199# define QEMU_HARDFLOAT_1F64_USE_FP 0
200# define QEMU_HARDFLOAT_2F32_USE_FP 0
201# define QEMU_HARDFLOAT_2F64_USE_FP 0
202# define QEMU_HARDFLOAT_3F32_USE_FP 0
203# define QEMU_HARDFLOAT_3F64_USE_FP 0
204#endif
205
206/*
207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208 * float{32,64}_is_infinity when !USE_FP.
209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211 */
212#if defined(__x86_64__) || defined(__aarch64__)
213# define QEMU_HARDFLOAT_USE_ISINF 1
214#else
215# define QEMU_HARDFLOAT_USE_ISINF 0
216#endif
217
218/*
219 * Some targets clear the FP flags before most FP operations. This prevents
220 * the use of hardfloat, since hardfloat relies on the inexact flag being
221 * already set.
222 */
223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
224# if defined(__FAST_MATH__)
225# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226 IEEE implementation
227# endif
228# define QEMU_NO_HARDFLOAT 1
229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230#else
231# define QEMU_NO_HARDFLOAT 0
232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233#endif
234
235static inline bool can_use_fpu(const float_status *s)
236{
237 if (QEMU_NO_HARDFLOAT) {
238 return false;
239 }
240 return likely(s->float_exception_flags & float_flag_inexact &&
241 s->float_rounding_mode == float_round_nearest_even);
242}
243
244/*
245 * Hardfloat generation functions. Each operation can have two flavors:
246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247 * most condition checks, or native ones (e.g. fpclassify).
248 *
249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
250 * compiler to propagate constants and inline everything into the callers.
251 *
252 * We only generate functions for operations with two inputs, since only
253 * these are common enough to justify consolidating them into common code.
254 */
255
256typedef union {
257 float32 s;
258 float h;
259} union_float32;
260
261typedef union {
262 float64 s;
263 double h;
264} union_float64;
265
266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
271typedef float (*hard_f32_op2_fn)(float a, float b);
272typedef double (*hard_f64_op2_fn)(double a, double b);
273
274/* 2-input is-zero-or-normal */
275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276{
277 if (QEMU_HARDFLOAT_2F32_USE_FP) {
278 /*
279 * Not using a temp variable for consecutive fpclassify calls ends up
280 * generating faster code.
281 */
282 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
283 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
284 }
285 return float32_is_zero_or_normal(a.s) &&
286 float32_is_zero_or_normal(b.s);
287}
288
289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290{
291 if (QEMU_HARDFLOAT_2F64_USE_FP) {
292 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
293 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
294 }
295 return float64_is_zero_or_normal(a.s) &&
296 float64_is_zero_or_normal(b.s);
297}
298
299/* 3-input is-zero-or-normal */
300static inline
301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302{
303 if (QEMU_HARDFLOAT_3F32_USE_FP) {
304 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
305 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
306 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
307 }
308 return float32_is_zero_or_normal(a.s) &&
309 float32_is_zero_or_normal(b.s) &&
310 float32_is_zero_or_normal(c.s);
311}
312
313static inline
314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315{
316 if (QEMU_HARDFLOAT_3F64_USE_FP) {
317 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
318 (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
319 (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
320 }
321 return float64_is_zero_or_normal(a.s) &&
322 float64_is_zero_or_normal(b.s) &&
323 float64_is_zero_or_normal(c.s);
324}
325
326static inline bool f32_is_inf(union_float32 a)
327{
328 if (QEMU_HARDFLOAT_USE_ISINF) {
329 return isinf(a.h);
330 }
331 return float32_is_infinity(a.s);
332}
333
334static inline bool f64_is_inf(union_float64 a)
335{
336 if (QEMU_HARDFLOAT_USE_ISINF) {
337 return isinf(a.h);
338 }
339 return float64_is_infinity(a.s);
340}
341
a94b7839
EC
342static inline float32
343float32_gen2(float32 xa, float32 xb, float_status *s,
344 hard_f32_op2_fn hard, soft_f32_op2_fn soft,
b240c9c4 345 f32_check_fn pre, f32_check_fn post)
a94b7839
EC
346{
347 union_float32 ua, ub, ur;
348
349 ua.s = xa;
350 ub.s = xb;
351
352 if (unlikely(!can_use_fpu(s))) {
353 goto soft;
354 }
355
356 float32_input_flush2(&ua.s, &ub.s, s);
357 if (unlikely(!pre(ua, ub))) {
358 goto soft;
359 }
a94b7839
EC
360
361 ur.h = hard(ua.h, ub.h);
362 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 363 float_raise(float_flag_overflow, s);
b240c9c4
RH
364 } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
365 goto soft;
a94b7839
EC
366 }
367 return ur.s;
368
369 soft:
370 return soft(ua.s, ub.s, s);
371}
372
373static inline float64
374float64_gen2(float64 xa, float64 xb, float_status *s,
375 hard_f64_op2_fn hard, soft_f64_op2_fn soft,
b240c9c4 376 f64_check_fn pre, f64_check_fn post)
a94b7839
EC
377{
378 union_float64 ua, ub, ur;
379
380 ua.s = xa;
381 ub.s = xb;
382
383 if (unlikely(!can_use_fpu(s))) {
384 goto soft;
385 }
386
387 float64_input_flush2(&ua.s, &ub.s, s);
388 if (unlikely(!pre(ua, ub))) {
389 goto soft;
390 }
a94b7839
EC
391
392 ur.h = hard(ua.h, ub.h);
393 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 394 float_raise(float_flag_overflow, s);
b240c9c4
RH
395 } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
396 goto soft;
a94b7839
EC
397 }
398 return ur.s;
399
400 soft:
401 return soft(ua.s, ub.s, s);
402}
403
d97544c9
AB
404/*----------------------------------------------------------------------------
405| Returns the fraction bits of the single-precision floating-point value `a'.
406*----------------------------------------------------------------------------*/
407
408static inline uint32_t extractFloat32Frac(float32 a)
409{
410 return float32_val(a) & 0x007FFFFF;
411}
412
413/*----------------------------------------------------------------------------
414| Returns the exponent bits of the single-precision floating-point value `a'.
415*----------------------------------------------------------------------------*/
416
417static inline int extractFloat32Exp(float32 a)
418{
419 return (float32_val(a) >> 23) & 0xFF;
420}
421
422/*----------------------------------------------------------------------------
423| Returns the sign bit of the single-precision floating-point value `a'.
424*----------------------------------------------------------------------------*/
425
c120391c 426static inline bool extractFloat32Sign(float32 a)
d97544c9
AB
427{
428 return float32_val(a) >> 31;
429}
430
431/*----------------------------------------------------------------------------
432| Returns the fraction bits of the double-precision floating-point value `a'.
433*----------------------------------------------------------------------------*/
434
435static inline uint64_t extractFloat64Frac(float64 a)
436{
e9321124 437 return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
d97544c9
AB
438}
439
440/*----------------------------------------------------------------------------
441| Returns the exponent bits of the double-precision floating-point value `a'.
442*----------------------------------------------------------------------------*/
443
444static inline int extractFloat64Exp(float64 a)
445{
446 return (float64_val(a) >> 52) & 0x7FF;
447}
448
449/*----------------------------------------------------------------------------
450| Returns the sign bit of the double-precision floating-point value `a'.
451*----------------------------------------------------------------------------*/
452
c120391c 453static inline bool extractFloat64Sign(float64 a)
d97544c9
AB
454{
455 return float64_val(a) >> 63;
456}
457
a90119b5
AB
458/*
459 * Classify a floating point number. Everything above float_class_qnan
460 * is a NaN so cls >= float_class_qnan is any NaN.
461 */
462
463typedef enum __attribute__ ((__packed__)) {
464 float_class_unclassified,
465 float_class_zero,
466 float_class_normal,
467 float_class_inf,
468 float_class_qnan, /* all NaNs from here */
469 float_class_snan,
a90119b5
AB
470} FloatClass;
471
247d1f21
RH
472/* Simple helpers for checking if, or what kind of, NaN we have */
473static inline __attribute__((unused)) bool is_nan(FloatClass c)
474{
475 return unlikely(c >= float_class_qnan);
476}
477
478static inline __attribute__((unused)) bool is_snan(FloatClass c)
479{
480 return c == float_class_snan;
481}
482
483static inline __attribute__((unused)) bool is_qnan(FloatClass c)
484{
485 return c == float_class_qnan;
486}
487
a90119b5
AB
488/*
489 * Structure holding all of the decomposed parts of a float. The
490 * exponent is unbiased and the fraction is normalized. All
491 * calculations are done with a 64 bit fraction and then rounded as
492 * appropriate for the final format.
493 *
494 * Thanks to the packed FloatClass a decent compiler should be able to
495 * fit the whole structure into registers and avoid using the stack
496 * for parameter passing.
497 */
498
499typedef struct {
500 uint64_t frac;
501 int32_t exp;
502 FloatClass cls;
503 bool sign;
504} FloatParts;
505
e99c4373 506#define DECOMPOSED_BINARY_POINT 63
a90119b5 507#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
a90119b5
AB
508
509/* Structure holding all of the relevant parameters for a format.
510 * exp_size: the size of the exponent field
511 * exp_bias: the offset applied to the exponent field
512 * exp_max: the maximum normalised exponent
513 * frac_size: the size of the fraction field
514 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
515 * The following are computed based the size of fraction
516 * frac_lsb: least significant bit of fraction
ca3a3d5a 517 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 518 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
519 * The following optional modifiers are available:
520 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
521 */
522typedef struct {
523 int exp_size;
524 int exp_bias;
525 int exp_max;
526 int frac_size;
527 int frac_shift;
528 uint64_t frac_lsb;
529 uint64_t frac_lsbm1;
530 uint64_t round_mask;
531 uint64_t roundeven_mask;
ca3a3d5a 532 bool arm_althp;
a90119b5
AB
533} FloatFmt;
534
535/* Expand fields based on the size of exponent and fraction */
536#define FLOAT_PARAMS(E, F) \
537 .exp_size = E, \
538 .exp_bias = ((1 << E) - 1) >> 1, \
539 .exp_max = (1 << E) - 1, \
540 .frac_size = F, \
541 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
542 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
543 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
544 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
545 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
546
547static const FloatFmt float16_params = {
548 FLOAT_PARAMS(5, 10)
549};
550
6fed16b2
AB
551static const FloatFmt float16_params_ahp = {
552 FLOAT_PARAMS(5, 10),
553 .arm_althp = true
554};
555
8282310d
LZ
556static const FloatFmt bfloat16_params = {
557 FLOAT_PARAMS(8, 7)
558};
559
a90119b5
AB
560static const FloatFmt float32_params = {
561 FLOAT_PARAMS(8, 23)
562};
563
564static const FloatFmt float64_params = {
565 FLOAT_PARAMS(11, 52)
566};
567
6fff2167
AB
568/* Unpack a float to parts, but do not canonicalize. */
569static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
570{
571 const int sign_pos = fmt.frac_size + fmt.exp_size;
572
573 return (FloatParts) {
574 .cls = float_class_unclassified,
575 .sign = extract64(raw, sign_pos, 1),
576 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
577 .frac = extract64(raw, 0, fmt.frac_size),
578 };
579}
580
581static inline FloatParts float16_unpack_raw(float16 f)
582{
583 return unpack_raw(float16_params, f);
584}
585
8282310d
LZ
586static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
587{
588 return unpack_raw(bfloat16_params, f);
589}
590
6fff2167
AB
591static inline FloatParts float32_unpack_raw(float32 f)
592{
593 return unpack_raw(float32_params, f);
594}
595
596static inline FloatParts float64_unpack_raw(float64 f)
597{
598 return unpack_raw(float64_params, f);
599}
600
601/* Pack a float from parts, but do not canonicalize. */
602static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
603{
604 const int sign_pos = fmt.frac_size + fmt.exp_size;
605 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
606 return deposit64(ret, sign_pos, 1, p.sign);
607}
608
609static inline float16 float16_pack_raw(FloatParts p)
610{
611 return make_float16(pack_raw(float16_params, p));
612}
613
8282310d
LZ
614static inline bfloat16 bfloat16_pack_raw(FloatParts p)
615{
616 return pack_raw(bfloat16_params, p);
617}
618
6fff2167
AB
619static inline float32 float32_pack_raw(FloatParts p)
620{
621 return make_float32(pack_raw(float32_params, p));
622}
623
624static inline float64 float64_pack_raw(FloatParts p)
625{
626 return make_float64(pack_raw(float64_params, p));
627}
628
0664335a
RH
629/*----------------------------------------------------------------------------
630| Functions and definitions to determine: (1) whether tininess for underflow
631| is detected before or after rounding by default, (2) what (if anything)
632| happens when exceptions are raised, (3) how signaling NaNs are distinguished
633| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
634| are propagated from function inputs to output. These details are target-
635| specific.
636*----------------------------------------------------------------------------*/
139c1837 637#include "softfloat-specialize.c.inc"
0664335a 638
6fff2167 639/* Canonicalize EXP and FRAC, setting CLS. */
f9943c7f
EC
640static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
641 float_status *status)
6fff2167 642{
ca3a3d5a 643 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
644 if (part.frac == 0) {
645 part.cls = float_class_inf;
646 } else {
94933df0 647 part.frac <<= parm->frac_shift;
298b468e
RH
648 part.cls = (parts_is_snan_frac(part.frac, status)
649 ? float_class_snan : float_class_qnan);
6fff2167
AB
650 }
651 } else if (part.exp == 0) {
652 if (likely(part.frac == 0)) {
653 part.cls = float_class_zero;
654 } else if (status->flush_inputs_to_zero) {
655 float_raise(float_flag_input_denormal, status);
656 part.cls = float_class_zero;
657 part.frac = 0;
658 } else {
e99c4373 659 int shift = clz64(part.frac);
6fff2167
AB
660 part.cls = float_class_normal;
661 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
662 part.frac <<= shift;
663 }
664 } else {
665 part.cls = float_class_normal;
666 part.exp -= parm->exp_bias;
667 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
668 }
669 return part;
670}
671
672/* Round and uncanonicalize a floating-point number by parts. There
673 * are FRAC_SHIFT bits that may require rounding at the bottom of the
674 * fraction; these bits will be removed. The exponent will be biased
675 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
676 */
677
678static FloatParts round_canonical(FloatParts p, float_status *s,
679 const FloatFmt *parm)
680{
5d64abb3 681 const uint64_t frac_lsb = parm->frac_lsb;
6fff2167
AB
682 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
683 const uint64_t round_mask = parm->round_mask;
684 const uint64_t roundeven_mask = parm->roundeven_mask;
685 const int exp_max = parm->exp_max;
686 const int frac_shift = parm->frac_shift;
687 uint64_t frac, inc;
688 int exp, flags = 0;
689 bool overflow_norm;
690
691 frac = p.frac;
692 exp = p.exp;
693
694 switch (p.cls) {
695 case float_class_normal:
696 switch (s->float_rounding_mode) {
697 case float_round_nearest_even:
698 overflow_norm = false;
699 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
700 break;
701 case float_round_ties_away:
702 overflow_norm = false;
703 inc = frac_lsbm1;
704 break;
705 case float_round_to_zero:
706 overflow_norm = true;
707 inc = 0;
708 break;
709 case float_round_up:
710 inc = p.sign ? 0 : round_mask;
711 overflow_norm = p.sign;
712 break;
713 case float_round_down:
714 inc = p.sign ? round_mask : 0;
715 overflow_norm = !p.sign;
716 break;
5d64abb3
RH
717 case float_round_to_odd:
718 overflow_norm = true;
719 inc = frac & frac_lsb ? 0 : round_mask;
720 break;
6fff2167
AB
721 default:
722 g_assert_not_reached();
723 }
724
725 exp += parm->exp_bias;
726 if (likely(exp > 0)) {
727 if (frac & round_mask) {
728 flags |= float_flag_inexact;
e99c4373
RH
729 if (uadd64_overflow(frac, inc, &frac)) {
730 frac = (frac >> 1) | DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
731 exp++;
732 }
733 }
734 frac >>= frac_shift;
735
ca3a3d5a
AB
736 if (parm->arm_althp) {
737 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
738 if (unlikely(exp > exp_max)) {
739 /* Overflow. Return the maximum normal. */
740 flags = float_flag_invalid;
741 exp = exp_max;
742 frac = -1;
743 }
744 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
745 flags |= float_flag_overflow | float_flag_inexact;
746 if (overflow_norm) {
747 exp = exp_max - 1;
748 frac = -1;
749 } else {
750 p.cls = float_class_inf;
751 goto do_inf;
752 }
753 }
754 } else if (s->flush_to_zero) {
755 flags |= float_flag_output_denormal;
756 p.cls = float_class_zero;
757 goto do_zero;
758 } else {
e99c4373
RH
759 bool is_tiny = s->tininess_before_rounding || (exp < 0);
760
761 if (!is_tiny) {
762 uint64_t discard;
763 is_tiny = !uadd64_overflow(frac, inc, &discard);
764 }
6fff2167
AB
765
766 shift64RightJamming(frac, 1 - exp, &frac);
767 if (frac & round_mask) {
768 /* Need to recompute round-to-even. */
5d64abb3
RH
769 switch (s->float_rounding_mode) {
770 case float_round_nearest_even:
6fff2167
AB
771 inc = ((frac & roundeven_mask) != frac_lsbm1
772 ? frac_lsbm1 : 0);
5d64abb3
RH
773 break;
774 case float_round_to_odd:
775 inc = frac & frac_lsb ? 0 : round_mask;
776 break;
3dede407
RH
777 default:
778 break;
6fff2167
AB
779 }
780 flags |= float_flag_inexact;
781 frac += inc;
782 }
783
784 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
785 frac >>= frac_shift;
786
787 if (is_tiny && (flags & float_flag_inexact)) {
788 flags |= float_flag_underflow;
789 }
790 if (exp == 0 && frac == 0) {
791 p.cls = float_class_zero;
792 }
793 }
794 break;
795
796 case float_class_zero:
797 do_zero:
798 exp = 0;
799 frac = 0;
800 break;
801
802 case float_class_inf:
803 do_inf:
ca3a3d5a 804 assert(!parm->arm_althp);
6fff2167
AB
805 exp = exp_max;
806 frac = 0;
807 break;
808
809 case float_class_qnan:
810 case float_class_snan:
ca3a3d5a 811 assert(!parm->arm_althp);
6fff2167 812 exp = exp_max;
94933df0 813 frac >>= parm->frac_shift;
6fff2167
AB
814 break;
815
816 default:
817 g_assert_not_reached();
818 }
819
820 float_raise(flags, s);
821 p.exp = exp;
822 p.frac = frac;
823 return p;
824}
825
6fed16b2
AB
826/* Explicit FloatFmt version */
827static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
828 const FloatFmt *params)
829{
f9943c7f 830 return sf_canonicalize(float16_unpack_raw(f), params, s);
6fed16b2
AB
831}
832
6fff2167
AB
833static FloatParts float16_unpack_canonical(float16 f, float_status *s)
834{
6fed16b2
AB
835 return float16a_unpack_canonical(f, s, &float16_params);
836}
837
8282310d
LZ
838static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
839{
840 return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
841}
842
6fed16b2
AB
843static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
844 const FloatFmt *params)
845{
846 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
847}
848
849static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
850{
6fed16b2 851 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
852}
853
8282310d
LZ
854static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
855{
856 return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
857}
858
6fff2167
AB
859static FloatParts float32_unpack_canonical(float32 f, float_status *s)
860{
f9943c7f 861 return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
6fff2167
AB
862}
863
864static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
865{
0bcfbcbe 866 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
867}
868
869static FloatParts float64_unpack_canonical(float64 f, float_status *s)
870{
f9943c7f 871 return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
6fff2167
AB
872}
873
874static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
875{
0bcfbcbe 876 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
877}
878
dbe4d53a
AB
879static FloatParts return_nan(FloatParts a, float_status *s)
880{
881 switch (a.cls) {
882 case float_class_snan:
d82f3b2d 883 float_raise(float_flag_invalid, s);
0bcfbcbe 884 a = parts_silence_nan(a, s);
dbe4d53a
AB
885 /* fall through */
886 case float_class_qnan:
887 if (s->default_nan_mode) {
f7e598e2 888 return parts_default_nan(s);
dbe4d53a
AB
889 }
890 break;
891
892 default:
893 g_assert_not_reached();
894 }
895 return a;
896}
897
6fff2167
AB
898static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
899{
900 if (is_snan(a.cls) || is_snan(b.cls)) {
d82f3b2d 901 float_raise(float_flag_invalid, s);
6fff2167
AB
902 }
903
904 if (s->default_nan_mode) {
f7e598e2 905 return parts_default_nan(s);
6fff2167 906 } else {
4f251cfd 907 if (pickNaN(a.cls, b.cls,
6fff2167 908 a.frac > b.frac ||
913602e3 909 (a.frac == b.frac && a.sign < b.sign), s)) {
6fff2167
AB
910 a = b;
911 }
0bcfbcbe
RH
912 if (is_snan(a.cls)) {
913 return parts_silence_nan(a, s);
914 }
6fff2167
AB
915 }
916 return a;
917}
918
d446830a
AB
919static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
920 bool inf_zero, float_status *s)
921{
1839189b
PM
922 int which;
923
d446830a 924 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
d82f3b2d 925 float_raise(float_flag_invalid, s);
d446830a
AB
926 }
927
3bd2dec1 928 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 929
d446830a 930 if (s->default_nan_mode) {
1839189b
PM
931 /* Note that this check is after pickNaNMulAdd so that function
932 * has an opportunity to set the Invalid flag.
933 */
f7e598e2 934 which = 3;
1839189b 935 }
d446830a 936
1839189b
PM
937 switch (which) {
938 case 0:
939 break;
940 case 1:
941 a = b;
942 break;
943 case 2:
944 a = c;
945 break;
946 case 3:
f7e598e2 947 return parts_default_nan(s);
1839189b
PM
948 default:
949 g_assert_not_reached();
d446830a 950 }
1839189b 951
0bcfbcbe
RH
952 if (is_snan(a.cls)) {
953 return parts_silence_nan(a, s);
954 }
d446830a
AB
955 return a;
956}
957
6fff2167
AB
958/*
959 * Returns the result of adding or subtracting the values of the
960 * floating-point values `a' and `b'. The operation is performed
961 * according to the IEC/IEEE Standard for Binary Floating-Point
962 * Arithmetic.
963 */
964
965static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
966 float_status *s)
967{
968 bool a_sign = a.sign;
969 bool b_sign = b.sign ^ subtract;
970
971 if (a_sign != b_sign) {
972 /* Subtraction */
973
974 if (a.cls == float_class_normal && b.cls == float_class_normal) {
975 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
976 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
977 a.frac = a.frac - b.frac;
978 } else {
979 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
980 a.frac = b.frac - a.frac;
981 a.exp = b.exp;
982 a_sign ^= 1;
983 }
984
985 if (a.frac == 0) {
986 a.cls = float_class_zero;
987 a.sign = s->float_rounding_mode == float_round_down;
988 } else {
e99c4373 989 int shift = clz64(a.frac);
6fff2167
AB
990 a.frac = a.frac << shift;
991 a.exp = a.exp - shift;
992 a.sign = a_sign;
993 }
994 return a;
995 }
996 if (is_nan(a.cls) || is_nan(b.cls)) {
997 return pick_nan(a, b, s);
998 }
999 if (a.cls == float_class_inf) {
1000 if (b.cls == float_class_inf) {
1001 float_raise(float_flag_invalid, s);
f7e598e2 1002 return parts_default_nan(s);
6fff2167
AB
1003 }
1004 return a;
1005 }
1006 if (a.cls == float_class_zero && b.cls == float_class_zero) {
1007 a.sign = s->float_rounding_mode == float_round_down;
1008 return a;
1009 }
1010 if (a.cls == float_class_zero || b.cls == float_class_inf) {
1011 b.sign = a_sign ^ 1;
1012 return b;
1013 }
1014 if (b.cls == float_class_zero) {
1015 return a;
1016 }
1017 } else {
1018 /* Addition */
1019 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1020 if (a.exp > b.exp) {
1021 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1022 } else if (a.exp < b.exp) {
1023 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1024 a.exp = b.exp;
1025 }
e99c4373
RH
1026
1027 if (uadd64_overflow(a.frac, b.frac, &a.frac)) {
64d450a0 1028 shift64RightJamming(a.frac, 1, &a.frac);
e99c4373 1029 a.frac |= DECOMPOSED_IMPLICIT_BIT;
6fff2167
AB
1030 a.exp += 1;
1031 }
1032 return a;
1033 }
1034 if (is_nan(a.cls) || is_nan(b.cls)) {
1035 return pick_nan(a, b, s);
1036 }
1037 if (a.cls == float_class_inf || b.cls == float_class_zero) {
1038 return a;
1039 }
1040 if (b.cls == float_class_inf || a.cls == float_class_zero) {
1041 b.sign = b_sign;
1042 return b;
1043 }
1044 }
1045 g_assert_not_reached();
1046}
1047
1048/*
1049 * Returns the result of adding or subtracting the floating-point
1050 * values `a' and `b'. The operation is performed according to the
1051 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1052 */
1053
97ff87c0 1054float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
6fff2167
AB
1055{
1056 FloatParts pa = float16_unpack_canonical(a, status);
1057 FloatParts pb = float16_unpack_canonical(b, status);
1058 FloatParts pr = addsub_floats(pa, pb, false, status);
1059
1060 return float16_round_pack_canonical(pr, status);
1061}
1062
1b615d48
EC
1063float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1064{
1065 FloatParts pa = float16_unpack_canonical(a, status);
1066 FloatParts pb = float16_unpack_canonical(b, status);
1067 FloatParts pr = addsub_floats(pa, pb, true, status);
1068
1069 return float16_round_pack_canonical(pr, status);
1070}
1071
1072static float32 QEMU_SOFTFLOAT_ATTR
1073soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
6fff2167
AB
1074{
1075 FloatParts pa = float32_unpack_canonical(a, status);
1076 FloatParts pb = float32_unpack_canonical(b, status);
1b615d48 1077 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1078
1079 return float32_round_pack_canonical(pr, status);
1080}
1081
1b615d48
EC
1082static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1083{
1084 return soft_f32_addsub(a, b, false, status);
1085}
1086
1087static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1088{
1089 return soft_f32_addsub(a, b, true, status);
1090}
1091
1092static float64 QEMU_SOFTFLOAT_ATTR
1093soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
6fff2167
AB
1094{
1095 FloatParts pa = float64_unpack_canonical(a, status);
1096 FloatParts pb = float64_unpack_canonical(b, status);
1b615d48 1097 FloatParts pr = addsub_floats(pa, pb, subtract, status);
6fff2167
AB
1098
1099 return float64_round_pack_canonical(pr, status);
1100}
1101
1b615d48 1102static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
6fff2167 1103{
1b615d48
EC
1104 return soft_f64_addsub(a, b, false, status);
1105}
6fff2167 1106
1b615d48
EC
1107static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1108{
1109 return soft_f64_addsub(a, b, true, status);
6fff2167
AB
1110}
1111
1b615d48 1112static float hard_f32_add(float a, float b)
6fff2167 1113{
1b615d48
EC
1114 return a + b;
1115}
6fff2167 1116
1b615d48
EC
1117static float hard_f32_sub(float a, float b)
1118{
1119 return a - b;
6fff2167
AB
1120}
1121
1b615d48 1122static double hard_f64_add(double a, double b)
6fff2167 1123{
1b615d48
EC
1124 return a + b;
1125}
6fff2167 1126
1b615d48
EC
1127static double hard_f64_sub(double a, double b)
1128{
1129 return a - b;
1130}
1131
b240c9c4 1132static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1b615d48
EC
1133{
1134 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1135 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1136 }
1137 return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1138}
1139
b240c9c4 1140static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1b615d48
EC
1141{
1142 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1143 return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1144 } else {
1145 return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1146 }
1147}
1148
1149static float32 float32_addsub(float32 a, float32 b, float_status *s,
1150 hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1151{
1152 return float32_gen2(a, b, s, hard, soft,
b240c9c4 1153 f32_is_zon2, f32_addsubmul_post);
1b615d48
EC
1154}
1155
1156static float64 float64_addsub(float64 a, float64 b, float_status *s,
1157 hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1158{
1159 return float64_gen2(a, b, s, hard, soft,
b240c9c4 1160 f64_is_zon2, f64_addsubmul_post);
1b615d48
EC
1161}
1162
1163float32 QEMU_FLATTEN
1164float32_add(float32 a, float32 b, float_status *s)
1165{
1166 return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1167}
1168
1169float32 QEMU_FLATTEN
1170float32_sub(float32 a, float32 b, float_status *s)
1171{
1172 return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1173}
1174
1175float64 QEMU_FLATTEN
1176float64_add(float64 a, float64 b, float_status *s)
1177{
1178 return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1179}
1180
1181float64 QEMU_FLATTEN
1182float64_sub(float64 a, float64 b, float_status *s)
1183{
1184 return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
6fff2167
AB
1185}
1186
8282310d
LZ
1187/*
1188 * Returns the result of adding or subtracting the bfloat16
1189 * values `a' and `b'.
1190 */
1191bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1192{
1193 FloatParts pa = bfloat16_unpack_canonical(a, status);
1194 FloatParts pb = bfloat16_unpack_canonical(b, status);
1195 FloatParts pr = addsub_floats(pa, pb, false, status);
1196
1197 return bfloat16_round_pack_canonical(pr, status);
1198}
1199
1200bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1201{
1202 FloatParts pa = bfloat16_unpack_canonical(a, status);
1203 FloatParts pb = bfloat16_unpack_canonical(b, status);
1204 FloatParts pr = addsub_floats(pa, pb, true, status);
1205
1206 return bfloat16_round_pack_canonical(pr, status);
1207}
1208
74d707e2
AB
1209/*
1210 * Returns the result of multiplying the floating-point values `a' and
1211 * `b'. The operation is performed according to the IEC/IEEE Standard
1212 * for Binary Floating-Point Arithmetic.
1213 */
1214
1215static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1216{
1217 bool sign = a.sign ^ b.sign;
1218
1219 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1220 uint64_t hi, lo;
1221 int exp = a.exp + b.exp;
1222
1223 mul64To128(a.frac, b.frac, &hi, &lo);
e99c4373 1224 if (hi & DECOMPOSED_IMPLICIT_BIT) {
74d707e2 1225 exp += 1;
e99c4373
RH
1226 } else {
1227 hi <<= 1;
74d707e2 1228 }
e99c4373 1229 hi |= (lo != 0);
74d707e2
AB
1230
1231 /* Re-use a */
1232 a.exp = exp;
1233 a.sign = sign;
e99c4373 1234 a.frac = hi;
74d707e2
AB
1235 return a;
1236 }
1237 /* handle all the NaN cases */
1238 if (is_nan(a.cls) || is_nan(b.cls)) {
1239 return pick_nan(a, b, s);
1240 }
1241 /* Inf * Zero == NaN */
1242 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1243 (a.cls == float_class_zero && b.cls == float_class_inf)) {
d82f3b2d 1244 float_raise(float_flag_invalid, s);
f7e598e2 1245 return parts_default_nan(s);
74d707e2
AB
1246 }
1247 /* Multiply by 0 or Inf */
1248 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1249 a.sign = sign;
1250 return a;
1251 }
1252 if (b.cls == float_class_inf || b.cls == float_class_zero) {
1253 b.sign = sign;
1254 return b;
1255 }
1256 g_assert_not_reached();
1257}
1258
97ff87c0 1259float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
74d707e2
AB
1260{
1261 FloatParts pa = float16_unpack_canonical(a, status);
1262 FloatParts pb = float16_unpack_canonical(b, status);
1263 FloatParts pr = mul_floats(pa, pb, status);
1264
1265 return float16_round_pack_canonical(pr, status);
1266}
1267
2dfabc86
EC
1268static float32 QEMU_SOFTFLOAT_ATTR
1269soft_f32_mul(float32 a, float32 b, float_status *status)
74d707e2
AB
1270{
1271 FloatParts pa = float32_unpack_canonical(a, status);
1272 FloatParts pb = float32_unpack_canonical(b, status);
1273 FloatParts pr = mul_floats(pa, pb, status);
1274
1275 return float32_round_pack_canonical(pr, status);
1276}
1277
2dfabc86
EC
1278static float64 QEMU_SOFTFLOAT_ATTR
1279soft_f64_mul(float64 a, float64 b, float_status *status)
74d707e2
AB
1280{
1281 FloatParts pa = float64_unpack_canonical(a, status);
1282 FloatParts pb = float64_unpack_canonical(b, status);
1283 FloatParts pr = mul_floats(pa, pb, status);
1284
1285 return float64_round_pack_canonical(pr, status);
1286}
1287
2dfabc86
EC
1288static float hard_f32_mul(float a, float b)
1289{
1290 return a * b;
1291}
1292
1293static double hard_f64_mul(double a, double b)
1294{
1295 return a * b;
1296}
1297
2dfabc86
EC
1298float32 QEMU_FLATTEN
1299float32_mul(float32 a, float32 b, float_status *s)
1300{
1301 return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
b240c9c4 1302 f32_is_zon2, f32_addsubmul_post);
2dfabc86
EC
1303}
1304
1305float64 QEMU_FLATTEN
1306float64_mul(float64 a, float64 b, float_status *s)
1307{
1308 return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
b240c9c4 1309 f64_is_zon2, f64_addsubmul_post);
2dfabc86
EC
1310}
1311
8282310d
LZ
1312/*
1313 * Returns the result of multiplying the bfloat16
1314 * values `a' and `b'.
1315 */
1316
1317bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1318{
1319 FloatParts pa = bfloat16_unpack_canonical(a, status);
1320 FloatParts pb = bfloat16_unpack_canonical(b, status);
1321 FloatParts pr = mul_floats(pa, pb, status);
1322
1323 return bfloat16_round_pack_canonical(pr, status);
1324}
1325
d446830a
AB
1326/*
1327 * Returns the result of multiplying the floating-point values `a' and
1328 * `b' then adding 'c', with no intermediate rounding step after the
1329 * multiplication. The operation is performed according to the
1330 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1331 * The flags argument allows the caller to select negation of the
1332 * addend, the intermediate product, or the final result. (The
1333 * difference between this and having the caller do a separate
1334 * negation is that negating externally will flip the sign bit on
1335 * NaNs.)
1336 */
1337
1338static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1339 int flags, float_status *s)
1340{
1341 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1342 ((1 << float_class_inf) | (1 << float_class_zero));
1343 bool p_sign;
1344 bool sign_flip = flags & float_muladd_negate_result;
1345 FloatClass p_class;
1346 uint64_t hi, lo;
1347 int p_exp;
1348
1349 /* It is implementation-defined whether the cases of (0,inf,qnan)
1350 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1351 * they return if they do), so we have to hand this information
1352 * off to the target-specific pick-a-NaN routine.
1353 */
1354 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1355 return pick_nan_muladd(a, b, c, inf_zero, s);
1356 }
1357
1358 if (inf_zero) {
d82f3b2d 1359 float_raise(float_flag_invalid, s);
d446830a 1360 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1361 return parts_default_nan(s);
d446830a
AB
1362 }
1363
1364 if (flags & float_muladd_negate_c) {
1365 c.sign ^= 1;
1366 }
1367
1368 p_sign = a.sign ^ b.sign;
1369
1370 if (flags & float_muladd_negate_product) {
1371 p_sign ^= 1;
1372 }
1373
1374 if (a.cls == float_class_inf || b.cls == float_class_inf) {
1375 p_class = float_class_inf;
1376 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1377 p_class = float_class_zero;
1378 } else {
1379 p_class = float_class_normal;
1380 }
1381
1382 if (c.cls == float_class_inf) {
1383 if (p_class == float_class_inf && p_sign != c.sign) {
d82f3b2d 1384 float_raise(float_flag_invalid, s);
f7e598e2 1385 return parts_default_nan(s);
d446830a 1386 } else {
9793c1e2
RH
1387 c.sign ^= sign_flip;
1388 return c;
d446830a 1389 }
d446830a
AB
1390 }
1391
1392 if (p_class == float_class_inf) {
1393 a.cls = float_class_inf;
1394 a.sign = p_sign ^ sign_flip;
1395 return a;
1396 }
1397
1398 if (p_class == float_class_zero) {
1399 if (c.cls == float_class_zero) {
1400 if (p_sign != c.sign) {
1401 p_sign = s->float_rounding_mode == float_round_down;
1402 }
1403 c.sign = p_sign;
1404 } else if (flags & float_muladd_halve_result) {
1405 c.exp -= 1;
1406 }
1407 c.sign ^= sign_flip;
1408 return c;
1409 }
1410
1411 /* a & b should be normals now... */
1412 assert(a.cls == float_class_normal &&
1413 b.cls == float_class_normal);
1414
1415 p_exp = a.exp + b.exp;
1416
d446830a 1417 mul64To128(a.frac, b.frac, &hi, &lo);
d446830a 1418
e99c4373
RH
1419 /* Renormalize to the msb. */
1420 if (hi & DECOMPOSED_IMPLICIT_BIT) {
d446830a 1421 p_exp += 1;
e99c4373
RH
1422 } else {
1423 shortShift128Left(hi, lo, 1, &hi, &lo);
d446830a
AB
1424 }
1425
1426 /* + add/sub */
e99c4373 1427 if (c.cls != float_class_zero) {
d446830a
AB
1428 int exp_diff = p_exp - c.exp;
1429 if (p_sign == c.sign) {
1430 /* Addition */
1431 if (exp_diff <= 0) {
e99c4373 1432 shift64RightJamming(hi, -exp_diff, &hi);
d446830a 1433 p_exp = c.exp;
e99c4373
RH
1434 if (uadd64_overflow(hi, c.frac, &hi)) {
1435 shift64RightJamming(hi, 1, &hi);
1436 hi |= DECOMPOSED_IMPLICIT_BIT;
1437 p_exp += 1;
1438 }
d446830a 1439 } else {
e99c4373
RH
1440 uint64_t c_hi, c_lo, over;
1441 shift128RightJamming(c.frac, 0, exp_diff, &c_hi, &c_lo);
1442 add192(0, hi, lo, 0, c_hi, c_lo, &over, &hi, &lo);
1443 if (over) {
1444 shift64RightJamming(hi, 1, &hi);
1445 hi |= DECOMPOSED_IMPLICIT_BIT;
1446 p_exp += 1;
1447 }
d446830a 1448 }
d446830a
AB
1449 } else {
1450 /* Subtraction */
e99c4373 1451 uint64_t c_hi = c.frac, c_lo = 0;
d446830a
AB
1452
1453 if (exp_diff <= 0) {
1454 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1455 if (exp_diff == 0
1456 &&
1457 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1458 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1459 } else {
1460 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1461 p_sign ^= 1;
1462 p_exp = c.exp;
1463 }
1464 } else {
1465 shift128RightJamming(c_hi, c_lo,
1466 exp_diff,
1467 &c_hi, &c_lo);
1468 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1469 }
1470
1471 if (hi == 0 && lo == 0) {
1472 a.cls = float_class_zero;
1473 a.sign = s->float_rounding_mode == float_round_down;
1474 a.sign ^= sign_flip;
1475 return a;
1476 } else {
1477 int shift;
1478 if (hi != 0) {
1479 shift = clz64(hi);
1480 } else {
1481 shift = clz64(lo) + 64;
1482 }
1483 /* Normalizing to a binary point of 124 is the
1484 correct adjust for the exponent. However since we're
1485 shifting, we might as well put the binary point back
e99c4373 1486 at 63 where we really want it. Therefore shift as
d446830a
AB
1487 if we're leaving 1 bit at the top of the word, but
1488 adjust the exponent as if we're leaving 3 bits. */
e99c4373
RH
1489 shift128Left(hi, lo, shift, &hi, &lo);
1490 p_exp -= shift;
d446830a
AB
1491 }
1492 }
1493 }
e99c4373 1494 hi |= (lo != 0);
d446830a
AB
1495
1496 if (flags & float_muladd_halve_result) {
1497 p_exp -= 1;
1498 }
1499
1500 /* finally prepare our result */
1501 a.cls = float_class_normal;
1502 a.sign = p_sign ^ sign_flip;
1503 a.exp = p_exp;
e99c4373 1504 a.frac = hi;
d446830a
AB
1505
1506 return a;
1507}
1508
97ff87c0 1509float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
d446830a
AB
1510 int flags, float_status *status)
1511{
1512 FloatParts pa = float16_unpack_canonical(a, status);
1513 FloatParts pb = float16_unpack_canonical(b, status);
1514 FloatParts pc = float16_unpack_canonical(c, status);
1515 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1516
1517 return float16_round_pack_canonical(pr, status);
1518}
1519
ccf770ba
EC
1520static float32 QEMU_SOFTFLOAT_ATTR
1521soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1522 float_status *status)
d446830a
AB
1523{
1524 FloatParts pa = float32_unpack_canonical(a, status);
1525 FloatParts pb = float32_unpack_canonical(b, status);
1526 FloatParts pc = float32_unpack_canonical(c, status);
1527 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1528
1529 return float32_round_pack_canonical(pr, status);
1530}
1531
ccf770ba
EC
1532static float64 QEMU_SOFTFLOAT_ATTR
1533soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1534 float_status *status)
d446830a
AB
1535{
1536 FloatParts pa = float64_unpack_canonical(a, status);
1537 FloatParts pb = float64_unpack_canonical(b, status);
1538 FloatParts pc = float64_unpack_canonical(c, status);
1539 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1540
1541 return float64_round_pack_canonical(pr, status);
1542}
1543
f6b3b108
EC
1544static bool force_soft_fma;
1545
ccf770ba
EC
1546float32 QEMU_FLATTEN
1547float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1548{
1549 union_float32 ua, ub, uc, ur;
1550
1551 ua.s = xa;
1552 ub.s = xb;
1553 uc.s = xc;
1554
1555 if (unlikely(!can_use_fpu(s))) {
1556 goto soft;
1557 }
1558 if (unlikely(flags & float_muladd_halve_result)) {
1559 goto soft;
1560 }
1561
1562 float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1563 if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1564 goto soft;
1565 }
f6b3b108
EC
1566
1567 if (unlikely(force_soft_fma)) {
1568 goto soft;
1569 }
1570
ccf770ba
EC
1571 /*
1572 * When (a || b) == 0, there's no need to check for under/over flow,
1573 * since we know the addend is (normal || 0) and the product is 0.
1574 */
1575 if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1576 union_float32 up;
1577 bool prod_sign;
1578
1579 prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1580 prod_sign ^= !!(flags & float_muladd_negate_product);
1581 up.s = float32_set_sign(float32_zero, prod_sign);
1582
1583 if (flags & float_muladd_negate_c) {
1584 uc.h = -uc.h;
1585 }
1586 ur.h = up.h + uc.h;
1587 } else {
896f51fb
KC
1588 union_float32 ua_orig = ua;
1589 union_float32 uc_orig = uc;
1590
ccf770ba
EC
1591 if (flags & float_muladd_negate_product) {
1592 ua.h = -ua.h;
1593 }
1594 if (flags & float_muladd_negate_c) {
1595 uc.h = -uc.h;
1596 }
1597
1598 ur.h = fmaf(ua.h, ub.h, uc.h);
1599
1600 if (unlikely(f32_is_inf(ur))) {
d82f3b2d 1601 float_raise(float_flag_overflow, s);
ccf770ba 1602 } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
896f51fb
KC
1603 ua = ua_orig;
1604 uc = uc_orig;
ccf770ba
EC
1605 goto soft;
1606 }
1607 }
1608 if (flags & float_muladd_negate_result) {
1609 return float32_chs(ur.s);
1610 }
1611 return ur.s;
1612
1613 soft:
1614 return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1615}
1616
1617float64 QEMU_FLATTEN
1618float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1619{
1620 union_float64 ua, ub, uc, ur;
1621
1622 ua.s = xa;
1623 ub.s = xb;
1624 uc.s = xc;
1625
1626 if (unlikely(!can_use_fpu(s))) {
1627 goto soft;
1628 }
1629 if (unlikely(flags & float_muladd_halve_result)) {
1630 goto soft;
1631 }
1632
1633 float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1634 if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1635 goto soft;
1636 }
f6b3b108
EC
1637
1638 if (unlikely(force_soft_fma)) {
1639 goto soft;
1640 }
1641
ccf770ba
EC
1642 /*
1643 * When (a || b) == 0, there's no need to check for under/over flow,
1644 * since we know the addend is (normal || 0) and the product is 0.
1645 */
1646 if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1647 union_float64 up;
1648 bool prod_sign;
1649
1650 prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1651 prod_sign ^= !!(flags & float_muladd_negate_product);
1652 up.s = float64_set_sign(float64_zero, prod_sign);
1653
1654 if (flags & float_muladd_negate_c) {
1655 uc.h = -uc.h;
1656 }
1657 ur.h = up.h + uc.h;
1658 } else {
896f51fb
KC
1659 union_float64 ua_orig = ua;
1660 union_float64 uc_orig = uc;
1661
ccf770ba
EC
1662 if (flags & float_muladd_negate_product) {
1663 ua.h = -ua.h;
1664 }
1665 if (flags & float_muladd_negate_c) {
1666 uc.h = -uc.h;
1667 }
1668
1669 ur.h = fma(ua.h, ub.h, uc.h);
1670
1671 if (unlikely(f64_is_inf(ur))) {
d82f3b2d 1672 float_raise(float_flag_overflow, s);
ccf770ba 1673 } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
896f51fb
KC
1674 ua = ua_orig;
1675 uc = uc_orig;
ccf770ba
EC
1676 goto soft;
1677 }
1678 }
1679 if (flags & float_muladd_negate_result) {
1680 return float64_chs(ur.s);
1681 }
1682 return ur.s;
1683
1684 soft:
1685 return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1686}
1687
8282310d
LZ
1688/*
1689 * Returns the result of multiplying the bfloat16 values `a'
1690 * and `b' then adding 'c', with no intermediate rounding step after the
1691 * multiplication.
1692 */
1693
1694bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1695 int flags, float_status *status)
1696{
1697 FloatParts pa = bfloat16_unpack_canonical(a, status);
1698 FloatParts pb = bfloat16_unpack_canonical(b, status);
1699 FloatParts pc = bfloat16_unpack_canonical(c, status);
1700 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1701
1702 return bfloat16_round_pack_canonical(pr, status);
1703}
1704
cf07323d
AB
1705/*
1706 * Returns the result of dividing the floating-point value `a' by the
1707 * corresponding value `b'. The operation is performed according to
1708 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1709 */
1710
1711static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1712{
1713 bool sign = a.sign ^ b.sign;
1714
1715 if (a.cls == float_class_normal && b.cls == float_class_normal) {
5dfbc9e4 1716 uint64_t n0, n1, q, r;
cf07323d 1717 int exp = a.exp - b.exp;
5dfbc9e4
RH
1718
1719 /*
1720 * We want a 2*N / N-bit division to produce exactly an N-bit
1721 * result, so that we do not lose any precision and so that we
1722 * do not have to renormalize afterward. If A.frac < B.frac,
1723 * then division would produce an (N-1)-bit result; shift A left
1724 * by one to produce the an N-bit result, and decrement the
1725 * exponent to match.
1726 *
1727 * The udiv_qrnnd algorithm that we're using requires normalization,
e99c4373 1728 * i.e. the msb of the denominator must be set, which is already true.
5dfbc9e4 1729 */
cf07323d
AB
1730 if (a.frac < b.frac) {
1731 exp -= 1;
5dfbc9e4 1732 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
e99c4373
RH
1733 } else {
1734 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT, &n1, &n0);
cf07323d 1735 }
e99c4373 1736 q = udiv_qrnnd(&r, n1, n0, b.frac);
5dfbc9e4 1737
e99c4373 1738 /* Set lsb if there is a remainder, to set inexact. */
5dfbc9e4 1739 a.frac = q | (r != 0);
cf07323d
AB
1740 a.sign = sign;
1741 a.exp = exp;
1742 return a;
1743 }
1744 /* handle all the NaN cases */
1745 if (is_nan(a.cls) || is_nan(b.cls)) {
1746 return pick_nan(a, b, s);
1747 }
1748 /* 0/0 or Inf/Inf */
1749 if (a.cls == b.cls
1750 &&
1751 (a.cls == float_class_inf || a.cls == float_class_zero)) {
d82f3b2d 1752 float_raise(float_flag_invalid, s);
f7e598e2 1753 return parts_default_nan(s);
cf07323d 1754 }
9cb4e398
AB
1755 /* Inf / x or 0 / x */
1756 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1757 a.sign = sign;
1758 return a;
1759 }
cf07323d
AB
1760 /* Div 0 => Inf */
1761 if (b.cls == float_class_zero) {
d82f3b2d 1762 float_raise(float_flag_divbyzero, s);
cf07323d
AB
1763 a.cls = float_class_inf;
1764 a.sign = sign;
1765 return a;
1766 }
cf07323d
AB
1767 /* Div by Inf */
1768 if (b.cls == float_class_inf) {
1769 a.cls = float_class_zero;
1770 a.sign = sign;
1771 return a;
1772 }
1773 g_assert_not_reached();
1774}
1775
1776float16 float16_div(float16 a, float16 b, float_status *status)
1777{
1778 FloatParts pa = float16_unpack_canonical(a, status);
1779 FloatParts pb = float16_unpack_canonical(b, status);
1780 FloatParts pr = div_floats(pa, pb, status);
1781
1782 return float16_round_pack_canonical(pr, status);
1783}
1784
4a629561
EC
1785static float32 QEMU_SOFTFLOAT_ATTR
1786soft_f32_div(float32 a, float32 b, float_status *status)
cf07323d
AB
1787{
1788 FloatParts pa = float32_unpack_canonical(a, status);
1789 FloatParts pb = float32_unpack_canonical(b, status);
1790 FloatParts pr = div_floats(pa, pb, status);
1791
1792 return float32_round_pack_canonical(pr, status);
1793}
1794
4a629561
EC
1795static float64 QEMU_SOFTFLOAT_ATTR
1796soft_f64_div(float64 a, float64 b, float_status *status)
cf07323d
AB
1797{
1798 FloatParts pa = float64_unpack_canonical(a, status);
1799 FloatParts pb = float64_unpack_canonical(b, status);
1800 FloatParts pr = div_floats(pa, pb, status);
1801
1802 return float64_round_pack_canonical(pr, status);
1803}
1804
4a629561
EC
1805static float hard_f32_div(float a, float b)
1806{
1807 return a / b;
1808}
1809
1810static double hard_f64_div(double a, double b)
1811{
1812 return a / b;
1813}
1814
1815static bool f32_div_pre(union_float32 a, union_float32 b)
1816{
1817 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1818 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1819 fpclassify(b.h) == FP_NORMAL;
1820 }
1821 return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1822}
1823
1824static bool f64_div_pre(union_float64 a, union_float64 b)
1825{
1826 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1827 return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1828 fpclassify(b.h) == FP_NORMAL;
1829 }
1830 return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1831}
1832
1833static bool f32_div_post(union_float32 a, union_float32 b)
1834{
1835 if (QEMU_HARDFLOAT_2F32_USE_FP) {
1836 return fpclassify(a.h) != FP_ZERO;
1837 }
1838 return !float32_is_zero(a.s);
1839}
1840
1841static bool f64_div_post(union_float64 a, union_float64 b)
1842{
1843 if (QEMU_HARDFLOAT_2F64_USE_FP) {
1844 return fpclassify(a.h) != FP_ZERO;
1845 }
1846 return !float64_is_zero(a.s);
1847}
1848
1849float32 QEMU_FLATTEN
1850float32_div(float32 a, float32 b, float_status *s)
1851{
1852 return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
b240c9c4 1853 f32_div_pre, f32_div_post);
4a629561
EC
1854}
1855
1856float64 QEMU_FLATTEN
1857float64_div(float64 a, float64 b, float_status *s)
1858{
1859 return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
b240c9c4 1860 f64_div_pre, f64_div_post);
4a629561
EC
1861}
1862
8282310d
LZ
1863/*
1864 * Returns the result of dividing the bfloat16
1865 * value `a' by the corresponding value `b'.
1866 */
1867
1868bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1869{
1870 FloatParts pa = bfloat16_unpack_canonical(a, status);
1871 FloatParts pb = bfloat16_unpack_canonical(b, status);
1872 FloatParts pr = div_floats(pa, pb, status);
1873
1874 return bfloat16_round_pack_canonical(pr, status);
1875}
1876
6fed16b2
AB
1877/*
1878 * Float to Float conversions
1879 *
1880 * Returns the result of converting one float format to another. The
1881 * conversion is performed according to the IEC/IEEE Standard for
1882 * Binary Floating-Point Arithmetic.
1883 *
1884 * The float_to_float helper only needs to take care of raising
1885 * invalid exceptions and handling the conversion on NaNs.
1886 */
1887
1888static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1889 float_status *s)
1890{
1891 if (dstf->arm_althp) {
1892 switch (a.cls) {
1893 case float_class_qnan:
1894 case float_class_snan:
1895 /* There is no NaN in the destination format. Raise Invalid
1896 * and return a zero with the sign of the input NaN.
1897 */
d82f3b2d 1898 float_raise(float_flag_invalid, s);
6fed16b2
AB
1899 a.cls = float_class_zero;
1900 a.frac = 0;
1901 a.exp = 0;
1902 break;
1903
1904 case float_class_inf:
1905 /* There is no Inf in the destination format. Raise Invalid
1906 * and return the maximum normal with the correct sign.
1907 */
d82f3b2d 1908 float_raise(float_flag_invalid, s);
6fed16b2
AB
1909 a.cls = float_class_normal;
1910 a.exp = dstf->exp_max;
1911 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1912 break;
1913
1914 default:
1915 break;
1916 }
1917 } else if (is_nan(a.cls)) {
1918 if (is_snan(a.cls)) {
d82f3b2d 1919 float_raise(float_flag_invalid, s);
6fed16b2
AB
1920 a = parts_silence_nan(a, s);
1921 }
1922 if (s->default_nan_mode) {
1923 return parts_default_nan(s);
1924 }
1925 }
1926 return a;
1927}
1928
1929float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1930{
1931 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1932 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1933 FloatParts pr = float_to_float(p, &float32_params, s);
1934 return float32_round_pack_canonical(pr, s);
1935}
1936
1937float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1938{
1939 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1940 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1941 FloatParts pr = float_to_float(p, &float64_params, s);
1942 return float64_round_pack_canonical(pr, s);
1943}
1944
1945float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1946{
1947 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1948 FloatParts p = float32_unpack_canonical(a, s);
1949 FloatParts pr = float_to_float(p, fmt16, s);
1950 return float16a_round_pack_canonical(pr, s, fmt16);
1951}
1952
21381dcf
MK
1953static float64 QEMU_SOFTFLOAT_ATTR
1954soft_float32_to_float64(float32 a, float_status *s)
6fed16b2
AB
1955{
1956 FloatParts p = float32_unpack_canonical(a, s);
1957 FloatParts pr = float_to_float(p, &float64_params, s);
1958 return float64_round_pack_canonical(pr, s);
1959}
1960
21381dcf
MK
1961float64 float32_to_float64(float32 a, float_status *s)
1962{
1963 if (likely(float32_is_normal(a))) {
1964 /* Widening conversion can never produce inexact results. */
1965 union_float32 uf;
1966 union_float64 ud;
1967 uf.s = a;
1968 ud.h = uf.h;
1969 return ud.s;
1970 } else if (float32_is_zero(a)) {
1971 return float64_set_sign(float64_zero, float32_is_neg(a));
1972 } else {
1973 return soft_float32_to_float64(a, s);
1974 }
1975}
1976
6fed16b2
AB
1977float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1978{
1979 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1980 FloatParts p = float64_unpack_canonical(a, s);
1981 FloatParts pr = float_to_float(p, fmt16, s);
1982 return float16a_round_pack_canonical(pr, s, fmt16);
1983}
1984
1985float32 float64_to_float32(float64 a, float_status *s)
1986{
1987 FloatParts p = float64_unpack_canonical(a, s);
1988 FloatParts pr = float_to_float(p, &float32_params, s);
1989 return float32_round_pack_canonical(pr, s);
1990}
1991
34f0c0a9
LZ
1992float32 bfloat16_to_float32(bfloat16 a, float_status *s)
1993{
1994 FloatParts p = bfloat16_unpack_canonical(a, s);
1995 FloatParts pr = float_to_float(p, &float32_params, s);
1996 return float32_round_pack_canonical(pr, s);
1997}
1998
1999float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2000{
2001 FloatParts p = bfloat16_unpack_canonical(a, s);
2002 FloatParts pr = float_to_float(p, &float64_params, s);
2003 return float64_round_pack_canonical(pr, s);
2004}
2005
2006bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2007{
2008 FloatParts p = float32_unpack_canonical(a, s);
2009 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2010 return bfloat16_round_pack_canonical(pr, s);
2011}
2012
2013bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2014{
2015 FloatParts p = float64_unpack_canonical(a, s);
2016 FloatParts pr = float_to_float(p, &bfloat16_params, s);
2017 return bfloat16_round_pack_canonical(pr, s);
2018}
2019
dbe4d53a
AB
2020/*
2021 * Rounds the floating-point value `a' to an integer, and returns the
2022 * result as a floating-point value. The operation is performed
2023 * according to the IEC/IEEE Standard for Binary Floating-Point
2024 * Arithmetic.
2025 */
2026
3dede407 2027static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
2f6c74be 2028 int scale, float_status *s)
dbe4d53a 2029{
2f6c74be
RH
2030 switch (a.cls) {
2031 case float_class_qnan:
2032 case float_class_snan:
dbe4d53a 2033 return return_nan(a, s);
dbe4d53a 2034
dbe4d53a
AB
2035 case float_class_zero:
2036 case float_class_inf:
dbe4d53a
AB
2037 /* already "integral" */
2038 break;
2f6c74be 2039
dbe4d53a 2040 case float_class_normal:
2f6c74be
RH
2041 scale = MIN(MAX(scale, -0x10000), 0x10000);
2042 a.exp += scale;
2043
dbe4d53a
AB
2044 if (a.exp >= DECOMPOSED_BINARY_POINT) {
2045 /* already integral */
2046 break;
2047 }
2048 if (a.exp < 0) {
2049 bool one;
2050 /* all fractional */
d82f3b2d 2051 float_raise(float_flag_inexact, s);
2f6c74be 2052 switch (rmode) {
dbe4d53a
AB
2053 case float_round_nearest_even:
2054 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2055 break;
2056 case float_round_ties_away:
2057 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2058 break;
2059 case float_round_to_zero:
2060 one = false;
2061 break;
2062 case float_round_up:
2063 one = !a.sign;
2064 break;
2065 case float_round_down:
2066 one = a.sign;
2067 break;
5d64abb3
RH
2068 case float_round_to_odd:
2069 one = true;
2070 break;
dbe4d53a
AB
2071 default:
2072 g_assert_not_reached();
2073 }
2074
2075 if (one) {
2076 a.frac = DECOMPOSED_IMPLICIT_BIT;
2077 a.exp = 0;
2078 } else {
2079 a.cls = float_class_zero;
2080 }
2081 } else {
2082 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2083 uint64_t frac_lsbm1 = frac_lsb >> 1;
2084 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2085 uint64_t rnd_mask = rnd_even_mask >> 1;
2086 uint64_t inc;
2087
2f6c74be 2088 switch (rmode) {
dbe4d53a
AB
2089 case float_round_nearest_even:
2090 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2091 break;
2092 case float_round_ties_away:
2093 inc = frac_lsbm1;
2094 break;
2095 case float_round_to_zero:
2096 inc = 0;
2097 break;
2098 case float_round_up:
2099 inc = a.sign ? 0 : rnd_mask;
2100 break;
2101 case float_round_down:
2102 inc = a.sign ? rnd_mask : 0;
2103 break;
5d64abb3
RH
2104 case float_round_to_odd:
2105 inc = a.frac & frac_lsb ? 0 : rnd_mask;
2106 break;
dbe4d53a
AB
2107 default:
2108 g_assert_not_reached();
2109 }
2110
2111 if (a.frac & rnd_mask) {
d82f3b2d 2112 float_raise(float_flag_inexact, s);
e99c4373 2113 if (uadd64_overflow(a.frac, inc, &a.frac)) {
dbe4d53a 2114 a.frac >>= 1;
e99c4373 2115 a.frac |= DECOMPOSED_IMPLICIT_BIT;
dbe4d53a
AB
2116 a.exp++;
2117 }
e99c4373 2118 a.frac &= ~rnd_mask;
dbe4d53a
AB
2119 }
2120 }
2121 break;
2122 default:
2123 g_assert_not_reached();
2124 }
2125 return a;
2126}
2127
2128float16 float16_round_to_int(float16 a, float_status *s)
2129{
2130 FloatParts pa = float16_unpack_canonical(a, s);
2f6c74be 2131 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2132 return float16_round_pack_canonical(pr, s);
2133}
2134
2135float32 float32_round_to_int(float32 a, float_status *s)
2136{
2137 FloatParts pa = float32_unpack_canonical(a, s);
2f6c74be 2138 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2139 return float32_round_pack_canonical(pr, s);
2140}
2141
2142float64 float64_round_to_int(float64 a, float_status *s)
2143{
2144 FloatParts pa = float64_unpack_canonical(a, s);
2f6c74be 2145 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
dbe4d53a
AB
2146 return float64_round_pack_canonical(pr, s);
2147}
2148
34f0c0a9
LZ
2149/*
2150 * Rounds the bfloat16 value `a' to an integer, and returns the
2151 * result as a bfloat16 value.
2152 */
2153
2154bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2155{
2156 FloatParts pa = bfloat16_unpack_canonical(a, s);
2157 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2158 return bfloat16_round_pack_canonical(pr, s);
2159}
2160
ab52f973
AB
2161/*
2162 * Returns the result of converting the floating-point value `a' to
2163 * the two's complement integer format. The conversion is performed
2164 * according to the IEC/IEEE Standard for Binary Floating-Point
2165 * Arithmetic---which means in particular that the conversion is
2166 * rounded according to the current rounding mode. If `a' is a NaN,
2167 * the largest positive integer is returned. Otherwise, if the
2168 * conversion overflows, the largest integer with the same sign as `a'
2169 * is returned.
2170*/
2171
3dede407
RH
2172static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2173 int scale, int64_t min, int64_t max,
ab52f973
AB
2174 float_status *s)
2175{
2176 uint64_t r;
2177 int orig_flags = get_float_exception_flags(s);
2f6c74be 2178 FloatParts p = round_to_int(in, rmode, scale, s);
ab52f973
AB
2179
2180 switch (p.cls) {
2181 case float_class_snan:
2182 case float_class_qnan:
801bc563 2183 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2184 return max;
2185 case float_class_inf:
801bc563 2186 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2187 return p.sign ? min : max;
2188 case float_class_zero:
2189 return 0;
2190 case float_class_normal:
e99c4373 2191 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2192 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2193 } else {
2194 r = UINT64_MAX;
2195 }
2196 if (p.sign) {
33358375 2197 if (r <= -(uint64_t) min) {
ab52f973
AB
2198 return -r;
2199 } else {
2200 s->float_exception_flags = orig_flags | float_flag_invalid;
2201 return min;
2202 }
2203 } else {
33358375 2204 if (r <= max) {
ab52f973
AB
2205 return r;
2206 } else {
2207 s->float_exception_flags = orig_flags | float_flag_invalid;
2208 return max;
2209 }
2210 }
2211 default:
2212 g_assert_not_reached();
2213 }
2214}
2215
0d93d8ec
FC
2216int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2217 float_status *s)
2218{
2219 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2220 rmode, scale, INT8_MIN, INT8_MAX, s);
2221}
2222
3dede407 2223int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2224 float_status *s)
2225{
2226 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2227 rmode, scale, INT16_MIN, INT16_MAX, s);
2228}
2229
3dede407 2230int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2231 float_status *s)
2232{
2233 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2234 rmode, scale, INT32_MIN, INT32_MAX, s);
2235}
2236
3dede407 2237int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2238 float_status *s)
2239{
2240 return round_to_int_and_pack(float16_unpack_canonical(a, s),
2241 rmode, scale, INT64_MIN, INT64_MAX, s);
2242}
2243
3dede407 2244int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2245 float_status *s)
2246{
2247 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2248 rmode, scale, INT16_MIN, INT16_MAX, s);
2249}
2250
3dede407 2251int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2252 float_status *s)
2253{
2254 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2255 rmode, scale, INT32_MIN, INT32_MAX, s);
2256}
2257
3dede407 2258int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2259 float_status *s)
2260{
2261 return round_to_int_and_pack(float32_unpack_canonical(a, s),
2262 rmode, scale, INT64_MIN, INT64_MAX, s);
2263}
2264
3dede407 2265int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2266 float_status *s)
2267{
2268 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2269 rmode, scale, INT16_MIN, INT16_MAX, s);
2270}
2271
3dede407 2272int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2273 float_status *s)
2274{
2275 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2276 rmode, scale, INT32_MIN, INT32_MAX, s);
2277}
2278
3dede407 2279int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2280 float_status *s)
2281{
2282 return round_to_int_and_pack(float64_unpack_canonical(a, s),
2283 rmode, scale, INT64_MIN, INT64_MAX, s);
2284}
2285
0d93d8ec
FC
2286int8_t float16_to_int8(float16 a, float_status *s)
2287{
2288 return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2289}
2290
2f6c74be
RH
2291int16_t float16_to_int16(float16 a, float_status *s)
2292{
2293 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2294}
2295
2296int32_t float16_to_int32(float16 a, float_status *s)
2297{
2298 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2299}
2300
2301int64_t float16_to_int64(float16 a, float_status *s)
2302{
2303 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2304}
2305
2306int16_t float32_to_int16(float32 a, float_status *s)
2307{
2308 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2309}
2310
2311int32_t float32_to_int32(float32 a, float_status *s)
2312{
2313 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2314}
2315
2316int64_t float32_to_int64(float32 a, float_status *s)
2317{
2318 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2319}
2320
2321int16_t float64_to_int16(float64 a, float_status *s)
2322{
2323 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2324}
2325
2326int32_t float64_to_int32(float64 a, float_status *s)
2327{
2328 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2329}
2330
2331int64_t float64_to_int64(float64 a, float_status *s)
2332{
2333 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2334}
2335
2336int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2337{
2338 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2339}
2340
2341int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2342{
2343 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2344}
2345
2346int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2347{
2348 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
ab52f973
AB
2349}
2350
2f6c74be
RH
2351int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2352{
2353 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2354}
ab52f973 2355
2f6c74be
RH
2356int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2357{
2358 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2359}
2360
2361int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2362{
2363 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2364}
2365
2366int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2367{
2368 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2369}
ab52f973 2370
2f6c74be
RH
2371int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2372{
2373 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2374}
ab52f973 2375
2f6c74be
RH
2376int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2377{
2378 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2379}
ab52f973 2380
34f0c0a9
LZ
2381/*
2382 * Returns the result of converting the floating-point value `a' to
2383 * the two's complement integer format.
2384 */
2385
2386int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2387 float_status *s)
2388{
2389 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2390 rmode, scale, INT16_MIN, INT16_MAX, s);
2391}
2392
2393int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2394 float_status *s)
2395{
2396 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2397 rmode, scale, INT32_MIN, INT32_MAX, s);
2398}
2399
2400int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2401 float_status *s)
2402{
2403 return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2404 rmode, scale, INT64_MIN, INT64_MAX, s);
2405}
2406
2407int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2408{
2409 return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2410}
2411
2412int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2413{
2414 return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2415}
2416
2417int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2418{
2419 return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2420}
2421
2422int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2423{
2424 return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2425}
2426
2427int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2428{
2429 return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2430}
2431
2432int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2433{
2434 return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2435}
2436
ab52f973
AB
2437/*
2438 * Returns the result of converting the floating-point value `a' to
2439 * the unsigned integer format. The conversion is performed according
2440 * to the IEC/IEEE Standard for Binary Floating-Point
2441 * Arithmetic---which means in particular that the conversion is
2442 * rounded according to the current rounding mode. If `a' is a NaN,
2443 * the largest unsigned integer is returned. Otherwise, if the
2444 * conversion overflows, the largest unsigned integer is returned. If
2445 * the 'a' is negative, the result is rounded and zero is returned;
2446 * values that do not round to zero will raise the inexact exception
2447 * flag.
2448 */
2449
3dede407
RH
2450static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2451 int scale, uint64_t max,
2452 float_status *s)
ab52f973
AB
2453{
2454 int orig_flags = get_float_exception_flags(s);
2f6c74be
RH
2455 FloatParts p = round_to_int(in, rmode, scale, s);
2456 uint64_t r;
ab52f973
AB
2457
2458 switch (p.cls) {
2459 case float_class_snan:
2460 case float_class_qnan:
2461 s->float_exception_flags = orig_flags | float_flag_invalid;
2462 return max;
2463 case float_class_inf:
801bc563 2464 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
2465 return p.sign ? 0 : max;
2466 case float_class_zero:
2467 return 0;
2468 case float_class_normal:
ab52f973
AB
2469 if (p.sign) {
2470 s->float_exception_flags = orig_flags | float_flag_invalid;
2471 return 0;
2472 }
2473
e99c4373 2474 if (p.exp <= DECOMPOSED_BINARY_POINT) {
ab52f973 2475 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
ab52f973
AB
2476 } else {
2477 s->float_exception_flags = orig_flags | float_flag_invalid;
2478 return max;
2479 }
2480
2481 /* For uint64 this will never trip, but if p.exp is too large
2482 * to shift a decomposed fraction we shall have exited via the
2483 * 3rd leg above.
2484 */
2485 if (r > max) {
2486 s->float_exception_flags = orig_flags | float_flag_invalid;
2487 return max;
ab52f973 2488 }
2f6c74be 2489 return r;
ab52f973
AB
2490 default:
2491 g_assert_not_reached();
2492 }
2493}
2494
0d93d8ec
FC
2495uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2496 float_status *s)
2497{
2498 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2499 rmode, scale, UINT8_MAX, s);
2500}
2501
3dede407 2502uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2503 float_status *s)
2504{
2505 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2506 rmode, scale, UINT16_MAX, s);
2507}
2508
3dede407 2509uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2510 float_status *s)
2511{
2512 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2513 rmode, scale, UINT32_MAX, s);
2514}
2515
3dede407 2516uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2517 float_status *s)
2518{
2519 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2520 rmode, scale, UINT64_MAX, s);
2521}
2522
3dede407 2523uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2524 float_status *s)
2525{
2526 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2527 rmode, scale, UINT16_MAX, s);
2528}
2529
3dede407 2530uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2531 float_status *s)
2532{
2533 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2534 rmode, scale, UINT32_MAX, s);
2535}
2536
3dede407 2537uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2538 float_status *s)
2539{
2540 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2541 rmode, scale, UINT64_MAX, s);
2542}
2543
3dede407 2544uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2545 float_status *s)
2546{
2547 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2548 rmode, scale, UINT16_MAX, s);
2549}
2550
3dede407 2551uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2552 float_status *s)
2553{
2554 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2555 rmode, scale, UINT32_MAX, s);
2556}
2557
3dede407 2558uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2f6c74be
RH
2559 float_status *s)
2560{
2561 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2562 rmode, scale, UINT64_MAX, s);
2563}
2564
0d93d8ec
FC
2565uint8_t float16_to_uint8(float16 a, float_status *s)
2566{
2567 return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2568}
2569
2f6c74be
RH
2570uint16_t float16_to_uint16(float16 a, float_status *s)
2571{
2572 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2573}
2574
2575uint32_t float16_to_uint32(float16 a, float_status *s)
2576{
2577 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2578}
2579
2580uint64_t float16_to_uint64(float16 a, float_status *s)
2581{
2582 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2583}
2584
2585uint16_t float32_to_uint16(float32 a, float_status *s)
2586{
2587 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2588}
2589
2590uint32_t float32_to_uint32(float32 a, float_status *s)
2591{
2592 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2593}
2594
2595uint64_t float32_to_uint64(float32 a, float_status *s)
2596{
2597 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2598}
2599
2600uint16_t float64_to_uint16(float64 a, float_status *s)
2601{
2602 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2603}
2604
2605uint32_t float64_to_uint32(float64 a, float_status *s)
2606{
2607 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2608}
2609
2610uint64_t float64_to_uint64(float64 a, float_status *s)
2611{
2612 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2613}
2614
2615uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2616{
2617 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2618}
2619
2620uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2621{
2622 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2623}
2624
2625uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2626{
2627 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2628}
2629
2630uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2631{
2632 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2633}
2634
2635uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2636{
2637 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2638}
2639
2640uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2641{
2642 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2643}
2644
2645uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2646{
2647 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2648}
2649
2650uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2651{
2652 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2653}
2654
2655uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2656{
2657 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2658}
ab52f973 2659
34f0c0a9
LZ
2660/*
2661 * Returns the result of converting the bfloat16 value `a' to
2662 * the unsigned integer format.
2663 */
2664
2665uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2666 int scale, float_status *s)
2667{
2668 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2669 rmode, scale, UINT16_MAX, s);
2670}
2671
2672uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2673 int scale, float_status *s)
2674{
2675 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2676 rmode, scale, UINT32_MAX, s);
2677}
2678
2679uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2680 int scale, float_status *s)
2681{
2682 return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2683 rmode, scale, UINT64_MAX, s);
2684}
2685
2686uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2687{
2688 return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2689}
2690
2691uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2692{
2693 return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2694}
2695
2696uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2697{
2698 return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2699}
2700
2701uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2702{
2703 return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2704}
2705
2706uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2707{
2708 return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2709}
2710
2711uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2712{
2713 return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2714}
2715
c02e1fb8
AB
2716/*
2717 * Integer to float conversions
2718 *
2719 * Returns the result of converting the two's complement integer `a'
2720 * to the floating-point format. The conversion is performed according
2721 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2722 */
2723
2abdfe24 2724static FloatParts int_to_float(int64_t a, int scale, float_status *status)
c02e1fb8 2725{
2abdfe24
RH
2726 FloatParts r = { .sign = false };
2727
c02e1fb8
AB
2728 if (a == 0) {
2729 r.cls = float_class_zero;
c02e1fb8 2730 } else {
2abdfe24
RH
2731 uint64_t f = a;
2732 int shift;
2733
2734 r.cls = float_class_normal;
c02e1fb8 2735 if (a < 0) {
2abdfe24 2736 f = -f;
c02e1fb8 2737 r.sign = true;
c02e1fb8 2738 }
e99c4373 2739 shift = clz64(f);
2abdfe24
RH
2740 scale = MIN(MAX(scale, -0x10000), 0x10000);
2741
2742 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
e99c4373 2743 r.frac = f << shift;
c02e1fb8
AB
2744 }
2745
2746 return r;
2747}
2748
2abdfe24 2749float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2750{
2abdfe24 2751 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2752 return float16_round_pack_canonical(pa, status);
2753}
2754
2abdfe24
RH
2755float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2756{
2757 return int64_to_float16_scalbn(a, scale, status);
2758}
2759
2760float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2761{
2762 return int64_to_float16_scalbn(a, scale, status);
2763}
2764
2765float16 int64_to_float16(int64_t a, float_status *status)
2766{
2767 return int64_to_float16_scalbn(a, 0, status);
2768}
2769
c02e1fb8
AB
2770float16 int32_to_float16(int32_t a, float_status *status)
2771{
2abdfe24 2772 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2773}
2774
2775float16 int16_to_float16(int16_t a, float_status *status)
2776{
2abdfe24 2777 return int64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2778}
2779
0d93d8ec
FC
2780float16 int8_to_float16(int8_t a, float_status *status)
2781{
2782 return int64_to_float16_scalbn(a, 0, status);
2783}
2784
2abdfe24 2785float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2786{
2abdfe24 2787 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2788 return float32_round_pack_canonical(pa, status);
2789}
2790
2abdfe24
RH
2791float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2792{
2793 return int64_to_float32_scalbn(a, scale, status);
2794}
2795
2796float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2797{
2798 return int64_to_float32_scalbn(a, scale, status);
2799}
2800
2801float32 int64_to_float32(int64_t a, float_status *status)
2802{
2803 return int64_to_float32_scalbn(a, 0, status);
2804}
2805
c02e1fb8
AB
2806float32 int32_to_float32(int32_t a, float_status *status)
2807{
2abdfe24 2808 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2809}
2810
2811float32 int16_to_float32(int16_t a, float_status *status)
2812{
2abdfe24 2813 return int64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2814}
2815
2abdfe24 2816float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
c02e1fb8 2817{
2abdfe24 2818 FloatParts pa = int_to_float(a, scale, status);
c02e1fb8
AB
2819 return float64_round_pack_canonical(pa, status);
2820}
2821
2abdfe24
RH
2822float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2823{
2824 return int64_to_float64_scalbn(a, scale, status);
2825}
2826
2827float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2828{
2829 return int64_to_float64_scalbn(a, scale, status);
2830}
2831
2832float64 int64_to_float64(int64_t a, float_status *status)
2833{
2834 return int64_to_float64_scalbn(a, 0, status);
2835}
2836
c02e1fb8
AB
2837float64 int32_to_float64(int32_t a, float_status *status)
2838{
2abdfe24 2839 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2840}
2841
2842float64 int16_to_float64(int16_t a, float_status *status)
2843{
2abdfe24 2844 return int64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
2845}
2846
34f0c0a9
LZ
2847/*
2848 * Returns the result of converting the two's complement integer `a'
2849 * to the bfloat16 format.
2850 */
2851
2852bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2853{
2854 FloatParts pa = int_to_float(a, scale, status);
2855 return bfloat16_round_pack_canonical(pa, status);
2856}
2857
2858bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
2859{
2860 return int64_to_bfloat16_scalbn(a, scale, status);
2861}
2862
2863bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
2864{
2865 return int64_to_bfloat16_scalbn(a, scale, status);
2866}
2867
2868bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
2869{
2870 return int64_to_bfloat16_scalbn(a, 0, status);
2871}
2872
2873bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
2874{
2875 return int64_to_bfloat16_scalbn(a, 0, status);
2876}
2877
2878bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
2879{
2880 return int64_to_bfloat16_scalbn(a, 0, status);
2881}
c02e1fb8
AB
2882
2883/*
2884 * Unsigned Integer to float conversions
2885 *
2886 * Returns the result of converting the unsigned integer `a' to the
2887 * floating-point format. The conversion is performed according to the
2888 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2889 */
2890
2abdfe24 2891static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
c02e1fb8 2892{
2abdfe24 2893 FloatParts r = { .sign = false };
e99c4373 2894 int shift;
c02e1fb8
AB
2895
2896 if (a == 0) {
2897 r.cls = float_class_zero;
2898 } else {
2abdfe24 2899 scale = MIN(MAX(scale, -0x10000), 0x10000);
e99c4373 2900 shift = clz64(a);
c02e1fb8 2901 r.cls = float_class_normal;
e99c4373
RH
2902 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2903 r.frac = a << shift;
c02e1fb8
AB
2904 }
2905
2906 return r;
2907}
2908
2abdfe24 2909float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2910{
2abdfe24 2911 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2912 return float16_round_pack_canonical(pa, status);
2913}
2914
2abdfe24
RH
2915float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2916{
2917 return uint64_to_float16_scalbn(a, scale, status);
2918}
2919
2920float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2921{
2922 return uint64_to_float16_scalbn(a, scale, status);
2923}
2924
2925float16 uint64_to_float16(uint64_t a, float_status *status)
2926{
2927 return uint64_to_float16_scalbn(a, 0, status);
2928}
2929
c02e1fb8
AB
2930float16 uint32_to_float16(uint32_t a, float_status *status)
2931{
2abdfe24 2932 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2933}
2934
2935float16 uint16_to_float16(uint16_t a, float_status *status)
2936{
2abdfe24 2937 return uint64_to_float16_scalbn(a, 0, status);
c02e1fb8
AB
2938}
2939
0d93d8ec
FC
2940float16 uint8_to_float16(uint8_t a, float_status *status)
2941{
2942 return uint64_to_float16_scalbn(a, 0, status);
2943}
2944
2abdfe24 2945float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2946{
2abdfe24 2947 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2948 return float32_round_pack_canonical(pa, status);
2949}
2950
2abdfe24
RH
2951float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2952{
2953 return uint64_to_float32_scalbn(a, scale, status);
2954}
2955
2956float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2957{
2958 return uint64_to_float32_scalbn(a, scale, status);
2959}
2960
2961float32 uint64_to_float32(uint64_t a, float_status *status)
2962{
2963 return uint64_to_float32_scalbn(a, 0, status);
2964}
2965
c02e1fb8
AB
2966float32 uint32_to_float32(uint32_t a, float_status *status)
2967{
2abdfe24 2968 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2969}
2970
2971float32 uint16_to_float32(uint16_t a, float_status *status)
2972{
2abdfe24 2973 return uint64_to_float32_scalbn(a, 0, status);
c02e1fb8
AB
2974}
2975
2abdfe24 2976float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
c02e1fb8 2977{
2abdfe24 2978 FloatParts pa = uint_to_float(a, scale, status);
c02e1fb8
AB
2979 return float64_round_pack_canonical(pa, status);
2980}
2981
2abdfe24
RH
2982float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2983{
2984 return uint64_to_float64_scalbn(a, scale, status);
2985}
2986
2987float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2988{
2989 return uint64_to_float64_scalbn(a, scale, status);
2990}
2991
2992float64 uint64_to_float64(uint64_t a, float_status *status)
2993{
2994 return uint64_to_float64_scalbn(a, 0, status);
2995}
2996
c02e1fb8
AB
2997float64 uint32_to_float64(uint32_t a, float_status *status)
2998{
2abdfe24 2999 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3000}
3001
3002float64 uint16_to_float64(uint16_t a, float_status *status)
3003{
2abdfe24 3004 return uint64_to_float64_scalbn(a, 0, status);
c02e1fb8
AB
3005}
3006
34f0c0a9
LZ
3007/*
3008 * Returns the result of converting the unsigned integer `a' to the
3009 * bfloat16 format.
3010 */
3011
3012bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3013{
3014 FloatParts pa = uint_to_float(a, scale, status);
3015 return bfloat16_round_pack_canonical(pa, status);
3016}
3017
3018bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3019{
3020 return uint64_to_bfloat16_scalbn(a, scale, status);
3021}
3022
3023bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3024{
3025 return uint64_to_bfloat16_scalbn(a, scale, status);
3026}
3027
3028bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3029{
3030 return uint64_to_bfloat16_scalbn(a, 0, status);
3031}
3032
3033bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3034{
3035 return uint64_to_bfloat16_scalbn(a, 0, status);
3036}
3037
3038bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3039{
3040 return uint64_to_bfloat16_scalbn(a, 0, status);
3041}
3042
89360067
AB
3043/* Float Min/Max */
3044/* min() and max() functions. These can't be implemented as
3045 * 'compare and pick one input' because that would mishandle
3046 * NaNs and +0 vs -0.
3047 *
3048 * minnum() and maxnum() functions. These are similar to the min()
3049 * and max() functions but if one of the arguments is a QNaN and
3050 * the other is numerical then the numerical argument is returned.
3051 * SNaNs will get quietened before being returned.
3052 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3053 * and maxNum() operations. min() and max() are the typical min/max
3054 * semantics provided by many CPUs which predate that specification.
3055 *
3056 * minnummag() and maxnummag() functions correspond to minNumMag()
3057 * and minNumMag() from the IEEE-754 2008.
3058 */
3059static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
3060 bool ieee, bool ismag, float_status *s)
3061{
3062 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3063 if (ieee) {
3064 /* Takes two floating-point values `a' and `b', one of
3065 * which is a NaN, and returns the appropriate NaN
3066 * result. If either `a' or `b' is a signaling NaN,
3067 * the invalid exception is raised.
3068 */
3069 if (is_snan(a.cls) || is_snan(b.cls)) {
3070 return pick_nan(a, b, s);
3071 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3072 return b;
3073 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3074 return a;
3075 }
3076 }
3077 return pick_nan(a, b, s);
3078 } else {
3079 int a_exp, b_exp;
89360067
AB
3080
3081 switch (a.cls) {
3082 case float_class_normal:
3083 a_exp = a.exp;
3084 break;
3085 case float_class_inf:
3086 a_exp = INT_MAX;
3087 break;
3088 case float_class_zero:
3089 a_exp = INT_MIN;
3090 break;
3091 default:
3092 g_assert_not_reached();
3093 break;
3094 }
3095 switch (b.cls) {
3096 case float_class_normal:
3097 b_exp = b.exp;
3098 break;
3099 case float_class_inf:
3100 b_exp = INT_MAX;
3101 break;
3102 case float_class_zero:
3103 b_exp = INT_MIN;
3104 break;
3105 default:
3106 g_assert_not_reached();
3107 break;
3108 }
3109
6245327a
EC
3110 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3111 bool a_less = a_exp < b_exp;
3112 if (a_exp == b_exp) {
3113 a_less = a.frac < b.frac;
3114 }
3115 return a_less ^ ismin ? b : a;
89360067
AB
3116 }
3117
6245327a 3118 if (a.sign == b.sign) {
89360067
AB
3119 bool a_less = a_exp < b_exp;
3120 if (a_exp == b_exp) {
3121 a_less = a.frac < b.frac;
3122 }
6245327a 3123 return a.sign ^ a_less ^ ismin ? b : a;
89360067 3124 } else {
6245327a 3125 return a.sign ^ ismin ? b : a;
89360067
AB
3126 }
3127 }
3128}
3129
3130#define MINMAX(sz, name, ismin, isiee, ismag) \
3131float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
3132 float_status *s) \
3133{ \
3134 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3135 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
3136 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3137 \
3138 return float ## sz ## _round_pack_canonical(pr, s); \
3139}
3140
3141MINMAX(16, min, true, false, false)
3142MINMAX(16, minnum, true, true, false)
3143MINMAX(16, minnummag, true, true, true)
3144MINMAX(16, max, false, false, false)
3145MINMAX(16, maxnum, false, true, false)
3146MINMAX(16, maxnummag, false, true, true)
3147
3148MINMAX(32, min, true, false, false)
3149MINMAX(32, minnum, true, true, false)
3150MINMAX(32, minnummag, true, true, true)
3151MINMAX(32, max, false, false, false)
3152MINMAX(32, maxnum, false, true, false)
3153MINMAX(32, maxnummag, false, true, true)
3154
3155MINMAX(64, min, true, false, false)
3156MINMAX(64, minnum, true, true, false)
3157MINMAX(64, minnummag, true, true, true)
3158MINMAX(64, max, false, false, false)
3159MINMAX(64, maxnum, false, true, false)
3160MINMAX(64, maxnummag, false, true, true)
3161
3162#undef MINMAX
3163
8282310d
LZ
3164#define BF16_MINMAX(name, ismin, isiee, ismag) \
3165bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s) \
3166{ \
3167 FloatParts pa = bfloat16_unpack_canonical(a, s); \
3168 FloatParts pb = bfloat16_unpack_canonical(b, s); \
3169 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
3170 \
3171 return bfloat16_round_pack_canonical(pr, s); \
3172}
3173
3174BF16_MINMAX(min, true, false, false)
3175BF16_MINMAX(minnum, true, true, false)
3176BF16_MINMAX(minnummag, true, true, true)
3177BF16_MINMAX(max, false, false, false)
3178BF16_MINMAX(maxnum, false, true, false)
3179BF16_MINMAX(maxnummag, false, true, true)
3180
3181#undef BF16_MINMAX
3182
0c4c9092 3183/* Floating point compare */
71bfd65c
RH
3184static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
3185 float_status *s)
0c4c9092
AB
3186{
3187 if (is_nan(a.cls) || is_nan(b.cls)) {
3188 if (!is_quiet ||
3189 a.cls == float_class_snan ||
3190 b.cls == float_class_snan) {
d82f3b2d 3191 float_raise(float_flag_invalid, s);
0c4c9092
AB
3192 }
3193 return float_relation_unordered;
3194 }
3195
3196 if (a.cls == float_class_zero) {
3197 if (b.cls == float_class_zero) {
3198 return float_relation_equal;
3199 }
3200 return b.sign ? float_relation_greater : float_relation_less;
3201 } else if (b.cls == float_class_zero) {
3202 return a.sign ? float_relation_less : float_relation_greater;
3203 }
3204
3205 /* The only really important thing about infinity is its sign. If
3206 * both are infinities the sign marks the smallest of the two.
3207 */
3208 if (a.cls == float_class_inf) {
3209 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3210 return float_relation_equal;
3211 }
3212 return a.sign ? float_relation_less : float_relation_greater;
3213 } else if (b.cls == float_class_inf) {
3214 return b.sign ? float_relation_greater : float_relation_less;
3215 }
3216
3217 if (a.sign != b.sign) {
3218 return a.sign ? float_relation_less : float_relation_greater;
3219 }
3220
3221 if (a.exp == b.exp) {
3222 if (a.frac == b.frac) {
3223 return float_relation_equal;
3224 }
3225 if (a.sign) {
3226 return a.frac > b.frac ?
3227 float_relation_less : float_relation_greater;
3228 } else {
3229 return a.frac > b.frac ?
3230 float_relation_greater : float_relation_less;
3231 }
3232 } else {
3233 if (a.sign) {
3234 return a.exp > b.exp ? float_relation_less : float_relation_greater;
3235 } else {
3236 return a.exp > b.exp ? float_relation_greater : float_relation_less;
3237 }
3238 }
3239}
3240
d9fe9db9
EC
3241#define COMPARE(name, attr, sz) \
3242static int attr \
3243name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
0c4c9092
AB
3244{ \
3245 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
3246 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
d9fe9db9 3247 return compare_floats(pa, pb, is_quiet, s); \
0c4c9092
AB
3248}
3249
d9fe9db9
EC
3250COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3251COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3252COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
0c4c9092
AB
3253
3254#undef COMPARE
3255
71bfd65c 3256FloatRelation float16_compare(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3257{
3258 return soft_f16_compare(a, b, false, s);
3259}
3260
71bfd65c 3261FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
d9fe9db9
EC
3262{
3263 return soft_f16_compare(a, b, true, s);
3264}
3265
71bfd65c 3266static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3267f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3268{
3269 union_float32 ua, ub;
3270
3271 ua.s = xa;
3272 ub.s = xb;
3273
3274 if (QEMU_NO_HARDFLOAT) {
3275 goto soft;
3276 }
3277
3278 float32_input_flush2(&ua.s, &ub.s, s);
3279 if (isgreaterequal(ua.h, ub.h)) {
3280 if (isgreater(ua.h, ub.h)) {
3281 return float_relation_greater;
3282 }
3283 return float_relation_equal;
3284 }
3285 if (likely(isless(ua.h, ub.h))) {
3286 return float_relation_less;
3287 }
3288 /* The only condition remaining is unordered.
3289 * Fall through to set flags.
3290 */
3291 soft:
3292 return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3293}
3294
71bfd65c 3295FloatRelation float32_compare(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3296{
3297 return f32_compare(a, b, false, s);
3298}
3299
71bfd65c 3300FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
d9fe9db9
EC
3301{
3302 return f32_compare(a, b, true, s);
3303}
3304
71bfd65c 3305static FloatRelation QEMU_FLATTEN
d9fe9db9
EC
3306f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3307{
3308 union_float64 ua, ub;
3309
3310 ua.s = xa;
3311 ub.s = xb;
3312
3313 if (QEMU_NO_HARDFLOAT) {
3314 goto soft;
3315 }
3316
3317 float64_input_flush2(&ua.s, &ub.s, s);
3318 if (isgreaterequal(ua.h, ub.h)) {
3319 if (isgreater(ua.h, ub.h)) {
3320 return float_relation_greater;
3321 }
3322 return float_relation_equal;
3323 }
3324 if (likely(isless(ua.h, ub.h))) {
3325 return float_relation_less;
3326 }
3327 /* The only condition remaining is unordered.
3328 * Fall through to set flags.
3329 */
3330 soft:
3331 return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3332}
3333
71bfd65c 3334FloatRelation float64_compare(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3335{
3336 return f64_compare(a, b, false, s);
3337}
3338
71bfd65c 3339FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
d9fe9db9
EC
3340{
3341 return f64_compare(a, b, true, s);
3342}
3343
8282310d
LZ
3344static FloatRelation QEMU_FLATTEN
3345soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3346{
3347 FloatParts pa = bfloat16_unpack_canonical(a, s);
3348 FloatParts pb = bfloat16_unpack_canonical(b, s);
3349 return compare_floats(pa, pb, is_quiet, s);
3350}
3351
3352FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3353{
3354 return soft_bf16_compare(a, b, false, s);
3355}
3356
3357FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3358{
3359 return soft_bf16_compare(a, b, true, s);
3360}
3361
0bfc9f19
AB
3362/* Multiply A by 2 raised to the power N. */
3363static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3364{
3365 if (unlikely(is_nan(a.cls))) {
3366 return return_nan(a, s);
3367 }
3368 if (a.cls == float_class_normal) {
ce8d4082
RH
3369 /* The largest float type (even though not supported by FloatParts)
3370 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
3371 * still allows rounding to infinity, without allowing overflow
3372 * within the int32_t that backs FloatParts.exp.
3373 */
3374 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
3375 a.exp += n;
3376 }
3377 return a;
3378}
3379
3380float16 float16_scalbn(float16 a, int n, float_status *status)
3381{
3382 FloatParts pa = float16_unpack_canonical(a, status);
3383 FloatParts pr = scalbn_decomposed(pa, n, status);
3384 return float16_round_pack_canonical(pr, status);
3385}
3386
3387float32 float32_scalbn(float32 a, int n, float_status *status)
3388{
3389 FloatParts pa = float32_unpack_canonical(a, status);
3390 FloatParts pr = scalbn_decomposed(pa, n, status);
3391 return float32_round_pack_canonical(pr, status);
3392}
3393
3394float64 float64_scalbn(float64 a, int n, float_status *status)
3395{
3396 FloatParts pa = float64_unpack_canonical(a, status);
3397 FloatParts pr = scalbn_decomposed(pa, n, status);
3398 return float64_round_pack_canonical(pr, status);
3399}
3400
8282310d
LZ
3401bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3402{
3403 FloatParts pa = bfloat16_unpack_canonical(a, status);
3404 FloatParts pr = scalbn_decomposed(pa, n, status);
3405 return bfloat16_round_pack_canonical(pr, status);
3406}
3407
c13bb2da
AB
3408/*
3409 * Square Root
3410 *
3411 * The old softfloat code did an approximation step before zeroing in
3412 * on the final result. However for simpleness we just compute the
3413 * square root by iterating down from the implicit bit to enough extra
3414 * bits to ensure we get a correctly rounded result.
3415 *
3416 * This does mean however the calculation is slower than before,
3417 * especially for 64 bit floats.
3418 */
3419
3420static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3421{
3422 uint64_t a_frac, r_frac, s_frac;
3423 int bit, last_bit;
3424
3425 if (is_nan(a.cls)) {
3426 return return_nan(a, s);
3427 }
3428 if (a.cls == float_class_zero) {
3429 return a; /* sqrt(+-0) = +-0 */
3430 }
3431 if (a.sign) {
d82f3b2d 3432 float_raise(float_flag_invalid, s);
f7e598e2 3433 return parts_default_nan(s);
c13bb2da
AB
3434 }
3435 if (a.cls == float_class_inf) {
3436 return a; /* sqrt(+inf) = +inf */
3437 }
3438
3439 assert(a.cls == float_class_normal);
3440
3441 /* We need two overflow bits at the top. Adding room for that is a
3442 * right shift. If the exponent is odd, we can discard the low bit
3443 * by multiplying the fraction by 2; that's a left shift. Combine
e99c4373 3444 * those and we shift right by 1 if the exponent is odd, otherwise 2.
c13bb2da 3445 */
e99c4373 3446 a_frac = a.frac >> (2 - (a.exp & 1));
c13bb2da
AB
3447 a.exp >>= 1;
3448
3449 /* Bit-by-bit computation of sqrt. */
3450 r_frac = 0;
3451 s_frac = 0;
3452
3453 /* Iterate from implicit bit down to the 3 extra bits to compute a
e99c4373
RH
3454 * properly rounded result. Remember we've inserted two more bits
3455 * at the top, so these positions are two less.
c13bb2da 3456 */
e99c4373 3457 bit = DECOMPOSED_BINARY_POINT - 2;
c13bb2da
AB
3458 last_bit = MAX(p->frac_shift - 4, 0);
3459 do {
3460 uint64_t q = 1ULL << bit;
3461 uint64_t t_frac = s_frac + q;
3462 if (t_frac <= a_frac) {
3463 s_frac = t_frac + q;
3464 a_frac -= t_frac;
3465 r_frac += q;
3466 }
3467 a_frac <<= 1;
3468 } while (--bit >= last_bit);
3469
3470 /* Undo the right shift done above. If there is any remaining
3471 * fraction, the result is inexact. Set the sticky bit.
3472 */
e99c4373 3473 a.frac = (r_frac << 2) + (a_frac != 0);
c13bb2da
AB
3474
3475 return a;
3476}
3477
97ff87c0 3478float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
c13bb2da
AB
3479{
3480 FloatParts pa = float16_unpack_canonical(a, status);
3481 FloatParts pr = sqrt_float(pa, status, &float16_params);
3482 return float16_round_pack_canonical(pr, status);
3483}
3484
f131bae8
EC
3485static float32 QEMU_SOFTFLOAT_ATTR
3486soft_f32_sqrt(float32 a, float_status *status)
c13bb2da
AB
3487{
3488 FloatParts pa = float32_unpack_canonical(a, status);
3489 FloatParts pr = sqrt_float(pa, status, &float32_params);
3490 return float32_round_pack_canonical(pr, status);
3491}
3492
f131bae8
EC
3493static float64 QEMU_SOFTFLOAT_ATTR
3494soft_f64_sqrt(float64 a, float_status *status)
c13bb2da
AB
3495{
3496 FloatParts pa = float64_unpack_canonical(a, status);
3497 FloatParts pr = sqrt_float(pa, status, &float64_params);
3498 return float64_round_pack_canonical(pr, status);
3499}
3500
f131bae8
EC
3501float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3502{
3503 union_float32 ua, ur;
3504
3505 ua.s = xa;
3506 if (unlikely(!can_use_fpu(s))) {
3507 goto soft;
3508 }
3509
3510 float32_input_flush1(&ua.s, s);
3511 if (QEMU_HARDFLOAT_1F32_USE_FP) {
3512 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3513 fpclassify(ua.h) == FP_ZERO) ||
3514 signbit(ua.h))) {
3515 goto soft;
3516 }
3517 } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3518 float32_is_neg(ua.s))) {
3519 goto soft;
3520 }
3521 ur.h = sqrtf(ua.h);
3522 return ur.s;
3523
3524 soft:
3525 return soft_f32_sqrt(ua.s, s);
3526}
3527
3528float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3529{
3530 union_float64 ua, ur;
3531
3532 ua.s = xa;
3533 if (unlikely(!can_use_fpu(s))) {
3534 goto soft;
3535 }
3536
3537 float64_input_flush1(&ua.s, s);
3538 if (QEMU_HARDFLOAT_1F64_USE_FP) {
3539 if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3540 fpclassify(ua.h) == FP_ZERO) ||
3541 signbit(ua.h))) {
3542 goto soft;
3543 }
3544 } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3545 float64_is_neg(ua.s))) {
3546 goto soft;
3547 }
3548 ur.h = sqrt(ua.h);
3549 return ur.s;
3550
3551 soft:
3552 return soft_f64_sqrt(ua.s, s);
3553}
3554
8282310d
LZ
3555bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3556{
3557 FloatParts pa = bfloat16_unpack_canonical(a, status);
3558 FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
3559 return bfloat16_round_pack_canonical(pr, status);
3560}
3561
0218a16e
RH
3562/*----------------------------------------------------------------------------
3563| The pattern for a default generated NaN.
3564*----------------------------------------------------------------------------*/
3565
3566float16 float16_default_nan(float_status *status)
3567{
3568 FloatParts p = parts_default_nan(status);
3569 p.frac >>= float16_params.frac_shift;
3570 return float16_pack_raw(p);
3571}
3572
3573float32 float32_default_nan(float_status *status)
3574{
3575 FloatParts p = parts_default_nan(status);
3576 p.frac >>= float32_params.frac_shift;
3577 return float32_pack_raw(p);
3578}
3579
3580float64 float64_default_nan(float_status *status)
3581{
3582 FloatParts p = parts_default_nan(status);
3583 p.frac >>= float64_params.frac_shift;
3584 return float64_pack_raw(p);
3585}
3586
3587float128 float128_default_nan(float_status *status)
3588{
3589 FloatParts p = parts_default_nan(status);
3590 float128 r;
3591
3592 /* Extrapolate from the choices made by parts_default_nan to fill
3593 * in the quad-floating format. If the low bit is set, assume we
3594 * want to set all non-snan bits.
3595 */
3596 r.low = -(p.frac & 1);
3597 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
e9321124 3598 r.high |= UINT64_C(0x7FFF000000000000);
0218a16e
RH
3599 r.high |= (uint64_t)p.sign << 63;
3600
3601 return r;
3602}
c13bb2da 3603
8282310d
LZ
3604bfloat16 bfloat16_default_nan(float_status *status)
3605{
3606 FloatParts p = parts_default_nan(status);
3607 p.frac >>= bfloat16_params.frac_shift;
3608 return bfloat16_pack_raw(p);
3609}
3610
158142c2 3611/*----------------------------------------------------------------------------
377ed926
RH
3612| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3613*----------------------------------------------------------------------------*/
3614
3615float16 float16_silence_nan(float16 a, float_status *status)
3616{
3617 FloatParts p = float16_unpack_raw(a);
3618 p.frac <<= float16_params.frac_shift;
3619 p = parts_silence_nan(p, status);
3620 p.frac >>= float16_params.frac_shift;
3621 return float16_pack_raw(p);
3622}
3623
3624float32 float32_silence_nan(float32 a, float_status *status)
3625{
3626 FloatParts p = float32_unpack_raw(a);
3627 p.frac <<= float32_params.frac_shift;
3628 p = parts_silence_nan(p, status);
3629 p.frac >>= float32_params.frac_shift;
3630 return float32_pack_raw(p);
3631}
3632
3633float64 float64_silence_nan(float64 a, float_status *status)
3634{
3635 FloatParts p = float64_unpack_raw(a);
3636 p.frac <<= float64_params.frac_shift;
3637 p = parts_silence_nan(p, status);
3638 p.frac >>= float64_params.frac_shift;
3639 return float64_pack_raw(p);
3640}
3641
8282310d
LZ
3642bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3643{
3644 FloatParts p = bfloat16_unpack_raw(a);
3645 p.frac <<= bfloat16_params.frac_shift;
3646 p = parts_silence_nan(p, status);
3647 p.frac >>= bfloat16_params.frac_shift;
3648 return bfloat16_pack_raw(p);
3649}
e6b405fe
AB
3650
3651/*----------------------------------------------------------------------------
3652| If `a' is denormal and we are in flush-to-zero mode then set the
3653| input-denormal exception and return zero. Otherwise just return the value.
3654*----------------------------------------------------------------------------*/
3655
3656static bool parts_squash_denormal(FloatParts p, float_status *status)
3657{
3658 if (p.exp == 0 && p.frac != 0) {
3659 float_raise(float_flag_input_denormal, status);
3660 return true;
3661 }
3662
3663 return false;
3664}
3665
3666float16 float16_squash_input_denormal(float16 a, float_status *status)
3667{
3668 if (status->flush_inputs_to_zero) {
3669 FloatParts p = float16_unpack_raw(a);
3670 if (parts_squash_denormal(p, status)) {
3671 return float16_set_sign(float16_zero, p.sign);
3672 }
3673 }
3674 return a;
3675}
3676
3677float32 float32_squash_input_denormal(float32 a, float_status *status)
3678{
3679 if (status->flush_inputs_to_zero) {
3680 FloatParts p = float32_unpack_raw(a);
3681 if (parts_squash_denormal(p, status)) {
3682 return float32_set_sign(float32_zero, p.sign);
3683 }
3684 }
3685 return a;
3686}
3687
3688float64 float64_squash_input_denormal(float64 a, float_status *status)
3689{
3690 if (status->flush_inputs_to_zero) {
3691 FloatParts p = float64_unpack_raw(a);
3692 if (parts_squash_denormal(p, status)) {
3693 return float64_set_sign(float64_zero, p.sign);
3694 }
3695 }
3696 return a;
3697}
3698
8282310d
LZ
3699bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3700{
3701 if (status->flush_inputs_to_zero) {
3702 FloatParts p = bfloat16_unpack_raw(a);
3703 if (parts_squash_denormal(p, status)) {
3704 return bfloat16_set_sign(bfloat16_zero, p.sign);
3705 }
3706 }
3707 return a;
3708}
3709
377ed926 3710/*----------------------------------------------------------------------------
158142c2
FB
3711| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3712| and 7, and returns the properly rounded 32-bit integer corresponding to the
3713| input. If `zSign' is 1, the input is negated before being converted to an
3714| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3715| is simply rounded to an integer, with the inexact exception raised if the
3716| input cannot be represented exactly as an integer. However, if the fixed-
3717| point input is too large, the invalid exception is raised and the largest
3718| positive or negative integer is returned.
3719*----------------------------------------------------------------------------*/
3720
c120391c
RH
3721static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3722 float_status *status)
158142c2 3723{
8f506c70 3724 int8_t roundingMode;
c120391c 3725 bool roundNearestEven;
8f506c70 3726 int8_t roundIncrement, roundBits;
760e1416 3727 int32_t z;
158142c2 3728
a2f2d288 3729 roundingMode = status->float_rounding_mode;
158142c2 3730 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3731 switch (roundingMode) {
3732 case float_round_nearest_even:
f9288a76 3733 case float_round_ties_away:
dc355b76
PM
3734 roundIncrement = 0x40;
3735 break;
3736 case float_round_to_zero:
3737 roundIncrement = 0;
3738 break;
3739 case float_round_up:
3740 roundIncrement = zSign ? 0 : 0x7f;
3741 break;
3742 case float_round_down:
3743 roundIncrement = zSign ? 0x7f : 0;
3744 break;
5d64abb3
RH
3745 case float_round_to_odd:
3746 roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3747 break;
dc355b76
PM
3748 default:
3749 abort();
158142c2
FB
3750 }
3751 roundBits = absZ & 0x7F;
3752 absZ = ( absZ + roundIncrement )>>7;
40662886
PMD
3753 if (!(roundBits ^ 0x40) && roundNearestEven) {
3754 absZ &= ~1;
3755 }
158142c2
FB
3756 z = absZ;
3757 if ( zSign ) z = - z;
3758 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 3759 float_raise(float_flag_invalid, status);
2c217da0 3760 return zSign ? INT32_MIN : INT32_MAX;
158142c2 3761 }
a2f2d288 3762 if (roundBits) {
d82f3b2d 3763 float_raise(float_flag_inexact, status);
a2f2d288 3764 }
158142c2
FB
3765 return z;
3766
3767}
3768
3769/*----------------------------------------------------------------------------
3770| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3771| `absZ1', with binary point between bits 63 and 64 (between the input words),
3772| and returns the properly rounded 64-bit integer corresponding to the input.
3773| If `zSign' is 1, the input is negated before being converted to an integer.
3774| Ordinarily, the fixed-point input is simply rounded to an integer, with
3775| the inexact exception raised if the input cannot be represented exactly as
3776| an integer. However, if the fixed-point input is too large, the invalid
3777| exception is raised and the largest positive or negative integer is
3778| returned.
3779*----------------------------------------------------------------------------*/
3780
c120391c 3781static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 3782 float_status *status)
158142c2 3783{
8f506c70 3784 int8_t roundingMode;
c120391c 3785 bool roundNearestEven, increment;
760e1416 3786 int64_t z;
158142c2 3787
a2f2d288 3788 roundingMode = status->float_rounding_mode;
158142c2 3789 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3790 switch (roundingMode) {
3791 case float_round_nearest_even:
f9288a76 3792 case float_round_ties_away:
dc355b76
PM
3793 increment = ((int64_t) absZ1 < 0);
3794 break;
3795 case float_round_to_zero:
3796 increment = 0;
3797 break;
3798 case float_round_up:
3799 increment = !zSign && absZ1;
3800 break;
3801 case float_round_down:
3802 increment = zSign && absZ1;
3803 break;
5d64abb3
RH
3804 case float_round_to_odd:
3805 increment = !(absZ0 & 1) && absZ1;
3806 break;
dc355b76
PM
3807 default:
3808 abort();
158142c2
FB
3809 }
3810 if ( increment ) {
3811 ++absZ0;
3812 if ( absZ0 == 0 ) goto overflow;
40662886
PMD
3813 if (!(absZ1 << 1) && roundNearestEven) {
3814 absZ0 &= ~1;
3815 }
158142c2
FB
3816 }
3817 z = absZ0;
3818 if ( zSign ) z = - z;
3819 if ( z && ( ( z < 0 ) ^ zSign ) ) {
3820 overflow:
ff32e16e 3821 float_raise(float_flag_invalid, status);
2c217da0 3822 return zSign ? INT64_MIN : INT64_MAX;
158142c2 3823 }
a2f2d288 3824 if (absZ1) {
d82f3b2d 3825 float_raise(float_flag_inexact, status);
a2f2d288 3826 }
158142c2
FB
3827 return z;
3828
3829}
3830
fb3ea83a
TM
3831/*----------------------------------------------------------------------------
3832| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3833| `absZ1', with binary point between bits 63 and 64 (between the input words),
3834| and returns the properly rounded 64-bit unsigned integer corresponding to the
3835| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3836| with the inexact exception raised if the input cannot be represented exactly
3837| as an integer. However, if the fixed-point input is too large, the invalid
3838| exception is raised and the largest unsigned integer is returned.
3839*----------------------------------------------------------------------------*/
3840
c120391c 3841static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
e5a41ffa 3842 uint64_t absZ1, float_status *status)
fb3ea83a 3843{
8f506c70 3844 int8_t roundingMode;
c120391c 3845 bool roundNearestEven, increment;
fb3ea83a 3846
a2f2d288 3847 roundingMode = status->float_rounding_mode;
fb3ea83a 3848 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
3849 switch (roundingMode) {
3850 case float_round_nearest_even:
f9288a76 3851 case float_round_ties_away:
dc355b76
PM
3852 increment = ((int64_t)absZ1 < 0);
3853 break;
3854 case float_round_to_zero:
3855 increment = 0;
3856 break;
3857 case float_round_up:
3858 increment = !zSign && absZ1;
3859 break;
3860 case float_round_down:
3861 increment = zSign && absZ1;
3862 break;
5d64abb3
RH
3863 case float_round_to_odd:
3864 increment = !(absZ0 & 1) && absZ1;
3865 break;
dc355b76
PM
3866 default:
3867 abort();
fb3ea83a
TM
3868 }
3869 if (increment) {
3870 ++absZ0;
3871 if (absZ0 == 0) {
ff32e16e 3872 float_raise(float_flag_invalid, status);
2c217da0 3873 return UINT64_MAX;
fb3ea83a 3874 }
40662886
PMD
3875 if (!(absZ1 << 1) && roundNearestEven) {
3876 absZ0 &= ~1;
3877 }
fb3ea83a
TM
3878 }
3879
3880 if (zSign && absZ0) {
ff32e16e 3881 float_raise(float_flag_invalid, status);
fb3ea83a
TM
3882 return 0;
3883 }
3884
3885 if (absZ1) {
d82f3b2d 3886 float_raise(float_flag_inexact, status);
fb3ea83a
TM
3887 }
3888 return absZ0;
3889}
3890
158142c2
FB
3891/*----------------------------------------------------------------------------
3892| Normalizes the subnormal single-precision floating-point value represented
3893| by the denormalized significand `aSig'. The normalized exponent and
3894| significand are stored at the locations pointed to by `zExpPtr' and
3895| `zSigPtr', respectively.
3896*----------------------------------------------------------------------------*/
3897
3898static void
0c48262d 3899 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 3900{
8f506c70 3901 int8_t shiftCount;
158142c2 3902
0019d5c3 3903 shiftCount = clz32(aSig) - 8;
158142c2
FB
3904 *zSigPtr = aSig<<shiftCount;
3905 *zExpPtr = 1 - shiftCount;
3906
3907}
3908
158142c2
FB
3909/*----------------------------------------------------------------------------
3910| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3911| and significand `zSig', and returns the proper single-precision floating-
3912| point value corresponding to the abstract input. Ordinarily, the abstract
3913| value is simply rounded and packed into the single-precision format, with
3914| the inexact exception raised if the abstract input cannot be represented
3915| exactly. However, if the abstract value is too large, the overflow and
3916| inexact exceptions are raised and an infinity or maximal finite value is
3917| returned. If the abstract value is too small, the input value is rounded to
3918| a subnormal number, and the underflow and inexact exceptions are raised if
3919| the abstract input cannot be represented exactly as a subnormal single-
3920| precision floating-point number.
3921| The input significand `zSig' has its binary point between bits 30
3922| and 29, which is 7 bits to the left of the usual location. This shifted
3923| significand must be normalized or smaller. If `zSig' is not normalized,
3924| `zExp' must be 0; in that case, the result returned is a subnormal number,
3925| and it must not require rounding. In the usual case that `zSig' is
3926| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3927| The handling of underflow and overflow follows the IEC/IEEE Standard for
3928| Binary Floating-Point Arithmetic.
3929*----------------------------------------------------------------------------*/
3930
c120391c 3931static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 3932 float_status *status)
158142c2 3933{
8f506c70 3934 int8_t roundingMode;
c120391c 3935 bool roundNearestEven;
8f506c70 3936 int8_t roundIncrement, roundBits;
c120391c 3937 bool isTiny;
158142c2 3938
a2f2d288 3939 roundingMode = status->float_rounding_mode;
158142c2 3940 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3941 switch (roundingMode) {
3942 case float_round_nearest_even:
f9288a76 3943 case float_round_ties_away:
dc355b76
PM
3944 roundIncrement = 0x40;
3945 break;
3946 case float_round_to_zero:
3947 roundIncrement = 0;
3948 break;
3949 case float_round_up:
3950 roundIncrement = zSign ? 0 : 0x7f;
3951 break;
3952 case float_round_down:
3953 roundIncrement = zSign ? 0x7f : 0;
3954 break;
5d64abb3
RH
3955 case float_round_to_odd:
3956 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3957 break;
dc355b76
PM
3958 default:
3959 abort();
3960 break;
158142c2
FB
3961 }
3962 roundBits = zSig & 0x7F;
bb98fe42 3963 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
3964 if ( ( 0xFD < zExp )
3965 || ( ( zExp == 0xFD )
bb98fe42 3966 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 3967 ) {
5d64abb3
RH
3968 bool overflow_to_inf = roundingMode != float_round_to_odd &&
3969 roundIncrement != 0;
ff32e16e 3970 float_raise(float_flag_overflow | float_flag_inexact, status);
5d64abb3 3971 return packFloat32(zSign, 0xFF, -!overflow_to_inf);
158142c2
FB
3972 }
3973 if ( zExp < 0 ) {
a2f2d288 3974 if (status->flush_to_zero) {
ff32e16e 3975 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3976 return packFloat32(zSign, 0, 0);
3977 }
a828b373
RH
3978 isTiny = status->tininess_before_rounding
3979 || (zExp < -1)
3980 || (zSig + roundIncrement < 0x80000000);
158142c2
FB
3981 shift32RightJamming( zSig, - zExp, &zSig );
3982 zExp = 0;
3983 roundBits = zSig & 0x7F;
ff32e16e
PM
3984 if (isTiny && roundBits) {
3985 float_raise(float_flag_underflow, status);
3986 }
5d64abb3
RH
3987 if (roundingMode == float_round_to_odd) {
3988 /*
3989 * For round-to-odd case, the roundIncrement depends on
3990 * zSig which just changed.
3991 */
3992 roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3993 }
158142c2
FB
3994 }
3995 }
a2f2d288 3996 if (roundBits) {
d82f3b2d 3997 float_raise(float_flag_inexact, status);
a2f2d288 3998 }
158142c2 3999 zSig = ( zSig + roundIncrement )>>7;
40662886
PMD
4000 if (!(roundBits ^ 0x40) && roundNearestEven) {
4001 zSig &= ~1;
4002 }
158142c2
FB
4003 if ( zSig == 0 ) zExp = 0;
4004 return packFloat32( zSign, zExp, zSig );
4005
4006}
4007
4008/*----------------------------------------------------------------------------
4009| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4010| and significand `zSig', and returns the proper single-precision floating-
4011| point value corresponding to the abstract input. This routine is just like
4012| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4013| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4014| floating-point exponent.
4015*----------------------------------------------------------------------------*/
4016
4017static float32
c120391c 4018 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
e5a41ffa 4019 float_status *status)
158142c2 4020{
8f506c70 4021 int8_t shiftCount;
158142c2 4022
0019d5c3 4023 shiftCount = clz32(zSig) - 1;
ff32e16e
PM
4024 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4025 status);
158142c2
FB
4026
4027}
4028
158142c2
FB
4029/*----------------------------------------------------------------------------
4030| Normalizes the subnormal double-precision floating-point value represented
4031| by the denormalized significand `aSig'. The normalized exponent and
4032| significand are stored at the locations pointed to by `zExpPtr' and
4033| `zSigPtr', respectively.
4034*----------------------------------------------------------------------------*/
4035
4036static void
0c48262d 4037 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 4038{
8f506c70 4039 int8_t shiftCount;
158142c2 4040
0019d5c3 4041 shiftCount = clz64(aSig) - 11;
158142c2
FB
4042 *zSigPtr = aSig<<shiftCount;
4043 *zExpPtr = 1 - shiftCount;
4044
4045}
4046
4047/*----------------------------------------------------------------------------
4048| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4049| double-precision floating-point value, returning the result. After being
4050| shifted into the proper positions, the three fields are simply added
4051| together to form the result. This means that any integer portion of `zSig'
4052| will be added into the exponent. Since a properly normalized significand
4053| will have an integer portion equal to 1, the `zExp' input should be 1 less
4054| than the desired result exponent whenever `zSig' is a complete, normalized
4055| significand.
4056*----------------------------------------------------------------------------*/
4057
c120391c 4058static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
158142c2
FB
4059{
4060
f090c9d4 4061 return make_float64(
bb98fe42 4062 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
4063
4064}
4065
4066/*----------------------------------------------------------------------------
4067| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4068| and significand `zSig', and returns the proper double-precision floating-
4069| point value corresponding to the abstract input. Ordinarily, the abstract
4070| value is simply rounded and packed into the double-precision format, with
4071| the inexact exception raised if the abstract input cannot be represented
4072| exactly. However, if the abstract value is too large, the overflow and
4073| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
4074| returned. If the abstract value is too small, the input value is rounded to
4075| a subnormal number, and the underflow and inexact exceptions are raised if
4076| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
4077| precision floating-point number.
4078| The input significand `zSig' has its binary point between bits 62
4079| and 61, which is 10 bits to the left of the usual location. This shifted
4080| significand must be normalized or smaller. If `zSig' is not normalized,
4081| `zExp' must be 0; in that case, the result returned is a subnormal number,
4082| and it must not require rounding. In the usual case that `zSig' is
4083| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4084| The handling of underflow and overflow follows the IEC/IEEE Standard for
4085| Binary Floating-Point Arithmetic.
4086*----------------------------------------------------------------------------*/
4087
c120391c 4088static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4089 float_status *status)
158142c2 4090{
8f506c70 4091 int8_t roundingMode;
c120391c 4092 bool roundNearestEven;
0c48262d 4093 int roundIncrement, roundBits;
c120391c 4094 bool isTiny;
158142c2 4095
a2f2d288 4096 roundingMode = status->float_rounding_mode;
158142c2 4097 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4098 switch (roundingMode) {
4099 case float_round_nearest_even:
f9288a76 4100 case float_round_ties_away:
dc355b76
PM
4101 roundIncrement = 0x200;
4102 break;
4103 case float_round_to_zero:
4104 roundIncrement = 0;
4105 break;
4106 case float_round_up:
4107 roundIncrement = zSign ? 0 : 0x3ff;
4108 break;
4109 case float_round_down:
4110 roundIncrement = zSign ? 0x3ff : 0;
4111 break;
9ee6f678
BR
4112 case float_round_to_odd:
4113 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4114 break;
dc355b76
PM
4115 default:
4116 abort();
158142c2
FB
4117 }
4118 roundBits = zSig & 0x3FF;
bb98fe42 4119 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
4120 if ( ( 0x7FD < zExp )
4121 || ( ( zExp == 0x7FD )
bb98fe42 4122 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 4123 ) {
9ee6f678
BR
4124 bool overflow_to_inf = roundingMode != float_round_to_odd &&
4125 roundIncrement != 0;
ff32e16e 4126 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 4127 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
4128 }
4129 if ( zExp < 0 ) {
a2f2d288 4130 if (status->flush_to_zero) {
ff32e16e 4131 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4132 return packFloat64(zSign, 0, 0);
4133 }
a828b373
RH
4134 isTiny = status->tininess_before_rounding
4135 || (zExp < -1)
4136 || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
158142c2
FB
4137 shift64RightJamming( zSig, - zExp, &zSig );
4138 zExp = 0;
4139 roundBits = zSig & 0x3FF;
ff32e16e
PM
4140 if (isTiny && roundBits) {
4141 float_raise(float_flag_underflow, status);
4142 }
9ee6f678
BR
4143 if (roundingMode == float_round_to_odd) {
4144 /*
4145 * For round-to-odd case, the roundIncrement depends on
4146 * zSig which just changed.
4147 */
4148 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4149 }
158142c2
FB
4150 }
4151 }
a2f2d288 4152 if (roundBits) {
d82f3b2d 4153 float_raise(float_flag_inexact, status);
a2f2d288 4154 }
158142c2 4155 zSig = ( zSig + roundIncrement )>>10;
40662886
PMD
4156 if (!(roundBits ^ 0x200) && roundNearestEven) {
4157 zSig &= ~1;
4158 }
158142c2
FB
4159 if ( zSig == 0 ) zExp = 0;
4160 return packFloat64( zSign, zExp, zSig );
4161
4162}
4163
4164/*----------------------------------------------------------------------------
4165| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4166| and significand `zSig', and returns the proper double-precision floating-
4167| point value corresponding to the abstract input. This routine is just like
4168| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4169| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4170| floating-point exponent.
4171*----------------------------------------------------------------------------*/
4172
4173static float64
c120391c 4174 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
e5a41ffa 4175 float_status *status)
158142c2 4176{
8f506c70 4177 int8_t shiftCount;
158142c2 4178
0019d5c3 4179 shiftCount = clz64(zSig) - 1;
ff32e16e
PM
4180 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4181 status);
158142c2
FB
4182
4183}
4184
158142c2
FB
4185/*----------------------------------------------------------------------------
4186| Normalizes the subnormal extended double-precision floating-point value
4187| represented by the denormalized significand `aSig'. The normalized exponent
4188| and significand are stored at the locations pointed to by `zExpPtr' and
4189| `zSigPtr', respectively.
4190*----------------------------------------------------------------------------*/
4191
88857aca
LV
4192void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4193 uint64_t *zSigPtr)
158142c2 4194{
8f506c70 4195 int8_t shiftCount;
158142c2 4196
0019d5c3 4197 shiftCount = clz64(aSig);
158142c2
FB
4198 *zSigPtr = aSig<<shiftCount;
4199 *zExpPtr = 1 - shiftCount;
158142c2
FB
4200}
4201
4202/*----------------------------------------------------------------------------
4203| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4204| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4205| and returns the proper extended double-precision floating-point value
4206| corresponding to the abstract input. Ordinarily, the abstract value is
4207| rounded and packed into the extended double-precision format, with the
4208| inexact exception raised if the abstract input cannot be represented
4209| exactly. However, if the abstract value is too large, the overflow and
4210| inexact exceptions are raised and an infinity or maximal finite value is
4211| returned. If the abstract value is too small, the input value is rounded to
4212| a subnormal number, and the underflow and inexact exceptions are raised if
4213| the abstract input cannot be represented exactly as a subnormal extended
4214| double-precision floating-point number.
4215| If `roundingPrecision' is 32 or 64, the result is rounded to the same
4216| number of bits as single or double precision, respectively. Otherwise, the
4217| result is rounded to the full precision of the extended double-precision
4218| format.
4219| The input significand must be normalized or smaller. If the input
4220| significand is not normalized, `zExp' must be 0; in that case, the result
4221| returned is a subnormal number, and it must not require rounding. The
4222| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4223| Floating-Point Arithmetic.
4224*----------------------------------------------------------------------------*/
4225
c120391c 4226floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
88857aca
LV
4227 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4228 float_status *status)
158142c2 4229{
8f506c70 4230 int8_t roundingMode;
c120391c 4231 bool roundNearestEven, increment, isTiny;
f42c2224 4232 int64_t roundIncrement, roundMask, roundBits;
158142c2 4233
a2f2d288 4234 roundingMode = status->float_rounding_mode;
158142c2
FB
4235 roundNearestEven = ( roundingMode == float_round_nearest_even );
4236 if ( roundingPrecision == 80 ) goto precision80;
4237 if ( roundingPrecision == 64 ) {
e9321124
AB
4238 roundIncrement = UINT64_C(0x0000000000000400);
4239 roundMask = UINT64_C(0x00000000000007FF);
158142c2
FB
4240 }
4241 else if ( roundingPrecision == 32 ) {
e9321124
AB
4242 roundIncrement = UINT64_C(0x0000008000000000);
4243 roundMask = UINT64_C(0x000000FFFFFFFFFF);
158142c2
FB
4244 }
4245 else {
4246 goto precision80;
4247 }
4248 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
4249 switch (roundingMode) {
4250 case float_round_nearest_even:
f9288a76 4251 case float_round_ties_away:
dc355b76
PM
4252 break;
4253 case float_round_to_zero:
4254 roundIncrement = 0;
4255 break;
4256 case float_round_up:
4257 roundIncrement = zSign ? 0 : roundMask;
4258 break;
4259 case float_round_down:
4260 roundIncrement = zSign ? roundMask : 0;
4261 break;
4262 default:
4263 abort();
158142c2
FB
4264 }
4265 roundBits = zSig0 & roundMask;
bb98fe42 4266 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4267 if ( ( 0x7FFE < zExp )
4268 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4269 ) {
4270 goto overflow;
4271 }
4272 if ( zExp <= 0 ) {
a2f2d288 4273 if (status->flush_to_zero) {
ff32e16e 4274 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4275 return packFloatx80(zSign, 0, 0);
4276 }
a828b373
RH
4277 isTiny = status->tininess_before_rounding
4278 || (zExp < 0 )
4279 || (zSig0 <= zSig0 + roundIncrement);
158142c2
FB
4280 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4281 zExp = 0;
4282 roundBits = zSig0 & roundMask;
ff32e16e
PM
4283 if (isTiny && roundBits) {
4284 float_raise(float_flag_underflow, status);
4285 }
a2f2d288 4286 if (roundBits) {
d82f3b2d 4287 float_raise(float_flag_inexact, status);
a2f2d288 4288 }
158142c2 4289 zSig0 += roundIncrement;
bb98fe42 4290 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4291 roundIncrement = roundMask + 1;
4292 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4293 roundMask |= roundIncrement;
4294 }
4295 zSig0 &= ~ roundMask;
4296 return packFloatx80( zSign, zExp, zSig0 );
4297 }
4298 }
a2f2d288 4299 if (roundBits) {
d82f3b2d 4300 float_raise(float_flag_inexact, status);
a2f2d288 4301 }
158142c2
FB
4302 zSig0 += roundIncrement;
4303 if ( zSig0 < roundIncrement ) {
4304 ++zExp;
e9321124 4305 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4306 }
4307 roundIncrement = roundMask + 1;
4308 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4309 roundMask |= roundIncrement;
4310 }
4311 zSig0 &= ~ roundMask;
4312 if ( zSig0 == 0 ) zExp = 0;
4313 return packFloatx80( zSign, zExp, zSig0 );
4314 precision80:
dc355b76
PM
4315 switch (roundingMode) {
4316 case float_round_nearest_even:
f9288a76 4317 case float_round_ties_away:
dc355b76
PM
4318 increment = ((int64_t)zSig1 < 0);
4319 break;
4320 case float_round_to_zero:
4321 increment = 0;
4322 break;
4323 case float_round_up:
4324 increment = !zSign && zSig1;
4325 break;
4326 case float_round_down:
4327 increment = zSign && zSig1;
4328 break;
4329 default:
4330 abort();
158142c2 4331 }
bb98fe42 4332 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
4333 if ( ( 0x7FFE < zExp )
4334 || ( ( zExp == 0x7FFE )
e9321124 4335 && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
158142c2
FB
4336 && increment
4337 )
4338 ) {
4339 roundMask = 0;
4340 overflow:
ff32e16e 4341 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4342 if ( ( roundingMode == float_round_to_zero )
4343 || ( zSign && ( roundingMode == float_round_up ) )
4344 || ( ! zSign && ( roundingMode == float_round_down ) )
4345 ) {
4346 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4347 }
0f605c88
LV
4348 return packFloatx80(zSign,
4349 floatx80_infinity_high,
4350 floatx80_infinity_low);
158142c2
FB
4351 }
4352 if ( zExp <= 0 ) {
a828b373
RH
4353 isTiny = status->tininess_before_rounding
4354 || (zExp < 0)
4355 || !increment
4356 || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4357 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4358 zExp = 0;
ff32e16e
PM
4359 if (isTiny && zSig1) {
4360 float_raise(float_flag_underflow, status);
4361 }
a2f2d288 4362 if (zSig1) {
d82f3b2d 4363 float_raise(float_flag_inexact, status);
a2f2d288 4364 }
dc355b76
PM
4365 switch (roundingMode) {
4366 case float_round_nearest_even:
f9288a76 4367 case float_round_ties_away:
dc355b76
PM
4368 increment = ((int64_t)zSig1 < 0);
4369 break;
4370 case float_round_to_zero:
4371 increment = 0;
4372 break;
4373 case float_round_up:
4374 increment = !zSign && zSig1;
4375 break;
4376 case float_round_down:
4377 increment = zSign && zSig1;
4378 break;
4379 default:
4380 abort();
158142c2
FB
4381 }
4382 if ( increment ) {
4383 ++zSig0;
40662886
PMD
4384 if (!(zSig1 << 1) && roundNearestEven) {
4385 zSig0 &= ~1;
4386 }
bb98fe42 4387 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
4388 }
4389 return packFloatx80( zSign, zExp, zSig0 );
4390 }
4391 }
a2f2d288 4392 if (zSig1) {
d82f3b2d 4393 float_raise(float_flag_inexact, status);
a2f2d288 4394 }
158142c2
FB
4395 if ( increment ) {
4396 ++zSig0;
4397 if ( zSig0 == 0 ) {
4398 ++zExp;
e9321124 4399 zSig0 = UINT64_C(0x8000000000000000);
158142c2
FB
4400 }
4401 else {
40662886
PMD
4402 if (!(zSig1 << 1) && roundNearestEven) {
4403 zSig0 &= ~1;
4404 }
158142c2
FB
4405 }
4406 }
4407 else {
4408 if ( zSig0 == 0 ) zExp = 0;
4409 }
4410 return packFloatx80( zSign, zExp, zSig0 );
4411
4412}
4413
4414/*----------------------------------------------------------------------------
4415| Takes an abstract floating-point value having sign `zSign', exponent
4416| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4417| and returns the proper extended double-precision floating-point value
4418| corresponding to the abstract input. This routine is just like
4419| `roundAndPackFloatx80' except that the input significand does not have to be
4420| normalized.
4421*----------------------------------------------------------------------------*/
4422
88857aca 4423floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
c120391c 4424 bool zSign, int32_t zExp,
88857aca
LV
4425 uint64_t zSig0, uint64_t zSig1,
4426 float_status *status)
158142c2 4427{
8f506c70 4428 int8_t shiftCount;
158142c2
FB
4429
4430 if ( zSig0 == 0 ) {
4431 zSig0 = zSig1;
4432 zSig1 = 0;
4433 zExp -= 64;
4434 }
0019d5c3 4435 shiftCount = clz64(zSig0);
158142c2
FB
4436 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4437 zExp -= shiftCount;
ff32e16e
PM
4438 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4439 zSig0, zSig1, status);
158142c2
FB
4440
4441}
4442
158142c2
FB
4443/*----------------------------------------------------------------------------
4444| Returns the least-significant 64 fraction bits of the quadruple-precision
4445| floating-point value `a'.
4446*----------------------------------------------------------------------------*/
4447
a49db98d 4448static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
4449{
4450
4451 return a.low;
4452
4453}
4454
4455/*----------------------------------------------------------------------------
4456| Returns the most-significant 48 fraction bits of the quadruple-precision
4457| floating-point value `a'.
4458*----------------------------------------------------------------------------*/
4459
a49db98d 4460static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
4461{
4462
e9321124 4463 return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
158142c2
FB
4464
4465}
4466
4467/*----------------------------------------------------------------------------
4468| Returns the exponent bits of the quadruple-precision floating-point value
4469| `a'.
4470*----------------------------------------------------------------------------*/
4471
f4014512 4472static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
4473{
4474
4475 return ( a.high>>48 ) & 0x7FFF;
4476
4477}
4478
4479/*----------------------------------------------------------------------------
4480| Returns the sign bit of the quadruple-precision floating-point value `a'.
4481*----------------------------------------------------------------------------*/
4482
c120391c 4483static inline bool extractFloat128Sign(float128 a)
158142c2 4484{
c120391c 4485 return a.high >> 63;
158142c2
FB
4486}
4487
4488/*----------------------------------------------------------------------------
4489| Normalizes the subnormal quadruple-precision floating-point value
4490| represented by the denormalized significand formed by the concatenation of
4491| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4492| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4493| significand are stored at the location pointed to by `zSig0Ptr', and the
4494| least significant 64 bits of the normalized significand are stored at the
4495| location pointed to by `zSig1Ptr'.
4496*----------------------------------------------------------------------------*/
4497
4498static void
4499 normalizeFloat128Subnormal(
bb98fe42
AF
4500 uint64_t aSig0,
4501 uint64_t aSig1,
f4014512 4502 int32_t *zExpPtr,
bb98fe42
AF
4503 uint64_t *zSig0Ptr,
4504 uint64_t *zSig1Ptr
158142c2
FB
4505 )
4506{
8f506c70 4507 int8_t shiftCount;
158142c2
FB
4508
4509 if ( aSig0 == 0 ) {
0019d5c3 4510 shiftCount = clz64(aSig1) - 15;
158142c2
FB
4511 if ( shiftCount < 0 ) {
4512 *zSig0Ptr = aSig1>>( - shiftCount );
4513 *zSig1Ptr = aSig1<<( shiftCount & 63 );
4514 }
4515 else {
4516 *zSig0Ptr = aSig1<<shiftCount;
4517 *zSig1Ptr = 0;
4518 }
4519 *zExpPtr = - shiftCount - 63;
4520 }
4521 else {
0019d5c3 4522 shiftCount = clz64(aSig0) - 15;
158142c2
FB
4523 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4524 *zExpPtr = 1 - shiftCount;
4525 }
4526
4527}
4528
4529/*----------------------------------------------------------------------------
4530| Packs the sign `zSign', the exponent `zExp', and the significand formed
4531| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4532| floating-point value, returning the result. After being shifted into the
4533| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4534| added together to form the most significant 32 bits of the result. This
4535| means that any integer portion of `zSig0' will be added into the exponent.
4536| Since a properly normalized significand will have an integer portion equal
4537| to 1, the `zExp' input should be 1 less than the desired result exponent
4538| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4539| significand.
4540*----------------------------------------------------------------------------*/
4541
a49db98d 4542static inline float128
c120391c 4543packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
158142c2
FB
4544{
4545 float128 z;
4546
4547 z.low = zSig1;
c120391c 4548 z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
158142c2 4549 return z;
158142c2
FB
4550}
4551
4552/*----------------------------------------------------------------------------
4553| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4554| and extended significand formed by the concatenation of `zSig0', `zSig1',
4555| and `zSig2', and returns the proper quadruple-precision floating-point value
4556| corresponding to the abstract input. Ordinarily, the abstract value is
4557| simply rounded and packed into the quadruple-precision format, with the
4558| inexact exception raised if the abstract input cannot be represented
4559| exactly. However, if the abstract value is too large, the overflow and
4560| inexact exceptions are raised and an infinity or maximal finite value is
4561| returned. If the abstract value is too small, the input value is rounded to
4562| a subnormal number, and the underflow and inexact exceptions are raised if
4563| the abstract input cannot be represented exactly as a subnormal quadruple-
4564| precision floating-point number.
4565| The input significand must be normalized or smaller. If the input
4566| significand is not normalized, `zExp' must be 0; in that case, the result
4567| returned is a subnormal number, and it must not require rounding. In the
4568| usual case that the input significand is normalized, `zExp' must be 1 less
4569| than the ``true'' floating-point exponent. The handling of underflow and
4570| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4571*----------------------------------------------------------------------------*/
4572
c120391c 4573static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4574 uint64_t zSig0, uint64_t zSig1,
4575 uint64_t zSig2, float_status *status)
158142c2 4576{
8f506c70 4577 int8_t roundingMode;
c120391c 4578 bool roundNearestEven, increment, isTiny;
158142c2 4579
a2f2d288 4580 roundingMode = status->float_rounding_mode;
158142c2 4581 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
4582 switch (roundingMode) {
4583 case float_round_nearest_even:
f9288a76 4584 case float_round_ties_away:
dc355b76
PM
4585 increment = ((int64_t)zSig2 < 0);
4586 break;
4587 case float_round_to_zero:
4588 increment = 0;
4589 break;
4590 case float_round_up:
4591 increment = !zSign && zSig2;
4592 break;
4593 case float_round_down:
4594 increment = zSign && zSig2;
4595 break;
9ee6f678
BR
4596 case float_round_to_odd:
4597 increment = !(zSig1 & 0x1) && zSig2;
4598 break;
dc355b76
PM
4599 default:
4600 abort();
158142c2 4601 }
bb98fe42 4602 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
4603 if ( ( 0x7FFD < zExp )
4604 || ( ( zExp == 0x7FFD )
4605 && eq128(
e9321124
AB
4606 UINT64_C(0x0001FFFFFFFFFFFF),
4607 UINT64_C(0xFFFFFFFFFFFFFFFF),
158142c2
FB
4608 zSig0,
4609 zSig1
4610 )
4611 && increment
4612 )
4613 ) {
ff32e16e 4614 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
4615 if ( ( roundingMode == float_round_to_zero )
4616 || ( zSign && ( roundingMode == float_round_up ) )
4617 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 4618 || (roundingMode == float_round_to_odd)
158142c2
FB
4619 ) {
4620 return
4621 packFloat128(
4622 zSign,
4623 0x7FFE,
e9321124
AB
4624 UINT64_C(0x0000FFFFFFFFFFFF),
4625 UINT64_C(0xFFFFFFFFFFFFFFFF)
158142c2
FB
4626 );
4627 }
4628 return packFloat128( zSign, 0x7FFF, 0, 0 );
4629 }
4630 if ( zExp < 0 ) {
a2f2d288 4631 if (status->flush_to_zero) {
ff32e16e 4632 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
4633 return packFloat128(zSign, 0, 0, 0);
4634 }
a828b373
RH
4635 isTiny = status->tininess_before_rounding
4636 || (zExp < -1)
4637 || !increment
4638 || lt128(zSig0, zSig1,
4639 UINT64_C(0x0001FFFFFFFFFFFF),
4640 UINT64_C(0xFFFFFFFFFFFFFFFF));
158142c2
FB
4641 shift128ExtraRightJamming(
4642 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4643 zExp = 0;
ff32e16e
PM
4644 if (isTiny && zSig2) {
4645 float_raise(float_flag_underflow, status);
4646 }
dc355b76
PM
4647 switch (roundingMode) {
4648 case float_round_nearest_even:
f9288a76 4649 case float_round_ties_away:
dc355b76
PM
4650 increment = ((int64_t)zSig2 < 0);
4651 break;
4652 case float_round_to_zero:
4653 increment = 0;
4654 break;
4655 case float_round_up:
4656 increment = !zSign && zSig2;
4657 break;
4658 case float_round_down:
4659 increment = zSign && zSig2;
4660 break;
9ee6f678
BR
4661 case float_round_to_odd:
4662 increment = !(zSig1 & 0x1) && zSig2;
4663 break;
dc355b76
PM
4664 default:
4665 abort();
158142c2
FB
4666 }
4667 }
4668 }
a2f2d288 4669 if (zSig2) {
d82f3b2d 4670 float_raise(float_flag_inexact, status);
a2f2d288 4671 }
158142c2
FB
4672 if ( increment ) {
4673 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
40662886
PMD
4674 if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4675 zSig1 &= ~1;
4676 }
158142c2
FB
4677 }
4678 else {
4679 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4680 }
4681 return packFloat128( zSign, zExp, zSig0, zSig1 );
4682
4683}
4684
4685/*----------------------------------------------------------------------------
4686| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4687| and significand formed by the concatenation of `zSig0' and `zSig1', and
4688| returns the proper quadruple-precision floating-point value corresponding
4689| to the abstract input. This routine is just like `roundAndPackFloat128'
4690| except that the input significand has fewer bits and does not have to be
4691| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4692| point exponent.
4693*----------------------------------------------------------------------------*/
4694
c120391c 4695static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
e5a41ffa
PM
4696 uint64_t zSig0, uint64_t zSig1,
4697 float_status *status)
158142c2 4698{
8f506c70 4699 int8_t shiftCount;
bb98fe42 4700 uint64_t zSig2;
158142c2
FB
4701
4702 if ( zSig0 == 0 ) {
4703 zSig0 = zSig1;
4704 zSig1 = 0;
4705 zExp -= 64;
4706 }
0019d5c3 4707 shiftCount = clz64(zSig0) - 15;
158142c2
FB
4708 if ( 0 <= shiftCount ) {
4709 zSig2 = 0;
4710 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4711 }
4712 else {
4713 shift128ExtraRightJamming(
4714 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4715 }
4716 zExp -= shiftCount;
ff32e16e 4717 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
4718
4719}
4720
158142c2 4721
158142c2
FB
4722/*----------------------------------------------------------------------------
4723| Returns the result of converting the 32-bit two's complement integer `a'
4724| to the extended double-precision floating-point format. The conversion
4725| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4726| Arithmetic.
4727*----------------------------------------------------------------------------*/
4728
e5a41ffa 4729floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2 4730{
c120391c 4731 bool zSign;
3a87d009 4732 uint32_t absA;
8f506c70 4733 int8_t shiftCount;
bb98fe42 4734 uint64_t zSig;
158142c2
FB
4735
4736 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4737 zSign = ( a < 0 );
4738 absA = zSign ? - a : a;
0019d5c3 4739 shiftCount = clz32(absA) + 32;
158142c2
FB
4740 zSig = absA;
4741 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4742
4743}
4744
158142c2
FB
4745/*----------------------------------------------------------------------------
4746| Returns the result of converting the 32-bit two's complement integer `a' to
4747| the quadruple-precision floating-point format. The conversion is performed
4748| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4749*----------------------------------------------------------------------------*/
4750
e5a41ffa 4751float128 int32_to_float128(int32_t a, float_status *status)
158142c2 4752{
c120391c 4753 bool zSign;
3a87d009 4754 uint32_t absA;
8f506c70 4755 int8_t shiftCount;
bb98fe42 4756 uint64_t zSig0;
158142c2
FB
4757
4758 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4759 zSign = ( a < 0 );
4760 absA = zSign ? - a : a;
0019d5c3 4761 shiftCount = clz32(absA) + 17;
158142c2
FB
4762 zSig0 = absA;
4763 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4764
4765}
4766
158142c2
FB
4767/*----------------------------------------------------------------------------
4768| Returns the result of converting the 64-bit two's complement integer `a'
4769| to the extended double-precision floating-point format. The conversion
4770| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4771| Arithmetic.
4772*----------------------------------------------------------------------------*/
4773
e5a41ffa 4774floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2 4775{
c120391c 4776 bool zSign;
182f42fd 4777 uint64_t absA;
8f506c70 4778 int8_t shiftCount;
158142c2
FB
4779
4780 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4781 zSign = ( a < 0 );
4782 absA = zSign ? - a : a;
0019d5c3 4783 shiftCount = clz64(absA);
158142c2
FB
4784 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4785
4786}
4787
158142c2
FB
4788/*----------------------------------------------------------------------------
4789| Returns the result of converting the 64-bit two's complement integer `a' to
4790| the quadruple-precision floating-point format. The conversion is performed
4791| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4792*----------------------------------------------------------------------------*/
4793
e5a41ffa 4794float128 int64_to_float128(int64_t a, float_status *status)
158142c2 4795{
c120391c 4796 bool zSign;
182f42fd 4797 uint64_t absA;
8f506c70 4798 int8_t shiftCount;
f4014512 4799 int32_t zExp;
bb98fe42 4800 uint64_t zSig0, zSig1;
158142c2
FB
4801
4802 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4803 zSign = ( a < 0 );
4804 absA = zSign ? - a : a;
0019d5c3 4805 shiftCount = clz64(absA) + 49;
158142c2
FB
4806 zExp = 0x406E - shiftCount;
4807 if ( 64 <= shiftCount ) {
4808 zSig1 = 0;
4809 zSig0 = absA;
4810 shiftCount -= 64;
4811 }
4812 else {
4813 zSig1 = absA;
4814 zSig0 = 0;
4815 }
4816 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4817 return packFloat128( zSign, zExp, zSig0, zSig1 );
4818
4819}
4820
6bb8e0f1
PM
4821/*----------------------------------------------------------------------------
4822| Returns the result of converting the 64-bit unsigned integer `a'
4823| to the quadruple-precision floating-point format. The conversion is performed
4824| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4825*----------------------------------------------------------------------------*/
4826
e5a41ffa 4827float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
4828{
4829 if (a == 0) {
4830 return float128_zero;
4831 }
6603d506 4832 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
4833}
4834
158142c2
FB
4835/*----------------------------------------------------------------------------
4836| Returns the result of converting the single-precision floating-point value
4837| `a' to the extended double-precision floating-point format. The conversion
4838| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4839| Arithmetic.
4840*----------------------------------------------------------------------------*/
4841
e5a41ffa 4842floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2 4843{
c120391c 4844 bool aSign;
0c48262d 4845 int aExp;
bb98fe42 4846 uint32_t aSig;
158142c2 4847
ff32e16e 4848 a = float32_squash_input_denormal(a, status);
158142c2
FB
4849 aSig = extractFloat32Frac( a );
4850 aExp = extractFloat32Exp( a );
4851 aSign = extractFloat32Sign( a );
4852 if ( aExp == 0xFF ) {
ff32e16e 4853 if (aSig) {
7537c2b4
JM
4854 floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4855 status);
4856 return floatx80_silence_nan(res, status);
ff32e16e 4857 }
0f605c88
LV
4858 return packFloatx80(aSign,
4859 floatx80_infinity_high,
4860 floatx80_infinity_low);
158142c2
FB
4861 }
4862 if ( aExp == 0 ) {
4863 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4864 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4865 }
4866 aSig |= 0x00800000;
bb98fe42 4867 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
4868
4869}
4870
158142c2
FB
4871/*----------------------------------------------------------------------------
4872| Returns the result of converting the single-precision floating-point value
4873| `a' to the double-precision floating-point format. The conversion is
4874| performed according to the IEC/IEEE Standard for Binary Floating-Point
4875| Arithmetic.
4876*----------------------------------------------------------------------------*/
4877
e5a41ffa 4878float128 float32_to_float128(float32 a, float_status *status)
158142c2 4879{
c120391c 4880 bool aSign;
0c48262d 4881 int aExp;
bb98fe42 4882 uint32_t aSig;
158142c2 4883
ff32e16e 4884 a = float32_squash_input_denormal(a, status);
158142c2
FB
4885 aSig = extractFloat32Frac( a );
4886 aExp = extractFloat32Exp( a );
4887 aSign = extractFloat32Sign( a );
4888 if ( aExp == 0xFF ) {
ff32e16e
PM
4889 if (aSig) {
4890 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4891 }
158142c2
FB
4892 return packFloat128( aSign, 0x7FFF, 0, 0 );
4893 }
4894 if ( aExp == 0 ) {
4895 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4896 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4897 --aExp;
4898 }
bb98fe42 4899 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
4900
4901}
4902
158142c2
FB
4903/*----------------------------------------------------------------------------
4904| Returns the remainder of the single-precision floating-point value `a'
4905| with respect to the corresponding value `b'. The operation is performed
4906| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4907*----------------------------------------------------------------------------*/
4908
e5a41ffa 4909float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 4910{
c120391c 4911 bool aSign, zSign;
0c48262d 4912 int aExp, bExp, expDiff;
bb98fe42
AF
4913 uint32_t aSig, bSig;
4914 uint32_t q;
4915 uint64_t aSig64, bSig64, q64;
4916 uint32_t alternateASig;
4917 int32_t sigMean;
ff32e16e
PM
4918 a = float32_squash_input_denormal(a, status);
4919 b = float32_squash_input_denormal(b, status);
158142c2
FB
4920
4921 aSig = extractFloat32Frac( a );
4922 aExp = extractFloat32Exp( a );
4923 aSign = extractFloat32Sign( a );
4924 bSig = extractFloat32Frac( b );
4925 bExp = extractFloat32Exp( b );
158142c2
FB
4926 if ( aExp == 0xFF ) {
4927 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 4928 return propagateFloat32NaN(a, b, status);
158142c2 4929 }
ff32e16e 4930 float_raise(float_flag_invalid, status);
af39bc8c 4931 return float32_default_nan(status);
158142c2
FB
4932 }
4933 if ( bExp == 0xFF ) {
ff32e16e
PM
4934 if (bSig) {
4935 return propagateFloat32NaN(a, b, status);
4936 }
158142c2
FB
4937 return a;
4938 }
4939 if ( bExp == 0 ) {
4940 if ( bSig == 0 ) {
ff32e16e 4941 float_raise(float_flag_invalid, status);
af39bc8c 4942 return float32_default_nan(status);
158142c2
FB
4943 }
4944 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4945 }
4946 if ( aExp == 0 ) {
4947 if ( aSig == 0 ) return a;
4948 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4949 }
4950 expDiff = aExp - bExp;
4951 aSig |= 0x00800000;
4952 bSig |= 0x00800000;
4953 if ( expDiff < 32 ) {
4954 aSig <<= 8;
4955 bSig <<= 8;
4956 if ( expDiff < 0 ) {
4957 if ( expDiff < -1 ) return a;
4958 aSig >>= 1;
4959 }
4960 q = ( bSig <= aSig );
4961 if ( q ) aSig -= bSig;
4962 if ( 0 < expDiff ) {
bb98fe42 4963 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
4964 q >>= 32 - expDiff;
4965 bSig >>= 2;
4966 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4967 }
4968 else {
4969 aSig >>= 2;
4970 bSig >>= 2;
4971 }
4972 }
4973 else {
4974 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
4975 aSig64 = ( (uint64_t) aSig )<<40;
4976 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
4977 expDiff -= 64;
4978 while ( 0 < expDiff ) {
4979 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4980 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4981 aSig64 = - ( ( bSig * q64 )<<38 );
4982 expDiff -= 62;
4983 }
4984 expDiff += 64;
4985 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
4986 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
4987 q = q64>>( 64 - expDiff );
4988 bSig <<= 6;
4989 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
4990 }
4991 do {
4992 alternateASig = aSig;
4993 ++q;
4994 aSig -= bSig;
bb98fe42 4995 } while ( 0 <= (int32_t) aSig );
158142c2
FB
4996 sigMean = aSig + alternateASig;
4997 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4998 aSig = alternateASig;
4999 }
bb98fe42 5000 zSign = ( (int32_t) aSig < 0 );
158142c2 5001 if ( zSign ) aSig = - aSig;
ff32e16e 5002 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5003}
5004
369be8f6 5005
158142c2 5006
8229c991
AJ
5007/*----------------------------------------------------------------------------
5008| Returns the binary exponential of the single-precision floating-point value
5009| `a'. The operation is performed according to the IEC/IEEE Standard for
5010| Binary Floating-Point Arithmetic.
5011|
5012| Uses the following identities:
5013|
5014| 1. -------------------------------------------------------------------------
5015| x x*ln(2)
5016| 2 = e
5017|
5018| 2. -------------------------------------------------------------------------
5019| 2 3 4 5 n
5020| x x x x x x x
5021| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5022| 1! 2! 3! 4! 5! n!
5023*----------------------------------------------------------------------------*/
5024
5025static const float64 float32_exp2_coefficients[15] =
5026{
d5138cf4
PM
5027 const_float64( 0x3ff0000000000000ll ), /* 1 */
5028 const_float64( 0x3fe0000000000000ll ), /* 2 */
5029 const_float64( 0x3fc5555555555555ll ), /* 3 */
5030 const_float64( 0x3fa5555555555555ll ), /* 4 */
5031 const_float64( 0x3f81111111111111ll ), /* 5 */
5032 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
5033 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
5034 const_float64( 0x3efa01a01a01a01all ), /* 8 */
5035 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
5036 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5037 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5038 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5039 const_float64( 0x3de6124613a86d09ll ), /* 13 */
5040 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5041 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
5042};
5043
e5a41ffa 5044float32 float32_exp2(float32 a, float_status *status)
8229c991 5045{
c120391c 5046 bool aSign;
0c48262d 5047 int aExp;
bb98fe42 5048 uint32_t aSig;
8229c991
AJ
5049 float64 r, x, xn;
5050 int i;
ff32e16e 5051 a = float32_squash_input_denormal(a, status);
8229c991
AJ
5052
5053 aSig = extractFloat32Frac( a );
5054 aExp = extractFloat32Exp( a );
5055 aSign = extractFloat32Sign( a );
5056
5057 if ( aExp == 0xFF) {
ff32e16e
PM
5058 if (aSig) {
5059 return propagateFloat32NaN(a, float32_zero, status);
5060 }
8229c991
AJ
5061 return (aSign) ? float32_zero : a;
5062 }
5063 if (aExp == 0) {
5064 if (aSig == 0) return float32_one;
5065 }
5066
ff32e16e 5067 float_raise(float_flag_inexact, status);
8229c991
AJ
5068
5069 /* ******************************* */
5070 /* using float64 for approximation */
5071 /* ******************************* */
ff32e16e
PM
5072 x = float32_to_float64(a, status);
5073 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
5074
5075 xn = x;
5076 r = float64_one;
5077 for (i = 0 ; i < 15 ; i++) {
5078 float64 f;
5079
ff32e16e
PM
5080 f = float64_mul(xn, float32_exp2_coefficients[i], status);
5081 r = float64_add(r, f, status);
8229c991 5082
ff32e16e 5083 xn = float64_mul(xn, x, status);
8229c991
AJ
5084 }
5085
5086 return float64_to_float32(r, status);
5087}
5088
374dfc33
AJ
5089/*----------------------------------------------------------------------------
5090| Returns the binary log of the single-precision floating-point value `a'.
5091| The operation is performed according to the IEC/IEEE Standard for Binary
5092| Floating-Point Arithmetic.
5093*----------------------------------------------------------------------------*/
e5a41ffa 5094float32 float32_log2(float32 a, float_status *status)
374dfc33 5095{
c120391c 5096 bool aSign, zSign;
0c48262d 5097 int aExp;
bb98fe42 5098 uint32_t aSig, zSig, i;
374dfc33 5099
ff32e16e 5100 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
5101 aSig = extractFloat32Frac( a );
5102 aExp = extractFloat32Exp( a );
5103 aSign = extractFloat32Sign( a );
5104
5105 if ( aExp == 0 ) {
5106 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5107 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5108 }
5109 if ( aSign ) {
ff32e16e 5110 float_raise(float_flag_invalid, status);
af39bc8c 5111 return float32_default_nan(status);
374dfc33
AJ
5112 }
5113 if ( aExp == 0xFF ) {
ff32e16e
PM
5114 if (aSig) {
5115 return propagateFloat32NaN(a, float32_zero, status);
5116 }
374dfc33
AJ
5117 return a;
5118 }
5119
5120 aExp -= 0x7F;
5121 aSig |= 0x00800000;
5122 zSign = aExp < 0;
5123 zSig = aExp << 23;
5124
5125 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 5126 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
5127 if ( aSig & 0x01000000 ) {
5128 aSig >>= 1;
5129 zSig |= i;
5130 }
5131 }
5132
5133 if ( zSign )
5134 zSig = -zSig;
5135
ff32e16e 5136 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
5137}
5138
158142c2 5139/*----------------------------------------------------------------------------
158142c2
FB
5140| Returns the result of converting the double-precision floating-point value
5141| `a' to the extended double-precision floating-point format. The conversion
5142| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5143| Arithmetic.
5144*----------------------------------------------------------------------------*/
5145
e5a41ffa 5146floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2 5147{
c120391c 5148 bool aSign;
0c48262d 5149 int aExp;
bb98fe42 5150 uint64_t aSig;
158142c2 5151
ff32e16e 5152 a = float64_squash_input_denormal(a, status);
158142c2
FB
5153 aSig = extractFloat64Frac( a );
5154 aExp = extractFloat64Exp( a );
5155 aSign = extractFloat64Sign( a );
5156 if ( aExp == 0x7FF ) {
ff32e16e 5157 if (aSig) {
7537c2b4
JM
5158 floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5159 status);
5160 return floatx80_silence_nan(res, status);
ff32e16e 5161 }
0f605c88
LV
5162 return packFloatx80(aSign,
5163 floatx80_infinity_high,
5164 floatx80_infinity_low);
158142c2
FB
5165 }
5166 if ( aExp == 0 ) {
5167 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5168 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5169 }
5170 return
5171 packFloatx80(
e9321124 5172 aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
158142c2
FB
5173
5174}
5175
158142c2
FB
5176/*----------------------------------------------------------------------------
5177| Returns the result of converting the double-precision floating-point value
5178| `a' to the quadruple-precision floating-point format. The conversion is
5179| performed according to the IEC/IEEE Standard for Binary Floating-Point
5180| Arithmetic.
5181*----------------------------------------------------------------------------*/
5182
e5a41ffa 5183float128 float64_to_float128(float64 a, float_status *status)
158142c2 5184{
c120391c 5185 bool aSign;
0c48262d 5186 int aExp;
bb98fe42 5187 uint64_t aSig, zSig0, zSig1;
158142c2 5188
ff32e16e 5189 a = float64_squash_input_denormal(a, status);
158142c2
FB
5190 aSig = extractFloat64Frac( a );
5191 aExp = extractFloat64Exp( a );
5192 aSign = extractFloat64Sign( a );
5193 if ( aExp == 0x7FF ) {
ff32e16e
PM
5194 if (aSig) {
5195 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5196 }
158142c2
FB
5197 return packFloat128( aSign, 0x7FFF, 0, 0 );
5198 }
5199 if ( aExp == 0 ) {
5200 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5201 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5202 --aExp;
5203 }
5204 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5205 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5206
5207}
5208
158142c2
FB
5209
5210/*----------------------------------------------------------------------------
5211| Returns the remainder of the double-precision floating-point value `a'
5212| with respect to the corresponding value `b'. The operation is performed
5213| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5214*----------------------------------------------------------------------------*/
5215
e5a41ffa 5216float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 5217{
c120391c 5218 bool aSign, zSign;
0c48262d 5219 int aExp, bExp, expDiff;
bb98fe42
AF
5220 uint64_t aSig, bSig;
5221 uint64_t q, alternateASig;
5222 int64_t sigMean;
158142c2 5223
ff32e16e
PM
5224 a = float64_squash_input_denormal(a, status);
5225 b = float64_squash_input_denormal(b, status);
158142c2
FB
5226 aSig = extractFloat64Frac( a );
5227 aExp = extractFloat64Exp( a );
5228 aSign = extractFloat64Sign( a );
5229 bSig = extractFloat64Frac( b );
5230 bExp = extractFloat64Exp( b );
158142c2
FB
5231 if ( aExp == 0x7FF ) {
5232 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 5233 return propagateFloat64NaN(a, b, status);
158142c2 5234 }
ff32e16e 5235 float_raise(float_flag_invalid, status);
af39bc8c 5236 return float64_default_nan(status);
158142c2
FB
5237 }
5238 if ( bExp == 0x7FF ) {
ff32e16e
PM
5239 if (bSig) {
5240 return propagateFloat64NaN(a, b, status);
5241 }
158142c2
FB
5242 return a;
5243 }
5244 if ( bExp == 0 ) {
5245 if ( bSig == 0 ) {
ff32e16e 5246 float_raise(float_flag_invalid, status);
af39bc8c 5247 return float64_default_nan(status);
158142c2
FB
5248 }
5249 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5250 }
5251 if ( aExp == 0 ) {
5252 if ( aSig == 0 ) return a;
5253 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5254 }
5255 expDiff = aExp - bExp;
e9321124
AB
5256 aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5257 bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
158142c2
FB
5258 if ( expDiff < 0 ) {
5259 if ( expDiff < -1 ) return a;
5260 aSig >>= 1;
5261 }
5262 q = ( bSig <= aSig );
5263 if ( q ) aSig -= bSig;
5264 expDiff -= 64;
5265 while ( 0 < expDiff ) {
5266 q = estimateDiv128To64( aSig, 0, bSig );
5267 q = ( 2 < q ) ? q - 2 : 0;
5268 aSig = - ( ( bSig>>2 ) * q );
5269 expDiff -= 62;
5270 }
5271 expDiff += 64;
5272 if ( 0 < expDiff ) {
5273 q = estimateDiv128To64( aSig, 0, bSig );
5274 q = ( 2 < q ) ? q - 2 : 0;
5275 q >>= 64 - expDiff;
5276 bSig >>= 2;
5277 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5278 }
5279 else {
5280 aSig >>= 2;
5281 bSig >>= 2;
5282 }
5283 do {
5284 alternateASig = aSig;
5285 ++q;
5286 aSig -= bSig;
bb98fe42 5287 } while ( 0 <= (int64_t) aSig );
158142c2
FB
5288 sigMean = aSig + alternateASig;
5289 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5290 aSig = alternateASig;
5291 }
bb98fe42 5292 zSign = ( (int64_t) aSig < 0 );
158142c2 5293 if ( zSign ) aSig = - aSig;
ff32e16e 5294 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
5295
5296}
5297
374dfc33
AJ
5298/*----------------------------------------------------------------------------
5299| Returns the binary log of the double-precision floating-point value `a'.
5300| The operation is performed according to the IEC/IEEE Standard for Binary
5301| Floating-Point Arithmetic.
5302*----------------------------------------------------------------------------*/
e5a41ffa 5303float64 float64_log2(float64 a, float_status *status)
374dfc33 5304{
c120391c 5305 bool aSign, zSign;
0c48262d 5306 int aExp;
bb98fe42 5307 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 5308 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
5309
5310 aSig = extractFloat64Frac( a );
5311 aExp = extractFloat64Exp( a );
5312 aSign = extractFloat64Sign( a );
5313
5314 if ( aExp == 0 ) {
5315 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5316 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5317 }
5318 if ( aSign ) {
ff32e16e 5319 float_raise(float_flag_invalid, status);
af39bc8c 5320 return float64_default_nan(status);
374dfc33
AJ
5321 }
5322 if ( aExp == 0x7FF ) {
ff32e16e
PM
5323 if (aSig) {
5324 return propagateFloat64NaN(a, float64_zero, status);
5325 }
374dfc33
AJ
5326 return a;
5327 }
5328
5329 aExp -= 0x3FF;
e9321124 5330 aSig |= UINT64_C(0x0010000000000000);
374dfc33 5331 zSign = aExp < 0;
bb98fe42 5332 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
5333 for (i = 1LL << 51; i > 0; i >>= 1) {
5334 mul64To128( aSig, aSig, &aSig0, &aSig1 );
5335 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
e9321124 5336 if ( aSig & UINT64_C(0x0020000000000000) ) {
374dfc33
AJ
5337 aSig >>= 1;
5338 zSig |= i;
5339 }
5340 }
5341
5342 if ( zSign )
5343 zSig = -zSig;
ff32e16e 5344 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
5345}
5346
158142c2
FB
5347/*----------------------------------------------------------------------------
5348| Returns the result of converting the extended double-precision floating-
5349| point value `a' to the 32-bit two's complement integer format. The
5350| conversion is performed according to the IEC/IEEE Standard for Binary
5351| Floating-Point Arithmetic---which means in particular that the conversion
5352| is rounded according to the current rounding mode. If `a' is a NaN, the
5353| largest positive integer is returned. Otherwise, if the conversion
5354| overflows, the largest integer with the same sign as `a' is returned.
5355*----------------------------------------------------------------------------*/
5356
f4014512 5357int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2 5358{
c120391c 5359 bool aSign;
f4014512 5360 int32_t aExp, shiftCount;
bb98fe42 5361 uint64_t aSig;
158142c2 5362
d1eb8f2a
AD
5363 if (floatx80_invalid_encoding(a)) {
5364 float_raise(float_flag_invalid, status);
5365 return 1 << 31;
5366 }
158142c2
FB
5367 aSig = extractFloatx80Frac( a );
5368 aExp = extractFloatx80Exp( a );
5369 aSign = extractFloatx80Sign( a );
bb98fe42 5370 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5371 shiftCount = 0x4037 - aExp;
5372 if ( shiftCount <= 0 ) shiftCount = 1;
5373 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 5374 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
5375
5376}
5377
5378/*----------------------------------------------------------------------------
5379| Returns the result of converting the extended double-precision floating-
5380| point value `a' to the 32-bit two's complement integer format. The
5381| conversion is performed according to the IEC/IEEE Standard for Binary
5382| Floating-Point Arithmetic, except that the conversion is always rounded
5383| toward zero. If `a' is a NaN, the largest positive integer is returned.
5384| Otherwise, if the conversion overflows, the largest integer with the same
5385| sign as `a' is returned.
5386*----------------------------------------------------------------------------*/
5387
f4014512 5388int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2 5389{
c120391c 5390 bool aSign;
f4014512 5391 int32_t aExp, shiftCount;
bb98fe42 5392 uint64_t aSig, savedASig;
b3a6a2e0 5393 int32_t z;
158142c2 5394
d1eb8f2a
AD
5395 if (floatx80_invalid_encoding(a)) {
5396 float_raise(float_flag_invalid, status);
5397 return 1 << 31;
5398 }
158142c2
FB
5399 aSig = extractFloatx80Frac( a );
5400 aExp = extractFloatx80Exp( a );
5401 aSign = extractFloatx80Sign( a );
5402 if ( 0x401E < aExp ) {
bb98fe42 5403 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
5404 goto invalid;
5405 }
5406 else if ( aExp < 0x3FFF ) {
a2f2d288 5407 if (aExp || aSig) {
d82f3b2d 5408 float_raise(float_flag_inexact, status);
a2f2d288 5409 }
158142c2
FB
5410 return 0;
5411 }
5412 shiftCount = 0x403E - aExp;
5413 savedASig = aSig;
5414 aSig >>= shiftCount;
5415 z = aSig;
5416 if ( aSign ) z = - z;
5417 if ( ( z < 0 ) ^ aSign ) {
5418 invalid:
ff32e16e 5419 float_raise(float_flag_invalid, status);
bb98fe42 5420 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5421 }
5422 if ( ( aSig<<shiftCount ) != savedASig ) {
d82f3b2d 5423 float_raise(float_flag_inexact, status);
158142c2
FB
5424 }
5425 return z;
5426
5427}
5428
5429/*----------------------------------------------------------------------------
5430| Returns the result of converting the extended double-precision floating-
5431| point value `a' to the 64-bit two's complement integer format. The
5432| conversion is performed according to the IEC/IEEE Standard for Binary
5433| Floating-Point Arithmetic---which means in particular that the conversion
5434| is rounded according to the current rounding mode. If `a' is a NaN,
5435| the largest positive integer is returned. Otherwise, if the conversion
5436| overflows, the largest integer with the same sign as `a' is returned.
5437*----------------------------------------------------------------------------*/
5438
f42c2224 5439int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2 5440{
c120391c 5441 bool aSign;
f4014512 5442 int32_t aExp, shiftCount;
bb98fe42 5443 uint64_t aSig, aSigExtra;
158142c2 5444
d1eb8f2a
AD
5445 if (floatx80_invalid_encoding(a)) {
5446 float_raise(float_flag_invalid, status);
5447 return 1ULL << 63;
5448 }
158142c2
FB
5449 aSig = extractFloatx80Frac( a );
5450 aExp = extractFloatx80Exp( a );
5451 aSign = extractFloatx80Sign( a );
5452 shiftCount = 0x403E - aExp;
5453 if ( shiftCount <= 0 ) {
5454 if ( shiftCount ) {
ff32e16e 5455 float_raise(float_flag_invalid, status);
0f605c88 5456 if (!aSign || floatx80_is_any_nan(a)) {
2c217da0 5457 return INT64_MAX;
158142c2 5458 }
2c217da0 5459 return INT64_MIN;
158142c2
FB
5460 }
5461 aSigExtra = 0;
5462 }
5463 else {
5464 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5465 }
ff32e16e 5466 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
5467
5468}
5469
5470/*----------------------------------------------------------------------------
5471| Returns the result of converting the extended double-precision floating-
5472| point value `a' to the 64-bit two's complement integer format. The
5473| conversion is performed according to the IEC/IEEE Standard for Binary
5474| Floating-Point Arithmetic, except that the conversion is always rounded
5475| toward zero. If `a' is a NaN, the largest positive integer is returned.
5476| Otherwise, if the conversion overflows, the largest integer with the same
5477| sign as `a' is returned.
5478*----------------------------------------------------------------------------*/
5479
f42c2224 5480int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2 5481{
c120391c 5482 bool aSign;
f4014512 5483 int32_t aExp, shiftCount;
bb98fe42 5484 uint64_t aSig;
f42c2224 5485 int64_t z;
158142c2 5486
d1eb8f2a
AD
5487 if (floatx80_invalid_encoding(a)) {
5488 float_raise(float_flag_invalid, status);
5489 return 1ULL << 63;
5490 }
158142c2
FB
5491 aSig = extractFloatx80Frac( a );
5492 aExp = extractFloatx80Exp( a );
5493 aSign = extractFloatx80Sign( a );
5494 shiftCount = aExp - 0x403E;
5495 if ( 0 <= shiftCount ) {
e9321124 5496 aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
158142c2 5497 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 5498 float_raise(float_flag_invalid, status);
158142c2 5499 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
2c217da0 5500 return INT64_MAX;
158142c2
FB
5501 }
5502 }
2c217da0 5503 return INT64_MIN;
158142c2
FB
5504 }
5505 else if ( aExp < 0x3FFF ) {
a2f2d288 5506 if (aExp | aSig) {
d82f3b2d 5507 float_raise(float_flag_inexact, status);
a2f2d288 5508 }
158142c2
FB
5509 return 0;
5510 }
5511 z = aSig>>( - shiftCount );
bb98fe42 5512 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
d82f3b2d 5513 float_raise(float_flag_inexact, status);
158142c2
FB
5514 }
5515 if ( aSign ) z = - z;
5516 return z;
5517
5518}
5519
5520/*----------------------------------------------------------------------------
5521| Returns the result of converting the extended double-precision floating-
5522| point value `a' to the single-precision floating-point format. The
5523| conversion is performed according to the IEC/IEEE Standard for Binary
5524| Floating-Point Arithmetic.
5525*----------------------------------------------------------------------------*/
5526
e5a41ffa 5527float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2 5528{
c120391c 5529 bool aSign;
f4014512 5530 int32_t aExp;
bb98fe42 5531 uint64_t aSig;
158142c2 5532
d1eb8f2a
AD
5533 if (floatx80_invalid_encoding(a)) {
5534 float_raise(float_flag_invalid, status);
5535 return float32_default_nan(status);
5536 }
158142c2
FB
5537 aSig = extractFloatx80Frac( a );
5538 aExp = extractFloatx80Exp( a );
5539 aSign = extractFloatx80Sign( a );
5540 if ( aExp == 0x7FFF ) {
bb98fe42 5541 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5542 float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5543 status);
5544 return float32_silence_nan(res, status);
158142c2
FB
5545 }
5546 return packFloat32( aSign, 0xFF, 0 );
5547 }
5548 shift64RightJamming( aSig, 33, &aSig );
5549 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 5550 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
5551
5552}
5553
5554/*----------------------------------------------------------------------------
5555| Returns the result of converting the extended double-precision floating-
5556| point value `a' to the double-precision floating-point format. The
5557| conversion is performed according to the IEC/IEEE Standard for Binary
5558| Floating-Point Arithmetic.
5559*----------------------------------------------------------------------------*/
5560
e5a41ffa 5561float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2 5562{
c120391c 5563 bool aSign;
f4014512 5564 int32_t aExp;
bb98fe42 5565 uint64_t aSig, zSig;
158142c2 5566
d1eb8f2a
AD
5567 if (floatx80_invalid_encoding(a)) {
5568 float_raise(float_flag_invalid, status);
5569 return float64_default_nan(status);
5570 }
158142c2
FB
5571 aSig = extractFloatx80Frac( a );
5572 aExp = extractFloatx80Exp( a );
5573 aSign = extractFloatx80Sign( a );
5574 if ( aExp == 0x7FFF ) {
bb98fe42 5575 if ( (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5576 float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5577 status);
5578 return float64_silence_nan(res, status);
158142c2
FB
5579 }
5580 return packFloat64( aSign, 0x7FF, 0 );
5581 }
5582 shift64RightJamming( aSig, 1, &zSig );
5583 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 5584 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
5585
5586}
5587
158142c2
FB
5588/*----------------------------------------------------------------------------
5589| Returns the result of converting the extended double-precision floating-
5590| point value `a' to the quadruple-precision floating-point format. The
5591| conversion is performed according to the IEC/IEEE Standard for Binary
5592| Floating-Point Arithmetic.
5593*----------------------------------------------------------------------------*/
5594
e5a41ffa 5595float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2 5596{
c120391c 5597 bool aSign;
0c48262d 5598 int aExp;
bb98fe42 5599 uint64_t aSig, zSig0, zSig1;
158142c2 5600
d1eb8f2a
AD
5601 if (floatx80_invalid_encoding(a)) {
5602 float_raise(float_flag_invalid, status);
5603 return float128_default_nan(status);
5604 }
158142c2
FB
5605 aSig = extractFloatx80Frac( a );
5606 aExp = extractFloatx80Exp( a );
5607 aSign = extractFloatx80Sign( a );
bb98fe42 5608 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
7537c2b4
JM
5609 float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5610 status);
5611 return float128_silence_nan(res, status);
158142c2
FB
5612 }
5613 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5614 return packFloat128( aSign, aExp, zSig0, zSig1 );
5615
5616}
5617
0f721292
LV
5618/*----------------------------------------------------------------------------
5619| Rounds the extended double-precision floating-point value `a'
5620| to the precision provided by floatx80_rounding_precision and returns the
5621| result as an extended double-precision floating-point value.
5622| The operation is performed according to the IEC/IEEE Standard for Binary
5623| Floating-Point Arithmetic.
5624*----------------------------------------------------------------------------*/
5625
5626floatx80 floatx80_round(floatx80 a, float_status *status)
5627{
5628 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5629 extractFloatx80Sign(a),
5630 extractFloatx80Exp(a),
5631 extractFloatx80Frac(a), 0, status);
5632}
5633
158142c2
FB
5634/*----------------------------------------------------------------------------
5635| Rounds the extended double-precision floating-point value `a' to an integer,
5636| and returns the result as an extended quadruple-precision floating-point
5637| value. The operation is performed according to the IEC/IEEE Standard for
5638| Binary Floating-Point Arithmetic.
5639*----------------------------------------------------------------------------*/
5640
e5a41ffa 5641floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2 5642{
c120391c 5643 bool aSign;
f4014512 5644 int32_t aExp;
bb98fe42 5645 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5646 floatx80 z;
5647
d1eb8f2a
AD
5648 if (floatx80_invalid_encoding(a)) {
5649 float_raise(float_flag_invalid, status);
5650 return floatx80_default_nan(status);
5651 }
158142c2
FB
5652 aExp = extractFloatx80Exp( a );
5653 if ( 0x403E <= aExp ) {
bb98fe42 5654 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 5655 return propagateFloatx80NaN(a, a, status);
158142c2
FB
5656 }
5657 return a;
5658 }
5659 if ( aExp < 0x3FFF ) {
5660 if ( ( aExp == 0 )
9ecaf5cc 5661 && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
158142c2
FB
5662 return a;
5663 }
d82f3b2d 5664 float_raise(float_flag_inexact, status);
158142c2 5665 aSign = extractFloatx80Sign( a );
a2f2d288 5666 switch (status->float_rounding_mode) {
158142c2 5667 case float_round_nearest_even:
bb98fe42 5668 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
5669 ) {
5670 return
e9321124 5671 packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
158142c2
FB
5672 }
5673 break;
f9288a76
PM
5674 case float_round_ties_away:
5675 if (aExp == 0x3FFE) {
e9321124 5676 return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
f9288a76
PM
5677 }
5678 break;
158142c2
FB
5679 case float_round_down:
5680 return
5681 aSign ?
e9321124 5682 packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
158142c2
FB
5683 : packFloatx80( 0, 0, 0 );
5684 case float_round_up:
5685 return
5686 aSign ? packFloatx80( 1, 0, 0 )
e9321124 5687 : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
3dede407
RH
5688
5689 case float_round_to_zero:
5690 break;
5691 default:
5692 g_assert_not_reached();
158142c2
FB
5693 }
5694 return packFloatx80( aSign, 0, 0 );
5695 }
5696 lastBitMask = 1;
5697 lastBitMask <<= 0x403E - aExp;
5698 roundBitsMask = lastBitMask - 1;
5699 z = a;
a2f2d288 5700 switch (status->float_rounding_mode) {
dc355b76 5701 case float_round_nearest_even:
158142c2 5702 z.low += lastBitMask>>1;
dc355b76
PM
5703 if ((z.low & roundBitsMask) == 0) {
5704 z.low &= ~lastBitMask;
5705 }
5706 break;
f9288a76
PM
5707 case float_round_ties_away:
5708 z.low += lastBitMask >> 1;
5709 break;
dc355b76
PM
5710 case float_round_to_zero:
5711 break;
5712 case float_round_up:
5713 if (!extractFloatx80Sign(z)) {
5714 z.low += roundBitsMask;
5715 }
5716 break;
5717 case float_round_down:
5718 if (extractFloatx80Sign(z)) {
158142c2
FB
5719 z.low += roundBitsMask;
5720 }
dc355b76
PM
5721 break;
5722 default:
5723 abort();
158142c2
FB
5724 }
5725 z.low &= ~ roundBitsMask;
5726 if ( z.low == 0 ) {
5727 ++z.high;
e9321124 5728 z.low = UINT64_C(0x8000000000000000);
158142c2 5729 }
a2f2d288 5730 if (z.low != a.low) {
d82f3b2d 5731 float_raise(float_flag_inexact, status);
a2f2d288 5732 }
158142c2
FB
5733 return z;
5734
5735}
5736
5737/*----------------------------------------------------------------------------
5738| Returns the result of adding the absolute values of the extended double-
5739| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5740| negated before being returned. `zSign' is ignored if the result is a NaN.
5741| The addition is performed according to the IEC/IEEE Standard for Binary
5742| Floating-Point Arithmetic.
5743*----------------------------------------------------------------------------*/
5744
c120391c 5745static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5746 float_status *status)
158142c2 5747{
f4014512 5748 int32_t aExp, bExp, zExp;
bb98fe42 5749 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5750 int32_t expDiff;
158142c2
FB
5751
5752 aSig = extractFloatx80Frac( a );
5753 aExp = extractFloatx80Exp( a );
5754 bSig = extractFloatx80Frac( b );
5755 bExp = extractFloatx80Exp( b );
5756 expDiff = aExp - bExp;
5757 if ( 0 < expDiff ) {
5758 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5759 if ((uint64_t)(aSig << 1)) {
5760 return propagateFloatx80NaN(a, b, status);
5761 }
158142c2
FB
5762 return a;
5763 }
5764 if ( bExp == 0 ) --expDiff;
5765 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5766 zExp = aExp;
5767 }
5768 else if ( expDiff < 0 ) {
5769 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5770 if ((uint64_t)(bSig << 1)) {
5771 return propagateFloatx80NaN(a, b, status);
5772 }
0f605c88
LV
5773 return packFloatx80(zSign,
5774 floatx80_infinity_high,
5775 floatx80_infinity_low);
158142c2
FB
5776 }
5777 if ( aExp == 0 ) ++expDiff;
5778 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5779 zExp = bExp;
5780 }
5781 else {
5782 if ( aExp == 0x7FFF ) {
bb98fe42 5783 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5784 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5785 }
5786 return a;
5787 }
5788 zSig1 = 0;
5789 zSig0 = aSig + bSig;
5790 if ( aExp == 0 ) {
41602807
JM
5791 if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5792 /* At least one of the values is a pseudo-denormal,
5793 * and there is a carry out of the result. */
5794 zExp = 1;
5795 goto shiftRight1;
5796 }
2f311075
RH
5797 if (zSig0 == 0) {
5798 return packFloatx80(zSign, 0, 0);
5799 }
158142c2
FB
5800 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5801 goto roundAndPack;
5802 }
5803 zExp = aExp;
5804 goto shiftRight1;
5805 }
5806 zSig0 = aSig + bSig;
bb98fe42 5807 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
5808 shiftRight1:
5809 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
e9321124 5810 zSig0 |= UINT64_C(0x8000000000000000);
158142c2
FB
5811 ++zExp;
5812 roundAndPack:
a2f2d288 5813 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5814 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5815}
5816
5817/*----------------------------------------------------------------------------
5818| Returns the result of subtracting the absolute values of the extended
5819| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5820| difference is negated before being returned. `zSign' is ignored if the
5821| result is a NaN. The subtraction is performed according to the IEC/IEEE
5822| Standard for Binary Floating-Point Arithmetic.
5823*----------------------------------------------------------------------------*/
5824
c120391c 5825static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
e5a41ffa 5826 float_status *status)
158142c2 5827{
f4014512 5828 int32_t aExp, bExp, zExp;
bb98fe42 5829 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 5830 int32_t expDiff;
158142c2
FB
5831
5832 aSig = extractFloatx80Frac( a );
5833 aExp = extractFloatx80Exp( a );
5834 bSig = extractFloatx80Frac( b );
5835 bExp = extractFloatx80Exp( b );
5836 expDiff = aExp - bExp;
5837 if ( 0 < expDiff ) goto aExpBigger;
5838 if ( expDiff < 0 ) goto bExpBigger;
5839 if ( aExp == 0x7FFF ) {
bb98fe42 5840 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 5841 return propagateFloatx80NaN(a, b, status);
158142c2 5842 }
ff32e16e 5843 float_raise(float_flag_invalid, status);
af39bc8c 5844 return floatx80_default_nan(status);
158142c2
FB
5845 }
5846 if ( aExp == 0 ) {
5847 aExp = 1;
5848 bExp = 1;
5849 }
5850 zSig1 = 0;
5851 if ( bSig < aSig ) goto aBigger;
5852 if ( aSig < bSig ) goto bBigger;
a2f2d288 5853 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
5854 bExpBigger:
5855 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5856 if ((uint64_t)(bSig << 1)) {
5857 return propagateFloatx80NaN(a, b, status);
5858 }
0f605c88
LV
5859 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5860 floatx80_infinity_low);
158142c2
FB
5861 }
5862 if ( aExp == 0 ) ++expDiff;
5863 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5864 bBigger:
5865 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5866 zExp = bExp;
5867 zSign ^= 1;
5868 goto normalizeRoundAndPack;
5869 aExpBigger:
5870 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5871 if ((uint64_t)(aSig << 1)) {
5872 return propagateFloatx80NaN(a, b, status);
5873 }
158142c2
FB
5874 return a;
5875 }
5876 if ( bExp == 0 ) --expDiff;
5877 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5878 aBigger:
5879 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5880 zExp = aExp;
5881 normalizeRoundAndPack:
a2f2d288 5882 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5883 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5884}
5885
5886/*----------------------------------------------------------------------------
5887| Returns the result of adding the extended double-precision floating-point
5888| values `a' and `b'. The operation is performed according to the IEC/IEEE
5889| Standard for Binary Floating-Point Arithmetic.
5890*----------------------------------------------------------------------------*/
5891
e5a41ffa 5892floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2 5893{
c120391c 5894 bool aSign, bSign;
158142c2 5895
d1eb8f2a
AD
5896 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5897 float_raise(float_flag_invalid, status);
5898 return floatx80_default_nan(status);
5899 }
158142c2
FB
5900 aSign = extractFloatx80Sign( a );
5901 bSign = extractFloatx80Sign( b );
5902 if ( aSign == bSign ) {
ff32e16e 5903 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5904 }
5905 else {
ff32e16e 5906 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5907 }
5908
5909}
5910
5911/*----------------------------------------------------------------------------
5912| Returns the result of subtracting the extended double-precision floating-
5913| point values `a' and `b'. The operation is performed according to the
5914| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5915*----------------------------------------------------------------------------*/
5916
e5a41ffa 5917floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2 5918{
c120391c 5919 bool aSign, bSign;
158142c2 5920
d1eb8f2a
AD
5921 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5922 float_raise(float_flag_invalid, status);
5923 return floatx80_default_nan(status);
5924 }
158142c2
FB
5925 aSign = extractFloatx80Sign( a );
5926 bSign = extractFloatx80Sign( b );
5927 if ( aSign == bSign ) {
ff32e16e 5928 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5929 }
5930 else {
ff32e16e 5931 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
5932 }
5933
5934}
5935
5936/*----------------------------------------------------------------------------
5937| Returns the result of multiplying the extended double-precision floating-
5938| point values `a' and `b'. The operation is performed according to the
5939| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5940*----------------------------------------------------------------------------*/
5941
e5a41ffa 5942floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2 5943{
c120391c 5944 bool aSign, bSign, zSign;
f4014512 5945 int32_t aExp, bExp, zExp;
bb98fe42 5946 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 5947
d1eb8f2a
AD
5948 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5949 float_raise(float_flag_invalid, status);
5950 return floatx80_default_nan(status);
5951 }
158142c2
FB
5952 aSig = extractFloatx80Frac( a );
5953 aExp = extractFloatx80Exp( a );
5954 aSign = extractFloatx80Sign( a );
5955 bSig = extractFloatx80Frac( b );
5956 bExp = extractFloatx80Exp( b );
5957 bSign = extractFloatx80Sign( b );
5958 zSign = aSign ^ bSign;
5959 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5960 if ( (uint64_t) ( aSig<<1 )
5961 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5962 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5963 }
5964 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
5965 return packFloatx80(zSign, floatx80_infinity_high,
5966 floatx80_infinity_low);
158142c2
FB
5967 }
5968 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5969 if ((uint64_t)(bSig << 1)) {
5970 return propagateFloatx80NaN(a, b, status);
5971 }
158142c2
FB
5972 if ( ( aExp | aSig ) == 0 ) {
5973 invalid:
ff32e16e 5974 float_raise(float_flag_invalid, status);
af39bc8c 5975 return floatx80_default_nan(status);
158142c2 5976 }
0f605c88
LV
5977 return packFloatx80(zSign, floatx80_infinity_high,
5978 floatx80_infinity_low);
158142c2
FB
5979 }
5980 if ( aExp == 0 ) {
5981 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5982 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5983 }
5984 if ( bExp == 0 ) {
5985 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5986 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5987 }
5988 zExp = aExp + bExp - 0x3FFE;
5989 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 5990 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
5991 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5992 --zExp;
5993 }
a2f2d288 5994 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 5995 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
5996}
5997
5998/*----------------------------------------------------------------------------
5999| Returns the result of dividing the extended double-precision floating-point
6000| value `a' by the corresponding value `b'. The operation is performed
6001| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6002*----------------------------------------------------------------------------*/
6003
e5a41ffa 6004floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2 6005{
c120391c 6006 bool aSign, bSign, zSign;
f4014512 6007 int32_t aExp, bExp, zExp;
bb98fe42
AF
6008 uint64_t aSig, bSig, zSig0, zSig1;
6009 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 6010
d1eb8f2a
AD
6011 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6012 float_raise(float_flag_invalid, status);
6013 return floatx80_default_nan(status);
6014 }
158142c2
FB
6015 aSig = extractFloatx80Frac( a );
6016 aExp = extractFloatx80Exp( a );
6017 aSign = extractFloatx80Sign( a );
6018 bSig = extractFloatx80Frac( b );
6019 bExp = extractFloatx80Exp( b );
6020 bSign = extractFloatx80Sign( b );
6021 zSign = aSign ^ bSign;
6022 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6023 if ((uint64_t)(aSig << 1)) {
6024 return propagateFloatx80NaN(a, b, status);
6025 }
158142c2 6026 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6027 if ((uint64_t)(bSig << 1)) {
6028 return propagateFloatx80NaN(a, b, status);
6029 }
158142c2
FB
6030 goto invalid;
6031 }
0f605c88
LV
6032 return packFloatx80(zSign, floatx80_infinity_high,
6033 floatx80_infinity_low);
158142c2
FB
6034 }
6035 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6036 if ((uint64_t)(bSig << 1)) {
6037 return propagateFloatx80NaN(a, b, status);
6038 }
158142c2
FB
6039 return packFloatx80( zSign, 0, 0 );
6040 }
6041 if ( bExp == 0 ) {
6042 if ( bSig == 0 ) {
6043 if ( ( aExp | aSig ) == 0 ) {
6044 invalid:
ff32e16e 6045 float_raise(float_flag_invalid, status);
af39bc8c 6046 return floatx80_default_nan(status);
158142c2 6047 }
ff32e16e 6048 float_raise(float_flag_divbyzero, status);
0f605c88
LV
6049 return packFloatx80(zSign, floatx80_infinity_high,
6050 floatx80_infinity_low);
158142c2
FB
6051 }
6052 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6053 }
6054 if ( aExp == 0 ) {
6055 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6056 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6057 }
6058 zExp = aExp - bExp + 0x3FFE;
6059 rem1 = 0;
6060 if ( bSig <= aSig ) {
6061 shift128Right( aSig, 0, 1, &aSig, &rem1 );
6062 ++zExp;
6063 }
6064 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6065 mul64To128( bSig, zSig0, &term0, &term1 );
6066 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 6067 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6068 --zSig0;
6069 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6070 }
6071 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 6072 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
6073 mul64To128( bSig, zSig1, &term1, &term2 );
6074 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 6075 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6076 --zSig1;
6077 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6078 }
6079 zSig1 |= ( ( rem1 | rem2 ) != 0 );
6080 }
a2f2d288 6081 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 6082 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
6083}
6084
6085/*----------------------------------------------------------------------------
6086| Returns the remainder of the extended double-precision floating-point value
6087| `a' with respect to the corresponding value `b'. The operation is performed
6b8b0136
JM
6088| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6089| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
445810ec
JM
6090| the quotient toward zero instead. '*quotient' is set to the low 64 bits of
6091| the absolute value of the integer quotient.
158142c2
FB
6092*----------------------------------------------------------------------------*/
6093
445810ec 6094floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6b8b0136 6095 float_status *status)
158142c2 6096{
c120391c 6097 bool aSign, zSign;
b662495d 6098 int32_t aExp, bExp, expDiff, aExpOrig;
bb98fe42
AF
6099 uint64_t aSig0, aSig1, bSig;
6100 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 6101
445810ec 6102 *quotient = 0;
d1eb8f2a
AD
6103 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6104 float_raise(float_flag_invalid, status);
6105 return floatx80_default_nan(status);
6106 }
158142c2 6107 aSig0 = extractFloatx80Frac( a );
b662495d 6108 aExpOrig = aExp = extractFloatx80Exp( a );
158142c2
FB
6109 aSign = extractFloatx80Sign( a );
6110 bSig = extractFloatx80Frac( b );
6111 bExp = extractFloatx80Exp( b );
158142c2 6112 if ( aExp == 0x7FFF ) {
bb98fe42
AF
6113 if ( (uint64_t) ( aSig0<<1 )
6114 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 6115 return propagateFloatx80NaN(a, b, status);
158142c2
FB
6116 }
6117 goto invalid;
6118 }
6119 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6120 if ((uint64_t)(bSig << 1)) {
6121 return propagateFloatx80NaN(a, b, status);
6122 }
b662495d
JM
6123 if (aExp == 0 && aSig0 >> 63) {
6124 /*
6125 * Pseudo-denormal argument must be returned in normalized
6126 * form.
6127 */
6128 return packFloatx80(aSign, 1, aSig0);
6129 }
158142c2
FB
6130 return a;
6131 }
6132 if ( bExp == 0 ) {
6133 if ( bSig == 0 ) {
6134 invalid:
ff32e16e 6135 float_raise(float_flag_invalid, status);
af39bc8c 6136 return floatx80_default_nan(status);
158142c2
FB
6137 }
6138 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6139 }
6140 if ( aExp == 0 ) {
499a2f7b 6141 if ( aSig0 == 0 ) return a;
158142c2
FB
6142 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6143 }
158142c2
FB
6144 zSign = aSign;
6145 expDiff = aExp - bExp;
6146 aSig1 = 0;
6147 if ( expDiff < 0 ) {
b662495d
JM
6148 if ( mod || expDiff < -1 ) {
6149 if (aExp == 1 && aExpOrig == 0) {
6150 /*
6151 * Pseudo-denormal argument must be returned in
6152 * normalized form.
6153 */
6154 return packFloatx80(aSign, aExp, aSig0);
6155 }
6156 return a;
6157 }
158142c2
FB
6158 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6159 expDiff = 0;
6160 }
445810ec 6161 *quotient = q = ( bSig <= aSig0 );
158142c2
FB
6162 if ( q ) aSig0 -= bSig;
6163 expDiff -= 64;
6164 while ( 0 < expDiff ) {
6165 q = estimateDiv128To64( aSig0, aSig1, bSig );
6166 q = ( 2 < q ) ? q - 2 : 0;
6167 mul64To128( bSig, q, &term0, &term1 );
6168 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6169 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6170 expDiff -= 62;
445810ec
JM
6171 *quotient <<= 62;
6172 *quotient += q;
158142c2
FB
6173 }
6174 expDiff += 64;
6175 if ( 0 < expDiff ) {
6176 q = estimateDiv128To64( aSig0, aSig1, bSig );
6177 q = ( 2 < q ) ? q - 2 : 0;
6178 q >>= 64 - expDiff;
6179 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6180 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6181 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6182 while ( le128( term0, term1, aSig0, aSig1 ) ) {
6183 ++q;
6184 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6185 }
445810ec
JM
6186 if (expDiff < 64) {
6187 *quotient <<= expDiff;
6188 } else {
6189 *quotient = 0;
6190 }
6191 *quotient += q;
158142c2
FB
6192 }
6193 else {
6194 term1 = 0;
6195 term0 = bSig;
6196 }
6b8b0136
JM
6197 if (!mod) {
6198 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6199 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6200 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6201 && ( q & 1 ) )
6202 ) {
6203 aSig0 = alternateASig0;
6204 aSig1 = alternateASig1;
6205 zSign = ! zSign;
445810ec 6206 ++*quotient;
6b8b0136 6207 }
158142c2
FB
6208 }
6209 return
6210 normalizeRoundAndPackFloatx80(
ff32e16e 6211 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
6212
6213}
6214
6b8b0136
JM
6215/*----------------------------------------------------------------------------
6216| Returns the remainder of the extended double-precision floating-point value
6217| `a' with respect to the corresponding value `b'. The operation is performed
6218| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6219*----------------------------------------------------------------------------*/
6220
6221floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6222{
445810ec
JM
6223 uint64_t quotient;
6224 return floatx80_modrem(a, b, false, &quotient, status);
6b8b0136
JM
6225}
6226
6227/*----------------------------------------------------------------------------
6228| Returns the remainder of the extended double-precision floating-point value
6229| `a' with respect to the corresponding value `b', with the quotient truncated
6230| toward zero.
6231*----------------------------------------------------------------------------*/
6232
6233floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6234{
445810ec
JM
6235 uint64_t quotient;
6236 return floatx80_modrem(a, b, true, &quotient, status);
6b8b0136
JM
6237}
6238
158142c2
FB
6239/*----------------------------------------------------------------------------
6240| Returns the square root of the extended double-precision floating-point
6241| value `a'. The operation is performed according to the IEC/IEEE Standard
6242| for Binary Floating-Point Arithmetic.
6243*----------------------------------------------------------------------------*/
6244
e5a41ffa 6245floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2 6246{
c120391c 6247 bool aSign;
f4014512 6248 int32_t aExp, zExp;
bb98fe42
AF
6249 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6250 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 6251
d1eb8f2a
AD
6252 if (floatx80_invalid_encoding(a)) {
6253 float_raise(float_flag_invalid, status);
6254 return floatx80_default_nan(status);
6255 }
158142c2
FB
6256 aSig0 = extractFloatx80Frac( a );
6257 aExp = extractFloatx80Exp( a );
6258 aSign = extractFloatx80Sign( a );
6259 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6260 if ((uint64_t)(aSig0 << 1)) {
6261 return propagateFloatx80NaN(a, a, status);
6262 }
158142c2
FB
6263 if ( ! aSign ) return a;
6264 goto invalid;
6265 }
6266 if ( aSign ) {
6267 if ( ( aExp | aSig0 ) == 0 ) return a;
6268 invalid:
ff32e16e 6269 float_raise(float_flag_invalid, status);
af39bc8c 6270 return floatx80_default_nan(status);
158142c2
FB
6271 }
6272 if ( aExp == 0 ) {
6273 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6274 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6275 }
6276 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6277 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6278 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6279 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6280 doubleZSig0 = zSig0<<1;
6281 mul64To128( zSig0, zSig0, &term0, &term1 );
6282 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6283 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6284 --zSig0;
6285 doubleZSig0 -= 2;
6286 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6287 }
6288 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
e9321124 6289 if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
158142c2
FB
6290 if ( zSig1 == 0 ) zSig1 = 1;
6291 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6292 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6293 mul64To128( zSig1, zSig1, &term2, &term3 );
6294 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6295 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6296 --zSig1;
6297 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6298 term3 |= 1;
6299 term2 |= doubleZSig0;
6300 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6301 }
6302 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6303 }
6304 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6305 zSig0 |= doubleZSig0;
a2f2d288
PM
6306 return roundAndPackFloatx80(status->floatx80_rounding_precision,
6307 0, zExp, zSig0, zSig1, status);
158142c2
FB
6308}
6309
6310/*----------------------------------------------------------------------------
158142c2
FB
6311| Returns the result of converting the quadruple-precision floating-point
6312| value `a' to the 32-bit two's complement integer format. The conversion
6313| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6314| Arithmetic---which means in particular that the conversion is rounded
6315| according to the current rounding mode. If `a' is a NaN, the largest
6316| positive integer is returned. Otherwise, if the conversion overflows, the
6317| largest integer with the same sign as `a' is returned.
6318*----------------------------------------------------------------------------*/
6319
f4014512 6320int32_t float128_to_int32(float128 a, float_status *status)
158142c2 6321{
c120391c 6322 bool aSign;
f4014512 6323 int32_t aExp, shiftCount;
bb98fe42 6324 uint64_t aSig0, aSig1;
158142c2
FB
6325
6326 aSig1 = extractFloat128Frac1( a );
6327 aSig0 = extractFloat128Frac0( a );
6328 aExp = extractFloat128Exp( a );
6329 aSign = extractFloat128Sign( a );
6330 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
e9321124 6331 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6332 aSig0 |= ( aSig1 != 0 );
6333 shiftCount = 0x4028 - aExp;
6334 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 6335 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
6336
6337}
6338
6339/*----------------------------------------------------------------------------
6340| Returns the result of converting the quadruple-precision floating-point
6341| value `a' to the 32-bit two's complement integer format. The conversion
6342| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6343| Arithmetic, except that the conversion is always rounded toward zero. If
6344| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6345| conversion overflows, the largest integer with the same sign as `a' is
6346| returned.
6347*----------------------------------------------------------------------------*/
6348
f4014512 6349int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2 6350{
c120391c 6351 bool aSign;
f4014512 6352 int32_t aExp, shiftCount;
bb98fe42 6353 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 6354 int32_t z;
158142c2
FB
6355
6356 aSig1 = extractFloat128Frac1( a );
6357 aSig0 = extractFloat128Frac0( a );
6358 aExp = extractFloat128Exp( a );
6359 aSign = extractFloat128Sign( a );
6360 aSig0 |= ( aSig1 != 0 );
6361 if ( 0x401E < aExp ) {
6362 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6363 goto invalid;
6364 }
6365 else if ( aExp < 0x3FFF ) {
a2f2d288 6366 if (aExp || aSig0) {
d82f3b2d 6367 float_raise(float_flag_inexact, status);
a2f2d288 6368 }
158142c2
FB
6369 return 0;
6370 }
e9321124 6371 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6372 shiftCount = 0x402F - aExp;
6373 savedASig = aSig0;
6374 aSig0 >>= shiftCount;
6375 z = aSig0;
6376 if ( aSign ) z = - z;
6377 if ( ( z < 0 ) ^ aSign ) {
6378 invalid:
ff32e16e 6379 float_raise(float_flag_invalid, status);
2c217da0 6380 return aSign ? INT32_MIN : INT32_MAX;
158142c2
FB
6381 }
6382 if ( ( aSig0<<shiftCount ) != savedASig ) {
d82f3b2d 6383 float_raise(float_flag_inexact, status);
158142c2
FB
6384 }
6385 return z;
6386
6387}
6388
6389/*----------------------------------------------------------------------------
6390| Returns the result of converting the quadruple-precision floating-point
6391| value `a' to the 64-bit two's complement integer format. The conversion
6392| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6393| Arithmetic---which means in particular that the conversion is rounded
6394| according to the current rounding mode. If `a' is a NaN, the largest
6395| positive integer is returned. Otherwise, if the conversion overflows, the
6396| largest integer with the same sign as `a' is returned.
6397*----------------------------------------------------------------------------*/
6398
f42c2224 6399int64_t float128_to_int64(float128 a, float_status *status)
158142c2 6400{
c120391c 6401 bool aSign;
f4014512 6402 int32_t aExp, shiftCount;
bb98fe42 6403 uint64_t aSig0, aSig1;
158142c2
FB
6404
6405 aSig1 = extractFloat128Frac1( a );
6406 aSig0 = extractFloat128Frac0( a );
6407 aExp = extractFloat128Exp( a );
6408 aSign = extractFloat128Sign( a );
e9321124 6409 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6410 shiftCount = 0x402F - aExp;
6411 if ( shiftCount <= 0 ) {
6412 if ( 0x403E < aExp ) {
ff32e16e 6413 float_raise(float_flag_invalid, status);
158142c2
FB
6414 if ( ! aSign
6415 || ( ( aExp == 0x7FFF )
e9321124 6416 && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
158142c2
FB
6417 )
6418 ) {
2c217da0 6419 return INT64_MAX;
158142c2 6420 }
2c217da0 6421 return INT64_MIN;
158142c2
FB
6422 }
6423 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6424 }
6425 else {
6426 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6427 }
ff32e16e 6428 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
6429
6430}
6431
6432/*----------------------------------------------------------------------------
6433| Returns the result of converting the quadruple-precision floating-point
6434| value `a' to the 64-bit two's complement integer format. The conversion
6435| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6436| Arithmetic, except that the conversion is always rounded toward zero.
6437| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6438| the conversion overflows, the largest integer with the same sign as `a' is
6439| returned.
6440*----------------------------------------------------------------------------*/
6441
f42c2224 6442int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2 6443{
c120391c 6444 bool aSign;
f4014512 6445 int32_t aExp, shiftCount;
bb98fe42 6446 uint64_t aSig0, aSig1;
f42c2224 6447 int64_t z;
158142c2
FB
6448
6449 aSig1 = extractFloat128Frac1( a );
6450 aSig0 = extractFloat128Frac0( a );
6451 aExp = extractFloat128Exp( a );
6452 aSign = extractFloat128Sign( a );
e9321124 6453 if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6454 shiftCount = aExp - 0x402F;
6455 if ( 0 < shiftCount ) {
6456 if ( 0x403E <= aExp ) {
e9321124
AB
6457 aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6458 if ( ( a.high == UINT64_C(0xC03E000000000000) )
6459 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
a2f2d288 6460 if (aSig1) {
d82f3b2d 6461 float_raise(float_flag_inexact, status);
a2f2d288 6462 }
158142c2
FB
6463 }
6464 else {
ff32e16e 6465 float_raise(float_flag_invalid, status);
158142c2 6466 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
2c217da0 6467 return INT64_MAX;
158142c2
FB
6468 }
6469 }
2c217da0 6470 return INT64_MIN;
158142c2
FB
6471 }
6472 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 6473 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
d82f3b2d 6474 float_raise(float_flag_inexact, status);
158142c2
FB
6475 }
6476 }
6477 else {
6478 if ( aExp < 0x3FFF ) {
6479 if ( aExp | aSig0 | aSig1 ) {
d82f3b2d 6480 float_raise(float_flag_inexact, status);
158142c2
FB
6481 }
6482 return 0;
6483 }
6484 z = aSig0>>( - shiftCount );
6485 if ( aSig1
bb98fe42 6486 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
d82f3b2d 6487 float_raise(float_flag_inexact, status);
158142c2
FB
6488 }
6489 }
6490 if ( aSign ) z = - z;
6491 return z;
6492
6493}
6494
2e6d8568
BR
6495/*----------------------------------------------------------------------------
6496| Returns the result of converting the quadruple-precision floating-point value
6497| `a' to the 64-bit unsigned integer format. The conversion is
6498| performed according to the IEC/IEEE Standard for Binary Floating-Point
6499| Arithmetic---which means in particular that the conversion is rounded
6500| according to the current rounding mode. If `a' is a NaN, the largest
6501| positive integer is returned. If the conversion overflows, the
6502| largest unsigned integer is returned. If 'a' is negative, the value is
6503| rounded and zero is returned; negative values that do not round to zero
6504| will raise the inexact exception.
6505*----------------------------------------------------------------------------*/
6506
6507uint64_t float128_to_uint64(float128 a, float_status *status)
6508{
c120391c 6509 bool aSign;
2e6d8568
BR
6510 int aExp;
6511 int shiftCount;
6512 uint64_t aSig0, aSig1;
6513
6514 aSig0 = extractFloat128Frac0(a);
6515 aSig1 = extractFloat128Frac1(a);
6516 aExp = extractFloat128Exp(a);
6517 aSign = extractFloat128Sign(a);
6518 if (aSign && (aExp > 0x3FFE)) {
6519 float_raise(float_flag_invalid, status);
6520 if (float128_is_any_nan(a)) {
2c217da0 6521 return UINT64_MAX;
2e6d8568
BR
6522 } else {
6523 return 0;
6524 }
6525 }
6526 if (aExp) {
2c217da0 6527 aSig0 |= UINT64_C(0x0001000000000000);
2e6d8568
BR
6528 }
6529 shiftCount = 0x402F - aExp;
6530 if (shiftCount <= 0) {
6531 if (0x403E < aExp) {
6532 float_raise(float_flag_invalid, status);
2c217da0 6533 return UINT64_MAX;
2e6d8568
BR
6534 }
6535 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6536 } else {
6537 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6538 }
6539 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6540}
6541
6542uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6543{
6544 uint64_t v;
6545 signed char current_rounding_mode = status->float_rounding_mode;
6546
6547 set_float_rounding_mode(float_round_to_zero, status);
6548 v = float128_to_uint64(a, status);
6549 set_float_rounding_mode(current_rounding_mode, status);
6550
6551 return v;
6552}
6553
158142c2
FB
6554/*----------------------------------------------------------------------------
6555| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
6556| value `a' to the 32-bit unsigned integer format. The conversion
6557| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6558| Arithmetic except that the conversion is always rounded toward zero.
6559| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6560| if the conversion overflows, the largest unsigned integer is returned.
6561| If 'a' is negative, the value is rounded and zero is returned; negative
6562| values that do not round to zero will raise the inexact exception.
6563*----------------------------------------------------------------------------*/
6564
6565uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6566{
6567 uint64_t v;
6568 uint32_t res;
6569 int old_exc_flags = get_float_exception_flags(status);
6570
6571 v = float128_to_uint64_round_to_zero(a, status);
6572 if (v > 0xffffffff) {
6573 res = 0xffffffff;
6574 } else {
6575 return v;
6576 }
6577 set_float_exception_flags(old_exc_flags, status);
e45de992
DH
6578 float_raise(float_flag_invalid, status);
6579 return res;
6580}
6581
6582/*----------------------------------------------------------------------------
6583| Returns the result of converting the quadruple-precision floating-point value
6584| `a' to the 32-bit unsigned integer format. The conversion is
6585| performed according to the IEC/IEEE Standard for Binary Floating-Point
6586| Arithmetic---which means in particular that the conversion is rounded
6587| according to the current rounding mode. If `a' is a NaN, the largest
6588| positive integer is returned. If the conversion overflows, the
6589| largest unsigned integer is returned. If 'a' is negative, the value is
6590| rounded and zero is returned; negative values that do not round to zero
6591| will raise the inexact exception.
6592*----------------------------------------------------------------------------*/
6593
6594uint32_t float128_to_uint32(float128 a, float_status *status)
6595{
6596 uint64_t v;
6597 uint32_t res;
6598 int old_exc_flags = get_float_exception_flags(status);
6599
6600 v = float128_to_uint64(a, status);
6601 if (v > 0xffffffff) {
6602 res = 0xffffffff;
6603 } else {
6604 return v;
6605 }
6606 set_float_exception_flags(old_exc_flags, status);
fd425037
BR
6607 float_raise(float_flag_invalid, status);
6608 return res;
6609}
6610
6611/*----------------------------------------------------------------------------
6612| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
6613| value `a' to the single-precision floating-point format. The conversion
6614| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6615| Arithmetic.
6616*----------------------------------------------------------------------------*/
6617
e5a41ffa 6618float32 float128_to_float32(float128 a, float_status *status)
158142c2 6619{
c120391c 6620 bool aSign;
f4014512 6621 int32_t aExp;
bb98fe42
AF
6622 uint64_t aSig0, aSig1;
6623 uint32_t zSig;
158142c2
FB
6624
6625 aSig1 = extractFloat128Frac1( a );
6626 aSig0 = extractFloat128Frac0( a );
6627 aExp = extractFloat128Exp( a );
6628 aSign = extractFloat128Sign( a );
6629 if ( aExp == 0x7FFF ) {
6630 if ( aSig0 | aSig1 ) {
ff32e16e 6631 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
6632 }
6633 return packFloat32( aSign, 0xFF, 0 );
6634 }
6635 aSig0 |= ( aSig1 != 0 );
6636 shift64RightJamming( aSig0, 18, &aSig0 );
6637 zSig = aSig0;
6638 if ( aExp || zSig ) {
6639 zSig |= 0x40000000;
6640 aExp -= 0x3F81;
6641 }
ff32e16e 6642 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
6643
6644}
6645
6646/*----------------------------------------------------------------------------
6647| Returns the result of converting the quadruple-precision floating-point
6648| value `a' to the double-precision floating-point format. The conversion
6649| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6650| Arithmetic.
6651*----------------------------------------------------------------------------*/
6652
e5a41ffa 6653float64 float128_to_float64(float128 a, float_status *status)
158142c2 6654{
c120391c 6655 bool aSign;
f4014512 6656 int32_t aExp;
bb98fe42 6657 uint64_t aSig0, aSig1;
158142c2
FB
6658
6659 aSig1 = extractFloat128Frac1( a );
6660 aSig0 = extractFloat128Frac0( a );
6661 aExp = extractFloat128Exp( a );
6662 aSign = extractFloat128Sign( a );
6663 if ( aExp == 0x7FFF ) {
6664 if ( aSig0 | aSig1 ) {
ff32e16e 6665 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
6666 }
6667 return packFloat64( aSign, 0x7FF, 0 );
6668 }
6669 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6670 aSig0 |= ( aSig1 != 0 );
6671 if ( aExp || aSig0 ) {
e9321124 6672 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
6673 aExp -= 0x3C01;
6674 }
ff32e16e 6675 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
6676
6677}
6678
158142c2
FB
6679/*----------------------------------------------------------------------------
6680| Returns the result of converting the quadruple-precision floating-point
6681| value `a' to the extended double-precision floating-point format. The
6682| conversion is performed according to the IEC/IEEE Standard for Binary
6683| Floating-Point Arithmetic.
6684*----------------------------------------------------------------------------*/
6685
e5a41ffa 6686floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2 6687{
c120391c 6688 bool aSign;
f4014512 6689 int32_t aExp;
bb98fe42 6690 uint64_t aSig0, aSig1;
158142c2
FB
6691
6692 aSig1 = extractFloat128Frac1( a );
6693 aSig0 = extractFloat128Frac0( a );
6694 aExp = extractFloat128Exp( a );
6695 aSign = extractFloat128Sign( a );
6696 if ( aExp == 0x7FFF ) {
6697 if ( aSig0 | aSig1 ) {
7537c2b4
JM
6698 floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6699 status);
6700 return floatx80_silence_nan(res, status);
158142c2 6701 }
0f605c88
LV
6702 return packFloatx80(aSign, floatx80_infinity_high,
6703 floatx80_infinity_low);
158142c2
FB
6704 }
6705 if ( aExp == 0 ) {
6706 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6707 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6708 }
6709 else {
e9321124 6710 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6711 }
6712 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 6713 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
6714
6715}
6716
158142c2
FB
6717/*----------------------------------------------------------------------------
6718| Rounds the quadruple-precision floating-point value `a' to an integer, and
6719| returns the result as a quadruple-precision floating-point value. The
6720| operation is performed according to the IEC/IEEE Standard for Binary
6721| Floating-Point Arithmetic.
6722*----------------------------------------------------------------------------*/
6723
e5a41ffa 6724float128 float128_round_to_int(float128 a, float_status *status)
158142c2 6725{
c120391c 6726 bool aSign;
f4014512 6727 int32_t aExp;
bb98fe42 6728 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
6729 float128 z;
6730
6731 aExp = extractFloat128Exp( a );
6732 if ( 0x402F <= aExp ) {
6733 if ( 0x406F <= aExp ) {
6734 if ( ( aExp == 0x7FFF )
6735 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6736 ) {
ff32e16e 6737 return propagateFloat128NaN(a, a, status);
158142c2
FB
6738 }
6739 return a;
6740 }
6741 lastBitMask = 1;
6742 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6743 roundBitsMask = lastBitMask - 1;
6744 z = a;
a2f2d288 6745 switch (status->float_rounding_mode) {
dc355b76 6746 case float_round_nearest_even:
158142c2
FB
6747 if ( lastBitMask ) {
6748 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6749 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6750 }
6751 else {
bb98fe42 6752 if ( (int64_t) z.low < 0 ) {
158142c2 6753 ++z.high;
bb98fe42 6754 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
6755 }
6756 }
dc355b76 6757 break;
f9288a76
PM
6758 case float_round_ties_away:
6759 if (lastBitMask) {
6760 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6761 } else {
6762 if ((int64_t) z.low < 0) {
6763 ++z.high;
6764 }
6765 }
6766 break;
dc355b76
PM
6767 case float_round_to_zero:
6768 break;
6769 case float_round_up:
6770 if (!extractFloat128Sign(z)) {
6771 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6772 }
6773 break;
6774 case float_round_down:
6775 if (extractFloat128Sign(z)) {
6776 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 6777 }
dc355b76 6778 break;
5d64abb3
RH
6779 case float_round_to_odd:
6780 /*
6781 * Note that if lastBitMask == 0, the last bit is the lsb
6782 * of high, and roundBitsMask == -1.
6783 */
6784 if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6785 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6786 }
6787 break;
dc355b76
PM
6788 default:
6789 abort();
158142c2
FB
6790 }
6791 z.low &= ~ roundBitsMask;
6792 }
6793 else {
6794 if ( aExp < 0x3FFF ) {
bb98fe42 6795 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
d82f3b2d 6796 float_raise(float_flag_inexact, status);
158142c2 6797 aSign = extractFloat128Sign( a );
a2f2d288 6798 switch (status->float_rounding_mode) {
5d64abb3 6799 case float_round_nearest_even:
158142c2
FB
6800 if ( ( aExp == 0x3FFE )
6801 && ( extractFloat128Frac0( a )
6802 | extractFloat128Frac1( a ) )
6803 ) {
6804 return packFloat128( aSign, 0x3FFF, 0, 0 );
6805 }
6806 break;
f9288a76
PM
6807 case float_round_ties_away:
6808 if (aExp == 0x3FFE) {
6809 return packFloat128(aSign, 0x3FFF, 0, 0);
6810 }
6811 break;
5d64abb3 6812 case float_round_down:
158142c2
FB
6813 return
6814 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6815 : packFloat128( 0, 0, 0, 0 );
5d64abb3 6816 case float_round_up:
158142c2
FB
6817 return
6818 aSign ? packFloat128( 1, 0, 0, 0 )
6819 : packFloat128( 0, 0x3FFF, 0, 0 );
5d64abb3
RH
6820
6821 case float_round_to_odd:
6822 return packFloat128(aSign, 0x3FFF, 0, 0);
3dede407
RH
6823
6824 case float_round_to_zero:
6825 break;
158142c2
FB
6826 }
6827 return packFloat128( aSign, 0, 0, 0 );
6828 }
6829 lastBitMask = 1;
6830 lastBitMask <<= 0x402F - aExp;
6831 roundBitsMask = lastBitMask - 1;
6832 z.low = 0;
6833 z.high = a.high;
a2f2d288 6834 switch (status->float_rounding_mode) {
dc355b76 6835 case float_round_nearest_even:
158142c2
FB
6836 z.high += lastBitMask>>1;
6837 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6838 z.high &= ~ lastBitMask;
6839 }
dc355b76 6840 break;
f9288a76
PM
6841 case float_round_ties_away:
6842 z.high += lastBitMask>>1;
6843 break;
dc355b76
PM
6844 case float_round_to_zero:
6845 break;
6846 case float_round_up:
6847 if (!extractFloat128Sign(z)) {
158142c2
FB
6848 z.high |= ( a.low != 0 );
6849 z.high += roundBitsMask;
6850 }
dc355b76
PM
6851 break;
6852 case float_round_down:
6853 if (extractFloat128Sign(z)) {
6854 z.high |= (a.low != 0);
6855 z.high += roundBitsMask;
6856 }
6857 break;
5d64abb3
RH
6858 case float_round_to_odd:
6859 if ((z.high & lastBitMask) == 0) {
6860 z.high |= (a.low != 0);
6861 z.high += roundBitsMask;
6862 }
6863 break;
dc355b76
PM
6864 default:
6865 abort();
158142c2
FB
6866 }
6867 z.high &= ~ roundBitsMask;
6868 }
6869 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
d82f3b2d 6870 float_raise(float_flag_inexact, status);
158142c2
FB
6871 }
6872 return z;
6873
6874}
6875
6876/*----------------------------------------------------------------------------
6877| Returns the result of adding the absolute values of the quadruple-precision
6878| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6879| before being returned. `zSign' is ignored if the result is a NaN.
6880| The addition is performed according to the IEC/IEEE Standard for Binary
6881| Floating-Point Arithmetic.
6882*----------------------------------------------------------------------------*/
6883
c120391c 6884static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 6885 float_status *status)
158142c2 6886{
f4014512 6887 int32_t aExp, bExp, zExp;
bb98fe42 6888 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 6889 int32_t expDiff;
158142c2
FB
6890
6891 aSig1 = extractFloat128Frac1( a );
6892 aSig0 = extractFloat128Frac0( a );
6893 aExp = extractFloat128Exp( a );
6894 bSig1 = extractFloat128Frac1( b );
6895 bSig0 = extractFloat128Frac0( b );
6896 bExp = extractFloat128Exp( b );
6897 expDiff = aExp - bExp;
6898 if ( 0 < expDiff ) {
6899 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6900 if (aSig0 | aSig1) {
6901 return propagateFloat128NaN(a, b, status);
6902 }
158142c2
FB
6903 return a;
6904 }
6905 if ( bExp == 0 ) {
6906 --expDiff;
6907 }
6908 else {
e9321124 6909 bSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6910 }
6911 shift128ExtraRightJamming(
6912 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6913 zExp = aExp;
6914 }
6915 else if ( expDiff < 0 ) {
6916 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6917 if (bSig0 | bSig1) {
6918 return propagateFloat128NaN(a, b, status);
6919 }
158142c2
FB
6920 return packFloat128( zSign, 0x7FFF, 0, 0 );
6921 }
6922 if ( aExp == 0 ) {
6923 ++expDiff;
6924 }
6925 else {
e9321124 6926 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6927 }
6928 shift128ExtraRightJamming(
6929 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6930 zExp = bExp;
6931 }
6932 else {
6933 if ( aExp == 0x7FFF ) {
6934 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6935 return propagateFloat128NaN(a, b, status);
158142c2
FB
6936 }
6937 return a;
6938 }
6939 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 6940 if ( aExp == 0 ) {
a2f2d288 6941 if (status->flush_to_zero) {
e6afc87f 6942 if (zSig0 | zSig1) {
ff32e16e 6943 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
6944 }
6945 return packFloat128(zSign, 0, 0, 0);
6946 }
fe76d976
PB
6947 return packFloat128( zSign, 0, zSig0, zSig1 );
6948 }
158142c2 6949 zSig2 = 0;
e9321124 6950 zSig0 |= UINT64_C(0x0002000000000000);
158142c2
FB
6951 zExp = aExp;
6952 goto shiftRight1;
6953 }
e9321124 6954 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
6955 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6956 --zExp;
e9321124 6957 if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
158142c2
FB
6958 ++zExp;
6959 shiftRight1:
6960 shift128ExtraRightJamming(
6961 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6962 roundAndPack:
ff32e16e 6963 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6964
6965}
6966
6967/*----------------------------------------------------------------------------
6968| Returns the result of subtracting the absolute values of the quadruple-
6969| precision floating-point values `a' and `b'. If `zSign' is 1, the
6970| difference is negated before being returned. `zSign' is ignored if the
6971| result is a NaN. The subtraction is performed according to the IEC/IEEE
6972| Standard for Binary Floating-Point Arithmetic.
6973*----------------------------------------------------------------------------*/
6974
c120391c 6975static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
e5a41ffa 6976 float_status *status)
158142c2 6977{
f4014512 6978 int32_t aExp, bExp, zExp;
bb98fe42 6979 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6980 int32_t expDiff;
158142c2
FB
6981
6982 aSig1 = extractFloat128Frac1( a );
6983 aSig0 = extractFloat128Frac0( a );
6984 aExp = extractFloat128Exp( a );
6985 bSig1 = extractFloat128Frac1( b );
6986 bSig0 = extractFloat128Frac0( b );
6987 bExp = extractFloat128Exp( b );
6988 expDiff = aExp - bExp;
6989 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6990 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6991 if ( 0 < expDiff ) goto aExpBigger;
6992 if ( expDiff < 0 ) goto bExpBigger;
6993 if ( aExp == 0x7FFF ) {
6994 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6995 return propagateFloat128NaN(a, b, status);
158142c2 6996 }
ff32e16e 6997 float_raise(float_flag_invalid, status);
af39bc8c 6998 return float128_default_nan(status);
158142c2
FB
6999 }
7000 if ( aExp == 0 ) {
7001 aExp = 1;
7002 bExp = 1;
7003 }
7004 if ( bSig0 < aSig0 ) goto aBigger;
7005 if ( aSig0 < bSig0 ) goto bBigger;
7006 if ( bSig1 < aSig1 ) goto aBigger;
7007 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
7008 return packFloat128(status->float_rounding_mode == float_round_down,
7009 0, 0, 0);
158142c2
FB
7010 bExpBigger:
7011 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7012 if (bSig0 | bSig1) {
7013 return propagateFloat128NaN(a, b, status);
7014 }
158142c2
FB
7015 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7016 }
7017 if ( aExp == 0 ) {
7018 ++expDiff;
7019 }
7020 else {
e9321124 7021 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7022 }
7023 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
e9321124 7024 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7025 bBigger:
7026 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7027 zExp = bExp;
7028 zSign ^= 1;
7029 goto normalizeRoundAndPack;
7030 aExpBigger:
7031 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7032 if (aSig0 | aSig1) {
7033 return propagateFloat128NaN(a, b, status);
7034 }
158142c2
FB
7035 return a;
7036 }
7037 if ( bExp == 0 ) {
7038 --expDiff;
7039 }
7040 else {
e9321124 7041 bSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7042 }
7043 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
e9321124 7044 aSig0 |= UINT64_C(0x4000000000000000);
158142c2
FB
7045 aBigger:
7046 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7047 zExp = aExp;
7048 normalizeRoundAndPack:
7049 --zExp;
ff32e16e
PM
7050 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7051 status);
158142c2
FB
7052
7053}
7054
7055/*----------------------------------------------------------------------------
7056| Returns the result of adding the quadruple-precision floating-point values
7057| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7058| for Binary Floating-Point Arithmetic.
7059*----------------------------------------------------------------------------*/
7060
e5a41ffa 7061float128 float128_add(float128 a, float128 b, float_status *status)
158142c2 7062{
c120391c 7063 bool aSign, bSign;
158142c2
FB
7064
7065 aSign = extractFloat128Sign( a );
7066 bSign = extractFloat128Sign( b );
7067 if ( aSign == bSign ) {
ff32e16e 7068 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7069 }
7070 else {
ff32e16e 7071 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7072 }
7073
7074}
7075
7076/*----------------------------------------------------------------------------
7077| Returns the result of subtracting the quadruple-precision floating-point
7078| values `a' and `b'. The operation is performed according to the IEC/IEEE
7079| Standard for Binary Floating-Point Arithmetic.
7080*----------------------------------------------------------------------------*/
7081
e5a41ffa 7082float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2 7083{
c120391c 7084 bool aSign, bSign;
158142c2
FB
7085
7086 aSign = extractFloat128Sign( a );
7087 bSign = extractFloat128Sign( b );
7088 if ( aSign == bSign ) {
ff32e16e 7089 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
7090 }
7091 else {
ff32e16e 7092 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
7093 }
7094
7095}
7096
7097/*----------------------------------------------------------------------------
7098| Returns the result of multiplying the quadruple-precision floating-point
7099| values `a' and `b'. The operation is performed according to the IEC/IEEE
7100| Standard for Binary Floating-Point Arithmetic.
7101*----------------------------------------------------------------------------*/
7102
e5a41ffa 7103float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2 7104{
c120391c 7105 bool aSign, bSign, zSign;
f4014512 7106 int32_t aExp, bExp, zExp;
bb98fe42 7107 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
7108
7109 aSig1 = extractFloat128Frac1( a );
7110 aSig0 = extractFloat128Frac0( a );
7111 aExp = extractFloat128Exp( a );
7112 aSign = extractFloat128Sign( a );
7113 bSig1 = extractFloat128Frac1( b );
7114 bSig0 = extractFloat128Frac0( b );
7115 bExp = extractFloat128Exp( b );
7116 bSign = extractFloat128Sign( b );
7117 zSign = aSign ^ bSign;
7118 if ( aExp == 0x7FFF ) {
7119 if ( ( aSig0 | aSig1 )
7120 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7121 return propagateFloat128NaN(a, b, status);
158142c2
FB
7122 }
7123 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7124 return packFloat128( zSign, 0x7FFF, 0, 0 );
7125 }
7126 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7127 if (bSig0 | bSig1) {
7128 return propagateFloat128NaN(a, b, status);
7129 }
158142c2
FB
7130 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7131 invalid:
ff32e16e 7132 float_raise(float_flag_invalid, status);
af39bc8c 7133 return float128_default_nan(status);
158142c2
FB
7134 }
7135 return packFloat128( zSign, 0x7FFF, 0, 0 );
7136 }
7137 if ( aExp == 0 ) {
7138 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7139 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7140 }
7141 if ( bExp == 0 ) {
7142 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7143 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7144 }
7145 zExp = aExp + bExp - 0x4000;
e9321124 7146 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7147 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7148 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7149 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7150 zSig2 |= ( zSig3 != 0 );
e9321124 7151 if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
158142c2
FB
7152 shift128ExtraRightJamming(
7153 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7154 ++zExp;
7155 }
ff32e16e 7156 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7157
7158}
7159
7160/*----------------------------------------------------------------------------
7161| Returns the result of dividing the quadruple-precision floating-point value
7162| `a' by the corresponding value `b'. The operation is performed according to
7163| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7164*----------------------------------------------------------------------------*/
7165
e5a41ffa 7166float128 float128_div(float128 a, float128 b, float_status *status)
158142c2 7167{
c120391c 7168 bool aSign, bSign, zSign;
f4014512 7169 int32_t aExp, bExp, zExp;
bb98fe42
AF
7170 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7171 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7172
7173 aSig1 = extractFloat128Frac1( a );
7174 aSig0 = extractFloat128Frac0( a );
7175 aExp = extractFloat128Exp( a );
7176 aSign = extractFloat128Sign( a );
7177 bSig1 = extractFloat128Frac1( b );
7178 bSig0 = extractFloat128Frac0( b );
7179 bExp = extractFloat128Exp( b );
7180 bSign = extractFloat128Sign( b );
7181 zSign = aSign ^ bSign;
7182 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7183 if (aSig0 | aSig1) {
7184 return propagateFloat128NaN(a, b, status);
7185 }
158142c2 7186 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7187 if (bSig0 | bSig1) {
7188 return propagateFloat128NaN(a, b, status);
7189 }
158142c2
FB
7190 goto invalid;
7191 }
7192 return packFloat128( zSign, 0x7FFF, 0, 0 );
7193 }
7194 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7195 if (bSig0 | bSig1) {
7196 return propagateFloat128NaN(a, b, status);
7197 }
158142c2
FB
7198 return packFloat128( zSign, 0, 0, 0 );
7199 }
7200 if ( bExp == 0 ) {
7201 if ( ( bSig0 | bSig1 ) == 0 ) {
7202 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7203 invalid:
ff32e16e 7204 float_raise(float_flag_invalid, status);
af39bc8c 7205 return float128_default_nan(status);
158142c2 7206 }
ff32e16e 7207 float_raise(float_flag_divbyzero, status);
158142c2
FB
7208 return packFloat128( zSign, 0x7FFF, 0, 0 );
7209 }
7210 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7211 }
7212 if ( aExp == 0 ) {
7213 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7214 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7215 }
7216 zExp = aExp - bExp + 0x3FFD;
7217 shortShift128Left(
e9321124 7218 aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
158142c2 7219 shortShift128Left(
e9321124 7220 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7221 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7222 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7223 ++zExp;
7224 }
7225 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7226 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7227 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 7228 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7229 --zSig0;
7230 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7231 }
7232 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7233 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7234 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7235 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7236 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7237 --zSig1;
7238 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7239 }
7240 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7241 }
7242 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 7243 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7244
7245}
7246
7247/*----------------------------------------------------------------------------
7248| Returns the remainder of the quadruple-precision floating-point value `a'
7249| with respect to the corresponding value `b'. The operation is performed
7250| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7251*----------------------------------------------------------------------------*/
7252
e5a41ffa 7253float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 7254{
c120391c 7255 bool aSign, zSign;
f4014512 7256 int32_t aExp, bExp, expDiff;
bb98fe42
AF
7257 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7258 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7259 int64_t sigMean0;
158142c2
FB
7260
7261 aSig1 = extractFloat128Frac1( a );
7262 aSig0 = extractFloat128Frac0( a );
7263 aExp = extractFloat128Exp( a );
7264 aSign = extractFloat128Sign( a );
7265 bSig1 = extractFloat128Frac1( b );
7266 bSig0 = extractFloat128Frac0( b );
7267 bExp = extractFloat128Exp( b );
158142c2
FB
7268 if ( aExp == 0x7FFF ) {
7269 if ( ( aSig0 | aSig1 )
7270 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 7271 return propagateFloat128NaN(a, b, status);
158142c2
FB
7272 }
7273 goto invalid;
7274 }
7275 if ( bExp == 0x7FFF ) {
ff32e16e
PM
7276 if (bSig0 | bSig1) {
7277 return propagateFloat128NaN(a, b, status);
7278 }
158142c2
FB
7279 return a;
7280 }
7281 if ( bExp == 0 ) {
7282 if ( ( bSig0 | bSig1 ) == 0 ) {
7283 invalid:
ff32e16e 7284 float_raise(float_flag_invalid, status);
af39bc8c 7285 return float128_default_nan(status);
158142c2
FB
7286 }
7287 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7288 }
7289 if ( aExp == 0 ) {
7290 if ( ( aSig0 | aSig1 ) == 0 ) return a;
7291 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7292 }
7293 expDiff = aExp - bExp;
7294 if ( expDiff < -1 ) return a;
7295 shortShift128Left(
e9321124 7296 aSig0 | UINT64_C(0x0001000000000000),
158142c2
FB
7297 aSig1,
7298 15 - ( expDiff < 0 ),
7299 &aSig0,
7300 &aSig1
7301 );
7302 shortShift128Left(
e9321124 7303 bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
158142c2
FB
7304 q = le128( bSig0, bSig1, aSig0, aSig1 );
7305 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7306 expDiff -= 64;
7307 while ( 0 < expDiff ) {
7308 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7309 q = ( 4 < q ) ? q - 4 : 0;
7310 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7311 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7312 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7313 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7314 expDiff -= 61;
7315 }
7316 if ( -64 < expDiff ) {
7317 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7318 q = ( 4 < q ) ? q - 4 : 0;
7319 q >>= - expDiff;
7320 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7321 expDiff += 52;
7322 if ( expDiff < 0 ) {
7323 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7324 }
7325 else {
7326 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7327 }
7328 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7329 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7330 }
7331 else {
7332 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7333 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7334 }
7335 do {
7336 alternateASig0 = aSig0;
7337 alternateASig1 = aSig1;
7338 ++q;
7339 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 7340 } while ( 0 <= (int64_t) aSig0 );
158142c2 7341 add128(
bb98fe42 7342 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
7343 if ( ( sigMean0 < 0 )
7344 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7345 aSig0 = alternateASig0;
7346 aSig1 = alternateASig1;
7347 }
bb98fe42 7348 zSign = ( (int64_t) aSig0 < 0 );
158142c2 7349 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
7350 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7351 status);
158142c2
FB
7352}
7353
7354/*----------------------------------------------------------------------------
7355| Returns the square root of the quadruple-precision floating-point value `a'.
7356| The operation is performed according to the IEC/IEEE Standard for Binary
7357| Floating-Point Arithmetic.
7358*----------------------------------------------------------------------------*/
7359
e5a41ffa 7360float128 float128_sqrt(float128 a, float_status *status)
158142c2 7361{
c120391c 7362 bool aSign;
f4014512 7363 int32_t aExp, zExp;
bb98fe42
AF
7364 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7365 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
7366
7367 aSig1 = extractFloat128Frac1( a );
7368 aSig0 = extractFloat128Frac0( a );
7369 aExp = extractFloat128Exp( a );
7370 aSign = extractFloat128Sign( a );
7371 if ( aExp == 0x7FFF ) {
ff32e16e
PM
7372 if (aSig0 | aSig1) {
7373 return propagateFloat128NaN(a, a, status);
7374 }
158142c2
FB
7375 if ( ! aSign ) return a;
7376 goto invalid;
7377 }
7378 if ( aSign ) {
7379 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7380 invalid:
ff32e16e 7381 float_raise(float_flag_invalid, status);
af39bc8c 7382 return float128_default_nan(status);
158142c2
FB
7383 }
7384 if ( aExp == 0 ) {
7385 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7386 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7387 }
7388 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
e9321124 7389 aSig0 |= UINT64_C(0x0001000000000000);
158142c2
FB
7390 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7391 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7392 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7393 doubleZSig0 = zSig0<<1;
7394 mul64To128( zSig0, zSig0, &term0, &term1 );
7395 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 7396 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
7397 --zSig0;
7398 doubleZSig0 -= 2;
7399 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7400 }
7401 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7402 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7403 if ( zSig1 == 0 ) zSig1 = 1;
7404 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7405 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7406 mul64To128( zSig1, zSig1, &term2, &term3 );
7407 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 7408 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
7409 --zSig1;
7410 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7411 term3 |= 1;
7412 term2 |= doubleZSig0;
7413 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7414 }
7415 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7416 }
7417 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 7418 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
7419
7420}
7421
71bfd65c
RH
7422static inline FloatRelation
7423floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7424 float_status *status)
f6714d36 7425{
c120391c 7426 bool aSign, bSign;
f6714d36 7427
d1eb8f2a
AD
7428 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7429 float_raise(float_flag_invalid, status);
7430 return float_relation_unordered;
7431 }
f6714d36
AJ
7432 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7433 ( extractFloatx80Frac( a )<<1 ) ) ||
7434 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7435 ( extractFloatx80Frac( b )<<1 ) )) {
7436 if (!is_quiet ||
af39bc8c
AM
7437 floatx80_is_signaling_nan(a, status) ||
7438 floatx80_is_signaling_nan(b, status)) {
ff32e16e 7439 float_raise(float_flag_invalid, status);
f6714d36
AJ
7440 }
7441 return float_relation_unordered;
7442 }
7443 aSign = extractFloatx80Sign( a );
7444 bSign = extractFloatx80Sign( b );
7445 if ( aSign != bSign ) {
7446
7447 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7448 ( ( a.low | b.low ) == 0 ) ) {
7449 /* zero case */
7450 return float_relation_equal;
7451 } else {
7452 return 1 - (2 * aSign);
7453 }
7454 } else {
be53fa78
JM
7455 /* Normalize pseudo-denormals before comparison. */
7456 if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7457 ++a.high;
7458 }
7459 if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7460 ++b.high;
7461 }
f6714d36
AJ
7462 if (a.low == b.low && a.high == b.high) {
7463 return float_relation_equal;
7464 } else {
7465 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7466 }
7467 }
7468}
7469
71bfd65c 7470FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 7471{
ff32e16e 7472 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
7473}
7474
71bfd65c
RH
7475FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7476 float_status *status)
f6714d36 7477{
ff32e16e 7478 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
7479}
7480
71bfd65c
RH
7481static inline FloatRelation
7482float128_compare_internal(float128 a, float128 b, bool is_quiet,
7483 float_status *status)
1f587329 7484{
c120391c 7485 bool aSign, bSign;
1f587329
BS
7486
7487 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7488 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7489 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7490 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7491 if (!is_quiet ||
af39bc8c
AM
7492 float128_is_signaling_nan(a, status) ||
7493 float128_is_signaling_nan(b, status)) {
ff32e16e 7494 float_raise(float_flag_invalid, status);
1f587329
BS
7495 }
7496 return float_relation_unordered;
7497 }
7498 aSign = extractFloat128Sign( a );
7499 bSign = extractFloat128Sign( b );
7500 if ( aSign != bSign ) {
7501 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7502 /* zero case */
7503 return float_relation_equal;
7504 } else {
7505 return 1 - (2 * aSign);
7506 }
7507 } else {
7508 if (a.low == b.low && a.high == b.high) {
7509 return float_relation_equal;
7510 } else {
7511 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7512 }
7513 }
7514}
7515
71bfd65c 7516FloatRelation float128_compare(float128 a, float128 b, float_status *status)
1f587329 7517{
ff32e16e 7518 return float128_compare_internal(a, b, 0, status);
1f587329
BS
7519}
7520
71bfd65c
RH
7521FloatRelation float128_compare_quiet(float128 a, float128 b,
7522 float_status *status)
1f587329 7523{
ff32e16e 7524 return float128_compare_internal(a, b, 1, status);
1f587329
BS
7525}
7526
e5a41ffa 7527floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb 7528{
c120391c 7529 bool aSign;
326b9e98 7530 int32_t aExp;
bb98fe42 7531 uint64_t aSig;
9ee6e8bb 7532
d1eb8f2a
AD
7533 if (floatx80_invalid_encoding(a)) {
7534 float_raise(float_flag_invalid, status);
7535 return floatx80_default_nan(status);
7536 }
9ee6e8bb
PB
7537 aSig = extractFloatx80Frac( a );
7538 aExp = extractFloatx80Exp( a );
7539 aSign = extractFloatx80Sign( a );
7540
326b9e98
AJ
7541 if ( aExp == 0x7FFF ) {
7542 if ( aSig<<1 ) {
ff32e16e 7543 return propagateFloatx80NaN(a, a, status);
326b9e98 7544 }
9ee6e8bb
PB
7545 return a;
7546 }
326b9e98 7547
3c85c37f
PM
7548 if (aExp == 0) {
7549 if (aSig == 0) {
7550 return a;
7551 }
7552 aExp++;
7553 }
69397542 7554
326b9e98
AJ
7555 if (n > 0x10000) {
7556 n = 0x10000;
7557 } else if (n < -0x10000) {
7558 n = -0x10000;
7559 }
7560
9ee6e8bb 7561 aExp += n;
a2f2d288
PM
7562 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7563 aSign, aExp, aSig, 0, status);
9ee6e8bb 7564}
9ee6e8bb 7565
e5a41ffa 7566float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb 7567{
c120391c 7568 bool aSign;
326b9e98 7569 int32_t aExp;
bb98fe42 7570 uint64_t aSig0, aSig1;
9ee6e8bb
PB
7571
7572 aSig1 = extractFloat128Frac1( a );
7573 aSig0 = extractFloat128Frac0( a );
7574 aExp = extractFloat128Exp( a );
7575 aSign = extractFloat128Sign( a );
7576 if ( aExp == 0x7FFF ) {
326b9e98 7577 if ( aSig0 | aSig1 ) {
ff32e16e 7578 return propagateFloat128NaN(a, a, status);
326b9e98 7579 }
9ee6e8bb
PB
7580 return a;
7581 }
3c85c37f 7582 if (aExp != 0) {
e9321124 7583 aSig0 |= UINT64_C(0x0001000000000000);
3c85c37f 7584 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 7585 return a;
3c85c37f
PM
7586 } else {
7587 aExp++;
7588 }
69397542 7589
326b9e98
AJ
7590 if (n > 0x10000) {
7591 n = 0x10000;
7592 } else if (n < -0x10000) {
7593 n = -0x10000;
7594 }
7595
69397542
PB
7596 aExp += n - 1;
7597 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 7598 , status);
9ee6e8bb
PB
7599
7600}
f6b3b108
EC
7601
7602static void __attribute__((constructor)) softfloat_init(void)
7603{
7604 union_float64 ua, ub, uc, ur;
7605
7606 if (QEMU_NO_HARDFLOAT) {
7607 return;
7608 }
7609 /*
7610 * Test that the host's FMA is not obviously broken. For example,
7611 * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7612 * https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7613 */
7614 ua.s = 0x0020000000000001ULL;
7615 ub.s = 0x3ca0000000000000ULL;
7616 uc.s = 0x0020000000000000ULL;
7617 ur.h = fma(ua.h, ub.h, uc.h);
7618 if (ur.s != 0x0020000000000001ULL) {
7619 force_soft_fma = true;
7620 }
7621}